From 74cad4ee9839669ad920257678ea0bf0a818cd3b Mon Sep 17 00:00:00 2001
From: Zhao Yakui <yakui.zhao@intel.com>
Date: Wed, 24 Jun 2009 11:49:49 +0800
Subject: ACPI: Make ACPI processor proc I/F depend on the ACPI_PROCFS

Now whether the ACPI processor proc I/F is registered depends on the
CONFIG_PROC. It had better depend on the CONFIG_ACPI_PROCFS.
When the CONFIG_ACPI_PROCFS is unset in kernel configuration, the
ACPI processor proc I/F won't be registered.

Signed-off-by: Zhao Yakui <yakui.zhao@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index 431f8b4..f26db48 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -60,7 +60,11 @@ config ACPI_PROCFS
 	  /proc/acpi/fadt (/sys/firmware/acpi/tables/FACP)
 	  /proc/acpi/debug_layer (/sys/module/acpi/parameters/debug_layer)
 	  /proc/acpi/debug_level (/sys/module/acpi/parameters/debug_level)
-
+	  /proc/acpi/processor/*/power (/sys/devices/system/cpu/*/cpuidle/*)
+	  /proc/acpi/processor/*/performance (/sys/devices/system/cpu/*/
+		cpufreq/*)
+	  /proc/acpi/processor/*/throttling (/sys/class/thermal/
+		cooling_device*/*)
 	  This option has no effect on /proc/acpi/ files
 	  and functions which do not yet exist in /sys.
 
diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c
index 23f0fb8..1b166c1 100644
--- a/drivers/acpi/processor_core.c
+++ b/drivers/acpi/processor_core.c
@@ -81,7 +81,9 @@ MODULE_LICENSE("GPL");
 static int acpi_processor_add(struct acpi_device *device);
 static int acpi_processor_start(struct acpi_device *device);
 static int acpi_processor_remove(struct acpi_device *device, int type);
+#ifdef CONFIG_ACPI_PROCFS
 static int acpi_processor_info_open_fs(struct inode *inode, struct file *file);
+#endif
 static void acpi_processor_notify(struct acpi_device *device, u32 event);
 static acpi_status acpi_processor_hotadd_init(acpi_handle handle, int *p_cpu);
 static int acpi_processor_handle_eject(struct acpi_processor *pr);
@@ -110,7 +112,7 @@ static struct acpi_driver acpi_processor_driver = {
 
 #define INSTALL_NOTIFY_HANDLER		1
 #define UNINSTALL_NOTIFY_HANDLER	2
-
+#ifdef CONFIG_ACPI_PROCFS
 static const struct file_operations acpi_processor_info_fops = {
 	.owner = THIS_MODULE,
 	.open = acpi_processor_info_open_fs,
@@ -118,6 +120,7 @@ static const struct file_operations acpi_processor_info_fops = {
 	.llseek = seq_lseek,
 	.release = single_release,
 };
+#endif
 
 DEFINE_PER_CPU(struct acpi_processor *, processors);
 struct acpi_processor_errata errata __read_mostly;
@@ -316,6 +319,7 @@ static int acpi_processor_set_pdc(struct acpi_processor *pr)
                               FS Interface (/proc)
    -------------------------------------------------------------------------- */
 
+#ifdef CONFIG_ACPI_PROCFS
 static struct proc_dir_entry *acpi_processor_dir = NULL;
 
 static int acpi_processor_info_seq_show(struct seq_file *seq, void *offset)
@@ -388,7 +392,6 @@ static int acpi_processor_add_fs(struct acpi_device *device)
 		return -EIO;
 	return 0;
 }
-
 static int acpi_processor_remove_fs(struct acpi_device *device)
 {
 
@@ -405,6 +408,16 @@ static int acpi_processor_remove_fs(struct acpi_device *device)
 
 	return 0;
 }
+#else
+static inline int acpi_processor_add_fs(struct acpi_device *device)
+{
+	return 0;
+}
+static inline int acpi_processor_remove_fs(struct acpi_device *device)
+{
+	return 0;
+}
+#endif
 
 /* Use the acpiid in MADT to map cpus in case of SMP */
 
@@ -1147,11 +1160,11 @@ static int __init acpi_processor_init(void)
 				(struct acpi_table_header **)&madt)))
 		madt = NULL;
 #endif
-
+#ifdef CONFIG_ACPI_PROCFS
 	acpi_processor_dir = proc_mkdir(ACPI_PROCESSOR_CLASS, acpi_root_dir);
 	if (!acpi_processor_dir)
 		return -ENOMEM;
-
+#endif
 	/*
 	 * Check whether the system is DMI table. If yes, OSPM
 	 * should not use mwait for CPU-states.
@@ -1179,7 +1192,9 @@ out_cpuidle:
 	cpuidle_unregister_driver(&acpi_idle_driver);
 
 out_proc:
+#ifdef CONFIG_ACPI_PROCFS
 	remove_proc_entry(ACPI_PROCESSOR_CLASS, acpi_root_dir);
+#endif
 
 	return result;
 }
@@ -1196,7 +1211,9 @@ static void __exit acpi_processor_exit(void)
 
 	cpuidle_unregister_driver(&acpi_idle_driver);
 
+#ifdef CONFIG_ACPI_PROCFS
 	remove_proc_entry(ACPI_PROCESSOR_CLASS, acpi_root_dir);
+#endif
 
 	return;
 }
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 10a2d91..67b2fa1 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -679,6 +679,7 @@ static int acpi_processor_get_power_info(struct acpi_processor *pr)
 	return 0;
 }
 
+#ifdef CONFIG_ACPI_PROCFS
 static int acpi_processor_power_seq_show(struct seq_file *seq, void *offset)
 {
 	struct acpi_processor *pr = seq->private;
@@ -758,7 +759,7 @@ static const struct file_operations acpi_processor_power_fops = {
 	.llseek = seq_lseek,
 	.release = single_release,
 };
-
+#endif
 
 /**
  * acpi_idle_bm_check - checks if bus master activity was detected
@@ -1216,7 +1217,7 @@ int __cpuinit acpi_processor_power_init(struct acpi_processor *pr,
 				       pr->power.states[i].type);
 		printk(")\n");
 	}
-
+#ifdef CONFIG_ACPI_PROCFS
 	/* 'power' [R] */
 	entry = proc_create_data(ACPI_PROCESSOR_FILE_POWER,
 				 S_IRUGO, acpi_device_dir(device),
@@ -1224,6 +1225,7 @@ int __cpuinit acpi_processor_power_init(struct acpi_processor *pr,
 				 acpi_driver_data(device));
 	if (!entry)
 		return -EIO;
+#endif
 	return 0;
 }
 
@@ -1236,9 +1238,11 @@ int acpi_processor_power_exit(struct acpi_processor *pr,
 	cpuidle_unregister_device(&pr->power.dev);
 	pr->flags.power_setup_done = 0;
 
+#ifdef CONFIG_ACPI_PROCFS
 	if (acpi_device_dir(device))
 		remove_proc_entry(ACPI_PROCESSOR_FILE_POWER,
 				  acpi_device_dir(device));
+#endif
 
 	return 0;
 }
diff --git a/drivers/acpi/processor_thermal.c b/drivers/acpi/processor_thermal.c
index 39838c6..07e2614 100644
--- a/drivers/acpi/processor_thermal.c
+++ b/drivers/acpi/processor_thermal.c
@@ -438,7 +438,7 @@ struct thermal_cooling_device_ops processor_cooling_ops = {
 };
 
 /* /proc interface */
-
+#ifdef CONFIG_ACPI_PROCFS
 static int acpi_processor_limit_seq_show(struct seq_file *seq, void *offset)
 {
 	struct acpi_processor *pr = (struct acpi_processor *)seq->private;
@@ -517,3 +517,4 @@ const struct file_operations acpi_processor_limit_fops = {
 	.llseek = seq_lseek,
 	.release = single_release,
 };
+#endif
diff --git a/drivers/acpi/processor_throttling.c b/drivers/acpi/processor_throttling.c
index 2275437..1656001 100644
--- a/drivers/acpi/processor_throttling.c
+++ b/drivers/acpi/processor_throttling.c
@@ -1214,7 +1214,7 @@ int acpi_processor_get_throttling_info(struct acpi_processor *pr)
 }
 
 /* proc interface */
-
+#ifdef CONFIG_ACPI_PROCFS
 static int acpi_processor_throttling_seq_show(struct seq_file *seq,
 					      void *offset)
 {
@@ -1322,3 +1322,4 @@ const struct file_operations acpi_processor_throttling_fops = {
 	.llseek = seq_lseek,
 	.release = single_release,
 };
+#endif
-- 
cgit v0.10.2


From b188e4ce3b7965ecc8d45191042cc9d25f6b90ee Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Wed, 24 Jun 2009 01:48:32 -0400
Subject: ACPI: fix CONFIG_ACPI_PROCFS=n build warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

drivers/acpi/processor_idle.c:1162: warning: unused variable ‘entry’

Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 67b2fa1..b85d9f0 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -1160,7 +1160,9 @@ int __cpuinit acpi_processor_power_init(struct acpi_processor *pr,
 {
 	acpi_status status = 0;
 	static int first_run;
+#ifdef CONFIG_ACPI_PROCFS
 	struct proc_dir_entry *entry = NULL;
+#endif
 	unsigned int i;
 
 	if (boot_option_idle_override)
-- 
cgit v0.10.2


From c1815e074079838d36d89e45e92b7ee317190700 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 22 Jun 2009 20:41:04 +0000
Subject: ACPI: processor: remove KOBJ_ONLINE/KOBJ_OFFLINE events

This patch removes the KOBJ_ONLINE/KOBJ_OFFLINE events the driver used
to generate for CPU hotplug.  As far as I know, nobody consumes these.
The driver core still generates KOBJ_ADD and KOBJ_REMOVE, of course.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
CC: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
CC: Zhao Yakui <yakui.zhao@intel.com>
CC: Matthew Garrett <mjg@redhat.com>
CC: Thomas Renninger <trenn@suse.de>
CC: Dave Jones <davej@codemonkey.org.uk>
CC: Kay Sievers <kay.sievers@vrfy.org>
CC: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c
index 84e0f3c..c6ec68d 100644
--- a/drivers/acpi/processor_core.c
+++ b/drivers/acpi/processor_core.c
@@ -963,9 +963,6 @@ int acpi_processor_device_add(acpi_handle handle, struct acpi_device **device)
 	if (!pr)
 		return -ENODEV;
 
-	if ((pr->id >= 0) && (pr->id < nr_cpu_ids)) {
-		kobject_uevent(&(*device)->dev.kobj, KOBJ_ONLINE);
-	}
 	return 0;
 }
 
@@ -1002,18 +999,10 @@ static void __ref acpi_processor_hotplug_notify(acpi_handle handle,
 			break;
 		}
 
-		if (pr->id >= 0 && (pr->id < nr_cpu_ids)) {
-			kobject_uevent(&device->dev.kobj, KOBJ_OFFLINE);
-			break;
-		}
-
 		result = acpi_processor_start(device);
-		if ((!result) && ((pr->id >= 0) && (pr->id < nr_cpu_ids))) {
-			kobject_uevent(&device->dev.kobj, KOBJ_ONLINE);
-		} else {
+		if (result)
 			printk(KERN_ERR PREFIX "Device [%s] failed to start\n",
 				    acpi_device_bid(device));
-		}
 		break;
 	case ACPI_NOTIFY_EJECT_REQUEST:
 		ACPI_DEBUG_PRINT((ACPI_DB_INFO,
@@ -1030,9 +1019,6 @@ static void __ref acpi_processor_hotplug_notify(acpi_handle handle,
 				    "Driver data is NULL, dropping EJECT\n");
 			return;
 		}
-
-		if ((pr->id < nr_cpu_ids) && (cpu_present(pr->id)))
-			kobject_uevent(&device->dev.kobj, KOBJ_OFFLINE);
 		break;
 	default:
 		ACPI_DEBUG_PRINT((ACPI_DB_INFO,
-- 
cgit v0.10.2


From d4e0526184199e23ac1460fe59b8a3741b17a8b5 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 22 Jun 2009 20:41:09 +0000
Subject: ACPI: processor: clean up in acpi_processor_start() error exits

We used to leave crud around if things failed in acpi_processor_start().
This patch cleans up as much as we can before returning.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Reviewed-by: Alex Chiang <achiang@hp.com>
CC: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
CC: Zhao Yakui <yakui.zhao@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c
index c6ec68d..a496a86 100644
--- a/drivers/acpi/processor_core.c
+++ b/drivers/acpi/processor_core.c
@@ -731,11 +731,13 @@ static int __cpuinit acpi_processor_start(struct acpi_device *device)
 
 	result = acpi_processor_add_fs(device);
 	if (result)
-		goto end;
+		return result;
 
 	sysdev = get_cpu_sysdev(pr->id);
-	if (sysfs_create_link(&device->dev.kobj, &sysdev->kobj, "sysdev"))
-		return -EFAULT;
+	if (sysfs_create_link(&device->dev.kobj, &sysdev->kobj, "sysdev")) {
+		result = -EFAULT;
+		goto err_remove_fs;
+	}
 
 	/* _PDC call should be done before doing anything else (if reqd.). */
 	arch_acpi_processor_init_pdc(pr);
@@ -755,7 +757,7 @@ static int __cpuinit acpi_processor_start(struct acpi_device *device)
 						&processor_cooling_ops);
 	if (IS_ERR(pr->cdev)) {
 		result = PTR_ERR(pr->cdev);
-		goto end;
+		goto err_power_exit;
 	}
 
 	dev_info(&device->dev, "registered as cooling_device%d\n",
@@ -764,13 +766,17 @@ static int __cpuinit acpi_processor_start(struct acpi_device *device)
 	result = sysfs_create_link(&device->dev.kobj,
 				   &pr->cdev->device.kobj,
 				   "thermal_cooling");
-	if (result)
+	if (result) {
 		printk(KERN_ERR PREFIX "Create sysfs link\n");
+		goto err_thermal_unregister;
+	}
 	result = sysfs_create_link(&pr->cdev->device.kobj,
 				   &device->dev.kobj,
 				   "device");
-	if (result)
+	if (result) {
 		printk(KERN_ERR PREFIX "Create sysfs link\n");
+		goto err_remove_sysfs;
+	}
 
 	if (pr->flags.throttling) {
 		printk(KERN_INFO PREFIX "%s [%s] (supports",
@@ -779,7 +785,16 @@ static int __cpuinit acpi_processor_start(struct acpi_device *device)
 		printk(")\n");
 	}
 
-      end:
+	return 0;
+
+err_remove_sysfs:
+	sysfs_remove_link(&device->dev.kobj, "thermal_cooling");
+err_thermal_unregister:
+	thermal_cooling_device_unregister(pr->cdev);
+err_power_exit:
+	acpi_processor_power_exit(pr, device);
+err_remove_fs:
+	acpi_processor_remove_fs(device);
 
 	return result;
 }
-- 
cgit v0.10.2


From ddcd62d89e8c919cc75aeffd2ca37c986141b0f0 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 22 Jun 2009 20:41:14 +0000
Subject: ACPI: processor: move acpi_processor_start() after
 acpi_processor_add()

Move acpi_processor_start() to just after acpi_processor_add().
A subsequent patch will merge them.

Code movement only; no functional change.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
CC: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
CC: Zhao Yakui <yakui.zhao@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c
index a496a86..53de55e 100644
--- a/drivers/acpi/processor_core.c
+++ b/drivers/acpi/processor_core.c
@@ -698,6 +698,90 @@ static int acpi_processor_get_info(struct acpi_device *device)
 
 static DEFINE_PER_CPU(void *, processor_device_array);
 
+static void acpi_processor_notify(struct acpi_device *device, u32 event)
+{
+	struct acpi_processor *pr = acpi_driver_data(device);
+	int saved;
+
+	if (!pr)
+		return;
+
+	switch (event) {
+	case ACPI_PROCESSOR_NOTIFY_PERFORMANCE:
+		saved = pr->performance_platform_limit;
+		acpi_processor_ppc_has_changed(pr);
+		if (saved == pr->performance_platform_limit)
+			break;
+		acpi_bus_generate_proc_event(device, event,
+					pr->performance_platform_limit);
+		acpi_bus_generate_netlink_event(device->pnp.device_class,
+						  dev_name(&device->dev), event,
+						  pr->performance_platform_limit);
+		break;
+	case ACPI_PROCESSOR_NOTIFY_POWER:
+		acpi_processor_cst_has_changed(pr);
+		acpi_bus_generate_proc_event(device, event, 0);
+		acpi_bus_generate_netlink_event(device->pnp.device_class,
+						  dev_name(&device->dev), event, 0);
+		break;
+	case ACPI_PROCESSOR_NOTIFY_THROTTLING:
+		acpi_processor_tstate_has_changed(pr);
+		acpi_bus_generate_proc_event(device, event, 0);
+		acpi_bus_generate_netlink_event(device->pnp.device_class,
+						  dev_name(&device->dev), event, 0);
+	default:
+		ACPI_DEBUG_PRINT((ACPI_DB_INFO,
+				  "Unsupported event [0x%x]\n", event));
+		break;
+	}
+
+	return;
+}
+
+static int acpi_cpu_soft_notify(struct notifier_block *nfb,
+		unsigned long action, void *hcpu)
+{
+	unsigned int cpu = (unsigned long)hcpu;
+	struct acpi_processor *pr = per_cpu(processors, cpu);
+
+	if (action == CPU_ONLINE && pr) {
+		acpi_processor_ppc_has_changed(pr);
+		acpi_processor_cst_has_changed(pr);
+		acpi_processor_tstate_has_changed(pr);
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block acpi_cpu_notifier =
+{
+	    .notifier_call = acpi_cpu_soft_notify,
+};
+
+static int acpi_processor_add(struct acpi_device *device)
+{
+	struct acpi_processor *pr = NULL;
+
+
+	if (!device)
+		return -EINVAL;
+
+	pr = kzalloc(sizeof(struct acpi_processor), GFP_KERNEL);
+	if (!pr)
+		return -ENOMEM;
+
+	if (!zalloc_cpumask_var(&pr->throttling.shared_cpu_map, GFP_KERNEL)) {
+		kfree(pr);
+		return -ENOMEM;
+	}
+
+	pr->handle = device->handle;
+	strcpy(acpi_device_name(device), ACPI_PROCESSOR_DEVICE_NAME);
+	strcpy(acpi_device_class(device), ACPI_PROCESSOR_CLASS);
+	device->driver_data = pr;
+
+	return 0;
+}
+
 static int __cpuinit acpi_processor_start(struct acpi_device *device)
 {
 	int result = 0;
@@ -799,90 +883,6 @@ err_remove_fs:
 	return result;
 }
 
-static void acpi_processor_notify(struct acpi_device *device, u32 event)
-{
-	struct acpi_processor *pr = acpi_driver_data(device);
-	int saved;
-
-	if (!pr)
-		return;
-
-	switch (event) {
-	case ACPI_PROCESSOR_NOTIFY_PERFORMANCE:
-		saved = pr->performance_platform_limit;
-		acpi_processor_ppc_has_changed(pr);
-		if (saved == pr->performance_platform_limit)
-			break;
-		acpi_bus_generate_proc_event(device, event,
-					pr->performance_platform_limit);
-		acpi_bus_generate_netlink_event(device->pnp.device_class,
-						  dev_name(&device->dev), event,
-						  pr->performance_platform_limit);
-		break;
-	case ACPI_PROCESSOR_NOTIFY_POWER:
-		acpi_processor_cst_has_changed(pr);
-		acpi_bus_generate_proc_event(device, event, 0);
-		acpi_bus_generate_netlink_event(device->pnp.device_class,
-						  dev_name(&device->dev), event, 0);
-		break;
-	case ACPI_PROCESSOR_NOTIFY_THROTTLING:
-		acpi_processor_tstate_has_changed(pr);
-		acpi_bus_generate_proc_event(device, event, 0);
-		acpi_bus_generate_netlink_event(device->pnp.device_class,
-						  dev_name(&device->dev), event, 0);
-	default:
-		ACPI_DEBUG_PRINT((ACPI_DB_INFO,
-				  "Unsupported event [0x%x]\n", event));
-		break;
-	}
-
-	return;
-}
-
-static int acpi_cpu_soft_notify(struct notifier_block *nfb,
-		unsigned long action, void *hcpu)
-{
-	unsigned int cpu = (unsigned long)hcpu;
-	struct acpi_processor *pr = per_cpu(processors, cpu);
-
-	if (action == CPU_ONLINE && pr) {
-		acpi_processor_ppc_has_changed(pr);
-		acpi_processor_cst_has_changed(pr);
-		acpi_processor_tstate_has_changed(pr);
-	}
-	return NOTIFY_OK;
-}
-
-static struct notifier_block acpi_cpu_notifier =
-{
-	    .notifier_call = acpi_cpu_soft_notify,
-};
-
-static int acpi_processor_add(struct acpi_device *device)
-{
-	struct acpi_processor *pr = NULL;
-
-
-	if (!device)
-		return -EINVAL;
-
-	pr = kzalloc(sizeof(struct acpi_processor), GFP_KERNEL);
-	if (!pr)
-		return -ENOMEM;
-
-	if (!zalloc_cpumask_var(&pr->throttling.shared_cpu_map, GFP_KERNEL)) {
-		kfree(pr);
-		return -ENOMEM;
-	}
-
-	pr->handle = device->handle;
-	strcpy(acpi_device_name(device), ACPI_PROCESSOR_DEVICE_NAME);
-	strcpy(acpi_device_class(device), ACPI_PROCESSOR_CLASS);
-	device->driver_data = pr;
-
-	return 0;
-}
-
 static int acpi_processor_remove(struct acpi_device *device, int type)
 {
 	struct acpi_processor *pr = NULL;
-- 
cgit v0.10.2


From 970b04929a68134acca17878b1d93e115e58c12a Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 22 Jun 2009 20:41:19 +0000
Subject: ACPI: processor: remove .start() method

This patch folds the .start() method into .add().

acpi_processor_start() is always called immediately after
acpi_processor_add(), so there's really no point in having them be
separate methods.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Reviewed-by: Alex Chiang <achiang@hp.com>
CC: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
CC: Zhao Yakui <yakui.zhao@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c
index 53de55e..8014e2a 100644
--- a/drivers/acpi/processor_core.c
+++ b/drivers/acpi/processor_core.c
@@ -79,7 +79,6 @@ MODULE_DESCRIPTION("ACPI Processor Driver");
 MODULE_LICENSE("GPL");
 
 static int acpi_processor_add(struct acpi_device *device);
-static int acpi_processor_start(struct acpi_device *device);
 static int acpi_processor_remove(struct acpi_device *device, int type);
 static int acpi_processor_info_open_fs(struct inode *inode, struct file *file);
 static void acpi_processor_notify(struct acpi_device *device, u32 event);
@@ -101,7 +100,6 @@ static struct acpi_driver acpi_processor_driver = {
 	.ops = {
 		.add = acpi_processor_add,
 		.remove = acpi_processor_remove,
-		.start = acpi_processor_start,
 		.suspend = acpi_processor_suspend,
 		.resume = acpi_processor_resume,
 		.notify = acpi_processor_notify,
@@ -760,10 +758,8 @@ static struct notifier_block acpi_cpu_notifier =
 static int acpi_processor_add(struct acpi_device *device)
 {
 	struct acpi_processor *pr = NULL;
-
-
-	if (!device)
-		return -EINVAL;
+	int result = 0;
+	struct sys_device *sysdev;
 
 	pr = kzalloc(sizeof(struct acpi_processor), GFP_KERNEL);
 	if (!pr)
@@ -779,17 +775,6 @@ static int acpi_processor_add(struct acpi_device *device)
 	strcpy(acpi_device_class(device), ACPI_PROCESSOR_CLASS);
 	device->driver_data = pr;
 
-	return 0;
-}
-
-static int __cpuinit acpi_processor_start(struct acpi_device *device)
-{
-	int result = 0;
-	struct acpi_processor *pr;
-	struct sys_device *sysdev;
-
-	pr = acpi_driver_data(device);
-
 	result = acpi_processor_get_info(device);
 	if (result) {
 		/* Processor is physically not present */
@@ -807,7 +792,8 @@ static int __cpuinit acpi_processor_start(struct acpi_device *device)
 	    per_cpu(processor_device_array, pr->id) != device) {
 		printk(KERN_WARNING "BIOS reported wrong ACPI id "
 			"for the processor\n");
-		return -ENODEV;
+		result = -ENODEV;
+		goto err_free_cpumask;
 	}
 	per_cpu(processor_device_array, pr->id) = device;
 
@@ -815,7 +801,7 @@ static int __cpuinit acpi_processor_start(struct acpi_device *device)
 
 	result = acpi_processor_add_fs(device);
 	if (result)
-		return result;
+		goto err_free_cpumask;
 
 	sysdev = get_cpu_sysdev(pr->id);
 	if (sysfs_create_link(&device->dev.kobj, &sysdev->kobj, "sysdev")) {
@@ -879,6 +865,8 @@ err_power_exit:
 	acpi_processor_power_exit(pr, device);
 err_remove_fs:
 	acpi_processor_remove_fs(device);
+err_free_cpumask:
+	free_cpumask_var(pr->throttling.shared_cpu_map);
 
 	return result;
 }
@@ -957,7 +945,6 @@ int acpi_processor_device_add(acpi_handle handle, struct acpi_device **device)
 {
 	acpi_handle phandle;
 	struct acpi_device *pdev;
-	struct acpi_processor *pr;
 
 
 	if (acpi_get_parent(handle, &phandle)) {
@@ -972,12 +959,6 @@ int acpi_processor_device_add(acpi_handle handle, struct acpi_device **device)
 		return -ENODEV;
 	}
 
-	acpi_bus_start(*device);
-
-	pr = acpi_driver_data(*device);
-	if (!pr)
-		return -ENODEV;
-
 	return 0;
 }
 
@@ -1007,17 +988,6 @@ static void __ref acpi_processor_hotplug_notify(acpi_handle handle,
 					    "Unable to add the device\n");
 			break;
 		}
-
-		pr = acpi_driver_data(device);
-		if (!pr) {
-			printk(KERN_ERR PREFIX "Driver data is NULL\n");
-			break;
-		}
-
-		result = acpi_processor_start(device);
-		if (result)
-			printk(KERN_ERR PREFIX "Device [%s] failed to start\n",
-				    acpi_device_bid(device));
 		break;
 	case ACPI_NOTIFY_EJECT_REQUEST:
 		ACPI_DEBUG_PRINT((ACPI_DB_INFO,
-- 
cgit v0.10.2


From 80f20fef6a2381402e59b169eb51b989cc175ab7 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 22 Jun 2009 20:41:25 +0000
Subject: ACPI: memory hotplug: remove .start() method

This patch folds the .start() method into .add().

The .start() method is called in two paths: boot-time device enumeration
and run-time node addition, currently via container_device_add().  In both
cases, .start() is called immediately after .add(), so there's no reason
to make them separate methods.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Reviewed-by: Alex Chiang <achiang@hp.com>
CC: Yasunori Goto <y-goto@jp.fujitsu.com>
CC: Dave Hansen <haveblue@us.ibm.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c
index 7a0f4aa..a8d9d8f 100644
--- a/drivers/acpi/acpi_memhotplug.c
+++ b/drivers/acpi/acpi_memhotplug.c
@@ -50,7 +50,6 @@ MODULE_LICENSE("GPL");
 
 static int acpi_memory_device_add(struct acpi_device *device);
 static int acpi_memory_device_remove(struct acpi_device *device, int type);
-static int acpi_memory_device_start(struct acpi_device *device);
 
 static const struct acpi_device_id memory_device_ids[] = {
 	{ACPI_MEMORY_DEVICE_HID, 0},
@@ -65,7 +64,6 @@ static struct acpi_driver acpi_memory_device_driver = {
 	.ops = {
 		.add = acpi_memory_device_add,
 		.remove = acpi_memory_device_remove,
-		.start = acpi_memory_device_start,
 		},
 };
 
@@ -415,28 +413,6 @@ static int acpi_memory_device_add(struct acpi_device *device)
 
 	printk(KERN_DEBUG "%s \n", acpi_device_name(device));
 
-	return result;
-}
-
-static int acpi_memory_device_remove(struct acpi_device *device, int type)
-{
-	struct acpi_memory_device *mem_device = NULL;
-
-
-	if (!device || !acpi_driver_data(device))
-		return -EINVAL;
-
-	mem_device = acpi_driver_data(device);
-	kfree(mem_device);
-
-	return 0;
-}
-
-static int acpi_memory_device_start (struct acpi_device *device)
-{
-	struct acpi_memory_device *mem_device;
-	int result = 0;
-
 	/*
 	 * Early boot code has recognized memory area by EFI/E820.
 	 * If DSDT shows these memory devices on boot, hotplug is not necessary
@@ -446,8 +422,6 @@ static int acpi_memory_device_start (struct acpi_device *device)
 	if (!acpi_hotmem_initialized)
 		return 0;
 
-	mem_device = acpi_driver_data(device);
-
 	if (!acpi_memory_check_device(mem_device)) {
 		/* call add_memory func */
 		result = acpi_memory_enable_device(mem_device);
@@ -458,6 +432,20 @@ static int acpi_memory_device_start (struct acpi_device *device)
 	return result;
 }
 
+static int acpi_memory_device_remove(struct acpi_device *device, int type)
+{
+	struct acpi_memory_device *mem_device = NULL;
+
+
+	if (!device || !acpi_driver_data(device))
+		return -EINVAL;
+
+	mem_device = acpi_driver_data(device);
+	kfree(mem_device);
+
+	return 0;
+}
+
 /*
  * Helper function to check for memory device
  */
-- 
cgit v0.10.2


From 5efc5476184173996dfcce780c2bb5e727df674e Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 22 Jun 2009 20:41:30 +0000
Subject: ACPI: EC: move acpi_ec_start() after acpi_ec_add()

This patch rearranges ec_install_handlers() and acpi_ec_start() so
acpi_ec_start() ends up just after acpi_ec_add().  A subsequent patch
will merge them.

Code movement only; no functional change.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
CC: Alexey Starikovskiy <astarikovskiy@suse.de>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index 391f331..8b387a4 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -788,6 +788,42 @@ ec_parse_device(acpi_handle handle, u32 Level, void *context, void **retval)
 	return AE_CTRL_TERMINATE;
 }
 
+static int ec_install_handlers(struct acpi_ec *ec)
+{
+	acpi_status status;
+	if (test_bit(EC_FLAGS_HANDLERS_INSTALLED, &ec->flags))
+		return 0;
+	status = acpi_install_gpe_handler(NULL, ec->gpe,
+				  ACPI_GPE_EDGE_TRIGGERED,
+				  &acpi_ec_gpe_handler, ec);
+	if (ACPI_FAILURE(status))
+		return -ENODEV;
+	acpi_set_gpe_type(NULL, ec->gpe, ACPI_GPE_TYPE_RUNTIME);
+	acpi_enable_gpe(NULL, ec->gpe);
+	status = acpi_install_address_space_handler(ec->handle,
+						    ACPI_ADR_SPACE_EC,
+						    &acpi_ec_space_handler,
+						    NULL, ec);
+	if (ACPI_FAILURE(status)) {
+		if (status == AE_NOT_FOUND) {
+			/*
+			 * Maybe OS fails in evaluating the _REG object.
+			 * The AE_NOT_FOUND error will be ignored and OS
+			 * continue to initialize EC.
+			 */
+			printk(KERN_ERR "Fail in evaluating the _REG object"
+				" of EC device. Broken bios is suspected.\n");
+		} else {
+			acpi_remove_gpe_handler(NULL, ec->gpe,
+				&acpi_ec_gpe_handler);
+			return -ENODEV;
+		}
+	}
+
+	set_bit(EC_FLAGS_HANDLERS_INSTALLED, &ec->flags);
+	return 0;
+}
+
 static void ec_remove_handlers(struct acpi_ec *ec)
 {
 	if (ACPI_FAILURE(acpi_remove_address_space_handler(ec->handle,
@@ -842,6 +878,26 @@ static int acpi_ec_add(struct acpi_device *device)
 	return 0;
 }
 
+static int acpi_ec_start(struct acpi_device *device)
+{
+	struct acpi_ec *ec;
+	int ret = 0;
+
+	if (!device)
+		return -EINVAL;
+
+	ec = acpi_driver_data(device);
+
+	if (!ec)
+		return -EINVAL;
+
+	ret = ec_install_handlers(ec);
+
+	/* EC is fully operational, allow queries */
+	clear_bit(EC_FLAGS_QUERY_PENDING, &ec->flags);
+	return ret;
+}
+
 static int acpi_ec_remove(struct acpi_device *device, int type)
 {
 	struct acpi_ec *ec;
@@ -888,62 +944,6 @@ ec_parse_io_ports(struct acpi_resource *resource, void *context)
 	return AE_OK;
 }
 
-static int ec_install_handlers(struct acpi_ec *ec)
-{
-	acpi_status status;
-	if (test_bit(EC_FLAGS_HANDLERS_INSTALLED, &ec->flags))
-		return 0;
-	status = acpi_install_gpe_handler(NULL, ec->gpe,
-				  ACPI_GPE_EDGE_TRIGGERED,
-				  &acpi_ec_gpe_handler, ec);
-	if (ACPI_FAILURE(status))
-		return -ENODEV;
-	acpi_set_gpe_type(NULL, ec->gpe, ACPI_GPE_TYPE_RUNTIME);
-	acpi_enable_gpe(NULL, ec->gpe);
-	status = acpi_install_address_space_handler(ec->handle,
-						    ACPI_ADR_SPACE_EC,
-						    &acpi_ec_space_handler,
-						    NULL, ec);
-	if (ACPI_FAILURE(status)) {
-		if (status == AE_NOT_FOUND) {
-			/*
-			 * Maybe OS fails in evaluating the _REG object.
-			 * The AE_NOT_FOUND error will be ignored and OS
-			 * continue to initialize EC.
-			 */
-			printk(KERN_ERR "Fail in evaluating the _REG object"
-				" of EC device. Broken bios is suspected.\n");
-		} else {
-			acpi_remove_gpe_handler(NULL, ec->gpe,
-				&acpi_ec_gpe_handler);
-			return -ENODEV;
-		}
-	}
-
-	set_bit(EC_FLAGS_HANDLERS_INSTALLED, &ec->flags);
-	return 0;
-}
-
-static int acpi_ec_start(struct acpi_device *device)
-{
-	struct acpi_ec *ec;
-	int ret = 0;
-
-	if (!device)
-		return -EINVAL;
-
-	ec = acpi_driver_data(device);
-
-	if (!ec)
-		return -EINVAL;
-
-	ret = ec_install_handlers(ec);
-
-	/* EC is fully operational, allow queries */
-	clear_bit(EC_FLAGS_QUERY_PENDING, &ec->flags);
-	return ret;
-}
-
 static int acpi_ec_stop(struct acpi_device *device, int type)
 {
 	struct acpi_ec *ec;
-- 
cgit v0.10.2


From d02be04707b8ff5375a76c027327e8708877da39 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 22 Jun 2009 20:41:35 +0000
Subject: ACPI: EC: remove .start() method

This patch folds the .start() method into .add().

acpi_ec_start() is always called immediately after acpi_ec_add(),
so there's no need to have it be a separate method.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Reviewed-by: Alex Chiang <achiang@hp.com>
CC: Alexey Starikovskiy <astarikovskiy@suse.de>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index 8b387a4..1feedce 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -838,9 +838,8 @@ static void ec_remove_handlers(struct acpi_ec *ec)
 static int acpi_ec_add(struct acpi_device *device)
 {
 	struct acpi_ec *ec = NULL;
+	int ret;
 
-	if (!device)
-		return -EINVAL;
 	strcpy(acpi_device_name(device), ACPI_EC_DEVICE_NAME);
 	strcpy(acpi_device_class(device), ACPI_EC_CLASS);
 
@@ -875,21 +874,6 @@ static int acpi_ec_add(struct acpi_device *device)
 			  ec->gpe, ec->command_addr, ec->data_addr);
 	pr_info(PREFIX "driver started in %s mode\n",
 		(test_bit(EC_FLAGS_GPE_MODE, &ec->flags))?"interrupt":"poll");
-	return 0;
-}
-
-static int acpi_ec_start(struct acpi_device *device)
-{
-	struct acpi_ec *ec;
-	int ret = 0;
-
-	if (!device)
-		return -EINVAL;
-
-	ec = acpi_driver_data(device);
-
-	if (!ec)
-		return -EINVAL;
 
 	ret = ec_install_handlers(ec);
 
@@ -1077,7 +1061,6 @@ static struct acpi_driver acpi_ec_driver = {
 	.ops = {
 		.add = acpi_ec_add,
 		.remove = acpi_ec_remove,
-		.start = acpi_ec_start,
 		.stop = acpi_ec_stop,
 		.suspend = acpi_ec_suspend,
 		.resume = acpi_ec_resume,
-- 
cgit v0.10.2


From cf745ec7a1222a661b2c5f0e8c2c4be81300d2a4 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 22 Jun 2009 20:41:40 +0000
Subject: ACPI: EC: remove .stop() method

This patch folds the .stop() method into .remove().

acpi_ec_stop() is only called via acpi_device_probe() and
acpi_device_remove(), and in both cases it is called immediately before
acpi_ec_remove(), so there's no need to have it be a separate method.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Reviewed-by: Alex Chiang <achiang@hp.com>
CC: Alexey Starikovskiy <astarikovskiy@suse.de>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index 1feedce..d6bf057 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -891,6 +891,7 @@ static int acpi_ec_remove(struct acpi_device *device, int type)
 		return -EINVAL;
 
 	ec = acpi_driver_data(device);
+	ec_remove_handlers(ec);
 	mutex_lock(&ec->lock);
 	list_for_each_entry_safe(handler, tmp, &ec->list, node) {
 		list_del(&handler->node);
@@ -928,19 +929,6 @@ ec_parse_io_ports(struct acpi_resource *resource, void *context)
 	return AE_OK;
 }
 
-static int acpi_ec_stop(struct acpi_device *device, int type)
-{
-	struct acpi_ec *ec;
-	if (!device)
-		return -EINVAL;
-	ec = acpi_driver_data(device);
-	if (!ec)
-		return -EINVAL;
-	ec_remove_handlers(ec);
-
-	return 0;
-}
-
 int __init acpi_boot_ec_enable(void)
 {
 	if (!boot_ec || test_bit(EC_FLAGS_HANDLERS_INSTALLED, &boot_ec->flags))
@@ -1061,7 +1049,6 @@ static struct acpi_driver acpi_ec_driver = {
 	.ops = {
 		.add = acpi_ec_add,
 		.remove = acpi_ec_remove,
-		.stop = acpi_ec_stop,
 		.suspend = acpi_ec_suspend,
 		.resume = acpi_ec_resume,
 		},
-- 
cgit v0.10.2


From dcf52fb71d988ba945054308f661bddf9b2455fb Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 22 Jun 2009 20:41:45 +0000
Subject: ACPI: remove unused acpi_device_ops .stop method

No drivers use the .stop method, so remove it.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Reviewed-by: Alex Chiang <achiang@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 781435d..4a89f08 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -426,9 +426,6 @@ static int acpi_device_probe(struct device * dev)
 		if (acpi_drv->ops.notify) {
 			ret = acpi_device_install_notify_handler(acpi_dev);
 			if (ret) {
-				if (acpi_drv->ops.stop)
-					acpi_drv->ops.stop(acpi_dev,
-						   acpi_dev->removal_type);
 				if (acpi_drv->ops.remove)
 					acpi_drv->ops.remove(acpi_dev,
 						     acpi_dev->removal_type);
@@ -452,8 +449,6 @@ static int acpi_device_remove(struct device * dev)
 	if (acpi_drv) {
 		if (acpi_drv->ops.notify)
 			acpi_device_remove_notify_handler(acpi_dev);
-		if (acpi_drv->ops.stop)
-			acpi_drv->ops.stop(acpi_dev, acpi_dev->removal_type);
 		if (acpi_drv->ops.remove)
 			acpi_drv->ops.remove(acpi_dev, acpi_dev->removal_type);
 	}
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index c65e4ce..79a6c5e 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -89,7 +89,6 @@ struct acpi_device;
 typedef int (*acpi_op_add) (struct acpi_device * device);
 typedef int (*acpi_op_remove) (struct acpi_device * device, int type);
 typedef int (*acpi_op_start) (struct acpi_device * device);
-typedef int (*acpi_op_stop) (struct acpi_device * device, int type);
 typedef int (*acpi_op_suspend) (struct acpi_device * device,
 				pm_message_t state);
 typedef int (*acpi_op_resume) (struct acpi_device * device);
@@ -106,7 +105,6 @@ struct acpi_device_ops {
 	acpi_op_add add;
 	acpi_op_remove remove;
 	acpi_op_start start;
-	acpi_op_stop stop;
 	acpi_op_suspend suspend;
 	acpi_op_resume resume;
 	acpi_op_bind bind;
-- 
cgit v0.10.2


From b294a290d24d1196d68399cc3a9b8c50bfb55abd Mon Sep 17 00:00:00 2001
From: Andres Salomon <dilinger@collabora.co.uk>
Date: Tue, 30 Jun 2009 02:13:01 -0400
Subject: Revert "power: remove POWER_SUPPLY_PROP_CAPACITY_LEVEL"

This reverts commit 8efe444038a205e79b38b7ad03878824901849a8 and
4cbc76eadf56399cd11fb736b33c53aec9caab8c.

Richard@laptop.org was apparently using CAPACITY_LEVEL for debugging
battery/EC problems, and was upset that it was removed.  This readds it.

Conflicts:

	Documentation/power_supply_class.txt

Signed-off-by: Andres Salomon <dilinger@collabora.co.uk>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>

diff --git a/Documentation/power/power_supply_class.txt b/Documentation/power/power_supply_class.txt
index c6cd495..709d955 100644
--- a/Documentation/power/power_supply_class.txt
+++ b/Documentation/power/power_supply_class.txt
@@ -108,6 +108,8 @@ relative, time-based measurements.
 ENERGY_FULL, ENERGY_EMPTY - same as above but for energy.
 
 CAPACITY - capacity in percents.
+CAPACITY_LEVEL - capacity level. This corresponds to
+POWER_SUPPLY_CAPACITY_LEVEL_*.
 
 TEMP - temperature of the power supply.
 TEMP_AMBIENT - ambient temperature.
diff --git a/drivers/power/olpc_battery.c b/drivers/power/olpc_battery.c
index 58e4192..3a589df0 100644
--- a/drivers/power/olpc_battery.c
+++ b/drivers/power/olpc_battery.c
@@ -276,6 +276,14 @@ static int olpc_bat_get_property(struct power_supply *psy,
 			return ret;
 		val->intval = ec_byte;
 		break;
+	case POWER_SUPPLY_PROP_CAPACITY_LEVEL:
+		if (ec_byte & BAT_STAT_FULL)
+			val->intval = POWER_SUPPLY_CAPACITY_LEVEL_FULL;
+		else if (ec_byte & BAT_STAT_LOW)
+			val->intval = POWER_SUPPLY_CAPACITY_LEVEL_LOW;
+		else
+			val->intval = POWER_SUPPLY_CAPACITY_LEVEL_NORMAL;
+		break;
 	case POWER_SUPPLY_PROP_TEMP:
 		ret = olpc_ec_cmd(EC_BAT_TEMP, NULL, 0, (void *)&ec_word, 2);
 		if (ret)
@@ -321,6 +329,7 @@ static enum power_supply_property olpc_bat_props[] = {
 	POWER_SUPPLY_PROP_VOLTAGE_AVG,
 	POWER_SUPPLY_PROP_CURRENT_AVG,
 	POWER_SUPPLY_PROP_CAPACITY,
+	POWER_SUPPLY_PROP_CAPACITY_LEVEL,
 	POWER_SUPPLY_PROP_TEMP,
 	POWER_SUPPLY_PROP_TEMP_AMBIENT,
 	POWER_SUPPLY_PROP_MANUFACTURER,
diff --git a/drivers/power/power_supply_sysfs.c b/drivers/power/power_supply_sysfs.c
index da73591..9deabbd 100644
--- a/drivers/power/power_supply_sysfs.c
+++ b/drivers/power/power_supply_sysfs.c
@@ -51,6 +51,9 @@ static ssize_t power_supply_show_property(struct device *dev,
 		"Unknown", "NiMH", "Li-ion", "Li-poly", "LiFe", "NiCd",
 		"LiMn"
 	};
+	static char *capacity_level_text[] = {
+		"Unknown", "Critical", "Low", "Normal", "High", "Full"
+	};
 	ssize_t ret;
 	struct power_supply *psy = dev_get_drvdata(dev);
 	const ptrdiff_t off = attr - power_supply_attrs;
@@ -71,6 +74,8 @@ static ssize_t power_supply_show_property(struct device *dev,
 		return sprintf(buf, "%s\n", health_text[value.intval]);
 	else if (off == POWER_SUPPLY_PROP_TECHNOLOGY)
 		return sprintf(buf, "%s\n", technology_text[value.intval]);
+	else if (off == POWER_SUPPLY_PROP_CAPACITY_LEVEL)
+		return sprintf(buf, "%s\n", capacity_level_text[value.intval]);
 	else if (off >= POWER_SUPPLY_PROP_MODEL_NAME)
 		return sprintf(buf, "%s\n", value.strval);
 
@@ -109,6 +114,7 @@ static struct device_attribute power_supply_attrs[] = {
 	POWER_SUPPLY_ATTR(energy_now),
 	POWER_SUPPLY_ATTR(energy_avg),
 	POWER_SUPPLY_ATTR(capacity),
+	POWER_SUPPLY_ATTR(capacity_level),
 	POWER_SUPPLY_ATTR(temp),
 	POWER_SUPPLY_ATTR(temp_ambient),
 	POWER_SUPPLY_ATTR(time_to_empty_now),
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index 594c494..0ab6aa1 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -58,6 +58,15 @@ enum {
 	POWER_SUPPLY_TECHNOLOGY_LiMn,
 };
 
+enum {
+	POWER_SUPPLY_CAPACITY_LEVEL_UNKNOWN = 0,
+	POWER_SUPPLY_CAPACITY_LEVEL_CRITICAL,
+	POWER_SUPPLY_CAPACITY_LEVEL_LOW,
+	POWER_SUPPLY_CAPACITY_LEVEL_NORMAL,
+	POWER_SUPPLY_CAPACITY_LEVEL_HIGH,
+	POWER_SUPPLY_CAPACITY_LEVEL_FULL,
+};
+
 enum power_supply_property {
 	/* Properties of type `int' */
 	POWER_SUPPLY_PROP_STATUS = 0,
@@ -89,6 +98,7 @@ enum power_supply_property {
 	POWER_SUPPLY_PROP_ENERGY_NOW,
 	POWER_SUPPLY_PROP_ENERGY_AVG,
 	POWER_SUPPLY_PROP_CAPACITY, /* in percents! */
+	POWER_SUPPLY_PROP_CAPACITY_LEVEL,
 	POWER_SUPPLY_PROP_TEMP,
 	POWER_SUPPLY_PROP_TEMP_AMBIENT,
 	POWER_SUPPLY_PROP_TIME_TO_EMPTY_NOW,
-- 
cgit v0.10.2


From 144bbeaedc53290eab21da82ce1cb5faefd14374 Mon Sep 17 00:00:00 2001
From: Andres Salomon <dilinger@collabora.co.uk>
Date: Tue, 30 Jun 2009 02:15:26 -0400
Subject: olpc_battery: Add an 'error' sysfs device that displays raw errors

Grab the error code from EC_BAT_ERRCODE and let the user see it (rather
than attempting to decode it as we do with PROP_HEALTH) with a separate
error sysfs file.

Signed-off-by: Andres Salomon <dilinger@collabora.co.uk>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>

diff --git a/drivers/power/olpc_battery.c b/drivers/power/olpc_battery.c
index 3a589df0..602bbd0 100644
--- a/drivers/power/olpc_battery.c
+++ b/drivers/power/olpc_battery.c
@@ -10,7 +10,9 @@
 
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/types.h>
 #include <linux/err.h>
+#include <linux/device.h>
 #include <linux/platform_device.h>
 #include <linux/power_supply.h>
 #include <linux/jiffies.h>
@@ -379,6 +381,29 @@ static struct bin_attribute olpc_bat_eeprom = {
 	.read = olpc_bat_eeprom_read,
 };
 
+/* Allow userspace to see the specific error value pulled from the EC */
+
+static ssize_t olpc_bat_error_read(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	uint8_t ec_byte;
+	ssize_t ret;
+
+	ret = olpc_ec_cmd(EC_BAT_ERRCODE, NULL, 0, &ec_byte, 1);
+	if (ret < 0)
+		return ret;
+
+	return sprintf(buf, "%d\n", ec_byte);
+}
+
+static struct device_attribute olpc_bat_error = {
+	.attr = {
+		.name = "error",
+		.mode = S_IRUGO,
+	},
+	.show = olpc_bat_error_read,
+};
+
 /*********************************************************************
  *		Initialisation
  *********************************************************************/
@@ -442,8 +467,14 @@ static int __init olpc_bat_init(void)
 	if (ret)
 		goto eeprom_failed;
 
+	ret = device_create_file(olpc_bat.dev, &olpc_bat_error);
+	if (ret)
+		goto error_failed;
+
 	goto success;
 
+error_failed:
+	device_remove_bin_file(olpc_bat.dev, &olpc_bat_eeprom);
 eeprom_failed:
 	power_supply_unregister(&olpc_bat);
 battery_failed:
@@ -456,6 +487,7 @@ success:
 
 static void __exit olpc_bat_exit(void)
 {
+	device_remove_file(olpc_bat.dev, &olpc_bat_error);
 	device_remove_bin_file(olpc_bat.dev, &olpc_bat_eeprom);
 	power_supply_unregister(&olpc_bat);
 	power_supply_unregister(&olpc_ac);
-- 
cgit v0.10.2


From a4a874a906ae69c35df4b712fadbc35b15665355 Mon Sep 17 00:00:00 2001
From: Huang Weiyi <weiyi.huang@gmail.com>
Date: Thu, 18 Jun 2009 07:05:46 +0800
Subject: kmemcheck: remove duplicated #include

Remove duplicated #include in arch/x86/mm/kmemcheck/shadow.c.

Signed-off-by: Huang Weiyi <weiyi.huang@gmail.com>
Acked-by: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Vegard Nossum <vegard.nossum@gmail.com>

diff --git a/arch/x86/mm/kmemcheck/shadow.c b/arch/x86/mm/kmemcheck/shadow.c
index e773b6b..3f66b82 100644
--- a/arch/x86/mm/kmemcheck/shadow.c
+++ b/arch/x86/mm/kmemcheck/shadow.c
@@ -1,7 +1,6 @@
 #include <linux/kmemcheck.h>
 #include <linux/module.h>
 #include <linux/mm.h>
-#include <linux/module.h>
 
 #include <asm/page.h>
 #include <asm/pgtable.h>
-- 
cgit v0.10.2


From 414f3251aa1b4cbd1e070866971eabc004a7dc20 Mon Sep 17 00:00:00 2001
From: Vegard Nossum <vegard.nossum@gmail.com>
Date: Mon, 22 Jun 2009 14:31:53 +0200
Subject: kmemcheck: remove useless check

This check is a left-over from ancient times. We now have the equivalent
check much earlier in both the page fault handler and the debug trap
handler (the calls to kmemcheck_active()).

Signed-off-by: Vegard Nossum <vegard.nossum@gmail.com>

diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c
index 2c55ed0..5b99004 100644
--- a/arch/x86/mm/kmemcheck/kmemcheck.c
+++ b/arch/x86/mm/kmemcheck/kmemcheck.c
@@ -225,9 +225,6 @@ void kmemcheck_hide(struct pt_regs *regs)
 
 	BUG_ON(!irqs_disabled());
 
-	if (data->balance == 0)
-		return;
-
 	if (unlikely(data->balance != 1)) {
 		kmemcheck_show_all();
 		kmemcheck_error_save_bug(regs);
-- 
cgit v0.10.2


From d33c9a491bc87fb87fd0298d0425f0ff320d20a2 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
Date: Sun, 28 Jun 2009 13:10:19 +0200
Subject: kmemcheck: depend on HAVE_ARCH_KMEMCHECK

to make it selectable if it is available.

Signed-off-by: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
Acked-by: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Vegard Nossum <vegard.nossum@gmail.com>

diff --git a/lib/Kconfig.kmemcheck b/lib/Kconfig.kmemcheck
index 603c81b..846e039 100644
--- a/lib/Kconfig.kmemcheck
+++ b/lib/Kconfig.kmemcheck
@@ -1,6 +1,8 @@
 config HAVE_ARCH_KMEMCHECK
 	bool
 
+if HAVE_ARCH_KMEMCHECK
+
 menuconfig KMEMCHECK
 	bool "kmemcheck: trap use of uninitialized memory"
 	depends on DEBUG_KERNEL
@@ -89,3 +91,4 @@ config KMEMCHECK_BITOPS_OK
 	  accesses where not all the bits are initialized at the same time.
 	  This may also hide some real bugs.
 
+endif
-- 
cgit v0.10.2


From e3c6c4a8af9e3c4588235444774e66b6483b10ad Mon Sep 17 00:00:00 2001
From: Vegard Nossum <vegard.nossum@gmail.com>
Date: Wed, 1 Jul 2009 22:36:22 +0200
Subject: kmemcheck: update documentation

The download instructions are no longer needed since kmemcheck was
included in mainline.

Signed-off-by: Vegard Nossum <vegard.nossum@gmail.com>

diff --git a/Documentation/kmemcheck.txt b/Documentation/kmemcheck.txt
index 36304460..c28f828 100644
--- a/Documentation/kmemcheck.txt
+++ b/Documentation/kmemcheck.txt
@@ -43,26 +43,7 @@ feature.
 1. Downloading
 ==============
 
-kmemcheck can only be downloaded using git. If you want to write patches
-against the current code, you should use the kmemcheck development branch of
-the tip tree. It is also possible to use the linux-next tree, which also
-includes the latest version of kmemcheck.
-
-Assuming that you've already cloned the linux-2.6.git repository, all you
-have to do is add the -tip tree as a remote, like this:
-
-	$ git remote add tip git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git
-
-To actually download the tree, fetch the remote:
-
-	$ git fetch tip
-
-And to check out a new local branch with the kmemcheck code:
-
-	$ git checkout -b kmemcheck tip/kmemcheck
-
-General instructions for the -tip tree can be found here:
-http://people.redhat.com/mingo/tip.git/readme.txt
+As of version 2.6.31-rc1, kmemcheck is included in the mainline kernel.
 
 
 2. Configuring and compiling
-- 
cgit v0.10.2


From ee8076ed3e1cdd0cd1e61318386932669c90b92f Mon Sep 17 00:00:00 2001
From: Andres Salomon <dilinger@collabora.co.uk>
Date: Thu, 2 Jul 2009 09:45:18 -0400
Subject: power_supply: Add a charge_type property, and use it for olpc driver

This adds a new sysfs file called 'charge_type' which displays the
type of charging (unknown, n/a, trickle charge, or fast charging).

This allows things like battery diagnostics to determine what the
battery/EC is doing without resorting to changing the 'status' sysfs
output.

Signed-off-by: Andres Salomon <dilinger@collabora.co.uk>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>

diff --git a/Documentation/power/power_supply_class.txt b/Documentation/power/power_supply_class.txt
index 709d955..9f16c51 100644
--- a/Documentation/power/power_supply_class.txt
+++ b/Documentation/power/power_supply_class.txt
@@ -76,6 +76,11 @@ STATUS - this attribute represents operating status (charging, full,
 discharging (i.e. powering a load), etc.). This corresponds to
 BATTERY_STATUS_* values, as defined in battery.h.
 
+CHARGE_TYPE - batteries can typically charge at different rates.
+This defines trickle and fast charges.  For batteries that
+are already charged or discharging, 'n/a' can be displayed (or
+'unknown', if the status is not known).
+
 HEALTH - represents health of the battery, values corresponds to
 POWER_SUPPLY_HEALTH_*, defined in battery.h.
 
diff --git a/drivers/power/olpc_battery.c b/drivers/power/olpc_battery.c
index 602bbd0..8fefe5a 100644
--- a/drivers/power/olpc_battery.c
+++ b/drivers/power/olpc_battery.c
@@ -233,6 +233,14 @@ static int olpc_bat_get_property(struct power_supply *psy,
 		if (ret)
 			return ret;
 		break;
+	case POWER_SUPPLY_PROP_CHARGE_TYPE:
+		if (ec_byte & BAT_STAT_TRICKLE)
+			val->intval = POWER_SUPPLY_CHARGE_TYPE_TRICKLE;
+		else if (ec_byte & BAT_STAT_CHARGING)
+			val->intval = POWER_SUPPLY_CHARGE_TYPE_FAST;
+		else
+			val->intval = POWER_SUPPLY_CHARGE_TYPE_NONE;
+		break;
 	case POWER_SUPPLY_PROP_PRESENT:
 		val->intval = !!(ec_byte & (BAT_STAT_PRESENT |
 					    BAT_STAT_TRICKLE));
@@ -325,6 +333,7 @@ static int olpc_bat_get_property(struct power_supply *psy,
 
 static enum power_supply_property olpc_bat_props[] = {
 	POWER_SUPPLY_PROP_STATUS,
+	POWER_SUPPLY_PROP_CHARGE_TYPE,
 	POWER_SUPPLY_PROP_PRESENT,
 	POWER_SUPPLY_PROP_HEALTH,
 	POWER_SUPPLY_PROP_TECHNOLOGY,
diff --git a/drivers/power/power_supply_sysfs.c b/drivers/power/power_supply_sysfs.c
index 9deabbd..0814439 100644
--- a/drivers/power/power_supply_sysfs.c
+++ b/drivers/power/power_supply_sysfs.c
@@ -43,6 +43,9 @@ static ssize_t power_supply_show_property(struct device *dev,
 	static char *status_text[] = {
 		"Unknown", "Charging", "Discharging", "Not charging", "Full"
 	};
+	static char *charge_type[] = {
+		"Unknown", "N/A", "Trickle", "Fast"
+	};
 	static char *health_text[] = {
 		"Unknown", "Good", "Overheat", "Dead", "Over voltage",
 		"Unspecified failure", "Cold",
@@ -70,6 +73,8 @@ static ssize_t power_supply_show_property(struct device *dev,
 
 	if (off == POWER_SUPPLY_PROP_STATUS)
 		return sprintf(buf, "%s\n", status_text[value.intval]);
+	else if (off == POWER_SUPPLY_PROP_CHARGE_TYPE)
+		return sprintf(buf, "%s\n", charge_type[value.intval]);
 	else if (off == POWER_SUPPLY_PROP_HEALTH)
 		return sprintf(buf, "%s\n", health_text[value.intval]);
 	else if (off == POWER_SUPPLY_PROP_TECHNOLOGY)
@@ -86,6 +91,7 @@ static ssize_t power_supply_show_property(struct device *dev,
 static struct device_attribute power_supply_attrs[] = {
 	/* Properties of type `int' */
 	POWER_SUPPLY_ATTR(status),
+	POWER_SUPPLY_ATTR(charge_type),
 	POWER_SUPPLY_ATTR(health),
 	POWER_SUPPLY_ATTR(present),
 	POWER_SUPPLY_ATTR(online),
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index 0ab6aa1..4c7c6fc 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -39,6 +39,13 @@ enum {
 };
 
 enum {
+	POWER_SUPPLY_CHARGE_TYPE_UNKNOWN = 0,
+	POWER_SUPPLY_CHARGE_TYPE_NONE,
+	POWER_SUPPLY_CHARGE_TYPE_TRICKLE,
+	POWER_SUPPLY_CHARGE_TYPE_FAST,
+};
+
+enum {
 	POWER_SUPPLY_HEALTH_UNKNOWN = 0,
 	POWER_SUPPLY_HEALTH_GOOD,
 	POWER_SUPPLY_HEALTH_OVERHEAT,
@@ -70,6 +77,7 @@ enum {
 enum power_supply_property {
 	/* Properties of type `int' */
 	POWER_SUPPLY_PROP_STATUS = 0,
+	POWER_SUPPLY_PROP_CHARGE_TYPE,
 	POWER_SUPPLY_PROP_HEALTH,
 	POWER_SUPPLY_PROP_PRESENT,
 	POWER_SUPPLY_PROP_ONLINE,
-- 
cgit v0.10.2


From 846d8e7cc82a6205d5c0a905a4940abd0f565741 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yjwei@cn.fujitsu.com>
Date: Thu, 25 Jun 2009 16:35:44 +0800
Subject: svcrdma: fix error handling of rdma_alloc_frmr()

ib_alloc_fast_reg_mr() and ib_alloc_fast_reg_page_list() returns
ERR_PTR() and not NULL. Compile tested only.

Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 5151f9f..0cf5e8c 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -730,12 +730,12 @@ static struct svc_rdma_fastreg_mr *rdma_alloc_frmr(struct svcxprt_rdma *xprt)
 		goto err;
 
 	mr = ib_alloc_fast_reg_mr(xprt->sc_pd, RPCSVC_MAXPAGES);
-	if (!mr)
+	if (IS_ERR(mr))
 		goto err_free_frmr;
 
 	pl = ib_alloc_fast_reg_page_list(xprt->sc_cm_id->device,
 					 RPCSVC_MAXPAGES);
-	if (!pl)
+	if (IS_ERR(pl))
 		goto err_free_mr;
 
 	frmr->mr = mr;
-- 
cgit v0.10.2


From 147202aa772329a02c6e80bc2b7a6b8dd3deac0b Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Tue, 7 Jul 2009 19:43:20 +0100
Subject: intel-iommu: Speed up map routines by using cached domain ASAP

We did before, in the end -- but it was at the bottom of a long stack of
functions. Add an inline wrapper get_valid_domain_for_dev() which will
use the cached one _first_ and only make the out-of-line call if it's
not already set.

This takes the average time taken for a 1-page intel_map_sg() from 5961
cycles to 4812 cycles on my Lenovo x200s test box -- a modest 20%.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 360fb67..c5f7c73 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -2455,8 +2455,7 @@ static struct iova *intel_alloc_iova(struct device *dev,
 	return iova;
 }
 
-static struct dmar_domain *
-get_valid_domain_for_dev(struct pci_dev *pdev)
+static struct dmar_domain *__get_valid_domain_for_dev(struct pci_dev *pdev)
 {
 	struct dmar_domain *domain;
 	int ret;
@@ -2484,6 +2483,18 @@ get_valid_domain_for_dev(struct pci_dev *pdev)
 	return domain;
 }
 
+static inline struct dmar_domain *get_valid_domain_for_dev(struct pci_dev *dev)
+{
+	struct device_domain_info *info;
+
+	/* No lock here, assumes no domain exit in normal case */
+	info = dev->dev.archdata.iommu;
+	if (likely(info))
+		return info->domain;
+
+	return __get_valid_domain_for_dev(dev);
+}
+
 static int iommu_dummy(struct pci_dev *pdev)
 {
 	return pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
-- 
cgit v0.10.2


From 5a421ce3c062a87db0a9e7f2a0a7ee0a5b869aab Mon Sep 17 00:00:00 2001
From: Benny Halevy <bhalevy@panasas.com>
Date: Fri, 10 Jul 2009 12:37:40 +0300
Subject: nfsd41: gather and report statistics also for v4.1 ops

Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index bd2eba5..aff924a 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -234,7 +234,7 @@ enum nfs_opnum4 {
 Needs to be updated if more operations are defined in future.*/
 
 #define FIRST_NFS4_OP	OP_ACCESS
-#define LAST_NFS4_OP 	OP_RELEASE_LOCKOWNER
+#define LAST_NFS4_OP 	OP_RECLAIM_COMPLETE
 
 enum nfsstat4 {
 	NFS4_OK = 0,
-- 
cgit v0.10.2


From 4aed03ae58946c716c8e3f7060f8b500b8a8e30f Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Fri, 10 Jul 2009 16:28:33 +0100
Subject: wm8350_power: Implement charge type property

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>

diff --git a/drivers/power/wm8350_power.c b/drivers/power/wm8350_power.c
index 1b16bf3..28b0299 100644
--- a/drivers/power/wm8350_power.c
+++ b/drivers/power/wm8350_power.c
@@ -321,6 +321,24 @@ static int wm8350_bat_check_health(struct wm8350 *wm8350)
 	return POWER_SUPPLY_HEALTH_GOOD;
 }
 
+static int wm8350_bat_get_charge_type(struct wm8350 *wm8350)
+{
+	int state;
+
+	state = wm8350_reg_read(wm8350, WM8350_BATTERY_CHARGER_CONTROL_2) &
+	    WM8350_CHG_STS_MASK;
+	switch (state) {
+	case WM8350_CHG_STS_OFF:
+		return POWER_SUPPLY_CHARGE_TYPE_NONE;
+	case WM8350_CHG_STS_TRICKLE:
+		return POWER_SUPPLY_CHARGE_TYPE_TRICKLE;
+	case WM8350_CHG_STS_FAST:
+		return POWER_SUPPLY_CHARGE_TYPE_FAST;
+	default:
+		return POWER_SUPPLY_CHARGE_TYPE_UNKNOWN;
+	}
+}
+
 static int wm8350_bat_get_property(struct power_supply *psy,
 				   enum power_supply_property psp,
 				   union power_supply_propval *val)
@@ -342,6 +360,9 @@ static int wm8350_bat_get_property(struct power_supply *psy,
 	case POWER_SUPPLY_PROP_HEALTH:
 		val->intval = wm8350_bat_check_health(wm8350);
 		break;
+	case POWER_SUPPLY_PROP_CHARGE_TYPE:
+		val->intval = wm8350_bat_get_charge_type(wm8350);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
@@ -355,6 +376,7 @@ static enum power_supply_property wm8350_bat_props[] = {
 	POWER_SUPPLY_PROP_ONLINE,
 	POWER_SUPPLY_PROP_VOLTAGE_NOW,
 	POWER_SUPPLY_PROP_HEALTH,
+	POWER_SUPPLY_PROP_CHARGE_TYPE,
 };
 
 /*********************************************************************
-- 
cgit v0.10.2


From 9208faf297dddfa97a86d7224b6bf94f2e346dd9 Mon Sep 17 00:00:00 2001
From: Yu Zhiguo <yuzg@cn.fujitsu.com>
Date: Mon, 6 Jul 2009 17:24:16 +0800
Subject: NFSv4: ACL in operations 'open' and 'create' should be used

ACL in operations 'open' and 'create' is decoded but never be used.
It should be set as the initial ACL for the object according to RFC3530.
If error occurs when setting the ACL, just clear the ACL bit in the
returned attr bitmap.

Signed-off-by: Yu Zhiguo <yuzg@cn.fujitsu.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 7c88017..d781658 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -123,6 +123,35 @@ nfsd4_check_open_attributes(struct svc_rqst *rqstp,
 	return status;
 }
 
+static int
+is_create_with_attrs(struct nfsd4_open *open)
+{
+	return open->op_create == NFS4_OPEN_CREATE
+		&& (open->op_createmode == NFS4_CREATE_UNCHECKED
+		    || open->op_createmode == NFS4_CREATE_GUARDED
+		    || open->op_createmode == NFS4_CREATE_EXCLUSIVE4_1);
+}
+
+/*
+ * if error occurs when setting the acl, just clear the acl bit
+ * in the returned attr bitmap.
+ */
+static void
+do_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
+		struct nfs4_acl *acl, u32 *bmval)
+{
+	__be32 status;
+
+	status = nfsd4_set_nfs4_acl(rqstp, fhp, acl);
+	if (status)
+		/*
+		 * We should probably fail the whole open at this point,
+		 * but we've already created the file, so it's too late;
+		 * So this seems the least of evils:
+		 */
+		bmval[0] &= ~FATTR4_WORD0_ACL;
+}
+
 static inline void
 fh_dup2(struct svc_fh *dst, struct svc_fh *src)
 {
@@ -206,6 +235,9 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o
 	if (status)
 		goto out;
 
+	if (is_create_with_attrs(open) && open->op_acl != NULL)
+		do_set_nfs4_acl(rqstp, &resfh, open->op_acl, open->op_bmval);
+
 	set_change_info(&open->op_cinfo, current_fh);
 	fh_dup2(current_fh, &resfh);
 
@@ -536,12 +568,17 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		status = nfserr_badtype;
 	}
 
-	if (!status) {
-		fh_unlock(&cstate->current_fh);
-		set_change_info(&create->cr_cinfo, &cstate->current_fh);
-		fh_dup2(&cstate->current_fh, &resfh);
-	}
+	if (status)
+		goto out;
 
+	if (create->cr_acl != NULL)
+		do_set_nfs4_acl(rqstp, &resfh, create->cr_acl,
+				create->cr_bmval);
+
+	fh_unlock(&cstate->current_fh);
+	set_change_info(&create->cr_cinfo, &cstate->current_fh);
+	fh_dup2(&cstate->current_fh, &resfh);
+out:
 	fh_put(&resfh);
 	return status;
 }
-- 
cgit v0.10.2


From 7702ce40bc84a02e88aa20f95333df8cff5f9d37 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 13 Jul 2009 10:54:26 -0400
Subject: SUNRPC: handle IPv6 PKTINFO when extracting destination address

PKTINFO is needed to scrape the caller's IP address off the socket so
RPC datagram replies are routed correctly.  Fill in missing pieces in
the kernel RPC server's UDP receive path to request IPv6 PKTINFO and
correctly parse the IPv6 cmsg header.

Without this patch, kernel RPC services drop all incoming requests on
UDP on IPv6.

Related commit: 7a37f5787e76bf1765c1add3a9a7163f841a28bb

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Cc: Neil Brown <neilb@suse.de>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 23128ee..99a826d 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -432,29 +432,49 @@ static void svc_tcp_write_space(struct sock *sk)
 }
 
 /*
+ * See net/ipv6/ip_sockglue.c : ip_cmsg_recv_pktinfo
+ */
+static int svc_udp_get_dest_address4(struct svc_rqst *rqstp,
+				     struct cmsghdr *cmh)
+{
+	struct in_pktinfo *pki = CMSG_DATA(cmh);
+	if (cmh->cmsg_type != IP_PKTINFO)
+		return 0;
+	rqstp->rq_daddr.addr.s_addr = pki->ipi_spec_dst.s_addr;
+	return 1;
+}
+
+/*
+ * See net/ipv6/datagram.c : datagram_recv_ctl
+ */
+static int svc_udp_get_dest_address6(struct svc_rqst *rqstp,
+				     struct cmsghdr *cmh)
+{
+	struct in6_pktinfo *pki = CMSG_DATA(cmh);
+	if (cmh->cmsg_type != IPV6_PKTINFO)
+		return 0;
+	ipv6_addr_copy(&rqstp->rq_daddr.addr6, &pki->ipi6_addr);
+	return 1;
+}
+
+/*
  * Copy the UDP datagram's destination address to the rqstp structure.
  * The 'destination' address in this case is the address to which the
  * peer sent the datagram, i.e. our local address. For multihomed
  * hosts, this can change from msg to msg. Note that only the IP
  * address changes, the port number should remain the same.
  */
-static void svc_udp_get_dest_address(struct svc_rqst *rqstp,
-				     struct cmsghdr *cmh)
+static int svc_udp_get_dest_address(struct svc_rqst *rqstp,
+				    struct cmsghdr *cmh)
 {
-	struct svc_sock *svsk =
-		container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
-	switch (svsk->sk_sk->sk_family) {
-	case AF_INET: {
-		struct in_pktinfo *pki = CMSG_DATA(cmh);
-		rqstp->rq_daddr.addr.s_addr = pki->ipi_spec_dst.s_addr;
-		break;
-		}
-	case AF_INET6: {
-		struct in6_pktinfo *pki = CMSG_DATA(cmh);
-		ipv6_addr_copy(&rqstp->rq_daddr.addr6, &pki->ipi6_addr);
-		break;
-		}
+	switch (cmh->cmsg_level) {
+	case SOL_IP:
+		return svc_udp_get_dest_address4(rqstp, cmh);
+	case SOL_IPV6:
+		return svc_udp_get_dest_address6(rqstp, cmh);
 	}
+
+	return 0;
 }
 
 /*
@@ -531,16 +551,15 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
 
 	rqstp->rq_prot = IPPROTO_UDP;
 
-	if (cmh->cmsg_level != IPPROTO_IP ||
-	    cmh->cmsg_type != IP_PKTINFO) {
+	if (!svc_udp_get_dest_address(rqstp, cmh)) {
 		if (net_ratelimit())
-			printk("rpcsvc: received unknown control message:"
-			       "%d/%d\n",
-			       cmh->cmsg_level, cmh->cmsg_type);
+			printk(KERN_WARNING
+				"svc: received unknown control message %d/%d; "
+				"dropping RPC reply datagram\n",
+					cmh->cmsg_level, cmh->cmsg_type);
 		skb_free_datagram(svsk->sk_sk, skb);
 		return 0;
 	}
-	svc_udp_get_dest_address(rqstp, cmh);
 
 	if (skb_is_nonlinear(skb)) {
 		/* we have to copy */
@@ -651,8 +670,7 @@ static struct svc_xprt_class svc_udp_class = {
 
 static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
 {
-	int one = 1;
-	mm_segment_t oldfs;
+	int err, level, optname, one = 1;
 
 	svc_xprt_init(&svc_udp_class, &svsk->sk_xprt, serv);
 	clear_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags);
@@ -671,12 +689,22 @@ static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
 	set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
 	set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
 
-	oldfs = get_fs();
-	set_fs(KERNEL_DS);
 	/* make sure we get destination address info */
-	svsk->sk_sock->ops->setsockopt(svsk->sk_sock, IPPROTO_IP, IP_PKTINFO,
-				       (char __user *)&one, sizeof(one));
-	set_fs(oldfs);
+	switch (svsk->sk_sk->sk_family) {
+	case AF_INET:
+		level = SOL_IP;
+		optname = IP_PKTINFO;
+		break;
+	case AF_INET6:
+		level = SOL_IPV6;
+		optname = IPV6_RECVPKTINFO;
+		break;
+	default:
+		BUG();
+	}
+	err = kernel_setsockopt(svsk->sk_sock, level, optname,
+					(char *)&one, sizeof(one));
+	dprintk("svc: kernel_setsockopt returned %d\n", err);
 }
 
 /*
-- 
cgit v0.10.2


From 4bd9b0f4afc76cf972578c702e1bc1b6f2d10ba5 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Wed, 24 Jun 2009 15:37:45 -0400
Subject: nfsd41: use globals for DRC limits

The version 4.1 DRC memory limit and tracking variables are server wide and
session specific. Replace struct svc_serv fields with globals.
Stop using the svc_serv sv_lock.

Add a spinlock to serialize access to the DRC limit management variables which
change on session creation and deletion (usage counter) or (future)
administrative action to adjust the total DRC memory limit.

Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 980a216..2e6a44e 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -430,11 +430,11 @@ static int set_forechannel_maxreqs(struct nfsd4_channel_attrs *fchan)
 	else if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION)
 		fchan->maxreqs = NFSD_MAX_SLOTS_PER_SESSION;
 
-	spin_lock(&nfsd_serv->sv_lock);
-	if (np + nfsd_serv->sv_drc_pages_used > nfsd_serv->sv_drc_max_pages)
-		np = nfsd_serv->sv_drc_max_pages - nfsd_serv->sv_drc_pages_used;
-	nfsd_serv->sv_drc_pages_used += np;
-	spin_unlock(&nfsd_serv->sv_lock);
+	spin_lock(&nfsd_drc_lock);
+	if (np + nfsd_drc_pages_used > nfsd_drc_max_pages)
+		np = nfsd_drc_max_pages - nfsd_drc_pages_used;
+	nfsd_drc_pages_used += np;
+	spin_unlock(&nfsd_drc_lock);
 
 	if (np <= 0) {
 		status = nfserr_resource;
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index d4c9884..78d8fcd 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -67,6 +67,16 @@ struct timeval			nfssvc_boot;
 DEFINE_MUTEX(nfsd_mutex);
 struct svc_serv 		*nfsd_serv;
 
+/*
+ * nfsd_drc_lock protects nfsd_drc_max_pages and nfsd_drc_pages_used.
+ * nfsd_drc_max_pages limits the total amount of memory available for
+ * version 4.1 DRC caches.
+ * nfsd_drc_pages_used tracks the current version 4.1 DRC memory usage.
+ */
+spinlock_t	nfsd_drc_lock;
+unsigned int	nfsd_drc_max_pages;
+unsigned int	nfsd_drc_pages_used;
+
 #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
 static struct svc_stat	nfsd_acl_svcstats;
 static struct svc_version *	nfsd_acl_version[] = {
@@ -238,11 +248,12 @@ static void set_max_drc(void)
 {
 	/* The percent of nr_free_buffer_pages used by the V4.1 server DRC */
 	#define NFSD_DRC_SIZE_SHIFT	7
-	nfsd_serv->sv_drc_max_pages = nr_free_buffer_pages()
+	nfsd_drc_max_pages = nr_free_buffer_pages()
 						>> NFSD_DRC_SIZE_SHIFT;
-	nfsd_serv->sv_drc_pages_used = 0;
-	dprintk("%s svc_drc_max_pages %u\n", __func__,
-		nfsd_serv->sv_drc_max_pages);
+	nfsd_drc_pages_used = 0;
+	spin_lock_init(&nfsd_drc_lock);
+	dprintk("%s nfsd_drc_max_pages %u\n", __func__,
+		nfsd_drc_max_pages);
 }
 
 int nfsd_create_serv(void)
diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
index 2b49d67..2571f85 100644
--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -56,6 +56,9 @@ extern struct svc_version	nfsd_version2, nfsd_version3,
 extern u32			nfsd_supported_minorversion;
 extern struct mutex		nfsd_mutex;
 extern struct svc_serv		*nfsd_serv;
+extern spinlock_t		nfsd_drc_lock;
+extern unsigned int		nfsd_drc_max_pages;
+extern unsigned int		nfsd_drc_pages_used;
 
 extern struct seq_operations nfs_exports_op;
 
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index ea80096..52e8cb0 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -94,8 +94,6 @@ struct svc_serv {
 	struct module *		sv_module;	/* optional module to count when
 						 * adding threads */
 	svc_thread_fn		sv_function;	/* main function for threads */
-	unsigned int		sv_drc_max_pages; /* Total pages for DRC */
-	unsigned int		sv_drc_pages_used;/* DRC pages used */
 #if defined(CONFIG_NFS_V4_1)
 	struct list_head	sv_cb_list;	/* queue for callback requests
 						 * that arrive over the same
-- 
cgit v0.10.2


From 3d39cecc4841e8d4c4abdb401d10180f5faaded0 Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Wed, 8 Jul 2009 15:23:30 +0100
Subject: intel-iommu: Remove superfluous iova_alloc_lock from IOVA code

We only ever obtain this lock immediately before the iova_rbtree_lock,
and release it immediately after the iova_rbtree_lock. So ditch it and
just use iova_rbtree_lock.

[v2: Remove the lockdep bits this time too]
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index c5f7c73..d6a8573 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -1309,7 +1309,6 @@ static void iommu_detach_domain(struct dmar_domain *domain,
 }
 
 static struct iova_domain reserved_iova_list;
-static struct lock_class_key reserved_alloc_key;
 static struct lock_class_key reserved_rbtree_key;
 
 static void dmar_init_reserved_ranges(void)
@@ -1320,8 +1319,6 @@ static void dmar_init_reserved_ranges(void)
 
 	init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
 
-	lockdep_set_class(&reserved_iova_list.iova_alloc_lock,
-		&reserved_alloc_key);
 	lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
 		&reserved_rbtree_key);
 
diff --git a/drivers/pci/iova.c b/drivers/pci/iova.c
index 46dd440..7914951 100644
--- a/drivers/pci/iova.c
+++ b/drivers/pci/iova.c
@@ -22,7 +22,6 @@
 void
 init_iova_domain(struct iova_domain *iovad, unsigned long pfn_32bit)
 {
-	spin_lock_init(&iovad->iova_alloc_lock);
 	spin_lock_init(&iovad->iova_rbtree_lock);
 	iovad->rbroot = RB_ROOT;
 	iovad->cached32_node = NULL;
@@ -205,7 +204,6 @@ alloc_iova(struct iova_domain *iovad, unsigned long size,
 	unsigned long limit_pfn,
 	bool size_aligned)
 {
-	unsigned long flags;
 	struct iova *new_iova;
 	int ret;
 
@@ -219,11 +217,9 @@ alloc_iova(struct iova_domain *iovad, unsigned long size,
 	if (size_aligned)
 		size = __roundup_pow_of_two(size);
 
-	spin_lock_irqsave(&iovad->iova_alloc_lock, flags);
 	ret = __alloc_and_insert_iova_range(iovad, size, limit_pfn,
 			new_iova, size_aligned);
 
-	spin_unlock_irqrestore(&iovad->iova_alloc_lock, flags);
 	if (ret) {
 		free_iova_mem(new_iova);
 		return NULL;
@@ -381,8 +377,7 @@ reserve_iova(struct iova_domain *iovad,
 	struct iova *iova;
 	unsigned int overlap = 0;
 
-	spin_lock_irqsave(&iovad->iova_alloc_lock, flags);
-	spin_lock(&iovad->iova_rbtree_lock);
+	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
 	for (node = rb_first(&iovad->rbroot); node; node = rb_next(node)) {
 		if (__is_range_overlap(node, pfn_lo, pfn_hi)) {
 			iova = container_of(node, struct iova, node);
@@ -402,8 +397,7 @@ reserve_iova(struct iova_domain *iovad,
 	iova = __insert_new_range(iovad, pfn_lo, pfn_hi);
 finish:
 
-	spin_unlock(&iovad->iova_rbtree_lock);
-	spin_unlock_irqrestore(&iovad->iova_alloc_lock, flags);
+	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
 	return iova;
 }
 
@@ -420,8 +414,7 @@ copy_reserved_iova(struct iova_domain *from, struct iova_domain *to)
 	unsigned long flags;
 	struct rb_node *node;
 
-	spin_lock_irqsave(&from->iova_alloc_lock, flags);
-	spin_lock(&from->iova_rbtree_lock);
+	spin_lock_irqsave(&from->iova_rbtree_lock, flags);
 	for (node = rb_first(&from->rbroot); node; node = rb_next(node)) {
 		struct iova *iova = container_of(node, struct iova, node);
 		struct iova *new_iova;
@@ -430,6 +423,5 @@ copy_reserved_iova(struct iova_domain *from, struct iova_domain *to)
 			printk(KERN_ERR "Reserve iova range %lx@%lx failed\n",
 				iova->pfn_lo, iova->pfn_lo);
 	}
-	spin_unlock(&from->iova_rbtree_lock);
-	spin_unlock_irqrestore(&from->iova_alloc_lock, flags);
+	spin_unlock_irqrestore(&from->iova_rbtree_lock, flags);
 }
diff --git a/include/linux/iova.h b/include/linux/iova.h
index 228f6c9..76a0759 100644
--- a/include/linux/iova.h
+++ b/include/linux/iova.h
@@ -28,7 +28,6 @@ struct iova {
 
 /* holds all the iova translations for a domain */
 struct iova_domain {
-	spinlock_t	iova_alloc_lock;/* Lock to protect iova  allocation */
 	spinlock_t	iova_rbtree_lock; /* Lock to protect update of rbtree */
 	struct rb_root	rbroot;		/* iova domain rbtree root */
 	struct rb_node	*cached32_node; /* Save last alloced node */
-- 
cgit v0.10.2


From acea0018a24b794e32afea4f3be4230c58f2f8e3 Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Tue, 14 Jul 2009 01:55:11 +0100
Subject: intel-iommu: Defer the iotlb flush and iova free for intel_unmap_sg()
 too.

I see no reason why we did this _only_ in intel_unmap_page().

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index d6a8573..ee48fd0 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -2815,11 +2815,18 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
 	/* free page tables */
 	dma_pte_free_pagetable(domain, start_pfn, last_pfn);
 
-	iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
-			      (last_pfn - start_pfn + 1));
-
-	/* free iova */
-	__free_iova(&domain->iovad, iova);
+	if (intel_iommu_strict) {
+		iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
+				      last_pfn - start_pfn + 1);
+		/* free iova */
+		__free_iova(&domain->iovad, iova);
+	} else {
+		add_unmap(domain, iova);
+		/*
+		 * queue up the release of the unmap to save the 1/6th of the
+		 * cpu used up by the iotlb flush operation...
+		 */
+	}
 }
 
 static int intel_nontranslate_map_sg(struct device *hddev,
-- 
cgit v0.10.2


From 0db9b7aebb6a1c2bba2d0636ae0b1f9ef729c827 Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Tue, 14 Jul 2009 02:01:57 +0100
Subject: intel-iommu: Kill pointless intel_unmap_single() function

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index ee48fd0..86a8394 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -2741,12 +2741,6 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
 	}
 }
 
-static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
-			       int dir)
-{
-	intel_unmap_page(dev, dev_addr, size, dir, NULL);
-}
-
 static void *intel_alloc_coherent(struct device *hwdev, size_t size,
 				  dma_addr_t *dma_handle, gfp_t flags)
 {
@@ -2779,7 +2773,7 @@ static void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
 	size = PAGE_ALIGN(size);
 	order = get_order(size);
 
-	intel_unmap_single(hwdev, dma_handle, size, DMA_BIDIRECTIONAL);
+	intel_unmap_page(hwdev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
 	free_pages((unsigned long)vaddr, order);
 }
 
-- 
cgit v0.10.2


From 86f4d0123b1fddb47d35b9a893f8c0b94bf89abe Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Sun, 19 Jul 2009 14:47:45 +0300
Subject: intel-iommu: double kfree()

g_iommus is freed after we "goto error;".

Found by smatch (http://repo.or.cz/w/smatch.git).

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 86a8394..097d5da 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -2224,7 +2224,6 @@ int __init init_dmars(void)
 	deferred_flush = kzalloc(g_num_of_iommus *
 		sizeof(struct deferred_flush_tables), GFP_KERNEL);
 	if (!deferred_flush) {
-		kfree(g_iommus);
 		ret = -ENOMEM;
 		goto error;
 	}
-- 
cgit v0.10.2


From 2522a776c1b9b5c93383d07717f895cc1a56a87a Mon Sep 17 00:00:00 2001
From: Eric Sesterhenn <eric.sesterhenn@focus-voip.de>
Date: Tue, 28 Jul 2009 14:32:08 +0200
Subject: Fix memory leak in write_pool_threads

kmemleak produces the following warning

unreferenced object 0xc9ec02a0 (size 8):
  comm "cat", pid 19048, jiffies 730243
  backtrace:
    [<c01bf970>] create_object+0x100/0x240
    [<c01bfadb>] kmemleak_alloc+0x2b/0x60
    [<c01bcd4b>] __kmalloc+0x14b/0x270
    [<c02fd027>] write_pool_threads+0x87/0x1d0
    [<c02fcc08>] nfsctl_transaction_write+0x58/0x70
    [<c02fcc6f>] nfsctl_transaction_read+0x4f/0x60
    [<c01c2574>] vfs_read+0x94/0x150
    [<c01c297d>] sys_read+0x3d/0x70
    [<c0102d6b>] sysenter_do_call+0x12/0x32
    [<ffffffff>] 0xffffffff

write_pool_threads() only frees nthreads on error paths, in the success case
we leak it.

Signed-off-by: Eric Sesterhenn <eric.sesterhenn@lsexperts.de>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 1250fb9..48da164 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -785,6 +785,7 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size)
 		mesg += len;
 	}
 
+	kfree(nthreads);
 	mutex_unlock(&nfsd_mutex);
 	return (mesg-buf);
 
-- 
cgit v0.10.2


From 413d63d7106b914a4a004ac08698f10c618e4616 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Tue, 28 Jul 2009 11:37:25 -0400
Subject: nfsd: minor write_pool_threads exit cleanup

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 48da164..b51e7ae 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -784,11 +784,7 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size)
 		size -= len;
 		mesg += len;
 	}
-
-	kfree(nthreads);
-	mutex_unlock(&nfsd_mutex);
-	return (mesg-buf);
-
+	rv = mesg - buf;
 out_free:
 	kfree(nthreads);
 	mutex_unlock(&nfsd_mutex);
-- 
cgit v0.10.2


From be98d1bbd1b872a10d64cdef0af10b9afcc48092 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Mon, 27 Jul 2009 18:49:05 -0400
Subject: nfsd41: reclaim DRC memory on session free

This fixes a leak which would eventually lock out new clients.

Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 2e6a44e..69bd37e 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -585,6 +585,9 @@ free_session(struct kref *kref)
 		struct nfsd4_cache_entry *e = &ses->se_slots[i].sl_cache_entry;
 		nfsd4_release_respages(e->ce_respages, e->ce_resused);
 	}
+	spin_lock(&nfsd_drc_lock);
+	nfsd_drc_pages_used -= ses->se_fchannel.maxreqs * NFSD_PAGES_PER_SLOT;
+	spin_unlock(&nfsd_drc_lock);
 	kfree(ses);
 }
 
-- 
cgit v0.10.2


From b101ebbc39f50f8af4657e517954ca874b13b364 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Mon, 27 Jul 2009 18:40:09 -0400
Subject: nfsd41: minor set_forechannel_maxreqs cleanup

Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 69bd37e..70cba3f 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -423,26 +423,25 @@ gen_sessionid(struct nfsd4_session *ses)
  */
 static int set_forechannel_maxreqs(struct nfsd4_channel_attrs *fchan)
 {
-	int status = 0, np = fchan->maxreqs * NFSD_PAGES_PER_SLOT;
+	int np;
 
 	if (fchan->maxreqs < 1)
 		return nfserr_inval;
 	else if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION)
 		fchan->maxreqs = NFSD_MAX_SLOTS_PER_SESSION;
 
+	np = fchan->maxreqs * NFSD_PAGES_PER_SLOT;
+
 	spin_lock(&nfsd_drc_lock);
 	if (np + nfsd_drc_pages_used > nfsd_drc_max_pages)
 		np = nfsd_drc_max_pages - nfsd_drc_pages_used;
 	nfsd_drc_pages_used += np;
 	spin_unlock(&nfsd_drc_lock);
 
-	if (np <= 0) {
-		status = nfserr_resource;
-		fchan->maxreqs = 0;
-	} else
-		fchan->maxreqs = np / NFSD_PAGES_PER_SLOT;
-
-	return status;
+	fchan->maxreqs = np / NFSD_PAGES_PER_SLOT;
+	if (fchan->maxreqs == 0)
+		return nfserr_resource;
+	return 0;
 }
 
 /*
-- 
cgit v0.10.2


From 6a14dd1a4fe1bd00e02a96c97015cedfddda58ed Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Mon, 27 Jul 2009 19:06:45 -0400
Subject: nfsd41: reserve less memory for DRC

Also remove a slightly misleading comment.

Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 78d8fcd..9be2a19 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -246,8 +246,7 @@ void nfsd_reset_versions(void)
  */
 static void set_max_drc(void)
 {
-	/* The percent of nr_free_buffer_pages used by the V4.1 server DRC */
-	#define NFSD_DRC_SIZE_SHIFT	7
+	#define NFSD_DRC_SIZE_SHIFT	10
 	nfsd_drc_max_pages = nr_free_buffer_pages()
 						>> NFSD_DRC_SIZE_SHIFT;
 	nfsd_drc_pages_used = 0;
-- 
cgit v0.10.2


From 0c193054a4c1cf190d2f23e5e91bd14402e43912 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Mon, 27 Jul 2009 19:09:19 -0400
Subject: nfsd41: hange from page to memory based drc limits

NFSD_SLOT_CACHE_SIZE is the size of all encoded operation responses
(excluding the sequence operation) that we want to cache.

For now, keep NFSD_SLOT_CACHE_SIZE at PAGE_SIZE. It will be reduced
when the DRC is changed from page based to memory based.

Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 70cba3f..e2b11b1 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -414,31 +414,31 @@ gen_sessionid(struct nfsd4_session *ses)
 
 /*
  * Give the client the number of slots it requests bound by
- * NFSD_MAX_SLOTS_PER_SESSION and by sv_drc_max_pages.
+ * NFSD_MAX_SLOTS_PER_SESSION and by nfsd_drc_max_mem.
  *
- * If we run out of pages (sv_drc_pages_used == sv_drc_max_pages) we
- * should (up to a point) re-negotiate active sessions and reduce their
- * slot usage to make rooom for new connections. For now we just fail the
- * create session.
+ * If we run out of reserved DRC memory we should (up to a point) re-negotiate
+ * active sessions and reduce their slot usage to make rooom for new
+ * connections. For now we just fail the create session.
  */
 static int set_forechannel_maxreqs(struct nfsd4_channel_attrs *fchan)
 {
-	int np;
+	int mem;
 
 	if (fchan->maxreqs < 1)
 		return nfserr_inval;
 	else if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION)
 		fchan->maxreqs = NFSD_MAX_SLOTS_PER_SESSION;
 
-	np = fchan->maxreqs * NFSD_PAGES_PER_SLOT;
+	mem = fchan->maxreqs * NFSD_SLOT_CACHE_SIZE;
 
 	spin_lock(&nfsd_drc_lock);
-	if (np + nfsd_drc_pages_used > nfsd_drc_max_pages)
-		np = nfsd_drc_max_pages - nfsd_drc_pages_used;
-	nfsd_drc_pages_used += np;
+	if (mem + nfsd_drc_mem_used > nfsd_drc_max_mem)
+		mem = ((nfsd_drc_max_mem - nfsd_drc_mem_used) /
+				NFSD_SLOT_CACHE_SIZE) * NFSD_SLOT_CACHE_SIZE;
+	nfsd_drc_mem_used += mem;
 	spin_unlock(&nfsd_drc_lock);
 
-	fchan->maxreqs = np / NFSD_PAGES_PER_SLOT;
+	fchan->maxreqs = mem / NFSD_SLOT_CACHE_SIZE;
 	if (fchan->maxreqs == 0)
 		return nfserr_resource;
 	return 0;
@@ -465,9 +465,7 @@ static int init_forechannel_attrs(struct svc_rqst *rqstp,
 		fchan->maxresp_sz = maxcount;
 	session_fchan->maxresp_sz = fchan->maxresp_sz;
 
-	/* Set the max response cached size our default which is
-	 * a multiple of PAGE_SIZE and small */
-	session_fchan->maxresp_cached = NFSD_PAGES_PER_SLOT * PAGE_SIZE;
+	session_fchan->maxresp_cached = NFSD_SLOT_CACHE_SIZE;
 	fchan->maxresp_cached = session_fchan->maxresp_cached;
 
 	/* Use the client's maxops if possible */
@@ -585,7 +583,7 @@ free_session(struct kref *kref)
 		nfsd4_release_respages(e->ce_respages, e->ce_resused);
 	}
 	spin_lock(&nfsd_drc_lock);
-	nfsd_drc_pages_used -= ses->se_fchannel.maxreqs * NFSD_PAGES_PER_SLOT;
+	nfsd_drc_mem_used -= ses->se_fchannel.maxreqs * NFSD_SLOT_CACHE_SIZE;
 	spin_unlock(&nfsd_drc_lock);
 	kfree(ses);
 }
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 9be2a19..5a280a9 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -74,8 +74,8 @@ struct svc_serv 		*nfsd_serv;
  * nfsd_drc_pages_used tracks the current version 4.1 DRC memory usage.
  */
 spinlock_t	nfsd_drc_lock;
-unsigned int	nfsd_drc_max_pages;
-unsigned int	nfsd_drc_pages_used;
+unsigned int	nfsd_drc_max_mem;
+unsigned int	nfsd_drc_mem_used;
 
 #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
 static struct svc_stat	nfsd_acl_svcstats;
@@ -247,12 +247,11 @@ void nfsd_reset_versions(void)
 static void set_max_drc(void)
 {
 	#define NFSD_DRC_SIZE_SHIFT	10
-	nfsd_drc_max_pages = nr_free_buffer_pages()
-						>> NFSD_DRC_SIZE_SHIFT;
-	nfsd_drc_pages_used = 0;
+	nfsd_drc_max_mem = (nr_free_buffer_pages()
+					>> NFSD_DRC_SIZE_SHIFT) * PAGE_SIZE;
+	nfsd_drc_mem_used = 0;
 	spin_lock_init(&nfsd_drc_lock);
-	dprintk("%s nfsd_drc_max_pages %u\n", __func__,
-		nfsd_drc_max_pages);
+	dprintk("%s nfsd_drc_max_mem %u \n", __func__, nfsd_drc_max_mem);
 }
 
 int nfsd_create_serv(void)
diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
index 2571f85..2812ed5 100644
--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -57,8 +57,8 @@ extern u32			nfsd_supported_minorversion;
 extern struct mutex		nfsd_mutex;
 extern struct svc_serv		*nfsd_serv;
 extern spinlock_t		nfsd_drc_lock;
-extern unsigned int		nfsd_drc_max_pages;
-extern unsigned int		nfsd_drc_pages_used;
+extern unsigned int		nfsd_drc_max_mem;
+extern unsigned int		nfsd_drc_mem_used;
 
 extern struct seq_operations nfs_exports_op;
 
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index 57ab2ed..a6c87d6 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -96,6 +96,7 @@ struct nfs4_cb_conn {
 #define NFSD_MAX_SLOTS_PER_SESSION	128
 /* Maximum number of pages per slot cache entry */
 #define NFSD_PAGES_PER_SLOT	1
+#define NFSD_SLOT_CACHE_SIZE		PAGE_SIZE
 /* Maximum number of operations per session compound */
 #define NFSD_MAX_OPS_PER_COMPOUND	16
 
-- 
cgit v0.10.2


From 5261dcf8eb3d098545a676030910cf2c05a00e6c Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Thu, 23 Jul 2009 19:02:14 -0400
Subject: nfsd41: remove redundant forechannel max requests check

This check is done in set_forechannel_maxreqs.

Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index e2b11b1..0be417e 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -473,10 +473,6 @@ static int init_forechannel_attrs(struct svc_rqst *rqstp,
 		fchan->maxops = NFSD_MAX_OPS_PER_COMPOUND;
 	session_fchan->maxops = fchan->maxops;
 
-	/* try to use the client requested number of slots */
-	if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION)
-		fchan->maxreqs = NFSD_MAX_SLOTS_PER_SESSION;
-
 	/* FIXME: Error means no more DRC pages so the server should
 	 * recover pages from existing sessions. For now fail session
 	 * creation.
-- 
cgit v0.10.2


From 88e588d56a2f0226a34386b94a03fda97d2b8e67 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Thu, 23 Jul 2009 19:02:15 -0400
Subject: nfsd41: change check_slot_seqid parameters

For separation of session slot and clientid slot processing.

Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 0be417e..99df8e7 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1309,26 +1309,26 @@ error:
 }
 
 static int
-check_slot_seqid(u32 seqid, struct nfsd4_slot *slot)
+check_slot_seqid(u32 seqid, u32 slot_seqid, int slot_inuse)
 {
-	dprintk("%s enter. seqid %d slot->sl_seqid %d\n", __func__, seqid,
-		slot->sl_seqid);
+	dprintk("%s enter. seqid %d slot_seqid %d\n", __func__, seqid,
+		slot_seqid);
 
 	/* The slot is in use, and no response has been sent. */
-	if (slot->sl_inuse) {
-		if (seqid == slot->sl_seqid)
+	if (slot_inuse) {
+		if (seqid == slot_seqid)
 			return nfserr_jukebox;
 		else
 			return nfserr_seq_misordered;
 	}
 	/* Normal */
-	if (likely(seqid == slot->sl_seqid + 1))
+	if (likely(seqid == slot_seqid + 1))
 		return nfs_ok;
 	/* Replay */
-	if (seqid == slot->sl_seqid)
+	if (seqid == slot_seqid)
 		return nfserr_replay_cache;
 	/* Wraparound */
-	if (seqid == 1 && (slot->sl_seqid + 1) == 0)
+	if (seqid == 1 && (slot_seqid + 1) == 0)
 		return nfs_ok;
 	/* Misordered replay or misordered new request */
 	return nfserr_seq_misordered;
@@ -1351,7 +1351,8 @@ nfsd4_create_session(struct svc_rqst *rqstp,
 
 	if (conf) {
 		slot = &conf->cl_slot;
-		status = check_slot_seqid(cr_ses->seqid, slot);
+		status = check_slot_seqid(cr_ses->seqid, slot->sl_seqid,
+					  slot->sl_inuse);
 		if (status == nfserr_replay_cache) {
 			dprintk("Got a create_session replay! seqid= %d\n",
 				slot->sl_seqid);
@@ -1376,7 +1377,8 @@ nfsd4_create_session(struct svc_rqst *rqstp,
 		}
 
 		slot = &unconf->cl_slot;
-		status = check_slot_seqid(cr_ses->seqid, slot);
+		status = check_slot_seqid(cr_ses->seqid, slot->sl_seqid,
+					  slot->sl_inuse);
 		if (status) {
 			/* an unconfirmed replay returns misordered */
 			status = nfserr_seq_misordered;
@@ -1477,7 +1479,7 @@ nfsd4_sequence(struct svc_rqst *rqstp,
 	slot = &session->se_slots[seq->slotid];
 	dprintk("%s: slotid %d\n", __func__, seq->slotid);
 
-	status = check_slot_seqid(seq->seqid, slot);
+	status = check_slot_seqid(seq->seqid, slot->sl_seqid, slot->sl_inuse);
 	if (status == nfserr_replay_cache) {
 		cstate->slot = slot;
 		cstate->session = session;
-- 
cgit v0.10.2


From 49557cc74c7bdf6a984be227ead9a84b3a26f053 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Thu, 23 Jul 2009 19:02:16 -0400
Subject: nfsd41: Use separate DRC for setclientid

Instead of trying to share the generic 4.1 reply cache code for the
CREATE_SESSION reply cache, it's simpler to handle CREATE_SESSION
separately.

The nfs41 single slot clientid DRC holds the results of create session
processing.  CREATE_SESSION can be preceeded by a SEQUENCE operation
(an embedded CREATE_SESSION) and the create session single slot cache must be
maintained.  nfsd4_replay_cache_entry() and nfsd4_store_cache_entry() do not
implement the replay of an embedded CREATE_SESSION.

The clientid DRC slot does not need the inuse, cachethis or other fields that
the multiple slot session cache uses.  Replace the clientid DRC cache struct
nfs4_slot cache with a new nfsd4_clid_slot cache.  Save the xdr struct
nfsd4_create_session into the cache at the end of processing, and on a replay,
replace the struct for the replay request with the cached version all while
under the state lock.

nfsd4_proc_compound will handle both the solo and embedded CREATE_SESSION case
via the normal use of encode_operation.

Errors that do not change the create session cache:
A create session NFS4ERR_STALE_CLIENTID error means that a client record
(and associated create session slot) could not be found and therefore can't
be changed.  NFSERR_SEQ_MISORDERED errors do not change the slot cache.

All other errors get cached.

Remove the clientid DRC specific check in nfs4svc_encode_compoundres to
put the session only if cstate.session is set which will now always be true.

Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index d781658..d606c6a 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1120,7 +1120,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
 			BUG_ON(op->status == nfs_ok);
 
 encode_op:
-		/* Only from SEQUENCE or CREATE_SESSION */
+		/* Only from SEQUENCE */
 		if (resp->cstate.status == nfserr_replay_cache) {
 			dprintk("%s NFS4.1 replay from cache\n", __func__);
 			if (nfsd4_not_cached(resp))
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 99df8e7..7729d09 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -653,8 +653,6 @@ static inline void
 free_client(struct nfs4_client *clp)
 {
 	shutdown_callback_client(clp);
-	nfsd4_release_respages(clp->cl_slot.sl_cache_entry.ce_respages,
-			     clp->cl_slot.sl_cache_entry.ce_resused);
 	if (clp->cl_cred.cr_group_info)
 		put_group_info(clp->cl_cred.cr_group_info);
 	kfree(clp->cl_principal);
@@ -1293,12 +1291,11 @@ out_copy:
 	exid->clientid.cl_boot = new->cl_clientid.cl_boot;
 	exid->clientid.cl_id = new->cl_clientid.cl_id;
 
-	new->cl_slot.sl_seqid = 0;
 	exid->seqid = 1;
 	nfsd4_set_ex_flags(new, exid);
 
 	dprintk("nfsd4_exchange_id seqid %d flags %x\n",
-		new->cl_slot.sl_seqid, new->cl_exchange_flags);
+		new->cl_cs_slot.sl_seqid, new->cl_exchange_flags);
 	status = nfs_ok;
 
 out:
@@ -1334,15 +1331,35 @@ check_slot_seqid(u32 seqid, u32 slot_seqid, int slot_inuse)
 	return nfserr_seq_misordered;
 }
 
+/*
+ * Cache the create session result into the create session single DRC
+ * slot cache by saving the xdr structure. sl_seqid has been set.
+ * Do this for solo or embedded create session operations.
+ */
+static void
+nfsd4_cache_create_session(struct nfsd4_create_session *cr_ses,
+			   struct nfsd4_clid_slot *slot, int nfserr)
+{
+	slot->sl_status = nfserr;
+	memcpy(&slot->sl_cr_ses, cr_ses, sizeof(*cr_ses));
+}
+
+static __be32
+nfsd4_replay_create_session(struct nfsd4_create_session *cr_ses,
+			    struct nfsd4_clid_slot *slot)
+{
+	memcpy(cr_ses, &slot->sl_cr_ses, sizeof(*cr_ses));
+	return slot->sl_status;
+}
+
 __be32
 nfsd4_create_session(struct svc_rqst *rqstp,
 		     struct nfsd4_compound_state *cstate,
 		     struct nfsd4_create_session *cr_ses)
 {
 	u32 ip_addr = svc_addr_in(rqstp)->sin_addr.s_addr;
-	struct nfsd4_compoundres *resp = rqstp->rq_resp;
 	struct nfs4_client *conf, *unconf;
-	struct nfsd4_slot *slot = NULL;
+	struct nfsd4_clid_slot *cs_slot = NULL;
 	int status = 0;
 
 	nfs4_lock_state();
@@ -1350,25 +1367,22 @@ nfsd4_create_session(struct svc_rqst *rqstp,
 	conf = find_confirmed_client(&cr_ses->clientid);
 
 	if (conf) {
-		slot = &conf->cl_slot;
-		status = check_slot_seqid(cr_ses->seqid, slot->sl_seqid,
-					  slot->sl_inuse);
+		cs_slot = &conf->cl_cs_slot;
+		status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0);
 		if (status == nfserr_replay_cache) {
 			dprintk("Got a create_session replay! seqid= %d\n",
-				slot->sl_seqid);
-			cstate->slot = slot;
-			cstate->status = status;
+				cs_slot->sl_seqid);
 			/* Return the cached reply status */
-			status = nfsd4_replay_cache_entry(resp, NULL);
+			status = nfsd4_replay_create_session(cr_ses, cs_slot);
 			goto out;
-		} else if (cr_ses->seqid != conf->cl_slot.sl_seqid + 1) {
+		} else if (cr_ses->seqid != cs_slot->sl_seqid + 1) {
 			status = nfserr_seq_misordered;
 			dprintk("Sequence misordered!\n");
 			dprintk("Expected seqid= %d but got seqid= %d\n",
-				slot->sl_seqid, cr_ses->seqid);
+				cs_slot->sl_seqid, cr_ses->seqid);
 			goto out;
 		}
-		conf->cl_slot.sl_seqid++;
+		cs_slot->sl_seqid++;
 	} else if (unconf) {
 		if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) ||
 		    (ip_addr != unconf->cl_addr)) {
@@ -1376,16 +1390,15 @@ nfsd4_create_session(struct svc_rqst *rqstp,
 			goto out;
 		}
 
-		slot = &unconf->cl_slot;
-		status = check_slot_seqid(cr_ses->seqid, slot->sl_seqid,
-					  slot->sl_inuse);
+		cs_slot = &unconf->cl_cs_slot;
+		status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0);
 		if (status) {
 			/* an unconfirmed replay returns misordered */
 			status = nfserr_seq_misordered;
-			goto out;
+			goto out_cache;
 		}
 
-		slot->sl_seqid++; /* from 0 to 1 */
+		cs_slot->sl_seqid++; /* from 0 to 1 */
 		move_to_confirmed(unconf);
 
 		/*
@@ -1406,12 +1419,11 @@ nfsd4_create_session(struct svc_rqst *rqstp,
 
 	memcpy(cr_ses->sessionid.data, conf->cl_sessionid.data,
 	       NFS4_MAX_SESSIONID_LEN);
-	cr_ses->seqid = slot->sl_seqid;
+	cr_ses->seqid = cs_slot->sl_seqid;
 
-	slot->sl_inuse = true;
-	cstate->slot = slot;
-	/* Ensure a page is used for the cache */
-	slot->sl_cache_entry.ce_cachethis = 1;
+out_cache:
+	/* cache solo and embedded create sessions under the state lock */
+	nfsd4_cache_create_session(cr_ses, cs_slot, status);
 out:
 	nfs4_unlock_state();
 	dprintk("%s returns %d\n", __func__, ntohl(status));
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 2dcc7fe..fdf632b 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3313,8 +3313,7 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo
 			dprintk("%s: SET SLOT STATE TO AVAILABLE\n", __func__);
 			resp->cstate.slot->sl_inuse = 0;
 		}
-		if (resp->cstate.session)
-			nfsd4_put_session(resp->cstate.session);
+		nfsd4_put_session(resp->cstate.session);
 	}
 	return 1;
 }
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index a6c87d6..58bb197 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -127,6 +127,25 @@ struct nfsd4_channel_attrs {
 	u32		rdma_attrs;
 };
 
+struct nfsd4_create_session {
+	clientid_t			clientid;
+	struct nfs4_sessionid		sessionid;
+	u32				seqid;
+	u32				flags;
+	struct nfsd4_channel_attrs	fore_channel;
+	struct nfsd4_channel_attrs	back_channel;
+	u32				callback_prog;
+	u32				uid;
+	u32				gid;
+};
+
+/* The single slot clientid cache structure */
+struct nfsd4_clid_slot {
+	u32				sl_seqid;
+	__be32				sl_status;
+	struct nfsd4_create_session	sl_cr_ses;
+};
+
 struct nfsd4_session {
 	struct kref		se_ref;
 	struct list_head	se_hash;	/* hash by sessionid */
@@ -193,7 +212,7 @@ struct nfs4_client {
 
 	/* for nfs41 */
 	struct list_head	cl_sessions;
-	struct nfsd4_slot	cl_slot;	/* create_session slot */
+	struct nfsd4_clid_slot	cl_cs_slot;	/* create_session slot */
 	u32			cl_exchange_flags;
 	struct nfs4_sessionid	cl_sessionid;
 };
diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h
index 2bacf75..5e4beb0 100644
--- a/include/linux/nfsd/xdr4.h
+++ b/include/linux/nfsd/xdr4.h
@@ -366,18 +366,6 @@ struct nfsd4_exchange_id {
 	int		spa_how;
 };
 
-struct nfsd4_create_session {
-	clientid_t		clientid;
-	struct nfs4_sessionid	sessionid;
-	u32			seqid;
-	u32			flags;
-	struct nfsd4_channel_attrs fore_channel;
-	struct nfsd4_channel_attrs back_channel;
-	u32			callback_prog;
-	u32			uid;
-	u32			gid;
-};
-
 struct nfsd4_sequence {
 	struct nfs4_sessionid	sessionid;		/* request/response */
 	u32			seqid;			/* request/response */
-- 
cgit v0.10.2


From c8647947f8c13ee2647505debae284ab1c859e65 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Thu, 23 Jul 2009 19:02:17 -0400
Subject: nfsd41: rename nfsd4_enc_uncached_replay

This function is only used for SEQUENCE replay.

Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index d606c6a..23cd738 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -989,7 +989,7 @@ static const char *nfsd4_op_name(unsigned opnum);
  * encode the uncache rep error on the next operation.
  */
 static __be32
-nfsd4_enc_uncached_replay(struct nfsd4_compoundargs *args,
+nfsd4_enc_sequence_replay(struct nfsd4_compoundargs *args,
 			 struct nfsd4_compoundres *resp)
 {
 	struct nfsd4_op *op;
@@ -1124,7 +1124,7 @@ encode_op:
 		if (resp->cstate.status == nfserr_replay_cache) {
 			dprintk("%s NFS4.1 replay from cache\n", __func__);
 			if (nfsd4_not_cached(resp))
-				status = nfsd4_enc_uncached_replay(args, resp);
+				status = nfsd4_enc_sequence_replay(args, resp);
 			else
 				status = op->status;
 			goto out;
-- 
cgit v0.10.2


From abfabf8cafa60e7876a7193fb344f739f690071d Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Thu, 23 Jul 2009 19:02:18 -0400
Subject: nfsd41: encode replay sequence from the slot values

The sequence operation is not cached; always encode the sequence operation on
a replay from the slot table and session values. This simplifies the sessions
replay logic in nfsd4_proc_compound.

If this is a replay of a compound that was specified not to be cached, return
NFS4ERR_RETRY_UNCACHED_REP.

Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 23cd738..6fde431 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -984,34 +984,6 @@ static struct nfsd4_operation nfsd4_ops[];
 static const char *nfsd4_op_name(unsigned opnum);
 
 /*
- * This is a replay of a compound for which no cache entry pages
- * were used. Encode the sequence operation, and if cachethis is FALSE
- * encode the uncache rep error on the next operation.
- */
-static __be32
-nfsd4_enc_sequence_replay(struct nfsd4_compoundargs *args,
-			 struct nfsd4_compoundres *resp)
-{
-	struct nfsd4_op *op;
-
-	dprintk("--> %s resp->opcnt %d ce_cachethis %u \n", __func__,
-		resp->opcnt, resp->cstate.slot->sl_cache_entry.ce_cachethis);
-
-	/* Encode the replayed sequence operation */
-	BUG_ON(resp->opcnt != 1);
-	op = &args->ops[resp->opcnt - 1];
-	nfsd4_encode_operation(resp, op);
-
-	/*return nfserr_retry_uncached_rep in next operation. */
-	if (resp->cstate.slot->sl_cache_entry.ce_cachethis == 0) {
-		op = &args->ops[resp->opcnt++];
-		op->status = nfserr_retry_uncached_rep;
-		nfsd4_encode_operation(resp, op);
-	}
-	return op->status;
-}
-
-/*
  * Enforce NFSv4.1 COMPOUND ordering rules.
  *
  * TODO:
@@ -1123,10 +1095,7 @@ encode_op:
 		/* Only from SEQUENCE */
 		if (resp->cstate.status == nfserr_replay_cache) {
 			dprintk("%s NFS4.1 replay from cache\n", __func__);
-			if (nfsd4_not_cached(resp))
-				status = nfsd4_enc_sequence_replay(args, resp);
-			else
-				status = op->status;
+			status = op->status;
 			goto out;
 		}
 		if (op->status == nfserr_replay_me) {
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 7729d09..9295c4b 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1109,6 +1109,36 @@ nfsd41_copy_replay_data(struct nfsd4_compoundres *resp,
 }
 
 /*
+ * Encode the replay sequence operation from the slot values.
+ * If cachethis is FALSE encode the uncached rep error on the next
+ * operation which sets resp->p and increments resp->opcnt for
+ * nfs4svc_encode_compoundres.
+ *
+ */
+static __be32
+nfsd4_enc_sequence_replay(struct nfsd4_compoundargs *args,
+			  struct nfsd4_compoundres *resp)
+{
+	struct nfsd4_op *op;
+	struct nfsd4_slot *slot = resp->cstate.slot;
+
+	dprintk("--> %s resp->opcnt %d cachethis %u \n", __func__,
+		resp->opcnt, resp->cstate.slot->sl_cache_entry.ce_cachethis);
+
+	/* Encode the replayed sequence operation */
+	op = &args->ops[resp->opcnt - 1];
+	nfsd4_encode_operation(resp, op);
+
+	/* Return nfserr_retry_uncached_rep in next operation. */
+	if (args->opcnt > 1 && slot->sl_cache_entry.ce_cachethis == 0) {
+		op = &args->ops[resp->opcnt++];
+		op->status = nfserr_retry_uncached_rep;
+		nfsd4_encode_operation(resp, op);
+	}
+	return op->status;
+}
+
+/*
  * Keep the first page of the replay. Copy the NFSv4.1 data from the first
  * cached page.  Replace any futher replay pages from the cache.
  */
@@ -1131,10 +1161,12 @@ nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
 	 * session inactivity timer fires and a solo sequence operation
 	 * is sent (lease renewal).
 	 */
-	if (seq && nfsd4_not_cached(resp)) {
-		seq->maxslots = resp->cstate.session->se_fchannel.maxreqs;
-		return nfs_ok;
-	}
+	seq->maxslots = resp->cstate.session->se_fchannel.maxreqs;
+
+	/* Either returns 0 or nfserr_retry_uncached */
+	status = nfsd4_enc_sequence_replay(resp->rqstp->rq_argp, resp);
+	if (status == nfserr_retry_uncached_rep)
+		return status;
 
 	if (!nfsd41_copy_replay_data(resp, entry)) {
 		/*
-- 
cgit v0.10.2


From 2e83a5c5d2317c386de2880eb43ef0bef8eb1fa9 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Wed, 15 Jul 2009 18:20:38 +0200
Subject: ds2760_battery: delay power supply registration

This fixes a race condition I recently introduced with the PMOD feature
addition (cef437e3: "w1: ds2760_battery: add support for sleep mode
feature").

Postpone the call to power_supply_register() to fix it.

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Cc: Szabolcs Gyurko <szabolcs.gyurko@tlt.hu>
Acked-by: Matt Reimer <mreimer@vpop.net>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>

diff --git a/drivers/power/ds2760_battery.c b/drivers/power/ds2760_battery.c
index 520b5c4..cf07c43 100644
--- a/drivers/power/ds2760_battery.c
+++ b/drivers/power/ds2760_battery.c
@@ -381,12 +381,6 @@ static int ds2760_battery_probe(struct platform_device *pdev)
 
 	di->charge_status = POWER_SUPPLY_STATUS_UNKNOWN;
 
-	retval = power_supply_register(&pdev->dev, &di->bat);
-	if (retval) {
-		dev_err(di->dev, "failed to register battery\n");
-		goto batt_failed;
-	}
-
 	/* enable sleep mode feature */
 	ds2760_battery_read_status(di);
 	status = di->raw[DS2760_STATUS_REG];
@@ -397,6 +391,12 @@ static int ds2760_battery_probe(struct platform_device *pdev)
 
 	ds2760_battery_write_status(di, status);
 
+	retval = power_supply_register(&pdev->dev, &di->bat);
+	if (retval) {
+		dev_err(di->dev, "failed to register battery\n");
+		goto batt_failed;
+	}
+
 	INIT_DELAYED_WORK(&di->monitor_work, ds2760_battery_work);
 	di->monitor_wqueue = create_singlethread_workqueue(dev_name(&pdev->dev));
 	if (!di->monitor_wqueue) {
-- 
cgit v0.10.2


From 5c6e9bf2c96e746237516bc8897add67682ee452 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Wed, 15 Jul 2009 18:20:39 +0200
Subject: ds2760_battery: export more features

Export POWER_SUPPLY_PROP_TIME_TO_EMPTY_NOW and POWER_SUPPLY_PROP_CAPACITY
features to the power supply core.

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Cc: Szabolcs Gyurko <szabolcs.gyurko@tlt.hu>
Acked-by: Matt Reimer <mreimer@vpop.net>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>

diff --git a/drivers/power/ds2760_battery.c b/drivers/power/ds2760_battery.c
index cf07c43..f439071 100644
--- a/drivers/power/ds2760_battery.c
+++ b/drivers/power/ds2760_battery.c
@@ -337,6 +337,12 @@ static int ds2760_battery_get_property(struct power_supply *psy,
 	case POWER_SUPPLY_PROP_TEMP:
 		val->intval = di->temp_C;
 		break;
+	case POWER_SUPPLY_PROP_TIME_TO_EMPTY_NOW:
+		val->intval = di->life_sec;
+		break;
+	case POWER_SUPPLY_PROP_CAPACITY:
+		val->intval = di->rem_capacity;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -353,6 +359,8 @@ static enum power_supply_property ds2760_battery_props[] = {
 	POWER_SUPPLY_PROP_CHARGE_EMPTY,
 	POWER_SUPPLY_PROP_CHARGE_NOW,
 	POWER_SUPPLY_PROP_TEMP,
+	POWER_SUPPLY_PROP_TIME_TO_EMPTY_NOW,
+	POWER_SUPPLY_PROP_CAPACITY,
 };
 
 static int ds2760_battery_probe(struct platform_device *pdev)
-- 
cgit v0.10.2


From c1e72193ea3fa02e96bf3aa66006e18d107d0266 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Wed, 15 Jul 2009 18:20:40 +0200
Subject: ds2760_battery: add rated_capacity module parameter

For systems where the ds2760 is soldered directly on the PCB, the 'rated
capacity' register is not set to anything useful.

In order to allow users to bootstrap this value, introduce a new module
parameter 'rated_capacity' and use it to write the internal EEPROM in
case the value differes from what's been given.

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Cc: Szabolcs Gyurko <szabolcs.gyurko@tlt.hu>
Acked-by: Matt Reimer <mreimer@vpop.net>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>

diff --git a/drivers/power/ds2760_battery.c b/drivers/power/ds2760_battery.c
index f439071..ed0ea5e 100644
--- a/drivers/power/ds2760_battery.c
+++ b/drivers/power/ds2760_battery.c
@@ -66,6 +66,10 @@ static unsigned int pmod_enabled;
 module_param(pmod_enabled, bool, 0644);
 MODULE_PARM_DESC(pmod_enabled, "PMOD enable bit");
 
+static unsigned int rated_capacity;
+module_param(rated_capacity, uint, 0644);
+MODULE_PARM_DESC(rated_capacity, "rated battery capacity, 10*mAh or index");
+
 /* Some batteries have their rated capacity stored a N * 10 mAh, while
  * others use an index into this table. */
 static int rated_capacities[] = {
@@ -274,6 +278,17 @@ static void ds2760_battery_write_status(struct ds2760_device_info *di,
 	w1_ds2760_recall_eeprom(di->w1_dev, DS2760_EEPROM_BLOCK1);
 }
 
+static void ds2760_battery_write_rated_capacity(struct ds2760_device_info *di,
+						unsigned char rated_capacity)
+{
+	if (rated_capacity == di->raw[DS2760_RATED_CAPACITY])
+		return;
+
+	w1_ds2760_write(di->w1_dev, &rated_capacity, DS2760_RATED_CAPACITY, 1);
+	w1_ds2760_store_eeprom(di->w1_dev, DS2760_EEPROM_BLOCK1);
+	w1_ds2760_recall_eeprom(di->w1_dev, DS2760_EEPROM_BLOCK1);
+}
+
 static void ds2760_battery_work(struct work_struct *work)
 {
 	struct ds2760_device_info *di = container_of(work,
@@ -399,6 +414,10 @@ static int ds2760_battery_probe(struct platform_device *pdev)
 
 	ds2760_battery_write_status(di, status);
 
+	/* set rated capacity from module param */
+	if (rated_capacity)
+		ds2760_battery_write_rated_capacity(di, rated_capacity);
+
 	retval = power_supply_register(&pdev->dev, &di->bat);
 	if (retval) {
 		dev_err(di->dev, "failed to register battery\n");
-- 
cgit v0.10.2


From 25f2bfa62ae77820a8185734c4a2ab7f3971a2fc Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Wed, 15 Jul 2009 18:20:41 +0200
Subject: ds2760_battery: handle full_active_uAh == 0 case correctly

In systems where the battery monitor is not part of the battery pack and
is hence not bootstrapped with sane values, the full_active_uAh is
likely to be zero.

Handle that case by defaulting to the rated_capacity information which
can be passed to the driver using the new module parameter.

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Cc: Szabolcs Gyurko <szabolcs.gyurko@tlt.hu>
Acked-by: Matt Reimer <mreimer@vpop.net>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>

diff --git a/drivers/power/ds2760_battery.c b/drivers/power/ds2760_battery.c
index ed0ea5e..2d0e5ed 100644
--- a/drivers/power/ds2760_battery.c
+++ b/drivers/power/ds2760_battery.c
@@ -172,8 +172,13 @@ static int ds2760_battery_read_status(struct ds2760_device_info *di)
 	di->full_active_uAh = di->raw[DS2760_ACTIVE_FULL] << 8 |
 			      di->raw[DS2760_ACTIVE_FULL + 1];
 
-	scale[0] = di->raw[DS2760_ACTIVE_FULL] << 8 |
-		   di->raw[DS2760_ACTIVE_FULL + 1];
+	/* If the full_active_uAh value is not given, fall back to the rated
+	 * capacity. This is likely to happen when chips are not part of the
+	 * battery pack and is therefore not bootstrapped. */
+	if (di->full_active_uAh == 0)
+		di->full_active_uAh = di->rated_capacity / 1000L;
+
+	scale[0] = di->full_active_uAh;
 	for (i = 1; i < 5; i++)
 		scale[i] = scale[i - 1] + di->raw[DS2760_ACTIVE_FULL + 2 + i];
 
-- 
cgit v0.10.2


From 02d0d2758821c38b2601d34dac544140af09e651 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Wed, 15 Jul 2009 22:57:16 +0200
Subject: ds2760_battery: add current_accum module parameter

When connecting a ds2760 to a partly loaded battery the first time,
there must be a way to bootstrap the current_accum value. Without that,
the current capactity value is bogus until the battery is fully charged
for the first time.

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Cc: Szabolcs Gyurko <szabolcs.gyurko@tlt.hu>
Cc: Matt Reimer <mreimer@vpop.net>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>

diff --git a/drivers/power/ds2760_battery.c b/drivers/power/ds2760_battery.c
index 2d0e5ed..f4a9258 100644
--- a/drivers/power/ds2760_battery.c
+++ b/drivers/power/ds2760_battery.c
@@ -70,6 +70,10 @@ static unsigned int rated_capacity;
 module_param(rated_capacity, uint, 0644);
 MODULE_PARM_DESC(rated_capacity, "rated battery capacity, 10*mAh or index");
 
+static unsigned int current_accum;
+module_param(current_accum, uint, 0644);
+MODULE_PARM_DESC(current_accum, "current accumulator value");
+
 /* Some batteries have their rated capacity stored a N * 10 mAh, while
  * others use an index into this table. */
 static int rated_capacities[] = {
@@ -215,6 +219,22 @@ static int ds2760_battery_read_status(struct ds2760_device_info *di)
 	return 0;
 }
 
+static void ds2760_battery_set_current_accum(struct ds2760_device_info *di,
+					     unsigned int acr_val)
+{
+	unsigned char acr[2];
+
+	/* acr is in units of 0.25 mAh */
+	acr_val *= 4L;
+	acr_val /= 1000;
+
+	acr[0] = acr_val >> 8;
+	acr[1] = acr_val & 0xff;
+
+	if (w1_ds2760_write(di->w1_dev, acr, DS2760_CURRENT_ACCUM_MSB, 2) < 2)
+		dev_warn(di->dev, "ACR write failed\n");
+}
+
 static void ds2760_battery_update_status(struct ds2760_device_info *di)
 {
 	int old_charge_status = di->charge_status;
@@ -246,21 +266,9 @@ static void ds2760_battery_update_status(struct ds2760_device_info *di)
 			if (di->full_counter < 2) {
 				di->charge_status = POWER_SUPPLY_STATUS_CHARGING;
 			} else {
-				unsigned char acr[2];
-				int acr_val;
-
-				/* acr is in units of 0.25 mAh */
-				acr_val = di->full_active_uAh * 4L / 1000;
-
-				acr[0] = acr_val >> 8;
-				acr[1] = acr_val & 0xff;
-
-				if (w1_ds2760_write(di->w1_dev, acr,
-				    DS2760_CURRENT_ACCUM_MSB, 2) < 2)
-					dev_warn(di->dev,
-						 "ACR reset failed\n");
-
 				di->charge_status = POWER_SUPPLY_STATUS_FULL;
+				ds2760_battery_set_current_accum(di,
+						di->full_active_uAh);
 			}
 		}
 	} else {
@@ -423,6 +431,11 @@ static int ds2760_battery_probe(struct platform_device *pdev)
 	if (rated_capacity)
 		ds2760_battery_write_rated_capacity(di, rated_capacity);
 
+	/* set current accumulator if given as parameter.
+	 * this should only be done for bootstrapping the value */
+	if (current_accum)
+		ds2760_battery_set_current_accum(di, current_accum);
+
 	retval = power_supply_register(&pdev->dev, &di->bat);
 	if (retval) {
 		dev_err(di->dev, "failed to register battery\n");
-- 
cgit v0.10.2


From ff3417e7effe57cc002a8882a48bcb8e1a7e7267 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Thu, 30 Jul 2009 17:42:31 +0400
Subject: power_supply: EXPORT_SYMBOL cleanups

While I'm at it, cleanup the power supply code so that EXPORT_SYMBOL_GPL
appears directly after the symbole declaration. checkpatch.pl wants it
that way.

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Cc: Ian Molton <spyro@f2s.com>
Cc: Matt Reimer <mreimer@vpop.net>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>

diff --git a/drivers/power/power_supply_core.c b/drivers/power/power_supply_core.c
index 5520040..12cd6e3 100644
--- a/drivers/power/power_supply_core.c
+++ b/drivers/power/power_supply_core.c
@@ -18,7 +18,9 @@
 #include <linux/power_supply.h>
 #include "power_supply.h"
 
+/* exported for the APM Power driver, APM emulation */
 struct class *power_supply_class;
+EXPORT_SYMBOL_GPL(power_supply_class);
 
 static int __power_supply_changed_work(struct device *dev, void *data)
 {
@@ -55,6 +57,7 @@ void power_supply_changed(struct power_supply *psy)
 
 	schedule_work(&psy->changed_work);
 }
+EXPORT_SYMBOL_GPL(power_supply_changed);
 
 static int __power_supply_am_i_supplied(struct device *dev, void *data)
 {
@@ -86,6 +89,7 @@ int power_supply_am_i_supplied(struct power_supply *psy)
 
 	return error;
 }
+EXPORT_SYMBOL_GPL(power_supply_am_i_supplied);
 
 static int __power_supply_is_system_supplied(struct device *dev, void *data)
 {
@@ -110,6 +114,7 @@ int power_supply_is_system_supplied(void)
 
 	return error;
 }
+EXPORT_SYMBOL_GPL(power_supply_is_system_supplied);
 
 int power_supply_register(struct device *parent, struct power_supply *psy)
 {
@@ -144,6 +149,7 @@ dev_create_failed:
 success:
 	return rc;
 }
+EXPORT_SYMBOL_GPL(power_supply_register);
 
 void power_supply_unregister(struct power_supply *psy)
 {
@@ -152,6 +158,7 @@ void power_supply_unregister(struct power_supply *psy)
 	power_supply_remove_attrs(psy);
 	device_unregister(psy->dev);
 }
+EXPORT_SYMBOL_GPL(power_supply_unregister);
 
 static int __init power_supply_class_init(void)
 {
@@ -170,15 +177,6 @@ static void __exit power_supply_class_exit(void)
 	class_destroy(power_supply_class);
 }
 
-EXPORT_SYMBOL_GPL(power_supply_changed);
-EXPORT_SYMBOL_GPL(power_supply_am_i_supplied);
-EXPORT_SYMBOL_GPL(power_supply_is_system_supplied);
-EXPORT_SYMBOL_GPL(power_supply_register);
-EXPORT_SYMBOL_GPL(power_supply_unregister);
-
-/* exported for the APM Power driver, APM emulation */
-EXPORT_SYMBOL_GPL(power_supply_class);
-
 subsys_initcall(power_supply_class_init);
 module_exit(power_supply_class_exit);
 
-- 
cgit v0.10.2


From e5f5ccb646bc6009572b5c23201b5e81638ff150 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Thu, 23 Jul 2009 20:35:53 +0200
Subject: power_supply: get_by_name and set_charged functionality

This adds a function that indicates that a battery is fully charged.
It also includes functions to get a power_supply device from the class
of registered devices by name reference. These can be used to find a
specific battery to call power_supply_set_battery_charged() on.

Some battery drivers might need this information to calibrate
themselves.

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Cc: Ian Molton <spyro@f2s.com>
Cc: Anton Vorontsov <cbou@mail.ru>
Cc: Matt Reimer <mreimer@vpop.net>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>

diff --git a/drivers/power/power_supply_core.c b/drivers/power/power_supply_core.c
index 12cd6e3..cce75b4 100644
--- a/drivers/power/power_supply_core.c
+++ b/drivers/power/power_supply_core.c
@@ -116,6 +116,34 @@ int power_supply_is_system_supplied(void)
 }
 EXPORT_SYMBOL_GPL(power_supply_is_system_supplied);
 
+int power_supply_set_battery_charged(struct power_supply *psy)
+{
+	if (psy->type == POWER_SUPPLY_TYPE_BATTERY && psy->set_charged) {
+		psy->set_charged(psy);
+		return 0;
+	}
+
+	return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(power_supply_set_battery_charged);
+
+static int power_supply_match_device_by_name(struct device *dev, void *data)
+{
+	const char *name = data;
+	struct power_supply *psy = dev_get_drvdata(dev);
+
+	return strcmp(psy->name, name) == 0;
+}
+
+struct power_supply *power_supply_get_by_name(char *name)
+{
+	struct device *dev = class_find_device(power_supply_class, NULL, name,
+					power_supply_match_device_by_name);
+
+	return dev ? dev_get_drvdata(dev) : NULL;
+}
+EXPORT_SYMBOL_GPL(power_supply_get_by_name);
+
 int power_supply_register(struct device *parent, struct power_supply *psy)
 {
 	int rc = 0;
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index 4c7c6fc..b5d096d 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -144,6 +144,7 @@ struct power_supply {
 			    enum power_supply_property psp,
 			    union power_supply_propval *val);
 	void (*external_power_changed)(struct power_supply *psy);
+	void (*set_charged)(struct power_supply *psy);
 
 	/* For APM emulation, think legacy userspace. */
 	int use_for_apm;
@@ -183,8 +184,10 @@ struct power_supply_info {
 	int use_for_apm;
 };
 
+extern struct power_supply *power_supply_get_by_name(char *name);
 extern void power_supply_changed(struct power_supply *psy);
 extern int power_supply_am_i_supplied(struct power_supply *psy);
+extern int power_supply_set_battery_charged(struct power_supply *psy);
 
 #if defined(CONFIG_POWER_SUPPLY) || defined(CONFIG_POWER_SUPPLY_MODULE)
 extern int power_supply_is_system_supplied(void);
-- 
cgit v0.10.2


From 8d631ccff8d90fce77b42f01b3872595c599cbf9 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Thu, 23 Jul 2009 20:35:54 +0200
Subject: ds2760_battery: implement set_charged() feature

The ds2760's internal current meter is not reliable enough as it has an
inacurracy of around ~15%. Without any correction for that error, the
current accumulator is couting up all the time, even though the battery
is already fully charged and hence destroys the static information. The
longer it is connected, the worse is the aberration.

Fortunately, this can be corrected by the DS2760_CURRENT_OFFSET_BIAS
register. Using the external power_supply_set_battery_charged()
function, this register is now gauging the measurement.

A delayed work is used to debounce flaky GPIO signals and to let the
current value settle. Also see Maxim's application note AN4188.

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Cc: Szabolcs Gyurko <szabolcs.gyurko@tlt.hu>
Cc: Matt Reimer <mreimer@vpop.net>
Cc: Anton Vorontsov <cbou@mail.ru>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>

diff --git a/drivers/power/ds2760_battery.c b/drivers/power/ds2760_battery.c
index f4a9258..1bb8498 100644
--- a/drivers/power/ds2760_battery.c
+++ b/drivers/power/ds2760_battery.c
@@ -56,6 +56,7 @@ struct ds2760_device_info {
 	struct device *w1_dev;
 	struct workqueue_struct *monitor_wqueue;
 	struct delayed_work monitor_work;
+	struct delayed_work set_charged_work;
 };
 
 static unsigned int cache_time = 1000;
@@ -327,6 +328,52 @@ static void ds2760_battery_external_power_changed(struct power_supply *psy)
 	queue_delayed_work(di->monitor_wqueue, &di->monitor_work, HZ/10);
 }
 
+
+static void ds2760_battery_set_charged_work(struct work_struct *work)
+{
+	char bias;
+	struct ds2760_device_info *di = container_of(work,
+		struct ds2760_device_info, set_charged_work.work);
+
+	dev_dbg(di->dev, "%s\n", __func__);
+
+	ds2760_battery_read_status(di);
+
+	/* When we get notified by external circuitry that the battery is
+	 * considered fully charged now, we know that there is no current
+	 * flow any more. However, the ds2760's internal current meter is
+	 * too inaccurate to rely on - spec say something ~15% failure.
+	 * Hence, we use the current offset bias register to compensate
+	 * that error.
+	 */
+
+	if (!power_supply_am_i_supplied(&di->bat))
+		return;
+
+	bias = (signed char) di->current_raw +
+		(signed char) di->raw[DS2760_CURRENT_OFFSET_BIAS];
+
+	dev_dbg(di->dev, "%s: bias = %d\n", __func__, bias);
+
+	w1_ds2760_write(di->w1_dev, &bias, DS2760_CURRENT_OFFSET_BIAS, 1);
+	w1_ds2760_store_eeprom(di->w1_dev, DS2760_EEPROM_BLOCK1);
+	w1_ds2760_recall_eeprom(di->w1_dev, DS2760_EEPROM_BLOCK1);
+
+	/* Write to the di->raw[] buffer directly - the CURRENT_OFFSET_BIAS
+	 * value won't be read back by ds2760_battery_read_status() */
+	di->raw[DS2760_CURRENT_OFFSET_BIAS] = bias;
+}
+
+static void ds2760_battery_set_charged(struct power_supply *psy)
+{
+	struct ds2760_device_info *di = to_ds2760_device_info(psy);
+
+	/* postpone the actual work by 20 secs. This is for debouncing GPIO
+	 * signals and to let the current value settle. See AN4188. */
+	cancel_delayed_work(&di->set_charged_work);
+	queue_delayed_work(di->monitor_wqueue, &di->set_charged_work, HZ * 20);
+}
+
 static int ds2760_battery_get_property(struct power_supply *psy,
 				       enum power_supply_property psp,
 				       union power_supply_propval *val)
@@ -412,6 +459,7 @@ static int ds2760_battery_probe(struct platform_device *pdev)
 	di->bat.properties	= ds2760_battery_props;
 	di->bat.num_properties	= ARRAY_SIZE(ds2760_battery_props);
 	di->bat.get_property	= ds2760_battery_get_property;
+	di->bat.set_charged	= ds2760_battery_set_charged;
 	di->bat.external_power_changed =
 				  ds2760_battery_external_power_changed;
 
@@ -443,6 +491,8 @@ static int ds2760_battery_probe(struct platform_device *pdev)
 	}
 
 	INIT_DELAYED_WORK(&di->monitor_work, ds2760_battery_work);
+	INIT_DELAYED_WORK(&di->set_charged_work,
+			  ds2760_battery_set_charged_work);
 	di->monitor_wqueue = create_singlethread_workqueue(dev_name(&pdev->dev));
 	if (!di->monitor_wqueue) {
 		retval = -ESRCH;
@@ -467,6 +517,8 @@ static int ds2760_battery_remove(struct platform_device *pdev)
 
 	cancel_rearming_delayed_workqueue(di->monitor_wqueue,
 					  &di->monitor_work);
+	cancel_rearming_delayed_workqueue(di->monitor_wqueue,
+					  &di->set_charged_work);
 	destroy_workqueue(di->monitor_wqueue);
 	power_supply_unregister(&di->bat);
 
-- 
cgit v0.10.2


From cfc65dd57967f2e0c7b3a8b73e6d12470b1cf1c1 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@hp.com>
Date: Thu, 30 Jul 2009 16:15:18 -0600
Subject: iommu=pt is a valid early param

This avoids a "Malformed early option 'iommu'" warning on boot when
trying to use pass-through mode.

Signed-off-by: Alex Williamson <alex.williamson@hp.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 1a041bc..ae13e34 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -212,10 +212,8 @@ static __init int iommu_setup(char *p)
 		if (!strncmp(p, "soft", 4))
 			swiotlb = 1;
 #endif
-		if (!strncmp(p, "pt", 2)) {
+		if (!strncmp(p, "pt", 2))
 			iommu_pass_through = 1;
-			return 1;
-		}
 
 		gart_parse_options(p);
 
-- 
cgit v0.10.2


From 42c4ab41a176ee784c0f28c0b29025a8fc34f05a Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <sgruszka@redhat.com>
Date: Wed, 29 Jul 2009 12:15:26 +0200
Subject: itimers: Merge ITIMER_VIRT and ITIMER_PROF

Both cpu itimers have same data flow in the few places, this
patch make unification of code related with VIRT and PROF
itimers.

Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
LKML-Reference: <1248862529-6063-2-git-send-email-sgruszka@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3ab08e4..3b3efad 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -470,6 +470,11 @@ struct pacct_struct {
 	unsigned long		ac_minflt, ac_majflt;
 };
 
+struct cpu_itimer {
+	cputime_t expires;
+	cputime_t incr;
+};
+
 /**
  * struct task_cputime - collected CPU time counts
  * @utime:		time spent in user mode, in &cputime_t units
@@ -564,9 +569,12 @@ struct signal_struct {
 	struct pid *leader_pid;
 	ktime_t it_real_incr;
 
-	/* ITIMER_PROF and ITIMER_VIRTUAL timers for the process */
-	cputime_t it_prof_expires, it_virt_expires;
-	cputime_t it_prof_incr, it_virt_incr;
+	/*
+	 * ITIMER_PROF and ITIMER_VIRTUAL timers for the process, we use
+	 * CPUCLOCK_PROF and CPUCLOCK_VIRT for indexing array as these
+	 * values are defined to 0 and 1 respectively
+	 */
+	struct cpu_itimer it[2];
 
 	/*
 	 * Thread group totals for process CPU timers.
diff --git a/kernel/fork.c b/kernel/fork.c
index 29b532e..893ab0b 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -62,6 +62,7 @@
 #include <linux/fs_struct.h>
 #include <linux/magic.h>
 #include <linux/perf_counter.h>
+#include <linux/posix-timers.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -790,10 +791,10 @@ static void posix_cpu_timers_init_group(struct signal_struct *sig)
 	thread_group_cputime_init(sig);
 
 	/* Expiration times and increments. */
-	sig->it_virt_expires = cputime_zero;
-	sig->it_virt_incr = cputime_zero;
-	sig->it_prof_expires = cputime_zero;
-	sig->it_prof_incr = cputime_zero;
+	sig->it[CPUCLOCK_PROF].expires = cputime_zero;
+	sig->it[CPUCLOCK_PROF].incr = cputime_zero;
+	sig->it[CPUCLOCK_VIRT].expires = cputime_zero;
+	sig->it[CPUCLOCK_VIRT].incr = cputime_zero;
 
 	/* Cached expiration times. */
 	sig->cputime_expires.prof_exp = cputime_zero;
diff --git a/kernel/itimer.c b/kernel/itimer.c
index 58762f7..852c88d 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -41,10 +41,43 @@ static struct timeval itimer_get_remtime(struct hrtimer *timer)
 	return ktime_to_timeval(rem);
 }
 
+static void get_cpu_itimer(struct task_struct *tsk, unsigned int clock_id,
+			   struct itimerval *value)
+{
+	cputime_t cval, cinterval;
+	struct cpu_itimer *it = &tsk->signal->it[clock_id];
+
+	spin_lock_irq(&tsk->sighand->siglock);
+
+	cval = it->expires;
+	cinterval = it->incr;
+	if (!cputime_eq(cval, cputime_zero)) {
+		struct task_cputime cputime;
+		cputime_t t;
+
+		thread_group_cputimer(tsk, &cputime);
+		if (clock_id == CPUCLOCK_PROF)
+			t = cputime_add(cputime.utime, cputime.stime);
+		else
+			/* CPUCLOCK_VIRT */
+			t = cputime.utime;
+
+		if (cputime_le(cval, t))
+			/* about to fire */
+			cval = jiffies_to_cputime(1);
+		else
+			cval = cputime_sub(cval, t);
+	}
+
+	spin_unlock_irq(&tsk->sighand->siglock);
+
+	cputime_to_timeval(cval, &value->it_value);
+	cputime_to_timeval(cinterval, &value->it_interval);
+}
+
 int do_getitimer(int which, struct itimerval *value)
 {
 	struct task_struct *tsk = current;
-	cputime_t cinterval, cval;
 
 	switch (which) {
 	case ITIMER_REAL:
@@ -55,44 +88,10 @@ int do_getitimer(int which, struct itimerval *value)
 		spin_unlock_irq(&tsk->sighand->siglock);
 		break;
 	case ITIMER_VIRTUAL:
-		spin_lock_irq(&tsk->sighand->siglock);
-		cval = tsk->signal->it_virt_expires;
-		cinterval = tsk->signal->it_virt_incr;
-		if (!cputime_eq(cval, cputime_zero)) {
-			struct task_cputime cputime;
-			cputime_t utime;
-
-			thread_group_cputimer(tsk, &cputime);
-			utime = cputime.utime;
-			if (cputime_le(cval, utime)) { /* about to fire */
-				cval = jiffies_to_cputime(1);
-			} else {
-				cval = cputime_sub(cval, utime);
-			}
-		}
-		spin_unlock_irq(&tsk->sighand->siglock);
-		cputime_to_timeval(cval, &value->it_value);
-		cputime_to_timeval(cinterval, &value->it_interval);
+		get_cpu_itimer(tsk, CPUCLOCK_VIRT, value);
 		break;
 	case ITIMER_PROF:
-		spin_lock_irq(&tsk->sighand->siglock);
-		cval = tsk->signal->it_prof_expires;
-		cinterval = tsk->signal->it_prof_incr;
-		if (!cputime_eq(cval, cputime_zero)) {
-			struct task_cputime times;
-			cputime_t ptime;
-
-			thread_group_cputimer(tsk, &times);
-			ptime = cputime_add(times.utime, times.stime);
-			if (cputime_le(cval, ptime)) { /* about to fire */
-				cval = jiffies_to_cputime(1);
-			} else {
-				cval = cputime_sub(cval, ptime);
-			}
-		}
-		spin_unlock_irq(&tsk->sighand->siglock);
-		cputime_to_timeval(cval, &value->it_value);
-		cputime_to_timeval(cinterval, &value->it_interval);
+		get_cpu_itimer(tsk, CPUCLOCK_PROF, value);
 		break;
 	default:
 		return(-EINVAL);
@@ -128,6 +127,36 @@ enum hrtimer_restart it_real_fn(struct hrtimer *timer)
 	return HRTIMER_NORESTART;
 }
 
+static void set_cpu_itimer(struct task_struct *tsk, unsigned int clock_id,
+			   struct itimerval *value, struct itimerval *ovalue)
+{
+	cputime_t cval, cinterval, nval, ninterval;
+	struct cpu_itimer *it = &tsk->signal->it[clock_id];
+
+	nval = timeval_to_cputime(&value->it_value);
+	ninterval = timeval_to_cputime(&value->it_interval);
+
+	spin_lock_irq(&tsk->sighand->siglock);
+
+	cval = it->expires;
+	cinterval = it->incr;
+	if (!cputime_eq(cval, cputime_zero) ||
+	    !cputime_eq(nval, cputime_zero)) {
+		if (cputime_gt(nval, cputime_zero))
+			nval = cputime_add(nval, jiffies_to_cputime(1));
+		set_process_cpu_timer(tsk, clock_id, &nval, &cval);
+	}
+	it->expires = nval;
+	it->incr = ninterval;
+
+	spin_unlock_irq(&tsk->sighand->siglock);
+
+	if (ovalue) {
+		cputime_to_timeval(cval, &ovalue->it_value);
+		cputime_to_timeval(cinterval, &ovalue->it_interval);
+	}
+}
+
 /*
  * Returns true if the timeval is in canonical form
  */
@@ -139,7 +168,6 @@ int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue)
 	struct task_struct *tsk = current;
 	struct hrtimer *timer;
 	ktime_t expires;
-	cputime_t cval, cinterval, nval, ninterval;
 
 	/*
 	 * Validate the timevals in value.
@@ -174,48 +202,10 @@ again:
 		spin_unlock_irq(&tsk->sighand->siglock);
 		break;
 	case ITIMER_VIRTUAL:
-		nval = timeval_to_cputime(&value->it_value);
-		ninterval = timeval_to_cputime(&value->it_interval);
-		spin_lock_irq(&tsk->sighand->siglock);
-		cval = tsk->signal->it_virt_expires;
-		cinterval = tsk->signal->it_virt_incr;
-		if (!cputime_eq(cval, cputime_zero) ||
-		    !cputime_eq(nval, cputime_zero)) {
-			if (cputime_gt(nval, cputime_zero))
-				nval = cputime_add(nval,
-						   jiffies_to_cputime(1));
-			set_process_cpu_timer(tsk, CPUCLOCK_VIRT,
-					      &nval, &cval);
-		}
-		tsk->signal->it_virt_expires = nval;
-		tsk->signal->it_virt_incr = ninterval;
-		spin_unlock_irq(&tsk->sighand->siglock);
-		if (ovalue) {
-			cputime_to_timeval(cval, &ovalue->it_value);
-			cputime_to_timeval(cinterval, &ovalue->it_interval);
-		}
+		set_cpu_itimer(tsk, CPUCLOCK_VIRT, value, ovalue);
 		break;
 	case ITIMER_PROF:
-		nval = timeval_to_cputime(&value->it_value);
-		ninterval = timeval_to_cputime(&value->it_interval);
-		spin_lock_irq(&tsk->sighand->siglock);
-		cval = tsk->signal->it_prof_expires;
-		cinterval = tsk->signal->it_prof_incr;
-		if (!cputime_eq(cval, cputime_zero) ||
-		    !cputime_eq(nval, cputime_zero)) {
-			if (cputime_gt(nval, cputime_zero))
-				nval = cputime_add(nval,
-						   jiffies_to_cputime(1));
-			set_process_cpu_timer(tsk, CPUCLOCK_PROF,
-					      &nval, &cval);
-		}
-		tsk->signal->it_prof_expires = nval;
-		tsk->signal->it_prof_incr = ninterval;
-		spin_unlock_irq(&tsk->sighand->siglock);
-		if (ovalue) {
-			cputime_to_timeval(cval, &ovalue->it_value);
-			cputime_to_timeval(cinterval, &ovalue->it_interval);
-		}
+		set_cpu_itimer(tsk, CPUCLOCK_PROF, value, ovalue);
 		break;
 	default:
 		return -EINVAL;
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index bece7c0..9b2d5e4 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -14,11 +14,11 @@
  */
 void update_rlimit_cpu(unsigned long rlim_new)
 {
-	cputime_t cputime;
+	cputime_t cputime = secs_to_cputime(rlim_new);
+	struct signal_struct *const sig = current->signal;
 
-	cputime = secs_to_cputime(rlim_new);
-	if (cputime_eq(current->signal->it_prof_expires, cputime_zero) ||
-	    cputime_gt(current->signal->it_prof_expires, cputime)) {
+	if (cputime_eq(sig->it[CPUCLOCK_PROF].expires, cputime_zero) ||
+	    cputime_gt(sig->it[CPUCLOCK_PROF].expires, cputime)) {
 		spin_lock_irq(&current->sighand->siglock);
 		set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL);
 		spin_unlock_irq(&current->sighand->siglock);
@@ -613,6 +613,9 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now)
 				break;
 			}
 		} else {
+			struct signal_struct *const sig = p->signal;
+			union cpu_time_count *exp = &timer->it.cpu.expires;
+
 			/*
 			 * For a process timer, set the cached expiration time.
 			 */
@@ -620,30 +623,27 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now)
 			default:
 				BUG();
 			case CPUCLOCK_VIRT:
-				if (!cputime_eq(p->signal->it_virt_expires,
+				if (!cputime_eq(sig->it[CPUCLOCK_VIRT].expires,
 						cputime_zero) &&
-				    cputime_lt(p->signal->it_virt_expires,
-					       timer->it.cpu.expires.cpu))
+				    cputime_lt(sig->it[CPUCLOCK_VIRT].expires,
+					       exp->cpu))
 					break;
-				p->signal->cputime_expires.virt_exp =
-					timer->it.cpu.expires.cpu;
+				sig->cputime_expires.virt_exp = exp->cpu;
 				break;
 			case CPUCLOCK_PROF:
-				if (!cputime_eq(p->signal->it_prof_expires,
+				if (!cputime_eq(sig->it[CPUCLOCK_PROF].expires,
 						cputime_zero) &&
-				    cputime_lt(p->signal->it_prof_expires,
-					       timer->it.cpu.expires.cpu))
+				    cputime_lt(sig->it[CPUCLOCK_PROF].expires,
+					       exp->cpu))
 					break;
-				i = p->signal->rlim[RLIMIT_CPU].rlim_cur;
+				i = sig->rlim[RLIMIT_CPU].rlim_cur;
 				if (i != RLIM_INFINITY &&
-				    i <= cputime_to_secs(timer->it.cpu.expires.cpu))
+				    i <= cputime_to_secs(exp->cpu))
 					break;
-				p->signal->cputime_expires.prof_exp =
-					timer->it.cpu.expires.cpu;
+				sig->cputime_expires.prof_exp = exp->cpu;
 				break;
 			case CPUCLOCK_SCHED:
-				p->signal->cputime_expires.sched_exp =
-					timer->it.cpu.expires.sched;
+				sig->cputime_expires.sched_exp = exp->sched;
 				break;
 			}
 		}
@@ -1070,6 +1070,27 @@ static void stop_process_timers(struct task_struct *tsk)
 	spin_unlock_irqrestore(&cputimer->lock, flags);
 }
 
+static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
+			     cputime_t *expires, cputime_t cur_time, int signo)
+{
+	if (cputime_eq(it->expires, cputime_zero))
+		return;
+
+	if (cputime_ge(cur_time, it->expires)) {
+		it->expires = it->incr;
+		if (!cputime_eq(it->expires, cputime_zero))
+			it->expires = cputime_add(it->expires, cur_time);
+
+		__group_send_sig_info(signo, SEND_SIG_PRIV, tsk);
+	}
+
+	if (!cputime_eq(it->expires, cputime_zero) &&
+	    (cputime_eq(*expires, cputime_zero) ||
+	     cputime_lt(it->expires, *expires))) {
+		*expires = it->expires;
+	}
+}
+
 /*
  * Check for any per-thread CPU timers that have fired and move them
  * off the tsk->*_timers list onto the firing list.  Per-thread timers
@@ -1089,10 +1110,10 @@ static void check_process_timers(struct task_struct *tsk,
 	 * Don't sample the current process CPU clocks if there are no timers.
 	 */
 	if (list_empty(&timers[CPUCLOCK_PROF]) &&
-	    cputime_eq(sig->it_prof_expires, cputime_zero) &&
+	    cputime_eq(sig->it[CPUCLOCK_PROF].expires, cputime_zero) &&
 	    sig->rlim[RLIMIT_CPU].rlim_cur == RLIM_INFINITY &&
 	    list_empty(&timers[CPUCLOCK_VIRT]) &&
-	    cputime_eq(sig->it_virt_expires, cputime_zero) &&
+	    cputime_eq(sig->it[CPUCLOCK_VIRT].expires, cputime_zero) &&
 	    list_empty(&timers[CPUCLOCK_SCHED])) {
 		stop_process_timers(tsk);
 		return;
@@ -1152,38 +1173,11 @@ static void check_process_timers(struct task_struct *tsk,
 	/*
 	 * Check for the special case process timers.
 	 */
-	if (!cputime_eq(sig->it_prof_expires, cputime_zero)) {
-		if (cputime_ge(ptime, sig->it_prof_expires)) {
-			/* ITIMER_PROF fires and reloads.  */
-			sig->it_prof_expires = sig->it_prof_incr;
-			if (!cputime_eq(sig->it_prof_expires, cputime_zero)) {
-				sig->it_prof_expires = cputime_add(
-					sig->it_prof_expires, ptime);
-			}
-			__group_send_sig_info(SIGPROF, SEND_SIG_PRIV, tsk);
-		}
-		if (!cputime_eq(sig->it_prof_expires, cputime_zero) &&
-		    (cputime_eq(prof_expires, cputime_zero) ||
-		     cputime_lt(sig->it_prof_expires, prof_expires))) {
-			prof_expires = sig->it_prof_expires;
-		}
-	}
-	if (!cputime_eq(sig->it_virt_expires, cputime_zero)) {
-		if (cputime_ge(utime, sig->it_virt_expires)) {
-			/* ITIMER_VIRTUAL fires and reloads.  */
-			sig->it_virt_expires = sig->it_virt_incr;
-			if (!cputime_eq(sig->it_virt_expires, cputime_zero)) {
-				sig->it_virt_expires = cputime_add(
-					sig->it_virt_expires, utime);
-			}
-			__group_send_sig_info(SIGVTALRM, SEND_SIG_PRIV, tsk);
-		}
-		if (!cputime_eq(sig->it_virt_expires, cputime_zero) &&
-		    (cputime_eq(virt_expires, cputime_zero) ||
-		     cputime_lt(sig->it_virt_expires, virt_expires))) {
-			virt_expires = sig->it_virt_expires;
-		}
-	}
+	check_cpu_itimer(tsk, &sig->it[CPUCLOCK_PROF], &prof_expires, ptime,
+			 SIGPROF);
+	check_cpu_itimer(tsk, &sig->it[CPUCLOCK_VIRT], &virt_expires, utime,
+			 SIGVTALRM);
+
 	if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) {
 		unsigned long psecs = cputime_to_secs(ptime);
 		cputime_t x;
-- 
cgit v0.10.2


From 8356b5f9c424e5831715abbce747197c30d1fd71 Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <sgruszka@redhat.com>
Date: Wed, 29 Jul 2009 12:15:27 +0200
Subject: itimers: Fix periodic tics precision

Measure ITIMER_PROF and ITIMER_VIRT timers interval error
between real ticks and requested by user. Take it into account
when scheduling next tick.

This patch introduce possibility where time between two
consecutive tics is smaller then requested interval, it
preserve however dependency that n tick is generated not
earlier than n*interval time - counting from the beginning of
periodic signal generation.

Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
LKML-Reference: <1248862529-6063-3-git-send-email-sgruszka@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3b3efad..a069e65 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -473,6 +473,8 @@ struct pacct_struct {
 struct cpu_itimer {
 	cputime_t expires;
 	cputime_t incr;
+	u32 error;
+	u32 incr_error;
 };
 
 /**
diff --git a/kernel/itimer.c b/kernel/itimer.c
index 852c88d..21adff7 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -42,7 +42,7 @@ static struct timeval itimer_get_remtime(struct hrtimer *timer)
 }
 
 static void get_cpu_itimer(struct task_struct *tsk, unsigned int clock_id,
-			   struct itimerval *value)
+			   struct itimerval *const value)
 {
 	cputime_t cval, cinterval;
 	struct cpu_itimer *it = &tsk->signal->it[clock_id];
@@ -127,14 +127,32 @@ enum hrtimer_restart it_real_fn(struct hrtimer *timer)
 	return HRTIMER_NORESTART;
 }
 
+static inline u32 cputime_sub_ns(cputime_t ct, s64 real_ns)
+{
+	struct timespec ts;
+	s64 cpu_ns;
+
+	cputime_to_timespec(ct, &ts);
+	cpu_ns = timespec_to_ns(&ts);
+
+	return (cpu_ns <= real_ns) ? 0 : cpu_ns - real_ns;
+}
+
 static void set_cpu_itimer(struct task_struct *tsk, unsigned int clock_id,
-			   struct itimerval *value, struct itimerval *ovalue)
+			   const struct itimerval *const value,
+			   struct itimerval *const ovalue)
 {
-	cputime_t cval, cinterval, nval, ninterval;
+	cputime_t cval, nval, cinterval, ninterval;
+	s64 ns_ninterval, ns_nval;
 	struct cpu_itimer *it = &tsk->signal->it[clock_id];
 
 	nval = timeval_to_cputime(&value->it_value);
+	ns_nval = timeval_to_ns(&value->it_value);
 	ninterval = timeval_to_cputime(&value->it_interval);
+	ns_ninterval = timeval_to_ns(&value->it_interval);
+
+	it->incr_error = cputime_sub_ns(ninterval, ns_ninterval);
+	it->error = cputime_sub_ns(nval, ns_nval);
 
 	spin_lock_irq(&tsk->sighand->siglock);
 
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 9b2d5e4..b60d644 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -1070,6 +1070,8 @@ static void stop_process_timers(struct task_struct *tsk)
 	spin_unlock_irqrestore(&cputimer->lock, flags);
 }
 
+static u32 onecputick;
+
 static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
 			     cputime_t *expires, cputime_t cur_time, int signo)
 {
@@ -1077,9 +1079,16 @@ static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
 		return;
 
 	if (cputime_ge(cur_time, it->expires)) {
-		it->expires = it->incr;
-		if (!cputime_eq(it->expires, cputime_zero))
-			it->expires = cputime_add(it->expires, cur_time);
+		if (!cputime_eq(it->incr, cputime_zero)) {
+			it->expires = cputime_add(it->expires, it->incr);
+			it->error += it->incr_error;
+			if (it->error >= onecputick) {
+				it->expires = cputime_sub(it->expires,
+							jiffies_to_cputime(1));
+				it->error -= onecputick;
+			}
+		} else
+			it->expires = cputime_zero;
 
 		__group_send_sig_info(signo, SEND_SIG_PRIV, tsk);
 	}
@@ -1696,10 +1705,15 @@ static __init int init_posix_cpu_timers(void)
 		.nsleep = thread_cpu_nsleep,
 		.nsleep_restart = thread_cpu_nsleep_restart,
 	};
+	struct timespec ts;
 
 	register_posix_clock(CLOCK_PROCESS_CPUTIME_ID, &process);
 	register_posix_clock(CLOCK_THREAD_CPUTIME_ID, &thread);
 
+	cputime_to_timespec(jiffies_to_cputime(1), &ts);
+	onecputick = ts.tv_nsec;
+	WARN_ON(ts.tv_sec != 0);
+
 	return 0;
 }
 __initcall(init_posix_cpu_timers);
-- 
cgit v0.10.2


From d1e3b6d195770bd422e3229b88edfc154b6a27dd Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <sgruszka@redhat.com>
Date: Wed, 29 Jul 2009 12:15:28 +0200
Subject: itimers: Simplify arm_timer() code a bit

Don't update values in expiration cache when new ones are
equal. Add expire_le() and expire_gt() helpers to simplify the
code.

Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
LKML-Reference: <1248862529-6063-4-git-send-email-sgruszka@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index b60d644..69c9237 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -541,6 +541,17 @@ static void clear_dead_task(struct k_itimer *timer, union cpu_time_count now)
 					     now);
 }
 
+static inline int expires_gt(cputime_t expires, cputime_t new_exp)
+{
+	return cputime_eq(expires, cputime_zero) ||
+	       cputime_gt(expires, new_exp);
+}
+
+static inline int expires_le(cputime_t expires, cputime_t new_exp)
+{
+	return !cputime_eq(expires, cputime_zero) &&
+	       cputime_le(expires, new_exp);
+}
 /*
  * Insert the timer on the appropriate list before any timers that
  * expire later.  This must be called with the tasklist_lock held
@@ -585,31 +596,26 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now)
 		 */
 
 		if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
+			union cpu_time_count *exp = &nt->expires;
+
 			switch (CPUCLOCK_WHICH(timer->it_clock)) {
 			default:
 				BUG();
 			case CPUCLOCK_PROF:
-				if (cputime_eq(p->cputime_expires.prof_exp,
-					       cputime_zero) ||
-				    cputime_gt(p->cputime_expires.prof_exp,
-					       nt->expires.cpu))
-					p->cputime_expires.prof_exp =
-						nt->expires.cpu;
+				if (expires_gt(p->cputime_expires.prof_exp,
+					       exp->cpu))
+					p->cputime_expires.prof_exp = exp->cpu;
 				break;
 			case CPUCLOCK_VIRT:
-				if (cputime_eq(p->cputime_expires.virt_exp,
-					       cputime_zero) ||
-				    cputime_gt(p->cputime_expires.virt_exp,
-					       nt->expires.cpu))
-					p->cputime_expires.virt_exp =
-						nt->expires.cpu;
+				if (expires_gt(p->cputime_expires.virt_exp,
+					       exp->cpu))
+					p->cputime_expires.virt_exp = exp->cpu;
 				break;
 			case CPUCLOCK_SCHED:
 				if (p->cputime_expires.sched_exp == 0 ||
-				    p->cputime_expires.sched_exp >
-							nt->expires.sched)
+				    p->cputime_expires.sched_exp > exp->sched)
 					p->cputime_expires.sched_exp =
-						nt->expires.sched;
+								exp->sched;
 				break;
 			}
 		} else {
@@ -623,17 +629,13 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now)
 			default:
 				BUG();
 			case CPUCLOCK_VIRT:
-				if (!cputime_eq(sig->it[CPUCLOCK_VIRT].expires,
-						cputime_zero) &&
-				    cputime_lt(sig->it[CPUCLOCK_VIRT].expires,
+				if (expires_le(sig->it[CPUCLOCK_VIRT].expires,
 					       exp->cpu))
 					break;
 				sig->cputime_expires.virt_exp = exp->cpu;
 				break;
 			case CPUCLOCK_PROF:
-				if (!cputime_eq(sig->it[CPUCLOCK_PROF].expires,
-						cputime_zero) &&
-				    cputime_lt(sig->it[CPUCLOCK_PROF].expires,
+				if (expires_le(sig->it[CPUCLOCK_PROF].expires,
 					       exp->cpu))
 					break;
 				i = sig->rlim[RLIMIT_CPU].rlim_cur;
-- 
cgit v0.10.2


From a42548a18866e87092db93b771e6c5b060d78401 Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <sgruszka@redhat.com>
Date: Wed, 29 Jul 2009 12:15:29 +0200
Subject: cputime: Optimize jiffies_to_cputime(1)

For powerpc with CONFIG_VIRT_CPU_ACCOUNTING
jiffies_to_cputime(1) is not compile time constant and run time
calculations are quite expensive. To optimize we use
precomputed value. For all other architectures is is
preprocessor definition.

Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
LKML-Reference: <1248862529-6063-5-git-send-email-sgruszka@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/ia64/include/asm/cputime.h b/arch/ia64/include/asm/cputime.h
index d20b998..7fa8a85 100644
--- a/arch/ia64/include/asm/cputime.h
+++ b/arch/ia64/include/asm/cputime.h
@@ -30,6 +30,7 @@ typedef u64 cputime_t;
 typedef u64 cputime64_t;
 
 #define cputime_zero			((cputime_t)0)
+#define cputime_one_jiffy		jiffies_to_cputime(1)
 #define cputime_max			((~((cputime_t)0) >> 1) - 1)
 #define cputime_add(__a, __b)		((__a) +  (__b))
 #define cputime_sub(__a, __b)		((__a) -  (__b))
diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h
index f42e623..fa19f3f 100644
--- a/arch/powerpc/include/asm/cputime.h
+++ b/arch/powerpc/include/asm/cputime.h
@@ -18,6 +18,9 @@
 
 #ifndef CONFIG_VIRT_CPU_ACCOUNTING
 #include <asm-generic/cputime.h>
+#ifdef __KERNEL__
+static inline void setup_cputime_one_jiffy(void) { }
+#endif
 #else
 
 #include <linux/types.h>
@@ -49,6 +52,11 @@ typedef u64 cputime64_t;
 #ifdef __KERNEL__
 
 /*
+ * One jiffy in timebase units computed during initialization
+ */
+extern cputime_t cputime_one_jiffy;
+
+/*
  * Convert cputime <-> jiffies
  */
 extern u64 __cputime_jiffies_factor;
@@ -89,6 +97,11 @@ static inline cputime_t jiffies_to_cputime(const unsigned long jif)
 	return ct;
 }
 
+static inline void setup_cputime_one_jiffy(void)
+{
+	cputime_one_jiffy = jiffies_to_cputime(1);
+}
+
 static inline cputime64_t jiffies64_to_cputime64(const u64 jif)
 {
 	cputime_t ct;
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index eae4511..211d7b0 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -193,6 +193,8 @@ EXPORT_SYMBOL(__cputime_clockt_factor);
 DEFINE_PER_CPU(unsigned long, cputime_last_delta);
 DEFINE_PER_CPU(unsigned long, cputime_scaled_last_delta);
 
+cputime_t cputime_one_jiffy;
+
 static void calc_cputime_factors(void)
 {
 	struct div_result res;
@@ -500,6 +502,7 @@ static int __init iSeries_tb_recal(void)
 				tb_to_xs = divres.result_low;
 				vdso_data->tb_ticks_per_sec = tb_ticks_per_sec;
 				vdso_data->tb_to_xs = tb_to_xs;
+				setup_cputime_one_jiffy();
 			}
 			else {
 				printk( "Titan recalibrate: FAILED (difference > 4 percent)\n"
@@ -945,6 +948,7 @@ void __init time_init(void)
 	tb_ticks_per_usec = ppc_tb_freq / 1000000;
 	tb_to_us = mulhwu_scale_factor(ppc_tb_freq, 1000000);
 	calc_cputime_factors();
+	setup_cputime_one_jiffy();
 
 	/*
 	 * Calculate the length of each tick in ns.  It will not be
diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h
index 7a3817a..24b1244 100644
--- a/arch/s390/include/asm/cputime.h
+++ b/arch/s390/include/asm/cputime.h
@@ -42,6 +42,7 @@ __div(unsigned long long n, unsigned int base)
 #endif /* __s390x__ */
 
 #define cputime_zero			(0ULL)
+#define cputime_one_jiffy		jiffies_to_cputime(1)
 #define cputime_max			((~0UL >> 1) - 1)
 #define cputime_add(__a, __b)		((__a) +  (__b))
 #define cputime_sub(__a, __b)		((__a) -  (__b))
diff --git a/include/asm-generic/cputime.h b/include/asm-generic/cputime.h
index 1c1fa42..ca0f239 100644
--- a/include/asm-generic/cputime.h
+++ b/include/asm-generic/cputime.h
@@ -7,6 +7,7 @@
 typedef unsigned long cputime_t;
 
 #define cputime_zero			(0UL)
+#define cputime_one_jiffy		jiffies_to_cputime(1)
 #define cputime_max			((~0UL >> 1) - 1)
 #define cputime_add(__a, __b)		((__a) +  (__b))
 #define cputime_sub(__a, __b)		((__a) -  (__b))
diff --git a/kernel/itimer.c b/kernel/itimer.c
index 21adff7..8078a32 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -64,7 +64,7 @@ static void get_cpu_itimer(struct task_struct *tsk, unsigned int clock_id,
 
 		if (cputime_le(cval, t))
 			/* about to fire */
-			cval = jiffies_to_cputime(1);
+			cval = cputime_one_jiffy;
 		else
 			cval = cputime_sub(cval, t);
 	}
@@ -161,7 +161,7 @@ static void set_cpu_itimer(struct task_struct *tsk, unsigned int clock_id,
 	if (!cputime_eq(cval, cputime_zero) ||
 	    !cputime_eq(nval, cputime_zero)) {
 		if (cputime_gt(nval, cputime_zero))
-			nval = cputime_add(nval, jiffies_to_cputime(1));
+			nval = cputime_add(nval, cputime_one_jiffy);
 		set_process_cpu_timer(tsk, clock_id, &nval, &cval);
 	}
 	it->expires = nval;
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 69c9237..18bdde6 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -1086,7 +1086,7 @@ static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
 			it->error += it->incr_error;
 			if (it->error >= onecputick) {
 				it->expires = cputime_sub(it->expires,
-							jiffies_to_cputime(1));
+							  cputime_one_jiffy);
 				it->error -= onecputick;
 			}
 		} else
@@ -1461,7 +1461,7 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
 		if (!cputime_eq(*oldval, cputime_zero)) {
 			if (cputime_le(*oldval, now.cpu)) {
 				/* Just about to fire. */
-				*oldval = jiffies_to_cputime(1);
+				*oldval = cputime_one_jiffy;
 			} else {
 				*oldval = cputime_sub(*oldval, now.cpu);
 			}
@@ -1712,7 +1712,7 @@ static __init int init_posix_cpu_timers(void)
 	register_posix_clock(CLOCK_PROCESS_CPUTIME_ID, &process);
 	register_posix_clock(CLOCK_THREAD_CPUTIME_ID, &thread);
 
-	cputime_to_timespec(jiffies_to_cputime(1), &ts);
+	cputime_to_timespec(cputime_one_jiffy, &ts);
 	onecputick = ts.tv_nsec;
 	WARN_ON(ts.tv_sec != 0);
 
diff --git a/kernel/sched.c b/kernel/sched.c
index 1b59e26..8f977d5 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5031,17 +5031,16 @@ void account_idle_time(cputime_t cputime)
  */
 void account_process_tick(struct task_struct *p, int user_tick)
 {
-	cputime_t one_jiffy = jiffies_to_cputime(1);
-	cputime_t one_jiffy_scaled = cputime_to_scaled(one_jiffy);
+	cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
 	struct rq *rq = this_rq();
 
 	if (user_tick)
-		account_user_time(p, one_jiffy, one_jiffy_scaled);
+		account_user_time(p, cputime_one_jiffy, one_jiffy_scaled);
 	else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET))
-		account_system_time(p, HARDIRQ_OFFSET, one_jiffy,
+		account_system_time(p, HARDIRQ_OFFSET, cputime_one_jiffy,
 				    one_jiffy_scaled);
 	else
-		account_idle_time(one_jiffy);
+		account_idle_time(cputime_one_jiffy);
 }
 
 /*
-- 
cgit v0.10.2


From 0815565adfe3f4c369110c57d8ffe83caefeed68 Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Tue, 4 Aug 2009 09:17:20 +0100
Subject: intel-iommu: Cope with broken HP DC7900 BIOS

Yet another reason why trusting this stuff to the BIOS was a bad idea.
The HP DC7900 BIOS reports an iommu at an address which just returns all
ones, when VT-d is disabled in the BIOS.

Fix up the missing iounmap in the error paths while we're at it.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index 7b287cb..380b60e 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -632,20 +632,31 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)
 	iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
 	iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
 
+	if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) {
+		/* Promote an attitude of violence to a BIOS engineer today */
+		WARN(1, "Your BIOS is broken; DMAR reported at address %llx returns all ones!\n"
+		     "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
+		     drhd->reg_base_addr,
+		     dmi_get_system_info(DMI_BIOS_VENDOR),
+		     dmi_get_system_info(DMI_BIOS_VERSION),
+		     dmi_get_system_info(DMI_PRODUCT_VERSION));
+		goto err_unmap;
+	}
+
 #ifdef CONFIG_DMAR
 	agaw = iommu_calculate_agaw(iommu);
 	if (agaw < 0) {
 		printk(KERN_ERR
 		       "Cannot get a valid agaw for iommu (seq_id = %d)\n",
 		       iommu->seq_id);
-		goto error;
+		goto err_unmap;
 	}
 	msagaw = iommu_calculate_max_sagaw(iommu);
 	if (msagaw < 0) {
 		printk(KERN_ERR
 			"Cannot get a valid max agaw for iommu (seq_id = %d)\n",
 			iommu->seq_id);
-		goto error;
+		goto err_unmap;
 	}
 #endif
 	iommu->agaw = agaw;
@@ -665,7 +676,7 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)
 	}
 
 	ver = readl(iommu->reg + DMAR_VER_REG);
-	pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
+	pr_info("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
 		(unsigned long long)drhd->reg_base_addr,
 		DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
 		(unsigned long long)iommu->cap,
@@ -675,7 +686,10 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)
 
 	drhd->iommu = iommu;
 	return 0;
-error:
+
+ err_unmap:
+	iounmap(iommu->reg);
+ error:
 	kfree(iommu);
 	return -1;
 }
-- 
cgit v0.10.2


From f866a8194f7cbabb9135b98b9ac7d26237b88367 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Tue, 4 Aug 2009 15:22:38 +1000
Subject: sunrpc/cache: rename queue_loose to cache_dequeue

'loose' was a mis-spelling of 'lose', and even that wasn't a good
word choice.
So give this function a more useful name.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index ff0c230..d19c075 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -101,7 +101,7 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
 EXPORT_SYMBOL_GPL(sunrpc_cache_lookup);
 
 
-static void queue_loose(struct cache_detail *detail, struct cache_head *ch);
+static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch);
 
 static int cache_fresh_locked(struct cache_head *head, time_t expiry)
 {
@@ -117,7 +117,7 @@ static void cache_fresh_unlocked(struct cache_head *head,
 		cache_revisit_request(head);
 	if (test_and_clear_bit(CACHE_PENDING, &head->flags)) {
 		cache_revisit_request(head);
-		queue_loose(detail, head);
+		cache_dequeue(detail, head);
 	}
 }
 
@@ -457,7 +457,7 @@ static int cache_clean(void)
 				)
 				continue;
 			if (test_and_clear_bit(CACHE_PENDING, &ch->flags))
-				queue_loose(current_detail, ch);
+				cache_dequeue(current_detail, ch);
 
 			if (atomic_read(&ch->ref.refcount) == 1)
 				break;
@@ -920,7 +920,7 @@ static const struct file_operations cache_file_operations = {
 };
 
 
-static void queue_loose(struct cache_detail *detail, struct cache_head *ch)
+static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch)
 {
 	struct cache_queue *cq;
 	spin_lock(&queue_lock);
-- 
cgit v0.10.2


From 5c4d26390341732a8d614141a4cf4663610a1698 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Tue, 4 Aug 2009 15:22:38 +1000
Subject: sunrpc/cache: make sure deferred requests eventually get revisited.

While deferred requests normally get revisited quite quickly,
it is possible for a request to remain in the deferral queue
when the cache item is discarded.  We can easily make sure that
doesn't happen by calling cache_revisit_request just before
the final 'put'.

Also there is a small chance that a race would cause one thread to
defer a request against a cache item while another thread is failing
to queue an upcall for that item.  So when the upcall fails, make
sure to revisit all deferred requests.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index d19c075..44f4516 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -221,6 +221,7 @@ int cache_check(struct cache_detail *detail,
 			switch (cache_make_upcall(detail, h)) {
 			case -EINVAL:
 				clear_bit(CACHE_PENDING, &h->flags);
+				cache_revisit_request(h);
 				if (rv == -EAGAIN) {
 					set_bit(CACHE_NEGATIVE, &h->flags);
 					cache_fresh_unlocked(h, detail,
@@ -473,8 +474,10 @@ static int cache_clean(void)
 		if (!ch)
 			current_index ++;
 		spin_unlock(&cache_list_lock);
-		if (ch)
+		if (ch) {
+			cache_revisit_request(ch);
 			cache_put(ch, d);
+		}
 	} else
 		spin_unlock(&cache_list_lock);
 
-- 
cgit v0.10.2


From 19943b0e30b05d42e494ae6fef78156ebc8c637e Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Tue, 4 Aug 2009 16:19:20 +0100
Subject: intel-iommu: Unify hardware and software passthrough support

This makes the hardware passthrough mode work a lot more like the
software version, so that the behaviour of a kernel with 'iommu=pt'
is the same whether the hardware supports passthrough or not.

In particular:
 - We use a single si_domain for the pass-through devices.
 - 32-bit devices can be taken out of the pass-through domain so that
   they don't have to use swiotlb.
 - Devices will work again after being removed from a KVM guest.
 - A potential oops on OOM (in init_context_pass_through()) is fixed.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index 6af96ee..1e66b18 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -71,9 +71,8 @@ void __init pci_swiotlb_init(void)
 {
 	/* don't initialize swiotlb if iommu=off (no_iommu=1) */
 #ifdef CONFIG_X86_64
-	if ((!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN) ||
-		iommu_pass_through)
-	       swiotlb = 1;
+	if ((!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN))
+		swiotlb = 1;
 #endif
 	if (swiotlb_force)
 		swiotlb = 1;
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 097d5da..147b3b9 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -251,7 +251,8 @@ static inline int first_pte_in_page(struct dma_pte *pte)
  * 	2. It maps to each iommu if successful.
  *	3. Each iommu mapps to this domain if successful.
  */
-struct dmar_domain *si_domain;
+static struct dmar_domain *si_domain;
+static int hw_pass_through = 1;
 
 /* devices under the same p2p bridge are owned in one domain */
 #define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
@@ -1948,14 +1949,24 @@ static int iommu_prepare_identity_map(struct pci_dev *pdev,
 	struct dmar_domain *domain;
 	int ret;
 
-	printk(KERN_INFO
-	       "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
-	       pci_name(pdev), start, end);
-
 	domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
 	if (!domain)
 		return -ENOMEM;
 
+	/* For _hardware_ passthrough, don't bother. But for software
+	   passthrough, we do it anyway -- it may indicate a memory
+	   range which is reserved in E820, so which didn't get set
+	   up to start with in si_domain */
+	if (domain == si_domain && hw_pass_through) {
+		printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
+		       pci_name(pdev), start, end);
+		return 0;
+	}
+
+	printk(KERN_INFO
+	       "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
+	       pci_name(pdev), start, end);
+
 	ret = iommu_domain_identity_map(domain, start, end);
 	if (ret)
 		goto error;
@@ -2006,23 +2017,6 @@ static inline void iommu_prepare_isa(void)
 }
 #endif /* !CONFIG_DMAR_FLPY_WA */
 
-/* Initialize each context entry as pass through.*/
-static int __init init_context_pass_through(void)
-{
-	struct pci_dev *pdev = NULL;
-	struct dmar_domain *domain;
-	int ret;
-
-	for_each_pci_dev(pdev) {
-		domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
-		ret = domain_context_mapping(domain, pdev,
-					     CONTEXT_TT_PASS_THROUGH);
-		if (ret)
-			return ret;
-	}
-	return 0;
-}
-
 static int md_domain_init(struct dmar_domain *domain, int guest_width);
 
 static int __init si_domain_work_fn(unsigned long start_pfn,
@@ -2037,7 +2031,7 @@ static int __init si_domain_work_fn(unsigned long start_pfn,
 
 }
 
-static int si_domain_init(void)
+static int si_domain_init(int hw)
 {
 	struct dmar_drhd_unit *drhd;
 	struct intel_iommu *iommu;
@@ -2064,6 +2058,9 @@ static int si_domain_init(void)
 
 	si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY;
 
+	if (hw)
+		return 0;
+
 	for_each_online_node(nid) {
 		work_with_active_regions(nid, si_domain_work_fn, &ret);
 		if (ret)
@@ -2155,24 +2152,26 @@ static int iommu_should_identity_map(struct pci_dev *pdev, int startup)
 	return 1;
 }
 
-static int iommu_prepare_static_identity_mapping(void)
+static int iommu_prepare_static_identity_mapping(int hw)
 {
 	struct pci_dev *pdev = NULL;
 	int ret;
 
-	ret = si_domain_init();
+	ret = si_domain_init(hw);
 	if (ret)
 		return -EFAULT;
 
 	for_each_pci_dev(pdev) {
 		if (iommu_should_identity_map(pdev, 1)) {
-			printk(KERN_INFO "IOMMU: identity mapping for device %s\n",
-			       pci_name(pdev));
+			printk(KERN_INFO "IOMMU: %s identity mapping for device %s\n",
+			       hw ? "hardware" : "software", pci_name(pdev));
 
 			ret = domain_context_mapping(si_domain, pdev,
+						     hw ? CONTEXT_TT_PASS_THROUGH :
 						     CONTEXT_TT_MULTI_LEVEL);
 			if (ret)
 				return ret;
+
 			ret = domain_add_dev_info(si_domain, pdev);
 			if (ret)
 				return ret;
@@ -2189,14 +2188,6 @@ int __init init_dmars(void)
 	struct pci_dev *pdev;
 	struct intel_iommu *iommu;
 	int i, ret;
-	int pass_through = 1;
-
-	/*
-	 * In case pass through can not be enabled, iommu tries to use identity
-	 * mapping.
-	 */
-	if (iommu_pass_through)
-		iommu_identity_mapping = 1;
 
 	/*
 	 * for each drhd
@@ -2250,14 +2241,8 @@ int __init init_dmars(void)
 			goto error;
 		}
 		if (!ecap_pass_through(iommu->ecap))
-			pass_through = 0;
+			hw_pass_through = 0;
 	}
-	if (iommu_pass_through)
-		if (!pass_through) {
-			printk(KERN_INFO
-			       "Pass Through is not supported by hardware.\n");
-			iommu_pass_through = 0;
-		}
 
 	/*
 	 * Start from the sane iommu hardware state.
@@ -2312,64 +2297,57 @@ int __init init_dmars(void)
 		}
 	}
 
+	if (iommu_pass_through)
+		iommu_identity_mapping = 1;
+#ifdef CONFIG_DMAR_BROKEN_GFX_WA
+	else
+		iommu_identity_mapping = 2;
+#endif
 	/*
-	 * If pass through is set and enabled, context entries of all pci
-	 * devices are intialized by pass through translation type.
+	 * If pass through is not set or not enabled, setup context entries for
+	 * identity mappings for rmrr, gfx, and isa and may fall back to static
+	 * identity mapping if iommu_identity_mapping is set.
 	 */
-	if (iommu_pass_through) {
-		ret = init_context_pass_through();
+	if (iommu_identity_mapping) {
+		ret = iommu_prepare_static_identity_mapping(hw_pass_through);
 		if (ret) {
-			printk(KERN_ERR "IOMMU: Pass through init failed.\n");
-			iommu_pass_through = 0;
+			printk(KERN_CRIT "Failed to setup IOMMU pass-through\n");
+			goto error;
 		}
 	}
-
 	/*
-	 * If pass through is not set or not enabled, setup context entries for
-	 * identity mappings for rmrr, gfx, and isa and may fall back to static
-	 * identity mapping if iommu_identity_mapping is set.
+	 * For each rmrr
+	 *   for each dev attached to rmrr
+	 *   do
+	 *     locate drhd for dev, alloc domain for dev
+	 *     allocate free domain
+	 *     allocate page table entries for rmrr
+	 *     if context not allocated for bus
+	 *           allocate and init context
+	 *           set present in root table for this bus
+	 *     init context with domain, translation etc
+	 *    endfor
+	 * endfor
 	 */
-	if (!iommu_pass_through) {
-#ifdef CONFIG_DMAR_BROKEN_GFX_WA
-		if (!iommu_identity_mapping)
-			iommu_identity_mapping = 2;
-#endif
-		if (iommu_identity_mapping)
-			iommu_prepare_static_identity_mapping();
-		/*
-		 * For each rmrr
-		 *   for each dev attached to rmrr
-		 *   do
-		 *     locate drhd for dev, alloc domain for dev
-		 *     allocate free domain
-		 *     allocate page table entries for rmrr
-		 *     if context not allocated for bus
-		 *           allocate and init context
-		 *           set present in root table for this bus
-		 *     init context with domain, translation etc
-		 *    endfor
-		 * endfor
-		 */
-		printk(KERN_INFO "IOMMU: Setting RMRR:\n");
-		for_each_rmrr_units(rmrr) {
-			for (i = 0; i < rmrr->devices_cnt; i++) {
-				pdev = rmrr->devices[i];
-				/*
-				 * some BIOS lists non-exist devices in DMAR
-				 * table.
-				 */
-				if (!pdev)
-					continue;
-				ret = iommu_prepare_rmrr_dev(rmrr, pdev);
-				if (ret)
-					printk(KERN_ERR
-				 "IOMMU: mapping reserved region failed\n");
-			}
+	printk(KERN_INFO "IOMMU: Setting RMRR:\n");
+	for_each_rmrr_units(rmrr) {
+		for (i = 0; i < rmrr->devices_cnt; i++) {
+			pdev = rmrr->devices[i];
+			/*
+			 * some BIOS lists non-exist devices in DMAR
+			 * table.
+			 */
+			if (!pdev)
+				continue;
+			ret = iommu_prepare_rmrr_dev(rmrr, pdev);
+			if (ret)
+				printk(KERN_ERR
+				       "IOMMU: mapping reserved region failed\n");
 		}
-
-		iommu_prepare_isa();
 	}
 
+	iommu_prepare_isa();
+
 	/*
 	 * for each drhd
 	 *   enable fault log
@@ -2536,7 +2514,10 @@ static int iommu_no_mapping(struct device *dev)
 			ret = domain_add_dev_info(si_domain, pdev);
 			if (ret)
 				return 0;
-			ret = domain_context_mapping(si_domain, pdev, CONTEXT_TT_MULTI_LEVEL);
+			ret = domain_context_mapping(si_domain, pdev,
+						     hw_pass_through ?
+						     CONTEXT_TT_PASS_THROUGH :
+						     CONTEXT_TT_MULTI_LEVEL);
 			if (!ret) {
 				printk(KERN_INFO "64bit %s uses identity mapping\n",
 				       pci_name(pdev));
@@ -3202,7 +3183,7 @@ int __init intel_iommu_init(void)
 	 * Check the need for DMA-remapping initialization now.
 	 * Above initialization will also be used by Interrupt-remapping.
 	 */
-	if (no_iommu || (swiotlb && !iommu_pass_through) || dmar_disabled)
+	if (no_iommu || swiotlb || dmar_disabled)
 		return -ENODEV;
 
 	iommu_init_mempool();
@@ -3222,14 +3203,7 @@ int __init intel_iommu_init(void)
 
 	init_timer(&unmap_timer);
 	force_iommu = 1;
-
-	if (!iommu_pass_through) {
-		printk(KERN_INFO
-		       "Multi-level page-table translation for DMAR.\n");
-		dma_ops = &intel_dma_ops;
-	} else
-		printk(KERN_INFO
-		       "DMAR: Pass through translation for DMAR.\n");
+	dma_ops = &intel_dma_ops;
 
 	init_iommu_sysfs();
 
-- 
cgit v0.10.2


From 989a19b9b10635eeb91c08cefe6cf82986bd4ee2 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Tue, 4 Aug 2009 15:22:38 +1000
Subject: sunrpc/cache: recheck cache validity after cache_defer_req

If cache_defer_req did not leave the request on a queue, then it could
possibly have waited long enough that the cache became valid.  So check the
status after the call.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 44f4516..bbd31f1 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -176,6 +176,22 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
 EXPORT_SYMBOL_GPL(sunrpc_cache_update);
 
 static int cache_make_upcall(struct cache_detail *detail, struct cache_head *h);
+
+static inline int cache_is_valid(struct cache_detail *detail, struct cache_head *h)
+{
+	if (!test_bit(CACHE_VALID, &h->flags) ||
+	    h->expiry_time < get_seconds())
+		return -EAGAIN;
+	else if (detail->flush_time > h->last_refresh)
+		return -EAGAIN;
+	else {
+		/* entry is valid */
+		if (test_bit(CACHE_NEGATIVE, &h->flags))
+			return -ENOENT;
+		else
+			return 0;
+	}
+}
 /*
  * This is the generic cache management routine for all
  * the authentication caches.
@@ -184,8 +200,10 @@ static int cache_make_upcall(struct cache_detail *detail, struct cache_head *h);
  *
  *
  * Returns 0 if the cache_head can be used, or cache_puts it and returns
- * -EAGAIN if upcall is pending,
- * -ETIMEDOUT if upcall failed and should be retried,
+ * -EAGAIN if upcall is pending and request has been queued
+ * -ETIMEDOUT if upcall failed or request could not be queue or
+ *           upcall completed but item is still invalid (implying that
+ *           the cache item has been replaced with a newer one).
  * -ENOENT if cache entry was negative
  */
 int cache_check(struct cache_detail *detail,
@@ -195,17 +213,7 @@ int cache_check(struct cache_detail *detail,
 	long refresh_age, age;
 
 	/* First decide return status as best we can */
-	if (!test_bit(CACHE_VALID, &h->flags) ||
-	    h->expiry_time < get_seconds())
-		rv = -EAGAIN;
-	else if (detail->flush_time > h->last_refresh)
-		rv = -EAGAIN;
-	else {
-		/* entry is valid */
-		if (test_bit(CACHE_NEGATIVE, &h->flags))
-			rv = -ENOENT;
-		else rv = 0;
-	}
+	rv = cache_is_valid(detail, h);
 
 	/* now see if we want to start an upcall */
 	refresh_age = (h->expiry_time - h->last_refresh);
@@ -238,10 +246,14 @@ int cache_check(struct cache_detail *detail,
 		}
 	}
 
-	if (rv == -EAGAIN)
-		if (cache_defer_req(rqstp, h) != 0)
-			rv = -ETIMEDOUT;
-
+	if (rv == -EAGAIN) {
+		if (cache_defer_req(rqstp, h) == 0) {
+			/* Request is not deferred */
+			rv = cache_is_valid(detail, h);
+			if (rv == -EAGAIN)
+				rv = -ETIMEDOUT;
+		}
+	}
 	if (rv)
 		cache_put(h, detail);
 	return rv;
@@ -560,11 +572,11 @@ static int cache_defer_req(struct cache_req *req, struct cache_head *item)
 		 * or continue and drop the oldest below
 		 */
 		if (net_random()&1)
-			return -ETIMEDOUT;
+			return 0;
 	}
 	dreq = req->defer(req);
 	if (dreq == NULL)
-		return -ETIMEDOUT;
+		return 0;
 
 	dreq->item = item;
 
@@ -594,8 +606,9 @@ static int cache_defer_req(struct cache_req *req, struct cache_head *item)
 	if (!test_bit(CACHE_PENDING, &item->flags)) {
 		/* must have just been validated... */
 		cache_revisit_request(item);
+		return 0;
 	}
-	return 0;
+	return 1;
 }
 
 static void cache_revisit_request(struct cache_head *item)
-- 
cgit v0.10.2


From 560ab42ef923aaf2e4347315bdfcc74b2708972c Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Tue, 4 Aug 2009 15:22:39 +1000
Subject: sunrpc: fix memory leak in unix_gid cache.

When we look up an entry in the uid->gidlist cache, we take
a reference to the content but don't drop the reference to the
cache entry.  So it never gets freed.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 5c865e2..799ff6e 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -658,6 +658,7 @@ static int unix_gid_find(uid_t uid, struct group_info **gip,
 	case 0:
 		*gip = ug->gi;
 		get_group_info(*gip);
+		cache_put(&ug->h, &unix_gid_cache);
 		return 0;
 	default:
 		return -EAGAIN;
-- 
cgit v0.10.2


From 5fe60f4e5871b64e687229199fafd4ef13cd0886 Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Sun, 9 Aug 2009 10:53:41 +0100
Subject: intel-iommu: make domain_add_dev_info() call domain_context_mapping()

All callers of the former were also calling the latter, in one order or
the other, and failing to correctly clean up if the second returned
failure.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 3f256b8..09606e9 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -2094,15 +2094,23 @@ static int identity_mapping(struct pci_dev *pdev)
 }
 
 static int domain_add_dev_info(struct dmar_domain *domain,
-				  struct pci_dev *pdev)
+			       struct pci_dev *pdev,
+			       int translation)
 {
 	struct device_domain_info *info;
 	unsigned long flags;
+	int ret;
 
 	info = alloc_devinfo_mem();
 	if (!info)
 		return -ENOMEM;
 
+	ret = domain_context_mapping(domain, pdev, translation);
+	if (ret) {
+		free_devinfo_mem(info);
+		return ret;
+	}
+
 	info->segment = pci_domain_nr(pdev->bus);
 	info->bus = pdev->bus->number;
 	info->devfn = pdev->devfn;
@@ -2173,15 +2181,11 @@ static int iommu_prepare_static_identity_mapping(int hw)
 			printk(KERN_INFO "IOMMU: %s identity mapping for device %s\n",
 			       hw ? "hardware" : "software", pci_name(pdev));
 
-			ret = domain_context_mapping(si_domain, pdev,
+			ret = domain_add_dev_info(si_domain, pdev,
 						     hw ? CONTEXT_TT_PASS_THROUGH :
 						     CONTEXT_TT_MULTI_LEVEL);
 			if (ret)
 				return ret;
-
-			ret = domain_add_dev_info(si_domain, pdev);
-			if (ret)
-				return ret;
 		}
 	}
 
@@ -2510,13 +2514,10 @@ static int iommu_no_mapping(struct device *dev)
 		 */
 		if (iommu_should_identity_map(pdev, 0)) {
 			int ret;
-			ret = domain_add_dev_info(si_domain, pdev);
-			if (ret)
-				return 0;
-			ret = domain_context_mapping(si_domain, pdev,
-						     hw_pass_through ?
-						     CONTEXT_TT_PASS_THROUGH :
-						     CONTEXT_TT_MULTI_LEVEL);
+			ret = domain_add_dev_info(si_domain, pdev,
+						  hw_pass_through ?
+						  CONTEXT_TT_PASS_THROUGH :
+						  CONTEXT_TT_MULTI_LEVEL);
 			if (!ret) {
 				printk(KERN_INFO "64bit %s uses identity mapping\n",
 				       pci_name(pdev));
@@ -3486,7 +3487,6 @@ static int intel_iommu_attach_device(struct iommu_domain *domain,
 	struct intel_iommu *iommu;
 	int addr_width;
 	u64 end;
-	int ret;
 
 	/* normally pdev is not mapped */
 	if (unlikely(domain_context_mapped(pdev))) {
@@ -3518,12 +3518,7 @@ static int intel_iommu_attach_device(struct iommu_domain *domain,
 		return -EFAULT;
 	}
 
-	ret = domain_add_dev_info(dmar_domain, pdev);
-	if (ret)
-		return ret;
-
-	ret = domain_context_mapping(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL);
-	return ret;
+	return domain_add_dev_info(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL);
 }
 
 static void intel_iommu_detach_device(struct iommu_domain *domain,
-- 
cgit v0.10.2


From ba6c548701ef7a93b9ea05d1506d2b62f1628333 Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Thu, 13 Aug 2009 18:18:00 +0100
Subject: ia64: IOMMU passthrough mode shouldn't trigger swiotlb init

Since commit 19943b0e30b05d42e494ae6fef78156ebc8c637e ('intel-iommu:
Unify hardware and software passthrough support'), hardware passthrough
mode will do the same as software passthrough mode was doing -- it'll
still use the IOMMU normally for devices which can't address all of
memory. This means that we don't need to bother with swiotlb.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/arch/ia64/kernel/pci-swiotlb.c b/arch/ia64/kernel/pci-swiotlb.c
index 223abb1..285aae8 100644
--- a/arch/ia64/kernel/pci-swiotlb.c
+++ b/arch/ia64/kernel/pci-swiotlb.c
@@ -46,7 +46,7 @@ void __init swiotlb_dma_init(void)
 
 void __init pci_swiotlb_init(void)
 {
-	if (!iommu_detected || iommu_pass_through) {
+	if (!iommu_detected) {
 #ifdef CONFIG_IA64_GENERIC
 		swiotlb = 1;
 		printk(KERN_INFO "PCI-DMA: Re-initialize machine vector.\n");
-- 
cgit v0.10.2


From 132032274a594ee9ffb6b9c9e2e9698149a09ea9 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw2@infradead.org>
Date: Mon, 3 Aug 2009 12:40:27 +0100
Subject: USB: Work around BIOS bugs by quiescing USB controllers earlier

We are seeing a number of crashes in SMM, when VT-d is enabled while
'Legacy USB support' is enabled in various BIOSes.

The BIOS is supposed to indicate which addresses it uses for DMA in a
special ACPI table ("RMRR"), so that we can punch a hole for it when we
set up the IOMMU.

The problem is, as usual, that BIOS engineers are totally incompetent.
They write code which will crash if the DMA goes AWOL, and then they
either neglect to provide an RMRR table at all, or they put the wrong
addresses in it. And of course they don't do _any_ QA, since that would
take too much time away from their crack-smoking habit.

The real fix, of course, is for consumers to refuse to buy motherboards
which only have closed-source firmware available. If we had _open_
firmware, bugs like this would be easy to fix.

Since that's something I can only dream about, this patch implements an
alternative -- ensuring that the USB controllers are handed off from the
BIOS and quiesced _before_ the IOMMU is initialised. That would have
been a much better design than this RMRR nonsense in the first place, of
course. The bootloader has no business doing DMA after the OS has booted
anyway.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/host/pci-quirks.c b/drivers/usb/host/pci-quirks.c
index 83b5f9c..23cf3bd 100644
--- a/drivers/usb/host/pci-quirks.c
+++ b/drivers/usb/host/pci-quirks.c
@@ -475,4 +475,4 @@ static void __devinit quirk_usb_early_handoff(struct pci_dev *pdev)
 	else if (pdev->class == PCI_CLASS_SERIAL_USB_XHCI)
 		quirk_usb_handoff_xhci(pdev);
 }
-DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID, quirk_usb_early_handoff);
+DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, quirk_usb_early_handoff);
-- 
cgit v0.10.2


From 4516fc0454e7ffe2f369e80045b23c2b32155004 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 14 Aug 2009 12:57:54 -0400
Subject: sunrpc: add routine for comparing addresses

lockd needs these sort of routines, as does the NFSv4 callback code.

Move lockd's routines into common code and rename them so that they can
be used by others.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Acked-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 1f3b0fc..fc9032d 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -166,7 +166,7 @@ __be32 nlmclnt_grant(const struct sockaddr *addr, const struct nlm_lock *lock)
 		 */
 		if (fl_blocked->fl_u.nfs_fl.owner->pid != lock->svid)
 			continue;
-		if (!nlm_cmp_addr(nlm_addr(block->b_host), addr))
+		if (!rpc_cmp_addr(nlm_addr(block->b_host), addr))
 			continue;
 		if (nfs_compare_fh(NFS_FH(fl_blocked->fl_file->f_path.dentry->d_inode) ,fh) != 0)
 			continue;
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 7cb076a..4600c20 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -111,7 +111,7 @@ static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni)
 	 */
 	chain = &nlm_hosts[nlm_hash_address(ni->sap)];
 	hlist_for_each_entry(host, pos, chain, h_hash) {
-		if (!nlm_cmp_addr(nlm_addr(host), ni->sap))
+		if (!rpc_cmp_addr(nlm_addr(host), ni->sap))
 			continue;
 
 		/* See if we have an NSM handle for this client */
@@ -125,7 +125,7 @@ static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni)
 		if (host->h_server != ni->server)
 			continue;
 		if (ni->server &&
-		    !nlm_cmp_addr(nlm_srcaddr(host), ni->src_sap))
+		    !rpc_cmp_addr(nlm_srcaddr(host), ni->src_sap))
 			continue;
 
 		/* Move to head of hash chain. */
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 30c9331..f956651 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -209,7 +209,7 @@ static struct nsm_handle *nsm_lookup_addr(const struct sockaddr *sap)
 	struct nsm_handle *nsm;
 
 	list_for_each_entry(nsm, &nsm_handles, sm_link)
-		if (nlm_cmp_addr(nsm_addr(nsm), sap))
+		if (rpc_cmp_addr(nsm_addr(nsm), sap))
 			return nsm;
 	return NULL;
 }
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 9e4d6aab..ad478da 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -417,7 +417,7 @@ EXPORT_SYMBOL_GPL(nlmsvc_unlock_all_by_sb);
 static int
 nlmsvc_match_ip(void *datap, struct nlm_host *host)
 {
-	return nlm_cmp_addr(nlm_srcaddr(host), datap);
+	return rpc_cmp_addr(nlm_srcaddr(host), datap);
 }
 
 /**
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index c325b18..e7a251a 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -338,49 +338,6 @@ static inline int nlm_privileged_requester(const struct svc_rqst *rqstp)
 	}
 }
 
-static inline int __nlm_cmp_addr4(const struct sockaddr *sap1,
-				  const struct sockaddr *sap2)
-{
-	const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sap1;
-	const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sap2;
-	return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr;
-}
-
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-static inline int __nlm_cmp_addr6(const struct sockaddr *sap1,
-				  const struct sockaddr *sap2)
-{
-	const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sap1;
-	const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sap2;
-	return ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr);
-}
-#else	/* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */
-static inline int __nlm_cmp_addr6(const struct sockaddr *sap1,
-				  const struct sockaddr *sap2)
-{
-	return 0;
-}
-#endif	/* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */
-
-/*
- * Compare two host addresses
- *
- * Return TRUE if the addresses are the same; otherwise FALSE.
- */
-static inline int nlm_cmp_addr(const struct sockaddr *sap1,
-			       const struct sockaddr *sap2)
-{
-	if (sap1->sa_family == sap2->sa_family) {
-		switch (sap1->sa_family) {
-		case AF_INET:
-			return __nlm_cmp_addr4(sap1, sap2);
-		case AF_INET6:
-			return __nlm_cmp_addr6(sap1, sap2);
-		}
-	}
-	return 0;
-}
-
 /*
  * Compare two NLM locks.
  * When the second lock is of type F_UNLCK, this acts like a wildcard.
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index ab3f6e9..b17df36 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -22,6 +22,7 @@
 #include <linux/sunrpc/timer.h>
 #include <asm/signal.h>
 #include <linux/path.h>
+#include <net/ipv6.h>
 
 struct rpc_inode;
 
@@ -188,5 +189,52 @@ static inline void rpc_set_port(struct sockaddr *sap,
 #define IPV6_SCOPE_DELIMITER		'%'
 #define IPV6_SCOPE_ID_LEN		sizeof("%nnnnnnnnnn")
 
+static inline bool __rpc_cmp_addr4(const struct sockaddr *sap1,
+				   const struct sockaddr *sap2)
+{
+	const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sap1;
+	const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sap2;
+
+	return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr;
+}
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1,
+				   const struct sockaddr *sap2)
+{
+	const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sap1;
+	const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sap2;
+	return ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr);
+}
+#else	/* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */
+static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1,
+				   const struct sockaddr *sap2)
+{
+	return false;
+}
+#endif	/* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */
+
+/**
+ * rpc_cmp_addr - compare the address portion of two sockaddrs.
+ * @sap1: first sockaddr
+ * @sap2: second sockaddr
+ *
+ * Just compares the family and address portion. Ignores port, scope, etc.
+ * Returns true if the addrs are equal, false if they aren't.
+ */
+static inline bool rpc_cmp_addr(const struct sockaddr *sap1,
+				const struct sockaddr *sap2)
+{
+	if (sap1->sa_family == sap2->sa_family) {
+		switch (sap1->sa_family) {
+		case AF_INET:
+			return __rpc_cmp_addr4(sap1, sap2);
+		case AF_INET6:
+			return __rpc_cmp_addr6(sap1, sap2);
+		}
+	}
+	return false;
+}
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_SUNRPC_CLNT_H */
-- 
cgit v0.10.2


From be3ad6b0b675fd1d6b48362ca30bdee75fbef6b4 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 14 Aug 2009 12:57:55 -0400
Subject: sunrpc: add common routine for copying address portion of a sockaddr

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Acked-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index b17df36..044f531 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -198,6 +198,17 @@ static inline bool __rpc_cmp_addr4(const struct sockaddr *sap1,
 	return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr;
 }
 
+static inline bool __rpc_copy_addr4(struct sockaddr *dst,
+				    const struct sockaddr *src)
+{
+	const struct sockaddr_in *ssin = (struct sockaddr_in *) src;
+	struct sockaddr_in *dsin = (struct sockaddr_in *) dst;
+
+	dsin->sin_family = ssin->sin_family;
+	dsin->sin_addr.s_addr = ssin->sin_addr.s_addr;
+	return true;
+}
+
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1,
 				   const struct sockaddr *sap2)
@@ -206,12 +217,29 @@ static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1,
 	const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sap2;
 	return ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr);
 }
+
+static inline bool __rpc_copy_addr6(struct sockaddr *dst,
+				    const struct sockaddr *src)
+{
+	const struct sockaddr_in6 *ssin6 = (const struct sockaddr_in6 *) src;
+	struct sockaddr_in6 *dsin6 = (struct sockaddr_in6 *) dst;
+
+	dsin6->sin6_family = ssin6->sin6_family;
+	ipv6_addr_copy(&dsin6->sin6_addr, &ssin6->sin6_addr);
+	return true;
+}
 #else	/* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */
 static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1,
 				   const struct sockaddr *sap2)
 {
 	return false;
 }
+
+static inline bool __rpc_copy_addr6(struct sockaddr *dst,
+				    const struct sockaddr *src)
+{
+	return false;
+}
 #endif	/* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */
 
 /**
@@ -236,5 +264,27 @@ static inline bool rpc_cmp_addr(const struct sockaddr *sap1,
 	return false;
 }
 
+/**
+ * rpc_copy_addr - copy the address portion of one sockaddr to another
+ * @dst: destination sockaddr
+ * @src: source sockaddr
+ *
+ * Just copies the address portion and family. Ignores port, scope, etc.
+ * Caller is responsible for making certain that dst is large enough to hold
+ * the address in src. Returns true if address family is supported. Returns
+ * false otherwise.
+ */
+static inline bool rpc_copy_addr(struct sockaddr *dst,
+				 const struct sockaddr *src)
+{
+	switch (src->sa_family) {
+	case AF_INET:
+		return __rpc_copy_addr4(dst, src);
+	case AF_INET6:
+		return __rpc_copy_addr6(dst, src);
+	}
+	return false;
+}
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_SUNRPC_CLNT_H */
-- 
cgit v0.10.2


From 363168b4ea8ec26aeb982ac6024a09f907ecd27e Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 14 Aug 2009 12:57:56 -0400
Subject: nfsd: make nfs4_client->cl_addr a struct sockaddr_storage

It's currently a __be32, which isn't big enough to hold an IPv6 address.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Acked-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 9295c4b..bfc14d8 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -55,6 +55,7 @@
 #include <linux/lockd/bind.h>
 #include <linux/module.h>
 #include <linux/sunrpc/svcauth_gss.h>
+#include <linux/sunrpc/clnt.h>
 
 #define NFSDDBG_FACILITY                NFSDDBG_PROC
 
@@ -1220,13 +1221,15 @@ nfsd4_exchange_id(struct svc_rqst *rqstp,
 	int status;
 	unsigned int		strhashval;
 	char			dname[HEXDIR_LEN];
+	char			addr_str[INET6_ADDRSTRLEN];
 	nfs4_verifier		verf = exid->verifier;
-	u32			ip_addr = svc_addr_in(rqstp)->sin_addr.s_addr;
+	struct sockaddr		*sa = svc_addr(rqstp);
 
+	rpc_ntop(sa, addr_str, sizeof(addr_str));
 	dprintk("%s rqstp=%p exid=%p clname.len=%u clname.data=%p "
-		" ip_addr=%u flags %x, spa_how %d\n",
+		"ip_addr=%s flags %x, spa_how %d\n",
 		__func__, rqstp, exid, exid->clname.len, exid->clname.data,
-		ip_addr, exid->flags, exid->spa_how);
+		addr_str, exid->flags, exid->spa_how);
 
 	if (!check_name(exid->clname) || (exid->flags & ~EXCHGID4_FLAG_MASK_A))
 		return nfserr_inval;
@@ -1315,7 +1318,7 @@ out_new:
 
 	copy_verf(new, &verf);
 	copy_cred(&new->cl_cred, &rqstp->rq_cred);
-	new->cl_addr = ip_addr;
+	rpc_copy_addr((struct sockaddr *) &new->cl_addr, sa);
 	gen_clid(new);
 	gen_confirm(new);
 	add_to_unconfirmed(new, strhashval);
@@ -1389,7 +1392,7 @@ nfsd4_create_session(struct svc_rqst *rqstp,
 		     struct nfsd4_compound_state *cstate,
 		     struct nfsd4_create_session *cr_ses)
 {
-	u32 ip_addr = svc_addr_in(rqstp)->sin_addr.s_addr;
+	struct sockaddr *sa = svc_addr(rqstp);
 	struct nfs4_client *conf, *unconf;
 	struct nfsd4_clid_slot *cs_slot = NULL;
 	int status = 0;
@@ -1417,7 +1420,7 @@ nfsd4_create_session(struct svc_rqst *rqstp,
 		cs_slot->sl_seqid++;
 	} else if (unconf) {
 		if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) ||
-		    (ip_addr != unconf->cl_addr)) {
+		    !rpc_cmp_addr(sa, (struct sockaddr *) &unconf->cl_addr)) {
 			status = nfserr_clid_inuse;
 			goto out;
 		}
@@ -1564,7 +1567,7 @@ __be32
 nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		  struct nfsd4_setclientid *setclid)
 {
-	struct sockaddr_in	*sin = svc_addr_in(rqstp);
+	struct sockaddr		*sa = svc_addr(rqstp);
 	struct xdr_netobj 	clname = { 
 		.len = setclid->se_namelen,
 		.data = setclid->se_name,
@@ -1596,8 +1599,11 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		/* RFC 3530 14.2.33 CASE 0: */
 		status = nfserr_clid_inuse;
 		if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) {
-			dprintk("NFSD: setclientid: string in use by client"
-				" at %pI4\n", &conf->cl_addr);
+			char addr_str[INET6_ADDRSTRLEN];
+			rpc_ntop((struct sockaddr *) &conf->cl_addr, addr_str,
+				 sizeof(addr_str));
+			dprintk("NFSD: setclientid: string in use by client "
+				"at %s\n", addr_str);
 			goto out;
 		}
 	}
@@ -1659,7 +1665,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		gen_clid(new);
 	}
 	copy_verf(new, &clverifier);
-	new->cl_addr = sin->sin_addr.s_addr;
+	rpc_copy_addr((struct sockaddr *) &new->cl_addr, sa);
 	new->cl_flavor = rqstp->rq_flavor;
 	princ = svc_gss_principal(rqstp);
 	if (princ) {
@@ -1693,7 +1699,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
 			 struct nfsd4_compound_state *cstate,
 			 struct nfsd4_setclientid_confirm *setclientid_confirm)
 {
-	struct sockaddr_in *sin = svc_addr_in(rqstp);
+	struct sockaddr *sa = svc_addr(rqstp);
 	struct nfs4_client *conf, *unconf;
 	nfs4_verifier confirm = setclientid_confirm->sc_confirm; 
 	clientid_t * clid = &setclientid_confirm->sc_clientid;
@@ -1712,9 +1718,9 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
 	unconf = find_unconfirmed_client(clid);
 
 	status = nfserr_clid_inuse;
-	if (conf && conf->cl_addr != sin->sin_addr.s_addr)
+	if (conf && !rpc_cmp_addr((struct sockaddr *) &conf->cl_addr, sa))
 		goto out;
-	if (unconf && unconf->cl_addr != sin->sin_addr.s_addr)
+	if (unconf && !rpc_cmp_addr((struct sockaddr *) &unconf->cl_addr, sa))
 		goto out;
 
 	/*
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index 58bb197..3510ddd 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -200,7 +200,7 @@ struct nfs4_client {
 	char                    cl_recdir[HEXDIR_LEN]; /* recovery dir */
 	nfs4_verifier		cl_verifier; 	/* generated by client */
 	time_t                  cl_time;        /* time of last lease renewal */
-	__be32			cl_addr; 	/* client ipaddress */
+	struct sockaddr_storage	cl_addr; 	/* client ipaddress */
 	u32			cl_flavor;	/* setclientid pseudoflavor */
 	char			*cl_principal;	/* setclientid principal name */
 	struct svc_cred		cl_cred; 	/* setclientid principal */
-- 
cgit v0.10.2


From aa9a4ec7707a5391cde556f3fa1b0eb4bca3bcf6 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 14 Aug 2009 12:57:57 -0400
Subject: nfsd: convert nfs4_cb_conn struct to hold address in sockaddr_storage

...rather than as a separate address and port fields. This will be
necessary for implementing callbacks over IPv6. Also, convert
gen_callback to use the standard rpcuaddr2sockaddr routine rather than
its own private one.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Acked-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 3fd23f7..81d1c52 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -377,7 +377,6 @@ static int max_cb_time(void)
 
 int setup_callback_client(struct nfs4_client *clp)
 {
-	struct sockaddr_in	addr;
 	struct nfs4_cb_conn *cb = &clp->cl_cb_conn;
 	struct rpc_timeout	timeparms = {
 		.to_initval	= max_cb_time(),
@@ -385,8 +384,8 @@ int setup_callback_client(struct nfs4_client *clp)
 	};
 	struct rpc_create_args args = {
 		.protocol	= IPPROTO_TCP,
-		.address	= (struct sockaddr *)&addr,
-		.addrsize	= sizeof(addr),
+		.address	= (struct sockaddr *) &cb->cb_addr,
+		.addrsize	= cb->cb_addrlen,
 		.timeout	= &timeparms,
 		.program	= &cb_program,
 		.prognumber	= cb->cb_prog,
@@ -400,12 +399,6 @@ int setup_callback_client(struct nfs4_client *clp)
 	if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5))
 		return -EINVAL;
 
-	/* Initialize address */
-	memset(&addr, 0, sizeof(addr));
-	addr.sin_family = AF_INET;
-	addr.sin_port = htons(cb->cb_port);
-	addr.sin_addr.s_addr = htonl(cb->cb_addr);
-
 	/* Create RPC client */
 	client = rpc_create(&args);
 	if (IS_ERR(client)) {
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index bfc14d8..96a7423 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -897,76 +897,6 @@ find_unconfirmed_client_by_str(const char *dname, unsigned int hashval,
 	return NULL;
 }
 
-/* a helper function for parse_callback */
-static int
-parse_octet(unsigned int *lenp, char **addrp)
-{
-	unsigned int len = *lenp;
-	char *p = *addrp;
-	int n = -1;
-	char c;
-
-	for (;;) {
-		if (!len)
-			break;
-		len--;
-		c = *p++;
-		if (c == '.')
-			break;
-		if ((c < '0') || (c > '9')) {
-			n = -1;
-			break;
-		}
-		if (n < 0)
-			n = 0;
-		n = (n * 10) + (c - '0');
-		if (n > 255) {
-			n = -1;
-			break;
-		}
-	}
-	*lenp = len;
-	*addrp = p;
-	return n;
-}
-
-/* parse and set the setclientid ipv4 callback address */
-static int
-parse_ipv4(unsigned int addr_len, char *addr_val, unsigned int *cbaddrp, unsigned short *cbportp)
-{
-	int temp = 0;
-	u32 cbaddr = 0;
-	u16 cbport = 0;
-	u32 addrlen = addr_len;
-	char *addr = addr_val;
-	int i, shift;
-
-	/* ipaddress */
-	shift = 24;
-	for(i = 4; i > 0  ; i--) {
-		if ((temp = parse_octet(&addrlen, &addr)) < 0) {
-			return 0;
-		}
-		cbaddr |= (temp << shift);
-		if (shift > 0)
-		shift -= 8;
-	}
-	*cbaddrp = cbaddr;
-
-	/* port */
-	shift = 8;
-	for(i = 2; i > 0  ; i--) {
-		if ((temp = parse_octet(&addrlen, &addr)) < 0) {
-			return 0;
-		}
-		cbport |= (temp << shift);
-		if (shift > 0)
-			shift -= 8;
-	}
-	*cbportp = cbport;
-	return 1;
-}
-
 static void
 gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se)
 {
@@ -976,14 +906,21 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se)
 	if ((se->se_callback_netid_len != 3) || memcmp((char *)se->se_callback_netid_val, "tcp", 3))
 		goto out_err;
 
-	if ( !(parse_ipv4(se->se_callback_addr_len, se->se_callback_addr_val,
-	                 &cb->cb_addr, &cb->cb_port)))
+	cb->cb_addrlen = rpc_uaddr2sockaddr(se->se_callback_addr_val,
+					    se->se_callback_addr_len,
+					    (struct sockaddr *) &cb->cb_addr,
+					    sizeof(cb->cb_addr));
+
+	if (!cb->cb_addrlen || cb->cb_addr.ss_family != AF_INET)
 		goto out_err;
+
 	cb->cb_minorversion = 0;
 	cb->cb_prog = se->se_callback_prog;
 	cb->cb_ident = se->se_callback_ident;
 	return;
 out_err:
+	cb->cb_addr.ss_family = AF_UNSPEC;
+	cb->cb_addrlen = 0;
 	dprintk(KERN_INFO "NFSD: this client (clientid %08x/%08x) "
 		"will not receive delegations\n",
 		clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index 3510ddd..fb0c404 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -81,8 +81,8 @@ struct nfs4_delegation {
 /* client delegation callback info */
 struct nfs4_cb_conn {
 	/* SETCLIENTID info */
-	u32                     cb_addr;
-	unsigned short          cb_port;
+	struct sockaddr_storage	cb_addr;
+	size_t			cb_addrlen;
 	u32                     cb_prog;
 	u32			cb_minorversion;
 	u32                     cb_ident;	/* minorversion 0 only */
-- 
cgit v0.10.2


From 7077ecbabd626cce1fcf5cc9766c83ec04d919f9 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 14 Aug 2009 12:57:58 -0400
Subject: nfsd: add support for NFSv4 callbacks over IPv6

The framework to add this is all in place. Now, add the code to allow
support for establishing a callback channel on an IPv6 socket.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Acked-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 96a7423..9ec0ca1 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -901,9 +901,16 @@ static void
 gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se)
 {
 	struct nfs4_cb_conn *cb = &clp->cl_cb_conn;
-
-	/* Currently, we only support tcp for the callback channel */
-	if ((se->se_callback_netid_len != 3) || memcmp((char *)se->se_callback_netid_val, "tcp", 3))
+	unsigned short expected_family;
+
+	/* Currently, we only support tcp and tcp6 for the callback channel */
+	if (se->se_callback_netid_len == 3 &&
+	    !memcmp(se->se_callback_netid_val, "tcp", 3))
+		expected_family = AF_INET;
+	else if (se->se_callback_netid_len == 4 &&
+		 !memcmp(se->se_callback_netid_val, "tcp6", 4))
+		expected_family = AF_INET6;
+	else
 		goto out_err;
 
 	cb->cb_addrlen = rpc_uaddr2sockaddr(se->se_callback_addr_val,
@@ -911,7 +918,7 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se)
 					    (struct sockaddr *) &cb->cb_addr,
 					    sizeof(cb->cb_addr));
 
-	if (!cb->cb_addrlen || cb->cb_addr.ss_family != AF_INET)
+	if (!cb->cb_addrlen || cb->cb_addr.ss_family != expected_family)
 		goto out_err;
 
 	cb->cb_minorversion = 0;
-- 
cgit v0.10.2


From fbf4665f41b02e757ab9d9198df65e319388e728 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 14 Aug 2009 12:57:59 -0400
Subject: nfsd: populate sin6_scope_id on callback address with scopeid from
 rq_addr on SETCLIENTID call

When a SETCLIENTID call comes in, one of the args given is the svc_rqst.
This struct contains an rq_addr field which holds the address that sent
the call. If this is an IPv6 address, then we can use the sin6_scope_id
field in this address to populate the sin6_scope_id field in the
callback address.

AFAICT, the rq_addr.sin6_scope_id is non-zero if and only if the client
mounted the server's link-local address.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Acked-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 9ec0ca1..d2a0524 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -898,7 +898,7 @@ find_unconfirmed_client_by_str(const char *dname, unsigned int hashval,
 }
 
 static void
-gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se)
+gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, u32 scopeid)
 {
 	struct nfs4_cb_conn *cb = &clp->cl_cb_conn;
 	unsigned short expected_family;
@@ -921,6 +921,9 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se)
 	if (!cb->cb_addrlen || cb->cb_addr.ss_family != expected_family)
 		goto out_err;
 
+	if (cb->cb_addr.ss_family == AF_INET6)
+		((struct sockaddr_in6 *) &cb->cb_addr)->sin6_scope_id = scopeid;
+
 	cb->cb_minorversion = 0;
 	cb->cb_prog = se->se_callback_prog;
 	cb->cb_ident = se->se_callback_ident;
@@ -1621,7 +1624,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	}
 	copy_cred(&new->cl_cred, &rqstp->rq_cred);
 	gen_confirm(new);
-	gen_callback(new, setclid);
+	gen_callback(new, setclid, rpc_get_scope_id(sa));
 	add_to_unconfirmed(new, strhashval);
 	setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot;
 	setclid->se_clientid.cl_id = new->cl_clientid.cl_id;
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 044f531..3d02558 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -286,5 +286,20 @@ static inline bool rpc_copy_addr(struct sockaddr *dst,
 	return false;
 }
 
+/**
+ * rpc_get_scope_id - return scopeid for a given sockaddr
+ * @sa: sockaddr to get scopeid from
+ *
+ * Returns the value of the sin6_scope_id for AF_INET6 addrs, or 0 if
+ * not an AF_INET6 address.
+ */
+static inline u32 rpc_get_scope_id(const struct sockaddr *sa)
+{
+	if (sa->sa_family != AF_INET6)
+		return 0;
+
+	return ((struct sockaddr_in6 *) sa)->sin6_scope_id;
+}
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_SUNRPC_CLNT_H */
-- 
cgit v0.10.2


From 071e13746f9ebb259987c71ea77f11e7656769a2 Mon Sep 17 00:00:00 2001
From: Matt Kraai <kraai@ftbfs.org>
Date: Sun, 23 Aug 2009 22:30:22 -0700
Subject: intel-iommu: Mark functions with __init

Mark si_domain_init and iommu_prepare_static_identity_mapping with
__init, to eliminate the following warnings:

WARNING: drivers/pci/built-in.o(.text+0xf1f4): Section mismatch in reference from the function si_domain_init() to the function .init.text:si_domain_work_fn()
The function si_domain_init() references
the function __init si_domain_work_fn().
This is often because si_domain_init lacks a __init
annotation or the annotation of si_domain_work_fn is wrong.

WARNING: drivers/pci/built-in.o(.text+0xe340): Section mismatch in reference from the function iommu_prepare_static_identity_mapping() to the function .init.text:si_domain_init()
The function iommu_prepare_static_identity_mapping() references
the function __init si_domain_init().
This is often because iommu_prepare_static_identity_mapping lacks a __init
annotation or the annotation of si_domain_init is wrong.

Signed-off-by: Matt Kraai <kraai@ftbfs.org>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 09606e9..e67335b 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -2038,7 +2038,7 @@ static int __init si_domain_work_fn(unsigned long start_pfn,
 
 }
 
-static int si_domain_init(int hw)
+static int __init si_domain_init(int hw)
 {
 	struct dmar_drhd_unit *drhd;
 	struct intel_iommu *iommu;
@@ -2167,7 +2167,7 @@ static int iommu_should_identity_map(struct pci_dev *pdev, int startup)
 	return 1;
 }
 
-static int iommu_prepare_static_identity_mapping(int hw)
+static int __init iommu_prepare_static_identity_mapping(int hw)
 {
 	struct pci_dev *pdev = NULL;
 	int ret;
-- 
cgit v0.10.2


From 94a91b5051a77d8a71d4f11a3240f0d9c51b6cf2 Mon Sep 17 00:00:00 2001
From: Donald Dutile <ddutile@redhat.com>
Date: Thu, 20 Aug 2009 16:51:34 -0400
Subject: intel-iommu: iommu init error path bug fixes

The kcalloc() failure path in iommu_init_domains() calls
free_dmar_iommu(), which assumes that ->domains, ->domain_ids,
and ->lock have been properly initialized.

Add checks in free_[dmar]_iommu to not use ->domains,->domain_ids
if not alloced. Move the lock init to prior to the kcalloc()'s,
so it is valid in free_context_table() when free_dmar_iommu() invokes
it at the end.

Patch based on iommu-2.6,
commit 132032274a594ee9ffb6b9c9e2e9698149a09ea9

Signed-off-by: Donald Dutile <ddutile@redhat.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index e67335b..beb5ccf 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -1158,6 +1158,8 @@ static int iommu_init_domains(struct intel_iommu *iommu)
 	pr_debug("Number of Domains supportd <%ld>\n", ndomains);
 	nlongs = BITS_TO_LONGS(ndomains);
 
+	spin_lock_init(&iommu->lock);
+
 	/* TBD: there might be 64K domains,
 	 * consider other allocation for future chip
 	 */
@@ -1170,12 +1172,9 @@ static int iommu_init_domains(struct intel_iommu *iommu)
 			GFP_KERNEL);
 	if (!iommu->domains) {
 		printk(KERN_ERR "Allocating domain array failed\n");
-		kfree(iommu->domain_ids);
 		return -ENOMEM;
 	}
 
-	spin_lock_init(&iommu->lock);
-
 	/*
 	 * if Caching mode is set, then invalid translations are tagged
 	 * with domainid 0. Hence we need to pre-allocate it.
@@ -1195,22 +1194,24 @@ void free_dmar_iommu(struct intel_iommu *iommu)
 	int i;
 	unsigned long flags;
 
-	i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
-	for (; i < cap_ndoms(iommu->cap); ) {
-		domain = iommu->domains[i];
-		clear_bit(i, iommu->domain_ids);
+	if ((iommu->domains) && (iommu->domain_ids)) {
+		i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
+		for (; i < cap_ndoms(iommu->cap); ) {
+			domain = iommu->domains[i];
+			clear_bit(i, iommu->domain_ids);
+
+			spin_lock_irqsave(&domain->iommu_lock, flags);
+			if (--domain->iommu_count == 0) {
+				if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
+					vm_domain_exit(domain);
+				else
+					domain_exit(domain);
+			}
+			spin_unlock_irqrestore(&domain->iommu_lock, flags);
 
-		spin_lock_irqsave(&domain->iommu_lock, flags);
-		if (--domain->iommu_count == 0) {
-			if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
-				vm_domain_exit(domain);
-			else
-				domain_exit(domain);
+			i = find_next_bit(iommu->domain_ids,
+				cap_ndoms(iommu->cap), i+1);
 		}
-		spin_unlock_irqrestore(&domain->iommu_lock, flags);
-
-		i = find_next_bit(iommu->domain_ids,
-			cap_ndoms(iommu->cap), i+1);
 	}
 
 	if (iommu->gcmd & DMA_GCMD_TE)
-- 
cgit v0.10.2


From 8f55f3c0a013c42fb733997da54a3326c74601e8 Mon Sep 17 00:00:00 2001
From: Alexandros Batsakis <batsakis@netapp.com>
Date: Thu, 20 Aug 2009 03:34:19 +0300
Subject: nfsd41: sunrpc: svc_tcp_recv_record()

Factor functionality out of svc_tcp_recvfrom() to simplify routine

Signed-off-by: Alexandros Batsakis <batsakis@netapp.com>
Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 99a826d..76a380d 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -854,21 +854,15 @@ failed:
 }
 
 /*
- * Receive data from a TCP socket.
+ * Receive data.
+ * If we haven't gotten the record length yet, get the next four bytes.
+ * Otherwise try to gobble up as much as possible up to the complete
+ * record length.
  */
-static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
+static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
 {
-	struct svc_sock	*svsk =
-		container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
 	struct svc_serv	*serv = svsk->sk_xprt.xpt_server;
-	int		len;
-	struct kvec *vec;
-	int pnum, vlen;
-
-	dprintk("svc: tcp_recv %p data %d conn %d close %d\n",
-		svsk, test_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags),
-		test_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags),
-		test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags));
+	int len;
 
 	if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags))
 		/* sndbuf needs to have room for one request
@@ -889,10 +883,6 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 
 	clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
 
-	/* Receive data. If we haven't got the record length yet, get
-	 * the next four bytes. Otherwise try to gobble up as much as
-	 * possible up to the complete record length.
-	 */
 	if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) {
 		int		want = sizeof(rpc_fraghdr) - svsk->sk_tcplen;
 		struct kvec	iov;
@@ -907,7 +897,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 			dprintk("svc: short recvfrom while reading record "
 				"length (%d of %d)\n", len, want);
 			svc_xprt_received(&svsk->sk_xprt);
-			return -EAGAIN; /* record header not complete */
+			goto err_again; /* record header not complete */
 		}
 
 		svsk->sk_reclen = ntohl(svsk->sk_reclen);
@@ -922,6 +912,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 					"per record not supported\n");
 			goto err_delete;
 		}
+
 		svsk->sk_reclen &= RPC_FRAGMENT_SIZE_MASK;
 		dprintk("svc: TCP record, %d bytes\n", svsk->sk_reclen);
 		if (svsk->sk_reclen > serv->sv_max_mesg) {
@@ -942,11 +933,45 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 		dprintk("svc: incomplete TCP record (%d of %d)\n",
 			len, svsk->sk_reclen);
 		svc_xprt_received(&svsk->sk_xprt);
-		return -EAGAIN;	/* record not complete */
+		goto err_again;	/* record not complete */
 	}
 	len = svsk->sk_reclen;
 	set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
 
+	return len;
+ error:
+	if (len == -EAGAIN) {
+		dprintk("RPC: TCP recv_record got EAGAIN\n");
+		svc_xprt_received(&svsk->sk_xprt);
+	}
+	return len;
+ err_delete:
+	set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
+ err_again:
+	return -EAGAIN;
+}
+
+/*
+ * Receive data from a TCP socket.
+ */
+static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
+{
+	struct svc_sock	*svsk =
+		container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
+	struct svc_serv	*serv = svsk->sk_xprt.xpt_server;
+	int		len;
+	struct kvec *vec;
+	int pnum, vlen;
+
+	dprintk("svc: tcp_recv %p data %d conn %d close %d\n",
+		svsk, test_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags),
+		test_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags),
+		test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags));
+
+	len = svc_tcp_recv_record(svsk, rqstp);
+	if (len < 0)
+		goto error;
+
 	vec = rqstp->rq_vec;
 	vec[0] = rqstp->rq_arg.head[0];
 	vlen = PAGE_SIZE;
@@ -962,7 +987,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 	/* Now receive data */
 	len = svc_recvfrom(rqstp, vec, pnum, len);
 	if (len < 0)
-		goto error;
+		goto err_again;
 
 	dprintk("svc: TCP complete record (%d bytes)\n", len);
 	rqstp->rq_arg.len = len;
@@ -988,21 +1013,19 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 
 	return len;
 
- err_delete:
-	set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
-	return -EAGAIN;
-
- error:
+err_again:
 	if (len == -EAGAIN) {
 		dprintk("RPC: TCP recvfrom got EAGAIN\n");
 		svc_xprt_received(&svsk->sk_xprt);
-	} else {
+		return len;
+	}
+error:
+	if (len != -EAGAIN) {
 		printk(KERN_NOTICE "%s: recvfrom returned errno %d\n",
 		       svsk->sk_xprt.xpt_server->sv_name, -len);
-		goto err_delete;
+		set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
 	}
-
-	return len;
+	return -EAGAIN;
 }
 
 /*
-- 
cgit v0.10.2


From 55bb55dca0cecac2fb7b8c743db41361c011c8a8 Mon Sep 17 00:00:00 2001
From: Frank Filz <ffilzlnx@us.ibm.com>
Date: Fri, 14 Aug 2009 15:02:30 -0700
Subject: nfsd: Fix unnecessary deny bits in NFSv4 ACL

The group deny entries end up denying tcy even though tcy was just
allowed by the allow entry. This appears to be due to:
	ace->access_mask = mask_from_posix(deny, flags);
instead of:
	ace->access_mask = deny_mask_from_posix(deny, flags);

Denying a previously allowed bit has no effect, so this shouldn't affect
behavior, but it's ugly.

Signed-off-by: Frank Filz <ffilzlnx@us.ibm.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index 54b8b41..5320c2b 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -335,7 +335,7 @@ _posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl,
 		if (deny) {
 			ace->type = NFS4_ACE_ACCESS_DENIED_ACE_TYPE;
 			ace->flag = eflag | NFS4_ACE_IDENTIFIER_GROUP;
-			ace->access_mask = mask_from_posix(deny, flags);
+			ace->access_mask = deny_mask_from_posix(deny, flags);
 			ace->whotype = NFS4_ACL_WHO_NAMED;
 			ace->who = pa->e_id;
 			ace++;
-- 
cgit v0.10.2


From ed2d8aed52212610d4cb79be3cbf535b04be38dc Mon Sep 17 00:00:00 2001
From: Ryusei Yamaguchi <mandel59@gmail.com>
Date: Sun, 16 Aug 2009 00:54:41 +0900
Subject: knfsd: Replace lock_kernel with a mutex in nfsd pool stats.

lock_kernel() in knfsd was replaced with a mutex. The later
commit 03cf6c9f49a8fea953d38648d016e3f46e814991 ("knfsd:
add file to export stats about nfsd pools") did not follow
that change. This patch fixes the issue.

Also move the get and put of nfsd_serv to the open and close methods
(instead of start and stop methods) to allow atomic check and increment
of reference count in the open method (where we can still return an
error).

Signed-off-by: Ryusei Yamaguchi <mandel59@gmail.com>
Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Cc: Greg Banks <gnb@fmeh.org>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index b764d7d..00388d2 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -174,12 +174,13 @@ static const struct file_operations exports_operations = {
 };
 
 extern int nfsd_pool_stats_open(struct inode *inode, struct file *file);
+extern int nfsd_pool_stats_release(struct inode *inode, struct file *file);
 
 static struct file_operations pool_stats_operations = {
 	.open		= nfsd_pool_stats_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
-	.release	= seq_release,
+	.release	= nfsd_pool_stats_release,
 	.owner		= THIS_MODULE,
 };
 
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index d68cd05..675d395 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -34,6 +34,7 @@
 #include <linux/nfsd/syscall.h>
 #include <linux/lockd/bind.h>
 #include <linux/nfsacl.h>
+#include <linux/seq_file.h>
 
 #define NFSDDBG_FACILITY	NFSDDBG_SVC
 
@@ -614,7 +615,25 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
 
 int nfsd_pool_stats_open(struct inode *inode, struct file *file)
 {
-	if (nfsd_serv == NULL)
+	int ret;
+	mutex_lock(&nfsd_mutex);
+	if (nfsd_serv == NULL) {
+		mutex_unlock(&nfsd_mutex);
 		return -ENODEV;
-	return svc_pool_stats_open(nfsd_serv, file);
+	}
+	/* bump up the psudo refcount while traversing */
+	svc_get(nfsd_serv);
+	ret = svc_pool_stats_open(nfsd_serv, file);
+	mutex_unlock(&nfsd_mutex);
+	return ret;
+}
+
+int nfsd_pool_stats_release(struct inode *inode, struct file *file)
+{
+	int ret = seq_release(inode, file);
+	mutex_lock(&nfsd_mutex);
+	/* this function really, really should have been called svc_put() */
+	svc_destroy(nfsd_serv);
+	mutex_unlock(&nfsd_mutex);
+	return ret;
 }
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 27d4433..dcd2d1e 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -1166,11 +1166,6 @@ static void *svc_pool_stats_start(struct seq_file *m, loff_t *pos)
 
 	dprintk("svc_pool_stats_start, *pidx=%u\n", pidx);
 
-	lock_kernel();
-	/* bump up the pseudo refcount while traversing */
-	svc_get(serv);
-	unlock_kernel();
-
 	if (!pidx)
 		return SEQ_START_TOKEN;
 	return (pidx > serv->sv_nrpools ? NULL : &serv->sv_pools[pidx-1]);
@@ -1198,12 +1193,6 @@ static void *svc_pool_stats_next(struct seq_file *m, void *p, loff_t *pos)
 
 static void svc_pool_stats_stop(struct seq_file *m, void *p)
 {
-	struct svc_serv *serv = m->private;
-
-	lock_kernel();
-	/* this function really, really should have been called svc_put() */
-	svc_destroy(serv);
-	unlock_kernel();
 }
 
 static int svc_pool_stats_show(struct seq_file *m, void *p)
-- 
cgit v0.10.2


From eac81736e6884484ebb45f8d0cba639f3285382b Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yjwei@cn.fujitsu.com>
Date: Tue, 4 Aug 2009 17:27:52 +0800
Subject: sunrpc: reply AUTH_BADCRED to RPCSEC_GSS with unknown service

When an RPC message is received with RPCSEC_GSS with an unknown service
(not RPC_GSS_SVC_NONE, RPC_GSS_SVC_INTEGRITY, or RPC_GSS_SVC_PRIVACY),
svcauth_gss_accept() returns AUTH_BADCRED, but svcauth_gss_release()
subsequently drops the response entirely, discarding the error.

Fix that so the AUTH_BADCRED error is returned to the client.

Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 2e6a148..f6c51e5 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -1374,8 +1374,10 @@ svcauth_gss_release(struct svc_rqst *rqstp)
 		if (stat)
 			goto out_err;
 		break;
-	default:
-		goto out_err;
+	/*
+	 * For any other gc_svc value, svcauth_gss_accept() already set
+	 * the auth_error appropriately; just fall through:
+	 */
 	}
 
 out:
-- 
cgit v0.10.2


From 2ff729f5445cc47d1910386c36e53fc6b1c5e47a Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Wed, 26 Aug 2009 14:25:41 +0100
Subject: intel-iommu: Cope with yet another BIOS screwup causing crashes

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index beb5ccf..d36fa80 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -1974,6 +1974,17 @@ static int iommu_prepare_identity_map(struct pci_dev *pdev,
 	printk(KERN_INFO
 	       "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
 	       pci_name(pdev), start, end);
+	
+	if (end >> agaw_to_width(domain->agaw)) {
+		WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
+		     "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
+		     agaw_to_width(domain->agaw),
+		     dmi_get_system_info(DMI_BIOS_VENDOR),
+		     dmi_get_system_info(DMI_BIOS_VERSION),
+		     dmi_get_system_info(DMI_PRODUCT_VERSION));
+		ret = -EIO;
+		goto error;
+	}
 
 	ret = iommu_domain_identity_map(domain, start, end);
 	if (ret)
-- 
cgit v0.10.2


From 5853a9f6dda244b4163b9daad663bdc41a74f596 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Tue, 2 Jun 2009 13:20:00 +0800
Subject: ACPICA: Fix several pointer casts to avoid possible compile warnings

Fixes warnings with gcc -Wcast-qual flag.

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/acpica/exdump.c b/drivers/acpi/acpica/exdump.c
index ec52461..de34463 100644
--- a/drivers/acpi/acpica/exdump.c
+++ b/drivers/acpi/acpica/exdump.c
@@ -418,9 +418,9 @@ acpi_ex_dump_object(union acpi_operand_object *obj_desc,
 		case ACPI_EXD_REFERENCE:
 
 			acpi_ex_out_string("Class Name",
-					   (char *)
-					   acpi_ut_get_reference_name
-					   (obj_desc));
+					   ACPI_CAST_PTR(char,
+							 acpi_ut_get_reference_name
+							 (obj_desc)));
 			acpi_ex_dump_reference_obj(obj_desc);
 			break;
 
diff --git a/drivers/acpi/acpica/nsutils.c b/drivers/acpi/acpica/nsutils.c
index 78277ed..ea55ab4 100644
--- a/drivers/acpi/acpica/nsutils.c
+++ b/drivers/acpi/acpica/nsutils.c
@@ -88,7 +88,8 @@ acpi_ns_report_error(const char *module_name,
 
 		/* There is a non-ascii character in the name */
 
-		ACPI_MOVE_32_TO_32(&bad_name, internal_name);
+		ACPI_MOVE_32_TO_32(&bad_name,
+				   ACPI_CAST_PTR(u32, internal_name));
 		acpi_os_printf("[0x%4.4X] (NON-ASCII)", bad_name);
 	} else {
 		/* Convert path to external format */
@@ -836,7 +837,7 @@ acpi_ns_get_node(struct acpi_namespace_node *prefix_node,
 	acpi_status status;
 	char *internal_path;
 
-	ACPI_FUNCTION_TRACE_PTR(ns_get_node, pathname);
+	ACPI_FUNCTION_TRACE_PTR(ns_get_node, ACPI_CAST_PTR(char, pathname));
 
 	if (!pathname) {
 		*return_node = prefix_node;
-- 
cgit v0.10.2


From f8d80cdf40fe4d2393159012b38ce9f85a488686 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Tue, 2 Jun 2009 13:28:13 +0800
Subject: ACPICA: Remove duplicate extern declarations for public globals

Some were defined twice, causes a warning with gcc
-Wredundant-decls.

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h
index 3d87362..0b73b31 100644
--- a/drivers/acpi/acpica/acglobal.h
+++ b/drivers/acpi/acpica/acglobal.h
@@ -58,6 +58,10 @@
 #define ACPI_INIT_GLOBAL(a,b) a
 #endif
 
+#ifdef DEFINE_ACPI_GLOBALS
+
+/* Public globals, available from outside ACPICA subsystem */
+
 /*****************************************************************************
  *
  * Runtime configuration (static defaults that can be overriden at runtime)
@@ -78,7 +82,7 @@
  * 5) Allow unresolved references (invalid target name) in package objects
  * 6) Enable warning messages for behavior that is not ACPI spec compliant
  */
-ACPI_EXTERN u8 ACPI_INIT_GLOBAL(acpi_gbl_enable_interpreter_slack, FALSE);
+u8 ACPI_INIT_GLOBAL(acpi_gbl_enable_interpreter_slack, FALSE);
 
 /*
  * Automatically serialize ALL control methods? Default is FALSE, meaning
@@ -86,27 +90,36 @@ ACPI_EXTERN u8 ACPI_INIT_GLOBAL(acpi_gbl_enable_interpreter_slack, FALSE);
  * Only change this if the ASL code is poorly written and cannot handle
  * reentrancy even though methods are marked "NotSerialized".
  */
-ACPI_EXTERN u8 ACPI_INIT_GLOBAL(acpi_gbl_all_methods_serialized, FALSE);
+u8 ACPI_INIT_GLOBAL(acpi_gbl_all_methods_serialized, FALSE);
 
 /*
  * Create the predefined _OSI method in the namespace? Default is TRUE
  * because ACPI CA is fully compatible with other ACPI implementations.
  * Changing this will revert ACPI CA (and machine ASL) to pre-OSI behavior.
  */
-ACPI_EXTERN u8 ACPI_INIT_GLOBAL(acpi_gbl_create_osi_method, TRUE);
+u8 ACPI_INIT_GLOBAL(acpi_gbl_create_osi_method, TRUE);
 
 /*
  * Disable wakeup GPEs during runtime? Default is TRUE because WAKE and
  * RUNTIME GPEs should never be shared, and WAKE GPEs should typically only
  * be enabled just before going to sleep.
  */
-ACPI_EXTERN u8 ACPI_INIT_GLOBAL(acpi_gbl_leave_wake_gpes_disabled, TRUE);
+u8 ACPI_INIT_GLOBAL(acpi_gbl_leave_wake_gpes_disabled, TRUE);
 
 /*
  * Optionally use default values for the ACPI register widths. Set this to
  * TRUE to use the defaults, if an FADT contains incorrect widths/lengths.
  */
-ACPI_EXTERN u8 ACPI_INIT_GLOBAL(acpi_gbl_use_default_register_widths, TRUE);
+u8 ACPI_INIT_GLOBAL(acpi_gbl_use_default_register_widths, TRUE);
+
+/* acpi_gbl_FADT is a local copy of the FADT, converted to a common format. */
+
+struct acpi_table_fadt acpi_gbl_FADT;
+u32 acpi_current_gpe_count;
+u32 acpi_gbl_trace_flags;
+acpi_name acpi_gbl_trace_method_name;
+
+#endif
 
 /*****************************************************************************
  *
@@ -114,11 +127,6 @@ ACPI_EXTERN u8 ACPI_INIT_GLOBAL(acpi_gbl_use_default_register_widths, TRUE);
  *
  ****************************************************************************/
 
-/* Runtime configuration of debug print levels */
-
-extern u32 acpi_dbg_level;
-extern u32 acpi_dbg_layer;
-
 /* Procedure nesting level for debug output */
 
 extern u32 acpi_gbl_nesting_level;
@@ -127,10 +135,8 @@ extern u32 acpi_gbl_nesting_level;
 
 ACPI_EXTERN u32 acpi_gbl_original_dbg_level;
 ACPI_EXTERN u32 acpi_gbl_original_dbg_layer;
-ACPI_EXTERN acpi_name acpi_gbl_trace_method_name;
 ACPI_EXTERN u32 acpi_gbl_trace_dbg_level;
 ACPI_EXTERN u32 acpi_gbl_trace_dbg_layer;
-ACPI_EXTERN u32 acpi_gbl_trace_flags;
 
 /*****************************************************************************
  *
@@ -142,10 +148,8 @@ ACPI_EXTERN u32 acpi_gbl_trace_flags;
  * acpi_gbl_root_table_list is the master list of ACPI tables found in the
  * RSDT/XSDT.
  *
- * acpi_gbl_FADT is a local copy of the FADT, converted to a common format.
  */
 ACPI_EXTERN struct acpi_internal_rsdt acpi_gbl_root_table_list;
-ACPI_EXTERN struct acpi_table_fadt acpi_gbl_FADT;
 ACPI_EXTERN struct acpi_table_facs *acpi_gbl_FACS;
 
 /* These addresses are calculated from the FADT Event Block addresses */
@@ -340,7 +344,6 @@ ACPI_EXTERN struct acpi_fixed_event_handler
 ACPI_EXTERN struct acpi_gpe_xrupt_info *acpi_gbl_gpe_xrupt_list_head;
 ACPI_EXTERN struct acpi_gpe_block_info
 *acpi_gbl_gpe_fadt_blocks[ACPI_MAX_GPE_BLOCKS];
-ACPI_EXTERN u32 acpi_current_gpe_count;
 
 /*****************************************************************************
  *
diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 82ec6a3..2aecaa5 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -64,6 +64,7 @@ extern u8 acpi_gbl_enable_interpreter_slack;
 extern u8 acpi_gbl_all_methods_serialized;
 extern u8 acpi_gbl_create_osi_method;
 extern u8 acpi_gbl_leave_wake_gpes_disabled;
+extern u8 acpi_gbl_use_default_register_widths;
 extern acpi_name acpi_gbl_trace_method_name;
 extern u32 acpi_gbl_trace_flags;
 
-- 
cgit v0.10.2


From c6b5774caafa4c12b6019366e2fdaaff117e95a4 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Wed, 24 Jun 2009 09:44:06 +0800
Subject: ACPICA: Add 64-bit support to acpi_read and acpi_write

Needed by drivers for new ACPi tables.  Internal versions of
these functions still use 32-bit max transfers, in order to
minimize disruption and stack use for the standard ACPI registers
(FADT-based).

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/acpica/achware.h b/drivers/acpi/acpica/achware.h
index 4afa3d8..36192f1 100644
--- a/drivers/acpi/acpica/achware.h
+++ b/drivers/acpi/acpica/achware.h
@@ -62,6 +62,14 @@ u32 acpi_hw_get_mode(void);
 /*
  * hwregs - ACPI Register I/O
  */
+acpi_status
+acpi_hw_validate_register(struct acpi_generic_address *reg,
+			  u8 max_bit_width, u64 *address);
+
+acpi_status acpi_hw_read(u32 *value, struct acpi_generic_address *reg);
+
+acpi_status acpi_hw_write(u32 value, struct acpi_generic_address *reg);
+
 struct acpi_bit_register_info *acpi_hw_get_bit_register_info(u32 register_id);
 
 acpi_status acpi_hw_write_pm1_control(u32 pm1a_control, u32 pm1b_control);
diff --git a/drivers/acpi/acpica/evgpe.c b/drivers/acpi/acpica/evgpe.c
index b9d8ee6..afacf44 100644
--- a/drivers/acpi/acpica/evgpe.c
+++ b/drivers/acpi/acpica/evgpe.c
@@ -424,8 +424,8 @@ u32 acpi_ev_gpe_detect(struct acpi_gpe_xrupt_info * gpe_xrupt_list)
 			/* Read the Status Register */
 
 			status =
-			    acpi_read(&status_reg,
-				      &gpe_register_info->status_address);
+			    acpi_hw_read(&status_reg,
+					 &gpe_register_info->status_address);
 			if (ACPI_FAILURE(status)) {
 				goto unlock_and_exit;
 			}
@@ -433,8 +433,8 @@ u32 acpi_ev_gpe_detect(struct acpi_gpe_xrupt_info * gpe_xrupt_list)
 			/* Read the Enable Register */
 
 			status =
-			    acpi_read(&enable_reg,
-				      &gpe_register_info->enable_address);
+			    acpi_hw_read(&enable_reg,
+					 &gpe_register_info->enable_address);
 			if (ACPI_FAILURE(status)) {
 				goto unlock_and_exit;
 			}
diff --git a/drivers/acpi/acpica/evgpeblk.c b/drivers/acpi/acpica/evgpeblk.c
index 7b34636..a60aaa7 100644
--- a/drivers/acpi/acpica/evgpeblk.c
+++ b/drivers/acpi/acpica/evgpeblk.c
@@ -843,14 +843,14 @@ acpi_ev_create_gpe_info_blocks(struct acpi_gpe_block_info *gpe_block)
 
 		/* Disable all GPEs within this register */
 
-		status = acpi_write(0x00, &this_register->enable_address);
+		status = acpi_hw_write(0x00, &this_register->enable_address);
 		if (ACPI_FAILURE(status)) {
 			goto error_exit;
 		}
 
 		/* Clear any pending GPE events within this register */
 
-		status = acpi_write(0xFF, &this_register->status_address);
+		status = acpi_hw_write(0xFF, &this_register->status_address);
 		if (ACPI_FAILURE(status)) {
 			goto error_exit;
 		}
diff --git a/drivers/acpi/acpica/hwgpe.c b/drivers/acpi/acpica/hwgpe.c
index d3b7e37..c28c41b 100644
--- a/drivers/acpi/acpica/hwgpe.c
+++ b/drivers/acpi/acpica/hwgpe.c
@@ -82,7 +82,7 @@ acpi_status acpi_hw_low_disable_gpe(struct acpi_gpe_event_info *gpe_event_info)
 
 	/* Get current value of the enable register that contains this GPE */
 
-	status = acpi_read(&enable_mask, &gpe_register_info->enable_address);
+	status = acpi_hw_read(&enable_mask, &gpe_register_info->enable_address);
 	if (ACPI_FAILURE(status)) {
 		return (status);
 	}
@@ -95,7 +95,7 @@ acpi_status acpi_hw_low_disable_gpe(struct acpi_gpe_event_info *gpe_event_info)
 
 	/* Write the updated enable mask */
 
-	status = acpi_write(enable_mask, &gpe_register_info->enable_address);
+	status = acpi_hw_write(enable_mask, &gpe_register_info->enable_address);
 	return (status);
 }
 
@@ -130,8 +130,8 @@ acpi_hw_write_gpe_enable_reg(struct acpi_gpe_event_info * gpe_event_info)
 
 	/* Write the entire GPE (runtime) enable register */
 
-	status = acpi_write(gpe_register_info->enable_for_run,
-			    &gpe_register_info->enable_address);
+	status = acpi_hw_write(gpe_register_info->enable_for_run,
+			       &gpe_register_info->enable_address);
 
 	return (status);
 }
@@ -163,8 +163,8 @@ acpi_status acpi_hw_clear_gpe(struct acpi_gpe_event_info * gpe_event_info)
 	 * Write a one to the appropriate bit in the status register to
 	 * clear this GPE.
 	 */
-	status = acpi_write(register_bit,
-			    &gpe_event_info->register_info->status_address);
+	status = acpi_hw_write(register_bit,
+			       &gpe_event_info->register_info->status_address);
 
 	return (status);
 }
@@ -222,7 +222,7 @@ acpi_hw_get_gpe_status(struct acpi_gpe_event_info * gpe_event_info,
 
 	/* GPE currently active (status bit == 1)? */
 
-	status = acpi_read(&in_byte, &gpe_register_info->status_address);
+	status = acpi_hw_read(&in_byte, &gpe_register_info->status_address);
 	if (ACPI_FAILURE(status)) {
 		goto unlock_and_exit;
 	}
@@ -266,8 +266,8 @@ acpi_hw_disable_gpe_block(struct acpi_gpe_xrupt_info *gpe_xrupt_info,
 		/* Disable all GPEs in this register */
 
 		status =
-		    acpi_write(0x00,
-			       &gpe_block->register_info[i].enable_address);
+		    acpi_hw_write(0x00,
+				  &gpe_block->register_info[i].enable_address);
 		if (ACPI_FAILURE(status)) {
 			return (status);
 		}
@@ -303,8 +303,8 @@ acpi_hw_clear_gpe_block(struct acpi_gpe_xrupt_info *gpe_xrupt_info,
 		/* Clear status on all GPEs in this register */
 
 		status =
-		    acpi_write(0xFF,
-			       &gpe_block->register_info[i].status_address);
+		    acpi_hw_write(0xFF,
+				  &gpe_block->register_info[i].status_address);
 		if (ACPI_FAILURE(status)) {
 			return (status);
 		}
@@ -345,9 +345,9 @@ acpi_hw_enable_runtime_gpe_block(struct acpi_gpe_xrupt_info *gpe_xrupt_info,
 
 		/* Enable all "runtime" GPEs in this register */
 
-		status = acpi_write(gpe_block->register_info[i].enable_for_run,
-				    &gpe_block->register_info[i].
-				    enable_address);
+		status =
+		    acpi_hw_write(gpe_block->register_info[i].enable_for_run,
+				  &gpe_block->register_info[i].enable_address);
 		if (ACPI_FAILURE(status)) {
 			return (status);
 		}
@@ -387,9 +387,9 @@ acpi_hw_enable_wakeup_gpe_block(struct acpi_gpe_xrupt_info *gpe_xrupt_info,
 
 		/* Enable all "wake" GPEs in this register */
 
-		status = acpi_write(gpe_block->register_info[i].enable_for_wake,
-				    &gpe_block->register_info[i].
-				    enable_address);
+		status =
+		    acpi_hw_write(gpe_block->register_info[i].enable_for_wake,
+				  &gpe_block->register_info[i].enable_address);
 		if (ACPI_FAILURE(status)) {
 			return (status);
 		}
diff --git a/drivers/acpi/acpica/hwregs.c b/drivers/acpi/acpica/hwregs.c
index 23d5505..15c9ed2 100644
--- a/drivers/acpi/acpica/hwregs.c
+++ b/drivers/acpi/acpica/hwregs.c
@@ -62,6 +62,184 @@ acpi_hw_write_multiple(u32 value,
 		       struct acpi_generic_address *register_a,
 		       struct acpi_generic_address *register_b);
 
+/******************************************************************************
+ *
+ * FUNCTION:    acpi_hw_validate_register
+ *
+ * PARAMETERS:  Reg                 - GAS register structure
+ *              max_bit_width       - Max bit_width supported (32 or 64)
+ *              Address             - Pointer to where the gas->address
+ *                                    is returned
+ *
+ * RETURN:      Status
+ *
+ * DESCRIPTION: Validate the contents of a GAS register. Checks the GAS
+ *              pointer, Address, space_id, bit_width, and bit_offset.
+ *
+ ******************************************************************************/
+
+acpi_status
+acpi_hw_validate_register(struct acpi_generic_address *reg,
+			  u8 max_bit_width, u64 *address)
+{
+
+	/* Must have a valid pointer to a GAS structure */
+
+	if (!reg) {
+		return (AE_BAD_PARAMETER);
+	}
+
+	/*
+	 * Copy the target address. This handles possible alignment issues.
+	 * Address must not be null. A null address also indicates an optional
+	 * ACPI register that is not supported, so no error message.
+	 */
+	ACPI_MOVE_64_TO_64(address, &reg->address);
+	if (!(*address)) {
+		return (AE_BAD_ADDRESS);
+	}
+
+	/* Validate the space_iD */
+
+	if ((reg->space_id != ACPI_ADR_SPACE_SYSTEM_MEMORY) &&
+	    (reg->space_id != ACPI_ADR_SPACE_SYSTEM_IO)) {
+		ACPI_ERROR((AE_INFO,
+			    "Unsupported address space: 0x%X", reg->space_id));
+		return (AE_SUPPORT);
+	}
+
+	/* Validate the bit_width */
+
+	if ((reg->bit_width != 8) &&
+	    (reg->bit_width != 16) &&
+	    (reg->bit_width != 32) && (reg->bit_width != max_bit_width)) {
+		ACPI_ERROR((AE_INFO,
+			    "Unsupported register bit width: 0x%X",
+			    reg->bit_width));
+		return (AE_SUPPORT);
+	}
+
+	/* Validate the bit_offset. Just a warning for now. */
+
+	if (reg->bit_offset != 0) {
+		ACPI_WARNING((AE_INFO,
+			      "Unsupported register bit offset: 0x%X",
+			      reg->bit_offset));
+	}
+
+	return (AE_OK);
+}
+
+/******************************************************************************
+ *
+ * FUNCTION:    acpi_hw_read
+ *
+ * PARAMETERS:  Value               - Where the value is returned
+ *              Reg                 - GAS register structure
+ *
+ * RETURN:      Status
+ *
+ * DESCRIPTION: Read from either memory or IO space. This is a 32-bit max
+ *              version of acpi_read, used internally since the overhead of
+ *              64-bit values is not needed.
+ *
+ * LIMITATIONS: <These limitations also apply to acpi_hw_write>
+ *      bit_width must be exactly 8, 16, or 32.
+ *      space_iD must be system_memory or system_iO.
+ *      bit_offset and access_width are currently ignored, as there has
+ *          not been a need to implement these.
+ *
+ ******************************************************************************/
+
+acpi_status acpi_hw_read(u32 *value, struct acpi_generic_address *reg)
+{
+	u64 address;
+	acpi_status status;
+
+	ACPI_FUNCTION_NAME(hw_read);
+
+	/* Validate contents of the GAS register */
+
+	status = acpi_hw_validate_register(reg, 32, &address);
+	if (ACPI_FAILURE(status)) {
+		return (status);
+	}
+
+	/* Initialize entire 32-bit return value to zero */
+
+	*value = 0;
+
+	/*
+	 * Two address spaces supported: Memory or IO. PCI_Config is
+	 * not supported here because the GAS structure is insufficient
+	 */
+	if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) {
+		status = acpi_os_read_memory((acpi_physical_address)
+					     address, value, reg->bit_width);
+	} else {		/* ACPI_ADR_SPACE_SYSTEM_IO, validated earlier */
+
+		status = acpi_hw_read_port((acpi_io_address)
+					   address, value, reg->bit_width);
+	}
+
+	ACPI_DEBUG_PRINT((ACPI_DB_IO,
+			  "Read:  %8.8X width %2d from %8.8X%8.8X (%s)\n",
+			  *value, reg->bit_width, ACPI_FORMAT_UINT64(address),
+			  acpi_ut_get_region_name(reg->space_id)));
+
+	return (status);
+}
+
+/******************************************************************************
+ *
+ * FUNCTION:    acpi_hw_write
+ *
+ * PARAMETERS:  Value               - Value to be written
+ *              Reg                 - GAS register structure
+ *
+ * RETURN:      Status
+ *
+ * DESCRIPTION: Write to either memory or IO space. This is a 32-bit max
+ *              version of acpi_write, used internally since the overhead of
+ *              64-bit values is not needed.
+ *
+ ******************************************************************************/
+
+acpi_status acpi_hw_write(u32 value, struct acpi_generic_address *reg)
+{
+	u64 address;
+	acpi_status status;
+
+	ACPI_FUNCTION_NAME(hw_write);
+
+	/* Validate contents of the GAS register */
+
+	status = acpi_hw_validate_register(reg, 32, &address);
+	if (ACPI_FAILURE(status)) {
+		return (status);
+	}
+
+	/*
+	 * Two address spaces supported: Memory or IO. PCI_Config is
+	 * not supported here because the GAS structure is insufficient
+	 */
+	if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) {
+		status = acpi_os_write_memory((acpi_physical_address)
+					      address, value, reg->bit_width);
+	} else {		/* ACPI_ADR_SPACE_SYSTEM_IO, validated earlier */
+
+		status = acpi_hw_write_port((acpi_io_address)
+					    address, value, reg->bit_width);
+	}
+
+	ACPI_DEBUG_PRINT((ACPI_DB_IO,
+			  "Wrote: %8.8X width %2d   to %8.8X%8.8X (%s)\n",
+			  value, reg->bit_width, ACPI_FORMAT_UINT64(address),
+			  acpi_ut_get_region_name(reg->space_id)));
+
+	return (status);
+}
+
 /*******************************************************************************
  *
  * FUNCTION:    acpi_hw_clear_acpi_status
@@ -152,15 +330,16 @@ acpi_status acpi_hw_write_pm1_control(u32 pm1a_control, u32 pm1b_control)
 
 	ACPI_FUNCTION_TRACE(hw_write_pm1_control);
 
-	status = acpi_write(pm1a_control, &acpi_gbl_FADT.xpm1a_control_block);
+	status =
+	    acpi_hw_write(pm1a_control, &acpi_gbl_FADT.xpm1a_control_block);
 	if (ACPI_FAILURE(status)) {
 		return_ACPI_STATUS(status);
 	}
 
 	if (acpi_gbl_FADT.xpm1b_control_block.address) {
 		status =
-		    acpi_write(pm1b_control,
-			       &acpi_gbl_FADT.xpm1b_control_block);
+		    acpi_hw_write(pm1b_control,
+				  &acpi_gbl_FADT.xpm1b_control_block);
 	}
 	return_ACPI_STATUS(status);
 }
@@ -218,12 +397,13 @@ acpi_hw_register_read(u32 register_id, u32 * return_value)
 
 	case ACPI_REGISTER_PM2_CONTROL:	/* 8-bit access */
 
-		status = acpi_read(&value, &acpi_gbl_FADT.xpm2_control_block);
+		status =
+		    acpi_hw_read(&value, &acpi_gbl_FADT.xpm2_control_block);
 		break;
 
 	case ACPI_REGISTER_PM_TIMER:	/* 32-bit access */
 
-		status = acpi_read(&value, &acpi_gbl_FADT.xpm_timer_block);
+		status = acpi_hw_read(&value, &acpi_gbl_FADT.xpm_timer_block);
 		break;
 
 	case ACPI_REGISTER_SMI_COMMAND_BLOCK:	/* 8-bit access */
@@ -340,7 +520,8 @@ acpi_status acpi_hw_register_write(u32 register_id, u32 value)
 		 * as per the ACPI spec.
 		 */
 		status =
-		    acpi_read(&read_value, &acpi_gbl_FADT.xpm2_control_block);
+		    acpi_hw_read(&read_value,
+				 &acpi_gbl_FADT.xpm2_control_block);
 		if (ACPI_FAILURE(status)) {
 			goto exit;
 		}
@@ -350,12 +531,13 @@ acpi_status acpi_hw_register_write(u32 register_id, u32 value)
 		ACPI_INSERT_BITS(value, ACPI_PM2_CONTROL_PRESERVED_BITS,
 				 read_value);
 
-		status = acpi_write(value, &acpi_gbl_FADT.xpm2_control_block);
+		status =
+		    acpi_hw_write(value, &acpi_gbl_FADT.xpm2_control_block);
 		break;
 
 	case ACPI_REGISTER_PM_TIMER:	/* 32-bit access */
 
-		status = acpi_write(value, &acpi_gbl_FADT.xpm_timer_block);
+		status = acpi_hw_write(value, &acpi_gbl_FADT.xpm_timer_block);
 		break;
 
 	case ACPI_REGISTER_SMI_COMMAND_BLOCK:	/* 8-bit access */
@@ -401,7 +583,7 @@ acpi_hw_read_multiple(u32 *value,
 
 	/* The first register is always required */
 
-	status = acpi_read(&value_a, register_a);
+	status = acpi_hw_read(&value_a, register_a);
 	if (ACPI_FAILURE(status)) {
 		return (status);
 	}
@@ -409,7 +591,7 @@ acpi_hw_read_multiple(u32 *value,
 	/* Second register is optional */
 
 	if (register_b->address) {
-		status = acpi_read(&value_b, register_b);
+		status = acpi_hw_read(&value_b, register_b);
 		if (ACPI_FAILURE(status)) {
 			return (status);
 		}
@@ -452,7 +634,7 @@ acpi_hw_write_multiple(u32 value,
 
 	/* The first register is always required */
 
-	status = acpi_write(value, register_a);
+	status = acpi_hw_write(value, register_a);
 	if (ACPI_FAILURE(status)) {
 		return (status);
 	}
@@ -470,7 +652,7 @@ acpi_hw_write_multiple(u32 value,
 	 * and writes have no side effects"
 	 */
 	if (register_b->address) {
-		status = acpi_write(value, register_b);
+		status = acpi_hw_write(value, register_b);
 	}
 
 	return (status);
diff --git a/drivers/acpi/acpica/hwtimer.c b/drivers/acpi/acpica/hwtimer.c
index b7f522c..6b282e8 100644
--- a/drivers/acpi/acpica/hwtimer.c
+++ b/drivers/acpi/acpica/hwtimer.c
@@ -100,7 +100,7 @@ acpi_status acpi_get_timer(u32 * ticks)
 	}
 
 	status =
-	    acpi_hw_low_level_read(32, ticks, &acpi_gbl_FADT.xpm_timer_block);
+	    acpi_hw_read(ticks, &acpi_gbl_FADT.xpm_timer_block);
 
 	return_ACPI_STATUS(status);
 }
diff --git a/drivers/acpi/acpica/hwxface.c b/drivers/acpi/acpica/hwxface.c
index 9829979..4ead85f 100644
--- a/drivers/acpi/acpica/hwxface.c
+++ b/drivers/acpi/acpica/hwxface.c
@@ -80,7 +80,7 @@ acpi_status acpi_reset(void)
 
 	/* Write the reset value to the reset register */
 
-	status = acpi_write(acpi_gbl_FADT.reset_value, reset_reg);
+	status = acpi_hw_write(acpi_gbl_FADT.reset_value, reset_reg);
 	return_ACPI_STATUS(status);
 }
 
@@ -97,67 +97,92 @@ ACPI_EXPORT_SYMBOL(acpi_reset)
  *
  * DESCRIPTION: Read from either memory or IO space.
  *
+ * LIMITATIONS: <These limitations also apply to acpi_write>
+ *      bit_width must be exactly 8, 16, 32, or 64.
+ *      space_iD must be system_memory or system_iO.
+ *      bit_offset and access_width are currently ignored, as there has
+ *          not been a need to implement these.
+ *
  ******************************************************************************/
-acpi_status acpi_read(u32 *value, struct acpi_generic_address *reg)
+acpi_status acpi_read(u64 *return_value, struct acpi_generic_address *reg)
 {
+	u32 value;
 	u32 width;
 	u64 address;
 	acpi_status status;
 
 	ACPI_FUNCTION_NAME(acpi_read);
 
-	/*
-	 * Must have a valid pointer to a GAS structure, and a non-zero address
-	 * within.
-	 */
-	if (!reg) {
+	if (!return_value) {
 		return (AE_BAD_PARAMETER);
 	}
 
-	/* Get a local copy of the address. Handles possible alignment issues */
+	/* Validate contents of the GAS register. Allow 64-bit transfers */
 
-	ACPI_MOVE_64_TO_64(&address, &reg->address);
-	if (!address) {
-		return (AE_BAD_ADDRESS);
+	status = acpi_hw_validate_register(reg, 64, &address);
+	if (ACPI_FAILURE(status)) {
+		return (status);
 	}
 
-	/* Supported widths are 8/16/32 */
-
 	width = reg->bit_width;
-	if ((width != 8) && (width != 16) && (width != 32)) {
-		return (AE_SUPPORT);
+	if (width == 64) {
+		width = 32;	/* Break into two 32-bit transfers */
 	}
 
-	/* Initialize entire 32-bit return value to zero */
+	/* Initialize entire 64-bit return value to zero */
 
-	*value = 0;
+	*return_value = 0;
+	value = 0;
 
 	/*
 	 * Two address spaces supported: Memory or IO. PCI_Config is
 	 * not supported here because the GAS structure is insufficient
 	 */
-	switch (reg->space_id) {
-	case ACPI_ADR_SPACE_SYSTEM_MEMORY:
+	if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) {
+		status = acpi_os_read_memory((acpi_physical_address)
+					     address, &value, width);
+		if (ACPI_FAILURE(status)) {
+			return (status);
+		}
+		*return_value = value;
+
+		if (reg->bit_width == 64) {
 
-		status = acpi_os_read_memory((acpi_physical_address) address,
-					     value, width);
-		break;
+			/* Read the top 32 bits */
 
-	case ACPI_ADR_SPACE_SYSTEM_IO:
+			status = acpi_os_read_memory((acpi_physical_address)
+						     (address + 4), &value, 32);
+			if (ACPI_FAILURE(status)) {
+				return (status);
+			}
+			*return_value |= ((u64)value << 32);
+		}
+	} else {		/* ACPI_ADR_SPACE_SYSTEM_IO, validated earlier */
 
-		status =
-		    acpi_hw_read_port((acpi_io_address) address, value, width);
-		break;
+		status = acpi_hw_read_port((acpi_io_address)
+					   address, &value, width);
+		if (ACPI_FAILURE(status)) {
+			return (status);
+		}
+		*return_value = value;
 
-	default:
-		ACPI_ERROR((AE_INFO,
-			    "Unsupported address space: %X", reg->space_id));
-		return (AE_BAD_PARAMETER);
+		if (reg->bit_width == 64) {
+
+			/* Read the top 32 bits */
+
+			status = acpi_hw_read_port((acpi_io_address)
+						   (address + 4), &value, 32);
+			if (ACPI_FAILURE(status)) {
+				return (status);
+			}
+			*return_value |= ((u64)value << 32);
+		}
 	}
 
 	ACPI_DEBUG_PRINT((ACPI_DB_IO,
-			  "Read:  %8.8X width %2d from %8.8X%8.8X (%s)\n",
-			  *value, width, ACPI_FORMAT_UINT64(address),
+			  "Read:  %8.8X%8.8X width %2d from %8.8X%8.8X (%s)\n",
+			  ACPI_FORMAT_UINT64(*return_value), reg->bit_width,
+			  ACPI_FORMAT_UINT64(address),
 			  acpi_ut_get_region_name(reg->space_id)));
 
 	return (status);
@@ -169,7 +194,7 @@ ACPI_EXPORT_SYMBOL(acpi_read)
  *
  * FUNCTION:    acpi_write
  *
- * PARAMETERS:  Value               - To be written
+ * PARAMETERS:  Value               - Value to be written
  *              Reg                 - GAS register structure
  *
  * RETURN:      Status
@@ -177,7 +202,7 @@ ACPI_EXPORT_SYMBOL(acpi_read)
  * DESCRIPTION: Write to either memory or IO space.
  *
  ******************************************************************************/
-acpi_status acpi_write(u32 value, struct acpi_generic_address *reg)
+acpi_status acpi_write(u64 value, struct acpi_generic_address *reg)
 {
 	u32 width;
 	u64 address;
@@ -185,54 +210,61 @@ acpi_status acpi_write(u32 value, struct acpi_generic_address *reg)
 
 	ACPI_FUNCTION_NAME(acpi_write);
 
-	/*
-	 * Must have a valid pointer to a GAS structure, and a non-zero address
-	 * within.
-	 */
-	if (!reg) {
-		return (AE_BAD_PARAMETER);
-	}
+	/* Validate contents of the GAS register. Allow 64-bit transfers */
 
-	/* Get a local copy of the address. Handles possible alignment issues */
-
-	ACPI_MOVE_64_TO_64(&address, &reg->address);
-	if (!address) {
-		return (AE_BAD_ADDRESS);
+	status = acpi_hw_validate_register(reg, 64, &address);
+	if (ACPI_FAILURE(status)) {
+		return (status);
 	}
 
-	/* Supported widths are 8/16/32 */
-
 	width = reg->bit_width;
-	if ((width != 8) && (width != 16) && (width != 32)) {
-		return (AE_SUPPORT);
+	if (width == 64) {
+		width = 32;	/* Break into two 32-bit transfers */
 	}
 
 	/*
-	 * Two address spaces supported: Memory or IO.
-	 * PCI_Config is not supported here because the GAS struct is insufficient
+	 * Two address spaces supported: Memory or IO. PCI_Config is
+	 * not supported here because the GAS structure is insufficient
 	 */
-	switch (reg->space_id) {
-	case ACPI_ADR_SPACE_SYSTEM_MEMORY:
-
-		status = acpi_os_write_memory((acpi_physical_address) address,
-					      value, width);
-		break;
+	if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) {
+		status = acpi_os_write_memory((acpi_physical_address)
+					      address, ACPI_LODWORD(value),
+					      width);
+		if (ACPI_FAILURE(status)) {
+			return (status);
+		}
 
-	case ACPI_ADR_SPACE_SYSTEM_IO:
+		if (reg->bit_width == 64) {
+			status = acpi_os_write_memory((acpi_physical_address)
+						      (address + 4),
+						      ACPI_HIDWORD(value), 32);
+			if (ACPI_FAILURE(status)) {
+				return (status);
+			}
+		}
+	} else {		/* ACPI_ADR_SPACE_SYSTEM_IO, validated earlier */
 
-		status = acpi_hw_write_port((acpi_io_address) address, value,
+		status = acpi_hw_write_port((acpi_io_address)
+					    address, ACPI_LODWORD(value),
 					    width);
-		break;
+		if (ACPI_FAILURE(status)) {
+			return (status);
+		}
 
-	default:
-		ACPI_ERROR((AE_INFO,
-			    "Unsupported address space: %X", reg->space_id));
-		return (AE_BAD_PARAMETER);
+		if (reg->bit_width == 64) {
+			status = acpi_hw_write_port((acpi_io_address)
+						    (address + 4),
+						    ACPI_HIDWORD(value), 32);
+			if (ACPI_FAILURE(status)) {
+				return (status);
+			}
+		}
 	}
 
 	ACPI_DEBUG_PRINT((ACPI_DB_IO,
-			  "Wrote: %8.8X width %2d   to %8.8X%8.8X (%s)\n",
-			  value, width, ACPI_FORMAT_UINT64(address),
+			  "Wrote: %8.8X%8.8X width %2d   to %8.8X%8.8X (%s)\n",
+			  ACPI_FORMAT_UINT64(value), reg->bit_width,
+			  ACPI_FORMAT_UINT64(address),
 			  acpi_ut_get_region_name(reg->space_id)));
 
 	return (status);
diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 2aecaa5..b450a19 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -360,9 +360,9 @@ acpi_status acpi_set_firmware_waking_vector(u32 physical_address);
 acpi_status acpi_set_firmware_waking_vector64(u64 physical_address);
 #endif
 
-acpi_status acpi_read(u32 *value, struct acpi_generic_address *reg);
+acpi_status acpi_read(u64 *value, struct acpi_generic_address *reg);
 
-acpi_status acpi_write(u32 value, struct acpi_generic_address *reg);
+acpi_status acpi_write(u64 value, struct acpi_generic_address *reg);
 
 acpi_status
 acpi_get_sleep_type_data(u8 sleep_state, u8 * slp_typ_a, u8 * slp_typ_b);
-- 
cgit v0.10.2


From 9c61b34cf7078da72cce276ff8cfae5d6e9955bc Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Wed, 24 Jun 2009 09:45:17 +0800
Subject: ACPICA: Remove duplicate prototypes from header

Two duplicates in acdebug.h.

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/acpica/acdebug.h b/drivers/acpi/acpica/acdebug.h
index 62c59df..a4fb001 100644
--- a/drivers/acpi/acpica/acdebug.h
+++ b/drivers/acpi/acpica/acdebug.h
@@ -154,10 +154,6 @@ void
 acpi_db_display_argument_object(union acpi_operand_object *obj_desc,
 				struct acpi_walk_state *walk_state);
 
-void acpi_db_check_predefined_names(void);
-
-void acpi_db_batch_execute(void);
-
 /*
  * dbexec - debugger control method execution
  */
-- 
cgit v0.10.2


From 15b8dd53f5ffaf8e2d9095c423f713423f576c0f Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Mon, 29 Jun 2009 13:39:29 +0800
Subject: ACPICA: Major update for acpi_get_object_info external interface

Completed a major update for the acpi_get_object_info external interface.
Changes include:
 - Support for variable, unlimited length HID, UID, and CID strings
 - Support Processor objects the same as Devices (HID,UID,CID,ADR,STA, etc.)
 - Call the _SxW power methods on behalf of a device object
 - Determine if a device is a PCI root bridge
 - Change the ACPI_BUFFER parameter to ACPI_DEVICE_INFO.
These changes will require an update to all callers of this interface.
See the ACPICA Programmer Reference for details.

Also, update all invocations of acpi_get_object_info interface

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c
index 8cfb001..674a837 100644
--- a/arch/ia64/hp/common/sba_iommu.c
+++ b/arch/ia64/hp/common/sba_iommu.c
@@ -2026,24 +2026,21 @@ acpi_sba_ioc_add(struct acpi_device *device)
 	struct ioc *ioc;
 	acpi_status status;
 	u64 hpa, length;
-	struct acpi_buffer buffer;
 	struct acpi_device_info *dev_info;
 
 	status = hp_acpi_csr_space(device->handle, &hpa, &length);
 	if (ACPI_FAILURE(status))
 		return 1;
 
-	buffer.length = ACPI_ALLOCATE_LOCAL_BUFFER;
-	status = acpi_get_object_info(device->handle, &buffer);
+	status = acpi_get_object_info(device->handle, &dev_info);
 	if (ACPI_FAILURE(status))
 		return 1;
-	dev_info = buffer.pointer;
 
 	/*
 	 * For HWP0001, only SBA appears in ACPI namespace.  It encloses the PCI
 	 * root bridges, and its CSR space includes the IOC function.
 	 */
-	if (strncmp("HWP0001", dev_info->hardware_id.value, 7) == 0) {
+	if (strncmp("HWP0001", dev_info->hardware_id.string, 7) == 0) {
 		hpa += ZX1_IOC_OFFSET;
 		/* zx1 based systems default to kernel page size iommu pages */
 		if (!iovp_shift)
diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c
index 9a62224..80eacbe 100644
--- a/drivers/acpi/acpi_memhotplug.c
+++ b/drivers/acpi/acpi_memhotplug.c
@@ -481,26 +481,23 @@ static acpi_status is_memory_device(acpi_handle handle)
 {
 	char *hardware_id;
 	acpi_status status;
-	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
 	struct acpi_device_info *info;
 
-
-	status = acpi_get_object_info(handle, &buffer);
+	status = acpi_get_object_info(handle, &info);
 	if (ACPI_FAILURE(status))
 		return status;
 
-	info = buffer.pointer;
 	if (!(info->valid & ACPI_VALID_HID)) {
-		kfree(buffer.pointer);
+		kfree(info);
 		return AE_ERROR;
 	}
 
-	hardware_id = info->hardware_id.value;
+	hardware_id = info->hardware_id.string;
 	if ((hardware_id == NULL) ||
 	    (strcmp(hardware_id, ACPI_MEMORY_DEVICE_HID)))
 		status = AE_ERROR;
 
-	kfree(buffer.pointer);
+	kfree(info);
 	return status;
 }
 
diff --git a/drivers/acpi/acpica/Makefile b/drivers/acpi/acpica/Makefile
index 72ac28d..0e7d561 100644
--- a/drivers/acpi/acpica/Makefile
+++ b/drivers/acpi/acpica/Makefile
@@ -44,4 +44,4 @@ acpi-y += tbxface.o tbinstal.o tbutils.o tbfind.o tbfadt.o tbxfroot.o
 
 acpi-y += utalloc.o utdebug.o uteval.o utinit.o utmisc.o utxface.o \
 		utcopy.o utdelete.o utglobal.o utmath.o utobject.o \
-		utstate.o utmutex.o utobject.o utresrc.o utlock.o
+		utstate.o utmutex.o utobject.o utresrc.o utlock.o utids.o
diff --git a/drivers/acpi/acpica/acconfig.h b/drivers/acpi/acpica/acconfig.h
index e6777fb..6c1fb2d 100644
--- a/drivers/acpi/acpica/acconfig.h
+++ b/drivers/acpi/acpica/acconfig.h
@@ -203,6 +203,11 @@
 
 #define ACPI_SMBUS_BUFFER_SIZE          34
 
+/* _sx_d and _sx_w control methods */
+
+#define ACPI_NUM_sx_d_METHODS           4
+#define ACPI_NUM_sx_w_METHODS           5
+
 /******************************************************************************
  *
  * ACPI AML Debugger
diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h
index 0b73b31..6389f7c 100644
--- a/drivers/acpi/acpica/acglobal.h
+++ b/drivers/acpi/acpica/acglobal.h
@@ -265,7 +265,8 @@ ACPI_EXTERN u8 acpi_gbl_osi_data;
 extern u8 acpi_gbl_shutdown;
 extern u32 acpi_gbl_startup_flags;
 extern const char *acpi_gbl_sleep_state_names[ACPI_S_STATE_COUNT];
-extern const char *acpi_gbl_highest_dstate_names[4];
+extern const char *acpi_gbl_lowest_dstate_names[ACPI_NUM_sx_w_METHODS];
+extern const char *acpi_gbl_highest_dstate_names[ACPI_NUM_sx_d_METHODS];
 extern const struct acpi_opcode_info acpi_gbl_aml_op_info[AML_NUM_OPCODES];
 extern const char *acpi_gbl_region_types[ACPI_NUM_PREDEFINED_REGIONS];
 
diff --git a/drivers/acpi/acpica/acinterp.h b/drivers/acpi/acpica/acinterp.h
index e8db7a3..5db9f29 100644
--- a/drivers/acpi/acpica/acinterp.h
+++ b/drivers/acpi/acpica/acinterp.h
@@ -461,9 +461,9 @@ void acpi_ex_acquire_global_lock(u32 rule);
 
 void acpi_ex_release_global_lock(u32 rule);
 
-void acpi_ex_eisa_id_to_string(u32 numeric_id, char *out_string);
+void acpi_ex_eisa_id_to_string(char *dest, acpi_integer compressed_id);
 
-void acpi_ex_unsigned_integer_to_string(acpi_integer value, char *out_string);
+void acpi_ex_integer_to_string(char *dest, acpi_integer value);
 
 /*
  * exregion - default op_region handlers
diff --git a/drivers/acpi/acpica/acutils.h b/drivers/acpi/acpica/acutils.h
index 897810b..b0add85 100644
--- a/drivers/acpi/acpica/acutils.h
+++ b/drivers/acpi/acpica/acutils.h
@@ -324,26 +324,30 @@ acpi_ut_evaluate_object(struct acpi_namespace_node *prefix_node,
 acpi_status
 acpi_ut_evaluate_numeric_object(char *object_name,
 				struct acpi_namespace_node *device_node,
-				acpi_integer * address);
+				acpi_integer *value);
 
 acpi_status
-acpi_ut_execute_HID(struct acpi_namespace_node *device_node,
-		    struct acpica_device_id *hid);
+acpi_ut_execute_STA(struct acpi_namespace_node *device_node, u32 *status_flags);
 
 acpi_status
-acpi_ut_execute_CID(struct acpi_namespace_node *device_node,
-		    struct acpi_compatible_id_list **return_cid_list);
+acpi_ut_execute_power_methods(struct acpi_namespace_node *device_node,
+			      const char **method_names,
+			      u8 method_count, u8 *out_values);
 
+/*
+ * utids - device ID support
+ */
 acpi_status
-acpi_ut_execute_STA(struct acpi_namespace_node *device_node,
-		    u32 * status_flags);
+acpi_ut_execute_HID(struct acpi_namespace_node *device_node,
+		    struct acpica_device_id **return_id);
 
 acpi_status
 acpi_ut_execute_UID(struct acpi_namespace_node *device_node,
-		    struct acpica_device_id *uid);
+		    struct acpica_device_id **return_id);
 
 acpi_status
-acpi_ut_execute_sxds(struct acpi_namespace_node *device_node, u8 * highest);
+acpi_ut_execute_CID(struct acpi_namespace_node *device_node,
+		    struct acpica_device_id_list **return_cid_list);
 
 /*
  * utlock - reader/writer locks
@@ -445,6 +449,8 @@ acpi_ut_short_divide(acpi_integer in_dividend,
  */
 const char *acpi_ut_validate_exception(acpi_status status);
 
+u8 acpi_ut_is_pci_root_bridge(char *id);
+
 u8 acpi_ut_is_aml_table(struct acpi_table_header *table);
 
 acpi_status acpi_ut_allocate_owner_id(acpi_owner_id * owner_id);
diff --git a/drivers/acpi/acpica/evrgnini.c b/drivers/acpi/acpica/evrgnini.c
index 284a7be..cf29c49 100644
--- a/drivers/acpi/acpica/evrgnini.c
+++ b/drivers/acpi/acpica/evrgnini.c
@@ -50,8 +50,6 @@
 ACPI_MODULE_NAME("evrgnini")
 
 /* Local prototypes */
-static u8 acpi_ev_match_pci_root_bridge(char *id);
-
 static u8 acpi_ev_is_pci_root_bridge(struct acpi_namespace_node *node);
 
 /*******************************************************************************
@@ -332,37 +330,6 @@ acpi_ev_pci_config_region_setup(acpi_handle handle,
 
 /*******************************************************************************
  *
- * FUNCTION:    acpi_ev_match_pci_root_bridge
- *
- * PARAMETERS:  Id              - The HID/CID in string format
- *
- * RETURN:      TRUE if the Id is a match for a PCI/PCI-Express Root Bridge
- *
- * DESCRIPTION: Determine if the input ID is a PCI Root Bridge ID.
- *
- ******************************************************************************/
-
-static u8 acpi_ev_match_pci_root_bridge(char *id)
-{
-
-	/*
-	 * Check if this is a PCI root.
-	 * ACPI 3.0+: check for a PCI Express root also.
-	 */
-	if (!(ACPI_STRNCMP(id,
-			   PCI_ROOT_HID_STRING,
-			   sizeof(PCI_ROOT_HID_STRING))) ||
-	    !(ACPI_STRNCMP(id,
-			   PCI_EXPRESS_ROOT_HID_STRING,
-			   sizeof(PCI_EXPRESS_ROOT_HID_STRING)))) {
-		return (TRUE);
-	}
-
-	return (FALSE);
-}
-
-/*******************************************************************************
- *
  * FUNCTION:    acpi_ev_is_pci_root_bridge
  *
  * PARAMETERS:  Node            - Device node being examined
@@ -377,9 +344,10 @@ static u8 acpi_ev_match_pci_root_bridge(char *id)
 static u8 acpi_ev_is_pci_root_bridge(struct acpi_namespace_node *node)
 {
 	acpi_status status;
-	struct acpica_device_id hid;
-	struct acpi_compatible_id_list *cid;
+	struct acpica_device_id *hid;
+	struct acpica_device_id_list *cid;
 	u32 i;
+	u8 match;
 
 	/* Get the _HID and check for a PCI Root Bridge */
 
@@ -388,7 +356,10 @@ static u8 acpi_ev_is_pci_root_bridge(struct acpi_namespace_node *node)
 		return (FALSE);
 	}
 
-	if (acpi_ev_match_pci_root_bridge(hid.value)) {
+	match = acpi_ut_is_pci_root_bridge(hid->string);
+	ACPI_FREE(hid);
+
+	if (match) {
 		return (TRUE);
 	}
 
@@ -402,7 +373,7 @@ static u8 acpi_ev_is_pci_root_bridge(struct acpi_namespace_node *node)
 	/* Check all _CIDs in the returned list */
 
 	for (i = 0; i < cid->count; i++) {
-		if (acpi_ev_match_pci_root_bridge(cid->id[i].value)) {
+		if (acpi_ut_is_pci_root_bridge(cid->ids[i].string)) {
 			ACPI_FREE(cid);
 			return (TRUE);
 		}
diff --git a/drivers/acpi/acpica/exutils.c b/drivers/acpi/acpica/exutils.c
index 87730e9..7d41f99 100644
--- a/drivers/acpi/acpica/exutils.c
+++ b/drivers/acpi/acpica/exutils.c
@@ -358,50 +358,67 @@ static u32 acpi_ex_digits_needed(acpi_integer value, u32 base)
  *
  * FUNCTION:    acpi_ex_eisa_id_to_string
  *
- * PARAMETERS:  numeric_id      - EISA ID to be converted
+ * PARAMETERS:  compressed_id   - EISAID to be converted
  *              out_string      - Where to put the converted string (8 bytes)
  *
  * RETURN:      None
  *
- * DESCRIPTION: Convert a numeric EISA ID to string representation
+ * DESCRIPTION: Convert a numeric EISAID to string representation. Return
+ *              buffer must be large enough to hold the string. The string
+ *              returned is always exactly of length ACPI_EISAID_STRING_SIZE
+ *              (includes null terminator). The EISAID is always 32 bits.
  *
  ******************************************************************************/
 
-void acpi_ex_eisa_id_to_string(u32 numeric_id, char *out_string)
+void acpi_ex_eisa_id_to_string(char *out_string, acpi_integer compressed_id)
 {
-	u32 eisa_id;
+	u32 swapped_id;
 
 	ACPI_FUNCTION_ENTRY();
 
+	/* The EISAID should be a 32-bit integer */
+
+	if (compressed_id > ACPI_UINT32_MAX) {
+		ACPI_WARNING((AE_INFO,
+			      "Expected EISAID is larger than 32 bits: 0x%8.8X%8.8X, truncating",
+			      ACPI_FORMAT_UINT64(compressed_id)));
+	}
+
 	/* Swap ID to big-endian to get contiguous bits */
 
-	eisa_id = acpi_ut_dword_byte_swap(numeric_id);
+	swapped_id = acpi_ut_dword_byte_swap((u32)compressed_id);
 
-	out_string[0] = (char)('@' + (((unsigned long)eisa_id >> 26) & 0x1f));
-	out_string[1] = (char)('@' + ((eisa_id >> 21) & 0x1f));
-	out_string[2] = (char)('@' + ((eisa_id >> 16) & 0x1f));
-	out_string[3] = acpi_ut_hex_to_ascii_char((acpi_integer) eisa_id, 12);
-	out_string[4] = acpi_ut_hex_to_ascii_char((acpi_integer) eisa_id, 8);
-	out_string[5] = acpi_ut_hex_to_ascii_char((acpi_integer) eisa_id, 4);
-	out_string[6] = acpi_ut_hex_to_ascii_char((acpi_integer) eisa_id, 0);
+	/* First 3 bytes are uppercase letters. Next 4 bytes are hexadecimal */
+
+	out_string[0] =
+	    (char)(0x40 + (((unsigned long)swapped_id >> 26) & 0x1F));
+	out_string[1] = (char)(0x40 + ((swapped_id >> 21) & 0x1F));
+	out_string[2] = (char)(0x40 + ((swapped_id >> 16) & 0x1F));
+	out_string[3] = acpi_ut_hex_to_ascii_char((acpi_integer)swapped_id, 12);
+	out_string[4] = acpi_ut_hex_to_ascii_char((acpi_integer)swapped_id, 8);
+	out_string[5] = acpi_ut_hex_to_ascii_char((acpi_integer)swapped_id, 4);
+	out_string[6] = acpi_ut_hex_to_ascii_char((acpi_integer)swapped_id, 0);
 	out_string[7] = 0;
 }
 
 /*******************************************************************************
  *
- * FUNCTION:    acpi_ex_unsigned_integer_to_string
+ * FUNCTION:    acpi_ex_integer_to_string
  *
- * PARAMETERS:  Value           - Value to be converted
- *              out_string      - Where to put the converted string (8 bytes)
+ * PARAMETERS:  out_string      - Where to put the converted string. At least
+ *                                21 bytes are needed to hold the largest
+ *                                possible 64-bit integer.
+ *              Value           - Value to be converted
  *
  * RETURN:      None, string
  *
- * DESCRIPTION: Convert a number to string representation. Assumes string
- *              buffer is large enough to hold the string.
+ * DESCRIPTION: Convert a 64-bit integer to decimal string representation.
+ *              Assumes string buffer is large enough to hold the string. The
+ *              largest string is (ACPI_MAX64_DECIMAL_DIGITS + 1).
  *
  ******************************************************************************/
 
-void acpi_ex_unsigned_integer_to_string(acpi_integer value, char *out_string)
+void acpi_ex_integer_to_string(char *out_string, acpi_integer value)
 {
 	u32 count;
 	u32 digits_needed;
diff --git a/drivers/acpi/acpica/nsdumpdv.c b/drivers/acpi/acpica/nsdumpdv.c
index 41994fe..0fe87f1 100644
--- a/drivers/acpi/acpica/nsdumpdv.c
+++ b/drivers/acpi/acpica/nsdumpdv.c
@@ -70,7 +70,6 @@ static acpi_status
 acpi_ns_dump_one_device(acpi_handle obj_handle,
 			u32 level, void *context, void **return_value)
 {
-	struct acpi_buffer buffer;
 	struct acpi_device_info *info;
 	acpi_status status;
 	u32 i;
@@ -80,17 +79,15 @@ acpi_ns_dump_one_device(acpi_handle obj_handle,
 	status =
 	    acpi_ns_dump_one_object(obj_handle, level, context, return_value);
 
-	buffer.length = ACPI_ALLOCATE_LOCAL_BUFFER;
-	status = acpi_get_object_info(obj_handle, &buffer);
+	status = acpi_get_object_info(obj_handle, &info);
 	if (ACPI_SUCCESS(status)) {
-		info = buffer.pointer;
 		for (i = 0; i < level; i++) {
 			ACPI_DEBUG_PRINT_RAW((ACPI_DB_TABLES, " "));
 		}
 
 		ACPI_DEBUG_PRINT_RAW((ACPI_DB_TABLES,
 				      "    HID: %s, ADR: %8.8X%8.8X, Status: %X\n",
-				      info->hardware_id.value,
+				      info->hardware_id.string,
 				      ACPI_FORMAT_UINT64(info->address),
 				      info->current_status));
 		ACPI_FREE(info);
diff --git a/drivers/acpi/acpica/nsxfeval.c b/drivers/acpi/acpica/nsxfeval.c
index daf4ad3..4929dbd 100644
--- a/drivers/acpi/acpica/nsxfeval.c
+++ b/drivers/acpi/acpica/nsxfeval.c
@@ -535,10 +535,11 @@ acpi_ns_get_device_callback(acpi_handle obj_handle,
 	acpi_status status;
 	struct acpi_namespace_node *node;
 	u32 flags;
-	struct acpica_device_id hid;
-	struct acpi_compatible_id_list *cid;
+	struct acpica_device_id *hid;
+	struct acpica_device_id_list *cid;
 	u32 i;
-	int found;
+	u8 found;
+	int no_match;
 
 	status = acpi_ut_acquire_mutex(ACPI_MTX_NAMESPACE);
 	if (ACPI_FAILURE(status)) {
@@ -582,10 +583,14 @@ acpi_ns_get_device_callback(acpi_handle obj_handle,
 			return (AE_CTRL_DEPTH);
 		}
 
-		if (ACPI_STRNCMP(hid.value, info->hid, sizeof(hid.value)) != 0) {
-
-			/* Get the list of Compatible IDs */
+		no_match = ACPI_STRCMP(hid->string, info->hid);
+		ACPI_FREE(hid);
 
+		if (no_match) {
+			/*
+			 * HID does not match, attempt match within the
+			 * list of Compatible IDs (CIDs)
+			 */
 			status = acpi_ut_execute_CID(node, &cid);
 			if (status == AE_NOT_FOUND) {
 				return (AE_OK);
@@ -597,10 +602,8 @@ acpi_ns_get_device_callback(acpi_handle obj_handle,
 
 			found = 0;
 			for (i = 0; i < cid->count; i++) {
-				if (ACPI_STRNCMP(cid->id[i].value, info->hid,
-						 sizeof(struct
-							acpi_compatible_id)) ==
-				    0) {
+				if (ACPI_STRCMP(cid->ids[i].string, info->hid)
+				    == 0) {
 					found = 1;
 					break;
 				}
diff --git a/drivers/acpi/acpica/nsxfname.c b/drivers/acpi/acpica/nsxfname.c
index f23593d..ddc84af 100644
--- a/drivers/acpi/acpica/nsxfname.c
+++ b/drivers/acpi/acpica/nsxfname.c
@@ -51,6 +51,11 @@
 #define _COMPONENT          ACPI_NAMESPACE
 ACPI_MODULE_NAME("nsxfname")
 
+/* Local prototypes */
+static char *acpi_ns_copy_device_id(struct acpica_device_id *dest,
+				    struct acpica_device_id *source,
+				    char *string_area);
+
 /******************************************************************************
  *
  * FUNCTION:    acpi_get_handle
@@ -68,6 +73,7 @@ ACPI_MODULE_NAME("nsxfname")
  *              namespace handle.
  *
  ******************************************************************************/
+
 acpi_status
 acpi_get_handle(acpi_handle parent,
 		acpi_string pathname, acpi_handle * ret_handle)
@@ -210,10 +216,38 @@ ACPI_EXPORT_SYMBOL(acpi_get_name)
 
 /******************************************************************************
  *
+ * FUNCTION:    acpi_ns_copy_device_id
+ *
+ * PARAMETERS:  Dest                - Pointer to the destination DEVICE_ID
+ *              Source              - Pointer to the source DEVICE_ID
+ *              string_area         - Pointer to where to copy the dest string
+ *
+ * RETURN:      Pointer to the next string area
+ *
+ * DESCRIPTION: Copy a single DEVICE_ID, including the string data.
+ *
+ ******************************************************************************/
+static char *acpi_ns_copy_device_id(struct acpica_device_id *dest,
+				    struct acpica_device_id *source,
+				    char *string_area)
+{
+	/* Create the destination DEVICE_ID */
+
+	dest->string = string_area;
+	dest->length = source->length;
+
+	/* Copy actual string and return a pointer to the next string area */
+
+	ACPI_MEMCPY(string_area, source->string, source->length);
+	return (string_area + source->length);
+}
+
+/******************************************************************************
+ *
  * FUNCTION:    acpi_get_object_info
  *
- * PARAMETERS:  Handle          - Object Handle
- *              Buffer          - Where the info is returned
+ * PARAMETERS:  Handle              - Object Handle
+ *              return_buffer       - Where the info is returned
  *
  * RETURN:      Status
  *
@@ -221,33 +255,37 @@ ACPI_EXPORT_SYMBOL(acpi_get_name)
  *              namespace node and possibly by running several standard
  *              control methods (Such as in the case of a device.)
  *
+ * For Device and Processor objects, run the Device _HID, _UID, _CID, _STA,
+ * _ADR, _sx_w, and _sx_d methods.
+ *
+ * Note: Allocates the return buffer, must be freed by the caller.
+ *
  ******************************************************************************/
+
 acpi_status
-acpi_get_object_info(acpi_handle handle, struct acpi_buffer * buffer)
+acpi_get_object_info(acpi_handle handle,
+		     struct acpi_device_info **return_buffer)
 {
-	acpi_status status;
 	struct acpi_namespace_node *node;
 	struct acpi_device_info *info;
-	struct acpi_device_info *return_info;
-	struct acpi_compatible_id_list *cid_list = NULL;
-	acpi_size size;
+	struct acpica_device_id_list *cid_list = NULL;
+	struct acpica_device_id *hid = NULL;
+	struct acpica_device_id *uid = NULL;
+	char *next_id_string;
+	acpi_object_type type;
+	acpi_name name;
+	u8 param_count = 0;
+	u8 valid = 0;
+	u32 info_size;
+	u32 i;
+	acpi_status status;
 
 	/* Parameter validation */
 
-	if (!handle || !buffer) {
+	if (!handle || !return_buffer) {
 		return (AE_BAD_PARAMETER);
 	}
 
-	status = acpi_ut_validate_buffer(buffer);
-	if (ACPI_FAILURE(status)) {
-		return (status);
-	}
-
-	info = ACPI_ALLOCATE_ZEROED(sizeof(struct acpi_device_info));
-	if (!info) {
-		return (AE_NO_MEMORY);
-	}
-
 	status = acpi_ut_acquire_mutex(ACPI_MTX_NAMESPACE);
 	if (ACPI_FAILURE(status)) {
 		goto cleanup;
@@ -256,66 +294,91 @@ acpi_get_object_info(acpi_handle handle, struct acpi_buffer * buffer)
 	node = acpi_ns_map_handle_to_node(handle);
 	if (!node) {
 		(void)acpi_ut_release_mutex(ACPI_MTX_NAMESPACE);
-		status = AE_BAD_PARAMETER;
-		goto cleanup;
+		return (AE_BAD_PARAMETER);
 	}
 
-	/* Init return structure */
-
-	size = sizeof(struct acpi_device_info);
+	/* Get the namespace node data while the namespace is locked */
 
-	info->type = node->type;
-	info->name = node->name.integer;
-	info->valid = 0;
+	info_size = sizeof(struct acpi_device_info);
+	type = node->type;
+	name = node->name.integer;
 
 	if (node->type == ACPI_TYPE_METHOD) {
-		info->param_count = node->object->method.param_count;
+		param_count = node->object->method.param_count;
 	}
 
 	status = acpi_ut_release_mutex(ACPI_MTX_NAMESPACE);
 	if (ACPI_FAILURE(status)) {
-		goto cleanup;
+		return (status);
 	}
 
-	/* If not a device, we are all done */
-
-	if (info->type == ACPI_TYPE_DEVICE) {
+	if ((type == ACPI_TYPE_DEVICE) || (type == ACPI_TYPE_PROCESSOR)) {
 		/*
-		 * Get extra info for ACPI Devices objects only:
-		 * Run the Device _HID, _UID, _CID, _STA, _ADR and _sx_d methods.
+		 * Get extra info for ACPI Device/Processor objects only:
+		 * Run the Device _HID, _UID, and _CID methods.
 		 *
 		 * Note: none of these methods are required, so they may or may
-		 * not be present for this device.  The Info->Valid bitfield is used
-		 * to indicate which methods were found and ran successfully.
+		 * not be present for this device. The Info->Valid bitfield is used
+		 * to indicate which methods were found and run successfully.
 		 */
 
 		/* Execute the Device._HID method */
 
-		status = acpi_ut_execute_HID(node, &info->hardware_id);
+		status = acpi_ut_execute_HID(node, &hid);
 		if (ACPI_SUCCESS(status)) {
-			info->valid |= ACPI_VALID_HID;
+			info_size += hid->length;
+			valid |= ACPI_VALID_HID;
 		}
 
 		/* Execute the Device._UID method */
 
-		status = acpi_ut_execute_UID(node, &info->unique_id);
+		status = acpi_ut_execute_UID(node, &uid);
 		if (ACPI_SUCCESS(status)) {
-			info->valid |= ACPI_VALID_UID;
+			info_size += uid->length;
+			valid |= ACPI_VALID_UID;
 		}
 
 		/* Execute the Device._CID method */
 
 		status = acpi_ut_execute_CID(node, &cid_list);
 		if (ACPI_SUCCESS(status)) {
-			size += cid_list->size;
-			info->valid |= ACPI_VALID_CID;
+
+			/* Add size of CID strings and CID pointer array */
+
+			info_size +=
+			    (cid_list->list_size -
+			     sizeof(struct acpica_device_id_list));
+			valid |= ACPI_VALID_CID;
 		}
+	}
+
+	/*
+	 * Now that we have the variable-length data, we can allocate the
+	 * return buffer
+	 */
+	info = ACPI_ALLOCATE_ZEROED(info_size);
+	if (!info) {
+		status = AE_NO_MEMORY;
+		goto cleanup;
+	}
+
+	/* Get the fixed-length data */
+
+	if ((type == ACPI_TYPE_DEVICE) || (type == ACPI_TYPE_PROCESSOR)) {
+		/*
+		 * Get extra info for ACPI Device/Processor objects only:
+		 * Run the _STA, _ADR and, sx_w, and _sx_d methods.
+		 *
+		 * Note: none of these methods are required, so they may or may
+		 * not be present for this device. The Info->Valid bitfield is used
+		 * to indicate which methods were found and run successfully.
+		 */
 
 		/* Execute the Device._STA method */
 
 		status = acpi_ut_execute_STA(node, &info->current_status);
 		if (ACPI_SUCCESS(status)) {
-			info->valid |= ACPI_VALID_STA;
+			valid |= ACPI_VALID_STA;
 		}
 
 		/* Execute the Device._ADR method */
@@ -323,36 +386,100 @@ acpi_get_object_info(acpi_handle handle, struct acpi_buffer * buffer)
 		status = acpi_ut_evaluate_numeric_object(METHOD_NAME__ADR, node,
 							 &info->address);
 		if (ACPI_SUCCESS(status)) {
-			info->valid |= ACPI_VALID_ADR;
+			valid |= ACPI_VALID_ADR;
+		}
+
+		/* Execute the Device._sx_w methods */
+
+		status = acpi_ut_execute_power_methods(node,
+						       acpi_gbl_lowest_dstate_names,
+						       ACPI_NUM_sx_w_METHODS,
+						       info->lowest_dstates);
+		if (ACPI_SUCCESS(status)) {
+			valid |= ACPI_VALID_SXWS;
 		}
 
 		/* Execute the Device._sx_d methods */
 
-		status = acpi_ut_execute_sxds(node, info->highest_dstates);
+		status = acpi_ut_execute_power_methods(node,
+						       acpi_gbl_highest_dstate_names,
+						       ACPI_NUM_sx_d_METHODS,
+						       info->highest_dstates);
 		if (ACPI_SUCCESS(status)) {
-			info->valid |= ACPI_VALID_SXDS;
+			valid |= ACPI_VALID_SXDS;
 		}
 	}
 
-	/* Validate/Allocate/Clear caller buffer */
+	/*
+	 * Create a pointer to the string area of the return buffer.
+	 * Point to the end of the base struct acpi_device_info structure.
+	 */
+	next_id_string = ACPI_CAST_PTR(char, info->compatible_id_list.ids);
+	if (cid_list) {
 
-	status = acpi_ut_initialize_buffer(buffer, size);
-	if (ACPI_FAILURE(status)) {
-		goto cleanup;
+		/* Point past the CID DEVICE_ID array */
+
+		next_id_string +=
+		    ((acpi_size) cid_list->count *
+		     sizeof(struct acpica_device_id));
 	}
 
-	/* Populate the return buffer */
+	/*
+	 * Copy the HID, UID, and CIDs to the return buffer. The variable-length
+	 * strings are copied to the reserved area at the end of the buffer.
+	 *
+	 * For HID and CID, check if the ID is a PCI Root Bridge.
+	 */
+	if (hid) {
+		next_id_string = acpi_ns_copy_device_id(&info->hardware_id,
+							hid, next_id_string);
+
+		if (acpi_ut_is_pci_root_bridge(hid->string)) {
+			info->flags |= ACPI_PCI_ROOT_BRIDGE;
+		}
+	}
 
-	return_info = buffer->pointer;
-	ACPI_MEMCPY(return_info, info, sizeof(struct acpi_device_info));
+	if (uid) {
+		next_id_string = acpi_ns_copy_device_id(&info->unique_id,
+							uid, next_id_string);
+	}
 
 	if (cid_list) {
-		ACPI_MEMCPY(&return_info->compatibility_id, cid_list,
-			    cid_list->size);
+		info->compatible_id_list.count = cid_list->count;
+		info->compatible_id_list.list_size = cid_list->list_size;
+
+		/* Copy each CID */
+
+		for (i = 0; i < cid_list->count; i++) {
+			next_id_string =
+			    acpi_ns_copy_device_id(&info->compatible_id_list.
+						   ids[i], &cid_list->ids[i],
+						   next_id_string);
+
+			if (acpi_ut_is_pci_root_bridge(cid_list->ids[i].string)) {
+				info->flags |= ACPI_PCI_ROOT_BRIDGE;
+			}
+		}
 	}
 
+	/* Copy the fixed-length data */
+
+	info->info_size = info_size;
+	info->type = type;
+	info->name = name;
+	info->param_count = param_count;
+	info->valid = valid;
+
+	*return_buffer = info;
+	status = AE_OK;
+
       cleanup:
-	ACPI_FREE(info);
+	if (hid) {
+		ACPI_FREE(hid);
+	}
+	if (uid) {
+		ACPI_FREE(uid);
+	}
 	if (cid_list) {
 		ACPI_FREE(cid_list);
 	}
diff --git a/drivers/acpi/acpica/uteval.c b/drivers/acpi/acpica/uteval.c
index 006b16c..5503307 100644
--- a/drivers/acpi/acpica/uteval.c
+++ b/drivers/acpi/acpica/uteval.c
@@ -44,19 +44,10 @@
 #include <acpi/acpi.h>
 #include "accommon.h"
 #include "acnamesp.h"
-#include "acinterp.h"
 
 #define _COMPONENT          ACPI_UTILITIES
 ACPI_MODULE_NAME("uteval")
 
-/* Local prototypes */
-static void
-acpi_ut_copy_id_string(char *destination, char *source, acpi_size max_length);
-
-static acpi_status
-acpi_ut_translate_one_cid(union acpi_operand_object *obj_desc,
-			  struct acpi_compatible_id *one_cid);
-
 /*
  * Strings supported by the _OSI predefined (internal) method.
  *
@@ -213,7 +204,7 @@ acpi_status acpi_osi_invalidate(char *interface)
  * RETURN:      Status
  *
  * DESCRIPTION: Evaluates a namespace object and verifies the type of the
- *              return object.  Common code that simplifies accessing objects
+ *              return object. Common code that simplifies accessing objects
  *              that have required return objects of fixed types.
  *
  *              NOTE: Internal function, no parameter validation
@@ -298,7 +289,7 @@ acpi_ut_evaluate_object(struct acpi_namespace_node *prefix_node,
 
 	if ((acpi_gbl_enable_interpreter_slack) && (!expected_return_btypes)) {
 		/*
-		 * We received a return object, but one was not expected.  This can
+		 * We received a return object, but one was not expected. This can
 		 * happen frequently if the "implicit return" feature is enabled.
 		 * Just delete the return object and return AE_OK.
 		 */
@@ -340,12 +331,12 @@ acpi_ut_evaluate_object(struct acpi_namespace_node *prefix_node,
  *
  * PARAMETERS:  object_name         - Object name to be evaluated
  *              device_node         - Node for the device
- *              Address             - Where the value is returned
+ *              Value               - Where the value is returned
  *
  * RETURN:      Status
  *
  * DESCRIPTION: Evaluates a numeric namespace object for a selected device
- *              and stores result in *Address.
+ *              and stores result in *Value.
  *
  *              NOTE: Internal function, no parameter validation
  *
@@ -354,7 +345,7 @@ acpi_ut_evaluate_object(struct acpi_namespace_node *prefix_node,
 acpi_status
 acpi_ut_evaluate_numeric_object(char *object_name,
 				struct acpi_namespace_node *device_node,
-				acpi_integer * address)
+				acpi_integer *value)
 {
 	union acpi_operand_object *obj_desc;
 	acpi_status status;
@@ -369,295 +360,7 @@ acpi_ut_evaluate_numeric_object(char *object_name,
 
 	/* Get the returned Integer */
 
-	*address = obj_desc->integer.value;
-
-	/* On exit, we must delete the return object */
-
-	acpi_ut_remove_reference(obj_desc);
-	return_ACPI_STATUS(status);
-}
-
-/*******************************************************************************
- *
- * FUNCTION:    acpi_ut_copy_id_string
- *
- * PARAMETERS:  Destination         - Where to copy the string
- *              Source              - Source string
- *              max_length          - Length of the destination buffer
- *
- * RETURN:      None
- *
- * DESCRIPTION: Copies an ID string for the _HID, _CID, and _UID methods.
- *              Performs removal of a leading asterisk if present -- workaround
- *              for a known issue on a bunch of machines.
- *
- ******************************************************************************/
-
-static void
-acpi_ut_copy_id_string(char *destination, char *source, acpi_size max_length)
-{
-
-	/*
-	 * Workaround for ID strings that have a leading asterisk. This construct
-	 * is not allowed by the ACPI specification  (ID strings must be
-	 * alphanumeric), but enough existing machines have this embedded in their
-	 * ID strings that the following code is useful.
-	 */
-	if (*source == '*') {
-		source++;
-	}
-
-	/* Do the actual copy */
-
-	ACPI_STRNCPY(destination, source, max_length);
-}
-
-/*******************************************************************************
- *
- * FUNCTION:    acpi_ut_execute_HID
- *
- * PARAMETERS:  device_node         - Node for the device
- *              Hid                 - Where the HID is returned
- *
- * RETURN:      Status
- *
- * DESCRIPTION: Executes the _HID control method that returns the hardware
- *              ID of the device.
- *
- *              NOTE: Internal function, no parameter validation
- *
- ******************************************************************************/
-
-acpi_status
-acpi_ut_execute_HID(struct acpi_namespace_node *device_node,
-		    struct acpica_device_id *hid)
-{
-	union acpi_operand_object *obj_desc;
-	acpi_status status;
-
-	ACPI_FUNCTION_TRACE(ut_execute_HID);
-
-	status = acpi_ut_evaluate_object(device_node, METHOD_NAME__HID,
-					 ACPI_BTYPE_INTEGER | ACPI_BTYPE_STRING,
-					 &obj_desc);
-	if (ACPI_FAILURE(status)) {
-		return_ACPI_STATUS(status);
-	}
-
-	if (obj_desc->common.type == ACPI_TYPE_INTEGER) {
-
-		/* Convert the Numeric HID to string */
-
-		acpi_ex_eisa_id_to_string((u32) obj_desc->integer.value,
-					  hid->value);
-	} else {
-		/* Copy the String HID from the returned object */
-
-		acpi_ut_copy_id_string(hid->value, obj_desc->string.pointer,
-				       sizeof(hid->value));
-	}
-
-	/* On exit, we must delete the return object */
-
-	acpi_ut_remove_reference(obj_desc);
-	return_ACPI_STATUS(status);
-}
-
-/*******************************************************************************
- *
- * FUNCTION:    acpi_ut_translate_one_cid
- *
- * PARAMETERS:  obj_desc            - _CID object, must be integer or string
- *              one_cid             - Where the CID string is returned
- *
- * RETURN:      Status
- *
- * DESCRIPTION: Return a numeric or string _CID value as a string.
- *              (Compatible ID)
- *
- *              NOTE:  Assumes a maximum _CID string length of
- *                     ACPI_MAX_CID_LENGTH.
- *
- ******************************************************************************/
-
-static acpi_status
-acpi_ut_translate_one_cid(union acpi_operand_object *obj_desc,
-			  struct acpi_compatible_id *one_cid)
-{
-
-	switch (obj_desc->common.type) {
-	case ACPI_TYPE_INTEGER:
-
-		/* Convert the Numeric CID to string */
-
-		acpi_ex_eisa_id_to_string((u32) obj_desc->integer.value,
-					  one_cid->value);
-		return (AE_OK);
-
-	case ACPI_TYPE_STRING:
-
-		if (obj_desc->string.length > ACPI_MAX_CID_LENGTH) {
-			return (AE_AML_STRING_LIMIT);
-		}
-
-		/* Copy the String CID from the returned object */
-
-		acpi_ut_copy_id_string(one_cid->value, obj_desc->string.pointer,
-				       ACPI_MAX_CID_LENGTH);
-		return (AE_OK);
-
-	default:
-
-		return (AE_TYPE);
-	}
-}
-
-/*******************************************************************************
- *
- * FUNCTION:    acpi_ut_execute_CID
- *
- * PARAMETERS:  device_node         - Node for the device
- *              return_cid_list     - Where the CID list is returned
- *
- * RETURN:      Status
- *
- * DESCRIPTION: Executes the _CID control method that returns one or more
- *              compatible hardware IDs for the device.
- *
- *              NOTE: Internal function, no parameter validation
- *
- ******************************************************************************/
-
-acpi_status
-acpi_ut_execute_CID(struct acpi_namespace_node * device_node,
-		    struct acpi_compatible_id_list ** return_cid_list)
-{
-	union acpi_operand_object *obj_desc;
-	acpi_status status;
-	u32 count;
-	u32 size;
-	struct acpi_compatible_id_list *cid_list;
-	u32 i;
-
-	ACPI_FUNCTION_TRACE(ut_execute_CID);
-
-	/* Evaluate the _CID method for this device */
-
-	status = acpi_ut_evaluate_object(device_node, METHOD_NAME__CID,
-					 ACPI_BTYPE_INTEGER | ACPI_BTYPE_STRING
-					 | ACPI_BTYPE_PACKAGE, &obj_desc);
-	if (ACPI_FAILURE(status)) {
-		return_ACPI_STATUS(status);
-	}
-
-	/* Get the number of _CIDs returned */
-
-	count = 1;
-	if (obj_desc->common.type == ACPI_TYPE_PACKAGE) {
-		count = obj_desc->package.count;
-	}
-
-	/* Allocate a worst-case buffer for the _CIDs */
-
-	size = (((count - 1) * sizeof(struct acpi_compatible_id)) +
-		sizeof(struct acpi_compatible_id_list));
-
-	cid_list = ACPI_ALLOCATE_ZEROED((acpi_size) size);
-	if (!cid_list) {
-		return_ACPI_STATUS(AE_NO_MEMORY);
-	}
-
-	/* Init CID list */
-
-	cid_list->count = count;
-	cid_list->size = size;
-
-	/*
-	 *  A _CID can return either a single compatible ID or a package of
-	 *  compatible IDs.  Each compatible ID can be one of the following:
-	 *  1) Integer (32 bit compressed EISA ID) or
-	 *  2) String (PCI ID format, e.g. "PCI\VEN_vvvv&DEV_dddd&SUBSYS_ssssssss")
-	 */
-
-	/* The _CID object can be either a single CID or a package (list) of CIDs */
-
-	if (obj_desc->common.type == ACPI_TYPE_PACKAGE) {
-
-		/* Translate each package element */
-
-		for (i = 0; i < count; i++) {
-			status =
-			    acpi_ut_translate_one_cid(obj_desc->package.
-						      elements[i],
-						      &cid_list->id[i]);
-			if (ACPI_FAILURE(status)) {
-				break;
-			}
-		}
-	} else {
-		/* Only one CID, translate to a string */
-
-		status = acpi_ut_translate_one_cid(obj_desc, cid_list->id);
-	}
-
-	/* Cleanup on error */
-
-	if (ACPI_FAILURE(status)) {
-		ACPI_FREE(cid_list);
-	} else {
-		*return_cid_list = cid_list;
-	}
-
-	/* On exit, we must delete the _CID return object */
-
-	acpi_ut_remove_reference(obj_desc);
-	return_ACPI_STATUS(status);
-}
-
-/*******************************************************************************
- *
- * FUNCTION:    acpi_ut_execute_UID
- *
- * PARAMETERS:  device_node         - Node for the device
- *              Uid                 - Where the UID is returned
- *
- * RETURN:      Status
- *
- * DESCRIPTION: Executes the _UID control method that returns the hardware
- *              ID of the device.
- *
- *              NOTE: Internal function, no parameter validation
- *
- ******************************************************************************/
-
-acpi_status
-acpi_ut_execute_UID(struct acpi_namespace_node *device_node,
-		    struct acpica_device_id *uid)
-{
-	union acpi_operand_object *obj_desc;
-	acpi_status status;
-
-	ACPI_FUNCTION_TRACE(ut_execute_UID);
-
-	status = acpi_ut_evaluate_object(device_node, METHOD_NAME__UID,
-					 ACPI_BTYPE_INTEGER | ACPI_BTYPE_STRING,
-					 &obj_desc);
-	if (ACPI_FAILURE(status)) {
-		return_ACPI_STATUS(status);
-	}
-
-	if (obj_desc->common.type == ACPI_TYPE_INTEGER) {
-
-		/* Convert the Numeric UID to string */
-
-		acpi_ex_unsigned_integer_to_string(obj_desc->integer.value,
-						   uid->value);
-	} else {
-		/* Copy the String UID from the returned object */
-
-		acpi_ut_copy_id_string(uid->value, obj_desc->string.pointer,
-				       sizeof(uid->value));
-	}
+	*value = obj_desc->integer.value;
 
 	/* On exit, we must delete the return object */
 
@@ -716,60 +419,64 @@ acpi_ut_execute_STA(struct acpi_namespace_node *device_node, u32 * flags)
 
 /*******************************************************************************
  *
- * FUNCTION:    acpi_ut_execute_Sxds
+ * FUNCTION:    acpi_ut_execute_power_methods
  *
  * PARAMETERS:  device_node         - Node for the device
- *              Flags               - Where the status flags are returned
+ *              method_names        - Array of power method names
+ *              method_count        - Number of methods to execute
+ *              out_values          - Where the power method values are returned
  *
- * RETURN:      Status
+ * RETURN:      Status, out_values
  *
- * DESCRIPTION: Executes _STA for selected device and stores results in
- *              *Flags.
+ * DESCRIPTION: Executes the specified power methods for the device and returns
+ *              the result(s).
  *
  *              NOTE: Internal function, no parameter validation
  *
- ******************************************************************************/
+******************************************************************************/
 
 acpi_status
-acpi_ut_execute_sxds(struct acpi_namespace_node *device_node, u8 * highest)
+acpi_ut_execute_power_methods(struct acpi_namespace_node *device_node,
+			      const char **method_names,
+			      u8 method_count, u8 *out_values)
 {
 	union acpi_operand_object *obj_desc;
 	acpi_status status;
+	acpi_status final_status = AE_NOT_FOUND;
 	u32 i;
 
-	ACPI_FUNCTION_TRACE(ut_execute_sxds);
+	ACPI_FUNCTION_TRACE(ut_execute_power_methods);
 
-	for (i = 0; i < 4; i++) {
-		highest[i] = 0xFF;
+	for (i = 0; i < method_count; i++) {
+		/*
+		 * Execute the power method (_sx_d or _sx_w). The only allowable
+		 * return type is an Integer.
+		 */
 		status = acpi_ut_evaluate_object(device_node,
 						 ACPI_CAST_PTR(char,
-							       acpi_gbl_highest_dstate_names
-							       [i]),
+							       method_names[i]),
 						 ACPI_BTYPE_INTEGER, &obj_desc);
-		if (ACPI_FAILURE(status)) {
-			if (status != AE_NOT_FOUND) {
-				ACPI_DEBUG_PRINT((ACPI_DB_EXEC,
-						  "%s on Device %4.4s, %s\n",
-						  ACPI_CAST_PTR(char,
-								acpi_gbl_highest_dstate_names
-								[i]),
-						  acpi_ut_get_node_name
-						  (device_node),
-						  acpi_format_exception
-						  (status)));
-
-				return_ACPI_STATUS(status);
-			}
-		} else {
-			/* Extract the Dstate value */
-
-			highest[i] = (u8) obj_desc->integer.value;
+		if (ACPI_SUCCESS(status)) {
+			out_values[i] = (u8)obj_desc->integer.value;
 
 			/* Delete the return object */
 
 			acpi_ut_remove_reference(obj_desc);
+			final_status = AE_OK;	/* At least one value is valid */
+			continue;
 		}
+
+		out_values[i] = ACPI_UINT8_MAX;
+		if (status == AE_NOT_FOUND) {
+			continue;	/* Ignore if not found */
+		}
+
+		ACPI_DEBUG_PRINT((ACPI_DB_EXEC,
+				  "Failed %s on Device %4.4s, %s\n",
+				  ACPI_CAST_PTR(char, method_names[i]),
+				  acpi_ut_get_node_name(device_node),
+				  acpi_format_exception(status)));
 	}
 
-	return_ACPI_STATUS(AE_OK);
+	return_ACPI_STATUS(final_status);
 }
diff --git a/drivers/acpi/acpica/utglobal.c b/drivers/acpi/acpica/utglobal.c
index 59e46f2..ed7a33c 100644
--- a/drivers/acpi/acpica/utglobal.c
+++ b/drivers/acpi/acpica/utglobal.c
@@ -90,7 +90,15 @@ const char *acpi_gbl_sleep_state_names[ACPI_S_STATE_COUNT] = {
 	"\\_S5_"
 };
 
-const char *acpi_gbl_highest_dstate_names[4] = {
+const char *acpi_gbl_lowest_dstate_names[ACPI_NUM_sx_w_METHODS] = {
+	"_S0W",
+	"_S1W",
+	"_S2W",
+	"_S3W",
+	"_S4W"
+};
+
+const char *acpi_gbl_highest_dstate_names[ACPI_NUM_sx_d_METHODS] = {
 	"_S1D",
 	"_S2D",
 	"_S3D",
diff --git a/drivers/acpi/acpica/utids.c b/drivers/acpi/acpica/utids.c
new file mode 100644
index 0000000..52eaae4
--- /dev/null
+++ b/drivers/acpi/acpica/utids.c
@@ -0,0 +1,382 @@
+/******************************************************************************
+ *
+ * Module Name: utids - support for device IDs - HID, UID, CID
+ *
+ *****************************************************************************/
+
+/*
+ * Copyright (C) 2000 - 2009, Intel Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions, and the following disclaimer,
+ *    without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ *    substantially similar to the "NO WARRANTY" disclaimer below
+ *    ("Disclaimer") and any redistribution must be conditioned upon
+ *    including a substantially similar Disclaimer requirement for further
+ *    binary redistribution.
+ * 3. Neither the names of the above-listed copyright holders nor the names
+ *    of any contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ */
+
+#include <acpi/acpi.h>
+#include "accommon.h"
+#include "acinterp.h"
+
+#define _COMPONENT          ACPI_UTILITIES
+ACPI_MODULE_NAME("utids")
+
+/* Local prototypes */
+static void acpi_ut_copy_id_string(char *destination, char *source);
+
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_ut_copy_id_string
+ *
+ * PARAMETERS:  Destination         - Where to copy the string
+ *              Source              - Source string
+ *
+ * RETURN:      None
+ *
+ * DESCRIPTION: Copies an ID string for the _HID, _CID, and _UID methods.
+ *              Performs removal of a leading asterisk if present -- workaround
+ *              for a known issue on a bunch of machines.
+ *
+ ******************************************************************************/
+
+static void acpi_ut_copy_id_string(char *destination, char *source)
+{
+
+	/*
+	 * Workaround for ID strings that have a leading asterisk. This construct
+	 * is not allowed by the ACPI specification  (ID strings must be
+	 * alphanumeric), but enough existing machines have this embedded in their
+	 * ID strings that the following code is useful.
+	 */
+	if (*source == '*') {
+		source++;
+	}
+
+	/* Do the actual copy */
+
+	ACPI_STRCPY(destination, source);
+}
+
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_ut_execute_HID
+ *
+ * PARAMETERS:  device_node         - Node for the device
+ *              return_id           - Where the string HID is returned
+ *
+ * RETURN:      Status
+ *
+ * DESCRIPTION: Executes the _HID control method that returns the hardware
+ *              ID of the device. The HID is either an 32-bit encoded EISAID
+ *              Integer or a String. A string is always returned. An EISAID
+ *              is converted to a string.
+ *
+ *              NOTE: Internal function, no parameter validation
+ *
+ ******************************************************************************/
+
+acpi_status
+acpi_ut_execute_HID(struct acpi_namespace_node *device_node,
+		    struct acpica_device_id **return_id)
+{
+	union acpi_operand_object *obj_desc;
+	struct acpica_device_id *hid;
+	u32 length;
+	acpi_status status;
+
+	ACPI_FUNCTION_TRACE(ut_execute_HID);
+
+	status = acpi_ut_evaluate_object(device_node, METHOD_NAME__HID,
+					 ACPI_BTYPE_INTEGER | ACPI_BTYPE_STRING,
+					 &obj_desc);
+	if (ACPI_FAILURE(status)) {
+		return_ACPI_STATUS(status);
+	}
+
+	/* Get the size of the String to be returned, includes null terminator */
+
+	if (obj_desc->common.type == ACPI_TYPE_INTEGER) {
+		length = ACPI_EISAID_STRING_SIZE;
+	} else {
+		length = obj_desc->string.length + 1;
+	}
+
+	/* Allocate a buffer for the HID */
+
+	hid =
+	    ACPI_ALLOCATE_ZEROED(sizeof(struct acpica_device_id) +
+				 (acpi_size) length);
+	if (!hid) {
+		status = AE_NO_MEMORY;
+		goto cleanup;
+	}
+
+	/* Area for the string starts after DEVICE_ID struct */
+
+	hid->string = ACPI_ADD_PTR(char, hid, sizeof(struct acpica_device_id));
+
+	/* Convert EISAID to a string or simply copy existing string */
+
+	if (obj_desc->common.type == ACPI_TYPE_INTEGER) {
+		acpi_ex_eisa_id_to_string(hid->string, obj_desc->integer.value);
+	} else {
+		acpi_ut_copy_id_string(hid->string, obj_desc->string.pointer);
+	}
+
+	hid->length = length;
+	*return_id = hid;
+
+cleanup:
+
+	/* On exit, we must delete the return object */
+
+	acpi_ut_remove_reference(obj_desc);
+	return_ACPI_STATUS(status);
+}
+
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_ut_execute_UID
+ *
+ * PARAMETERS:  device_node         - Node for the device
+ *              return_id           - Where the string UID is returned
+ *
+ * RETURN:      Status
+ *
+ * DESCRIPTION: Executes the _UID control method that returns the unique
+ *              ID of the device. The UID is either a 64-bit Integer (NOT an
+ *              EISAID) or a string. Always returns a string. A 64-bit integer
+ *              is converted to a decimal string.
+ *
+ *              NOTE: Internal function, no parameter validation
+ *
+ ******************************************************************************/
+
+acpi_status
+acpi_ut_execute_UID(struct acpi_namespace_node *device_node,
+		    struct acpica_device_id **return_id)
+{
+	union acpi_operand_object *obj_desc;
+	struct acpica_device_id *uid;
+	u32 length;
+	acpi_status status;
+
+	ACPI_FUNCTION_TRACE(ut_execute_UID);
+
+	status = acpi_ut_evaluate_object(device_node, METHOD_NAME__UID,
+					 ACPI_BTYPE_INTEGER | ACPI_BTYPE_STRING,
+					 &obj_desc);
+	if (ACPI_FAILURE(status)) {
+		return_ACPI_STATUS(status);
+	}
+
+	/* Get the size of the String to be returned, includes null terminator */
+
+	if (obj_desc->common.type == ACPI_TYPE_INTEGER) {
+		length = ACPI_MAX64_DECIMAL_DIGITS + 1;
+	} else {
+		length = obj_desc->string.length + 1;
+	}
+
+	/* Allocate a buffer for the UID */
+
+	uid =
+	    ACPI_ALLOCATE_ZEROED(sizeof(struct acpica_device_id) +
+				 (acpi_size) length);
+	if (!uid) {
+		status = AE_NO_MEMORY;
+		goto cleanup;
+	}
+
+	/* Area for the string starts after DEVICE_ID struct */
+
+	uid->string = ACPI_ADD_PTR(char, uid, sizeof(struct acpica_device_id));
+
+	/* Convert an Integer to string, or just copy an existing string */
+
+	if (obj_desc->common.type == ACPI_TYPE_INTEGER) {
+		acpi_ex_integer_to_string(uid->string, obj_desc->integer.value);
+	} else {
+		acpi_ut_copy_id_string(uid->string, obj_desc->string.pointer);
+	}
+
+	uid->length = length;
+	*return_id = uid;
+
+cleanup:
+
+	/* On exit, we must delete the return object */
+
+	acpi_ut_remove_reference(obj_desc);
+	return_ACPI_STATUS(status);
+}
+
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_ut_execute_CID
+ *
+ * PARAMETERS:  device_node         - Node for the device
+ *              return_cid_list     - Where the CID list is returned
+ *
+ * RETURN:      Status, list of CID strings
+ *
+ * DESCRIPTION: Executes the _CID control method that returns one or more
+ *              compatible hardware IDs for the device.
+ *
+ *              NOTE: Internal function, no parameter validation
+ *
+ * A _CID method can return either a single compatible ID or a package of
+ * compatible IDs. Each compatible ID can be one of the following:
+ * 1) Integer (32 bit compressed EISA ID) or
+ * 2) String (PCI ID format, e.g. "PCI\VEN_vvvv&DEV_dddd&SUBSYS_ssssssss")
+ *
+ * The Integer CIDs are converted to string format by this function.
+ *
+ ******************************************************************************/
+
+acpi_status
+acpi_ut_execute_CID(struct acpi_namespace_node *device_node,
+		    struct acpica_device_id_list **return_cid_list)
+{
+	union acpi_operand_object **cid_objects;
+	union acpi_operand_object *obj_desc;
+	struct acpica_device_id_list *cid_list;
+	char *next_id_string;
+	u32 string_area_size;
+	u32 length;
+	u32 cid_list_size;
+	acpi_status status;
+	u32 count;
+	u32 i;
+
+	ACPI_FUNCTION_TRACE(ut_execute_CID);
+
+	/* Evaluate the _CID method for this device */
+
+	status = acpi_ut_evaluate_object(device_node, METHOD_NAME__CID,
+					 ACPI_BTYPE_INTEGER | ACPI_BTYPE_STRING
+					 | ACPI_BTYPE_PACKAGE, &obj_desc);
+	if (ACPI_FAILURE(status)) {
+		return_ACPI_STATUS(status);
+	}
+
+	/*
+	 * Get the count and size of the returned _CIDs. _CID can return either
+	 * a Package of Integers/Strings or a single Integer or String.
+	 * Note: This section also validates that all CID elements are of the
+	 * correct type (Integer or String).
+	 */
+	if (obj_desc->common.type == ACPI_TYPE_PACKAGE) {
+		count = obj_desc->package.count;
+		cid_objects = obj_desc->package.elements;
+	} else {		/* Single Integer or String CID */
+
+		count = 1;
+		cid_objects = &obj_desc;
+	}
+
+	string_area_size = 0;
+	for (i = 0; i < count; i++) {
+
+		/* String lengths include null terminator */
+
+		switch (cid_objects[i]->common.type) {
+		case ACPI_TYPE_INTEGER:
+			string_area_size += ACPI_EISAID_STRING_SIZE;
+			break;
+
+		case ACPI_TYPE_STRING:
+			string_area_size += cid_objects[i]->string.length + 1;
+			break;
+
+		default:
+			status = AE_TYPE;
+			goto cleanup;
+		}
+	}
+
+	/*
+	 * Now that we know the length of the CIDs, allocate return buffer:
+	 * 1) Size of the base structure +
+	 * 2) Size of the CID DEVICE_ID array +
+	 * 3) Size of the actual CID strings
+	 */
+	cid_list_size = sizeof(struct acpica_device_id_list) +
+	    ((count - 1) * sizeof(struct acpica_device_id)) + string_area_size;
+
+	cid_list = ACPI_ALLOCATE_ZEROED(cid_list_size);
+	if (!cid_list) {
+		status = AE_NO_MEMORY;
+		goto cleanup;
+	}
+
+	/* Area for CID strings starts after the CID DEVICE_ID array */
+
+	next_id_string = ACPI_CAST_PTR(char, cid_list->ids) +
+	    ((acpi_size) count * sizeof(struct acpica_device_id));
+
+	/* Copy/convert the CIDs to the return buffer */
+
+	for (i = 0; i < count; i++) {
+		if (cid_objects[i]->common.type == ACPI_TYPE_INTEGER) {
+
+			/* Convert the Integer (EISAID) CID to a string */
+
+			acpi_ex_eisa_id_to_string(next_id_string,
+						  cid_objects[i]->integer.
+						  value);
+			length = ACPI_EISAID_STRING_SIZE;
+		} else {	/* ACPI_TYPE_STRING */
+
+			/* Copy the String CID from the returned object */
+
+			acpi_ut_copy_id_string(next_id_string,
+					       cid_objects[i]->string.pointer);
+			length = cid_objects[i]->string.length + 1;
+		}
+
+		cid_list->ids[i].string = next_id_string;
+		cid_list->ids[i].length = length;
+		next_id_string += length;
+	}
+
+	/* Finish the CID list */
+
+	cid_list->count = count;
+	cid_list->list_size = cid_list_size;
+	*return_cid_list = cid_list;
+
+cleanup:
+
+	/* On exit, we must delete the _CID return object */
+
+	acpi_ut_remove_reference(obj_desc);
+	return_ACPI_STATUS(status);
+}
diff --git a/drivers/acpi/acpica/utmisc.c b/drivers/acpi/acpica/utmisc.c
index fbe7823..9cd6533 100644
--- a/drivers/acpi/acpica/utmisc.c
+++ b/drivers/acpi/acpica/utmisc.c
@@ -120,6 +120,34 @@ const char *acpi_ut_validate_exception(acpi_status status)
 
 /*******************************************************************************
  *
+ * FUNCTION:    acpi_ut_is_pci_root_bridge
+ *
+ * PARAMETERS:  Id              - The HID/CID in string format
+ *
+ * RETURN:      TRUE if the Id is a match for a PCI/PCI-Express Root Bridge
+ *
+ * DESCRIPTION: Determine if the input ID is a PCI Root Bridge ID.
+ *
+ ******************************************************************************/
+
+u8 acpi_ut_is_pci_root_bridge(char *id)
+{
+
+	/*
+	 * Check if this is a PCI root bridge.
+	 * ACPI 3.0+: check for a PCI Express root also.
+	 */
+	if (!(ACPI_STRCMP(id,
+			  PCI_ROOT_HID_STRING)) ||
+	    !(ACPI_STRCMP(id, PCI_EXPRESS_ROOT_HID_STRING))) {
+		return (TRUE);
+	}
+
+	return (FALSE);
+}
+
+/*******************************************************************************
+ *
  * FUNCTION:    acpi_ut_is_aml_table
  *
  * PARAMETERS:  Table               - An ACPI table
diff --git a/drivers/acpi/container.c b/drivers/acpi/container.c
index fe0cdf8..2aee8c2 100644
--- a/drivers/acpi/container.c
+++ b/drivers/acpi/container.c
@@ -200,20 +200,17 @@ container_walk_namespace_cb(acpi_handle handle,
 			    u32 lvl, void *context, void **rv)
 {
 	char *hid = NULL;
-	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
 	struct acpi_device_info *info;
 	acpi_status status;
 	int *action = context;
 
-
-	status = acpi_get_object_info(handle, &buffer);
-	if (ACPI_FAILURE(status) || !buffer.pointer) {
+	status = acpi_get_object_info(handle, &info);
+	if (ACPI_FAILURE(status)) {
 		return AE_OK;
 	}
 
-	info = buffer.pointer;
 	if (info->valid & ACPI_VALID_HID)
-		hid = info->hardware_id.value;
+		hid = info->hardware_id.string;
 
 	if (hid == NULL) {
 		goto end;
@@ -240,7 +237,7 @@ container_walk_namespace_cb(acpi_handle handle,
 	}
 
       end:
-	kfree(buffer.pointer);
+	kfree(info);
 
 	return AE_OK;
 }
diff --git a/drivers/acpi/dock.c b/drivers/acpi/dock.c
index efb959d..39536b8 100644
--- a/drivers/acpi/dock.c
+++ b/drivers/acpi/dock.c
@@ -231,18 +231,16 @@ static int is_ata(acpi_handle handle)
 static int is_battery(acpi_handle handle)
 {
 	struct acpi_device_info *info;
-	struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL};
 	int ret = 1;
 
-	if (!ACPI_SUCCESS(acpi_get_object_info(handle, &buffer)))
+	if (!ACPI_SUCCESS(acpi_get_object_info(handle, &info)))
 		return 0;
-	info = buffer.pointer;
 	if (!(info->valid & ACPI_VALID_HID))
 		ret = 0;
 	else
-		ret = !strcmp("PNP0C0A", info->hardware_id.value);
+		ret = !strcmp("PNP0C0A", info->hardware_id.string);
 
-	kfree(buffer.pointer);
+	kfree(info);
 	return ret;
 }
 
diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c
index a8a5c29..27a7072 100644
--- a/drivers/acpi/glue.c
+++ b/drivers/acpi/glue.c
@@ -93,15 +93,13 @@ do_acpi_find_child(acpi_handle handle, u32 lvl, void *context, void **rv)
 {
 	acpi_status status;
 	struct acpi_device_info *info;
-	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
 	struct acpi_find_child *find = context;
 
-	status = acpi_get_object_info(handle, &buffer);
+	status = acpi_get_object_info(handle, &info);
 	if (ACPI_SUCCESS(status)) {
-		info = buffer.pointer;
 		if (info->address == find->address)
 			find->handle = handle;
-		kfree(buffer.pointer);
+		kfree(info);
 	}
 	return AE_OK;
 }
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 781435d..0ab526d 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -60,13 +60,13 @@ static int create_modalias(struct acpi_device *acpi_dev, char *modalias,
 	}
 
 	if (acpi_dev->flags.compatible_ids) {
-		struct acpi_compatible_id_list *cid_list;
+		struct acpica_device_id_list *cid_list;
 		int i;
 
 		cid_list = acpi_dev->pnp.cid_list;
 		for (i = 0; i < cid_list->count; i++) {
 			count = snprintf(&modalias[len], size, "%s:",
-					 cid_list->id[i].value);
+					 cid_list->ids[i].string);
 			if (count < 0 || count >= size) {
 				printk(KERN_ERR PREFIX "%s cid[%i] exceeds event buffer size",
 				       acpi_dev->pnp.device_name, i);
@@ -287,14 +287,14 @@ int acpi_match_device_ids(struct acpi_device *device,
 	}
 
 	if (device->flags.compatible_ids) {
-		struct acpi_compatible_id_list *cid_list = device->pnp.cid_list;
+		struct acpica_device_id_list *cid_list = device->pnp.cid_list;
 		int i;
 
 		for (id = ids; id->id[0]; id++) {
 			/* compare multiple _CID entries against driver ids */
 			for (i = 0; i < cid_list->count; i++) {
 				if (!strcmp((char*)id->id,
-					    cid_list->id[i].value))
+					    cid_list->ids[i].string))
 					return 0;
 			}
 		}
@@ -999,33 +999,89 @@ static int acpi_dock_match(struct acpi_device *device)
 	return acpi_get_handle(device->handle, "_DCK", &tmp);
 }
 
+static struct acpica_device_id_list*
+acpi_add_cid(
+	struct acpi_device_info         *info,
+	struct acpica_device_id         *new_cid)
+{
+	struct acpica_device_id_list    *cid;
+	char                            *next_id_string;
+	acpi_size                       cid_length;
+	acpi_size                       new_cid_length;
+	u32                             i;
+
+
+	/* Allocate new CID list with room for the new CID */
+
+	if (!new_cid)
+		new_cid_length = info->compatible_id_list.list_size;
+	else if (info->compatible_id_list.list_size)
+		new_cid_length = info->compatible_id_list.list_size +
+			new_cid->length + sizeof(struct acpica_device_id);
+	else
+		new_cid_length = sizeof(struct acpica_device_id_list) + new_cid->length;
+
+	cid = ACPI_ALLOCATE_ZEROED(new_cid_length);
+	if (!cid) {
+		return NULL;
+	}
+
+	cid->list_size = new_cid_length;
+	cid->count = info->compatible_id_list.count;
+	if (new_cid)
+		cid->count++;
+	next_id_string = (char *) cid->ids + (cid->count * sizeof(struct acpica_device_id));
+
+	/* Copy all existing CIDs */
+
+	for (i = 0; i < info->compatible_id_list.count; i++) {
+		cid_length = info->compatible_id_list.ids[i].length;
+		cid->ids[i].string = next_id_string;
+		cid->ids[i].length = cid_length;
+
+		ACPI_MEMCPY(next_id_string, info->compatible_id_list.ids[i].string,
+			cid_length);
+
+		next_id_string += cid_length;
+	}
+
+	/* Append the new CID */
+
+	if (new_cid) {
+		cid->ids[i].string = next_id_string;
+		cid->ids[i].length = new_cid->length;
+
+		ACPI_MEMCPY(next_id_string, new_cid->string, new_cid->length);
+	}
+
+	return cid;
+}
+
 static void acpi_device_set_id(struct acpi_device *device,
 			       struct acpi_device *parent, acpi_handle handle,
 			       int type)
 {
-	struct acpi_device_info *info;
-	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+	struct acpi_device_info *info = NULL;
 	char *hid = NULL;
 	char *uid = NULL;
-	struct acpi_compatible_id_list *cid_list = NULL;
-	const char *cid_add = NULL;
+	struct acpica_device_id_list *cid_list = NULL;
+	char *cid_add = NULL;
 	acpi_status status;
 
 	switch (type) {
 	case ACPI_BUS_TYPE_DEVICE:
-		status = acpi_get_object_info(handle, &buffer);
+		status = acpi_get_object_info(handle, &info);
 		if (ACPI_FAILURE(status)) {
 			printk(KERN_ERR PREFIX "%s: Error reading device info\n", __func__);
 			return;
 		}
 
-		info = buffer.pointer;
 		if (info->valid & ACPI_VALID_HID)
-			hid = info->hardware_id.value;
+			hid = info->hardware_id.string;
 		if (info->valid & ACPI_VALID_UID)
-			uid = info->unique_id.value;
+			uid = info->unique_id.string;
 		if (info->valid & ACPI_VALID_CID)
-			cid_list = &info->compatibility_id;
+			cid_list = &info->compatible_id_list;
 		if (info->valid & ACPI_VALID_ADR) {
 			device->pnp.bus_address = info->address;
 			device->flags.bus_address = 1;
@@ -1076,55 +1132,44 @@ static void acpi_device_set_id(struct acpi_device *device,
 	}
 
 	if (hid) {
-		strcpy(device->pnp.hardware_id, hid);
-		device->flags.hardware_id = 1;
-	}
+		device->pnp.hardware_id = ACPI_ALLOCATE_ZEROED(strlen (hid) + 1);
+		if (device->pnp.hardware_id) {
+			strcpy(device->pnp.hardware_id, hid);
+			device->flags.hardware_id = 1;
+		}
+	} else
+		device->pnp.hardware_id = NULL;
+
 	if (uid) {
-		strcpy(device->pnp.unique_id, uid);
-		device->flags.unique_id = 1;
-	}
+		device->pnp.unique_id = ACPI_ALLOCATE_ZEROED(strlen (uid) + 1);
+		if (device->pnp.unique_id) {
+			strcpy(device->pnp.unique_id, uid);
+			device->flags.unique_id = 1;
+		}
+	} else
+		device->pnp.unique_id = NULL;
+
 	if (cid_list || cid_add) {
-		struct  acpi_compatible_id_list *list;
-		int size = 0;
-		int count = 0;
-
-		if (cid_list) {
-			size = cid_list->size;
-		} else if (cid_add) {
-			size = sizeof(struct acpi_compatible_id_list);
-			cid_list = ACPI_ALLOCATE_ZEROED((acpi_size) size);
-			if (!cid_list) {
-				printk(KERN_ERR "Memory allocation error\n");
-				kfree(buffer.pointer);
-				return;
-			} else {
-				cid_list->count = 0;
-				cid_list->size = size;
-			}
+		struct acpica_device_id_list *list;
+
+		if (cid_add) {
+			struct acpica_device_id cid;
+			cid.length = strlen (cid_add) + 1;
+			cid.string = cid_add;
+
+			list = acpi_add_cid(info, &cid);
+		} else {
+			list = acpi_add_cid(info, NULL);
 		}
-		if (cid_add)
-			size += sizeof(struct acpi_compatible_id);
-		list = kmalloc(size, GFP_KERNEL);
 
 		if (list) {
-			if (cid_list) {
-				memcpy(list, cid_list, cid_list->size);
-				count = cid_list->count;
-			}
-			if (cid_add) {
-				strncpy(list->id[count].value, cid_add,
-					ACPI_MAX_CID_LENGTH);
-				count++;
-				device->flags.compatible_ids = 1;
-			}
-			list->size = size;
-			list->count = count;
 			device->pnp.cid_list = list;
-		} else
-			printk(KERN_ERR PREFIX "Memory allocation error\n");
+			if (cid_add)
+				device->flags.compatible_ids = 1;
+		}
 	}
 
-	kfree(buffer.pointer);
+	kfree(info);
 }
 
 static int acpi_device_set_context(struct acpi_device *device, int type)
diff --git a/drivers/char/agp/hp-agp.c b/drivers/char/agp/hp-agp.c
index 8f3d4c1..7bead4c 100644
--- a/drivers/char/agp/hp-agp.c
+++ b/drivers/char/agp/hp-agp.c
@@ -478,7 +478,6 @@ zx1_gart_probe (acpi_handle obj, u32 depth, void *context, void **ret)
 {
 	acpi_handle handle, parent;
 	acpi_status status;
-	struct acpi_buffer buffer;
 	struct acpi_device_info *info;
 	u64 lba_hpa, sba_hpa, length;
 	int match;
@@ -490,13 +489,11 @@ zx1_gart_probe (acpi_handle obj, u32 depth, void *context, void **ret)
 	/* Look for an enclosing IOC scope and find its CSR space */
 	handle = obj;
 	do {
-		buffer.length = ACPI_ALLOCATE_LOCAL_BUFFER;
-		status = acpi_get_object_info(handle, &buffer);
+		status = acpi_get_object_info(handle, &info);
 		if (ACPI_SUCCESS(status)) {
 			/* TBD check _CID also */
-			info = buffer.pointer;
-			info->hardware_id.value[sizeof(info->hardware_id)-1] = '\0';
-			match = (strcmp(info->hardware_id.value, "HWP0001") == 0);
+			info->hardware_id.string[sizeof(info->hardware_id.length)-1] = '\0';
+			match = (strcmp(info->hardware_id.string, "HWP0001") == 0);
 			kfree(info);
 			if (match) {
 				status = hp_acpi_csr_space(handle, &sba_hpa, &length);
diff --git a/drivers/ide/ide-acpi.c b/drivers/ide/ide-acpi.c
index c509c99..c0cf45a 100644
--- a/drivers/ide/ide-acpi.c
+++ b/drivers/ide/ide-acpi.c
@@ -114,8 +114,6 @@ static int ide_get_dev_handle(struct device *dev, acpi_handle *handle,
 	unsigned int bus, devnum, func;
 	acpi_integer addr;
 	acpi_handle dev_handle;
-	struct acpi_buffer buffer = {.length = ACPI_ALLOCATE_BUFFER,
-					.pointer = NULL};
 	acpi_status status;
 	struct acpi_device_info	*dinfo = NULL;
 	int ret = -ENODEV;
@@ -134,12 +132,11 @@ static int ide_get_dev_handle(struct device *dev, acpi_handle *handle,
 		goto err;
 	}
 
-	status = acpi_get_object_info(dev_handle, &buffer);
+	status = acpi_get_object_info(dev_handle, &dinfo);
 	if (ACPI_FAILURE(status)) {
 		DEBPRINT("get_object_info for device failed\n");
 		goto err;
 	}
-	dinfo = buffer.pointer;
 	if (dinfo && (dinfo->valid & ACPI_VALID_ADR) &&
 	    dinfo->address == addr) {
 		*pcidevfn = addr;
diff --git a/drivers/pci/hotplug/acpiphp_ibm.c b/drivers/pci/hotplug/acpiphp_ibm.c
index 5befa7e..a9d926b 100644
--- a/drivers/pci/hotplug/acpiphp_ibm.c
+++ b/drivers/pci/hotplug/acpiphp_ibm.c
@@ -398,23 +398,21 @@ static acpi_status __init ibm_find_acpi_device(acpi_handle handle,
 	acpi_handle *phandle = (acpi_handle *)context;
 	acpi_status status; 
 	struct acpi_device_info *info;
-	struct acpi_buffer info_buffer = { ACPI_ALLOCATE_BUFFER, NULL };
 	int retval = 0;
 
-	status = acpi_get_object_info(handle, &info_buffer);
+	status = acpi_get_object_info(handle, &info);
 	if (ACPI_FAILURE(status)) {
 		err("%s:  Failed to get device information status=0x%x\n",
 			__func__, status);
 		return retval;
 	}
-	info = info_buffer.pointer;
-	info->hardware_id.value[sizeof(info->hardware_id.value) - 1] = '\0';
+	info->hardware_id.string[sizeof(info->hardware_id.length) - 1] = '\0';
 
 	if (info->current_status && (info->valid & ACPI_VALID_HID) &&
-			(!strcmp(info->hardware_id.value, IBM_HARDWARE_ID1) ||
-			 !strcmp(info->hardware_id.value, IBM_HARDWARE_ID2))) {
+			(!strcmp(info->hardware_id.string, IBM_HARDWARE_ID1) ||
+			 !strcmp(info->hardware_id.string, IBM_HARDWARE_ID2))) {
 		dbg("found hardware: %s, handle: %p\n",
-			info->hardware_id.value, handle);
+			info->hardware_id.string, handle);
 		*phandle = handle;
 		/* returning non-zero causes the search to stop
 		 * and returns this value to the caller of 
diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c
index dafaa4a..f9f68e0 100644
--- a/drivers/platform/x86/sony-laptop.c
+++ b/drivers/platform/x86/sony-laptop.c
@@ -976,15 +976,12 @@ static acpi_status sony_walk_callback(acpi_handle handle, u32 level,
 				      void *context, void **return_value)
 {
 	struct acpi_device_info *info;
-	struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL};
-
-	if (ACPI_SUCCESS(acpi_get_object_info(handle, &buffer))) {
-		info = buffer.pointer;
 
+	if (ACPI_SUCCESS(acpi_get_object_info(handle, &info))) {
 		printk(KERN_WARNING DRV_PFX "method: name: %4.4s, args %X\n",
 			(char *)&info->name, info->param_count);
 
-		kfree(buffer.pointer);
+		kfree(info);
 	}
 
 	return AE_OK;
diff --git a/drivers/pnp/pnpacpi/core.c b/drivers/pnp/pnpacpi/core.c
index 9496494f..c07fdb9 100644
--- a/drivers/pnp/pnpacpi/core.c
+++ b/drivers/pnp/pnpacpi/core.c
@@ -194,13 +194,13 @@ static int __init pnpacpi_add_device(struct acpi_device *device)
 		pnpacpi_parse_resource_option_data(dev);
 
 	if (device->flags.compatible_ids) {
-		struct acpi_compatible_id_list *cid_list = device->pnp.cid_list;
+		struct acpica_device_id_list *cid_list = device->pnp.cid_list;
 		int i;
 
 		for (i = 0; i < cid_list->count; i++) {
-			if (!ispnpidacpi(cid_list->id[i].value))
+			if (!ispnpidacpi(cid_list->ids[i].string))
 				continue;
-			pnp_add_id(dev, cid_list->id[i].value);
+			pnp_add_id(dev, cid_list->ids[i].string);
 		}
 	}
 
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index c65e4ce..b91420b 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -173,17 +173,15 @@ struct acpi_device_dir {
 
 typedef char acpi_bus_id[8];
 typedef unsigned long acpi_bus_address;
-typedef char acpi_hardware_id[15];
-typedef char acpi_unique_id[9];
 typedef char acpi_device_name[40];
 typedef char acpi_device_class[20];
 
 struct acpi_device_pnp {
 	acpi_bus_id bus_id;	/* Object name */
 	acpi_bus_address bus_address;	/* _ADR */
-	acpi_hardware_id hardware_id;	/* _HID */
-	struct acpi_compatible_id_list *cid_list;	/* _CIDs */
-	acpi_unique_id unique_id;	/* _UID */
+	char *hardware_id;	/* _HID */
+	struct acpica_device_id_list *cid_list;	/* _CIDs */
+	char *unique_id;	/* _UID */
 	acpi_device_name device_name;	/* Driver-determined */
 	acpi_device_class device_class;	/*        "          */
 };
diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index b450a19..04904c7 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -200,7 +200,8 @@ acpi_evaluate_object_typed(acpi_handle object,
 			   acpi_object_type return_type);
 
 acpi_status
-acpi_get_object_info(acpi_handle handle, struct acpi_buffer *return_buffer);
+acpi_get_object_info(acpi_handle handle,
+		     struct acpi_device_info **return_buffer);
 
 acpi_status acpi_install_method(u8 *buffer);
 
diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h
index 37ba576..7a4ff79 100644
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h
@@ -338,7 +338,7 @@ typedef u32 acpi_physical_address;
 
 /* PM Timer ticks per second (HZ) */
 
-#define PM_TIMER_FREQUENCY  3579545
+#define PM_TIMER_FREQUENCY              3579545
 
 /*******************************************************************************
  *
@@ -969,38 +969,60 @@ acpi_status(*acpi_walk_callback) (acpi_handle obj_handle,
 #define ACPI_INTERRUPT_NOT_HANDLED      0x00
 #define ACPI_INTERRUPT_HANDLED          0x01
 
-/* Length of _HID, _UID, _CID, and UUID values */
+/* Length of 32-bit EISAID values when converted back to a string */
+
+#define ACPI_EISAID_STRING_SIZE         8	/* Includes null terminator */
+
+/* Length of UUID (string) values */
 
-#define ACPI_DEVICE_ID_LENGTH           0x09
-#define ACPI_MAX_CID_LENGTH             48
 #define ACPI_UUID_LENGTH                16
 
-/* Common string version of device HIDs and UIDs */
+/* Structures used for device/processor HID, UID, CID */
 
 struct acpica_device_id {
-	char value[ACPI_DEVICE_ID_LENGTH];
+	u32 length;		/* Length of string + null */
+	char *string;
 };
 
-/* Common string version of device CIDs */
-
-struct acpi_compatible_id {
-	char value[ACPI_MAX_CID_LENGTH];
+struct acpica_device_id_list {
+	u32 count;		/* Number of IDs in Ids array */
+	u32 list_size;		/* Size of list, including ID strings */
+	struct acpica_device_id ids[1];	/* ID array */
 };
 
-struct acpi_compatible_id_list {
-	u32 count;
-	u32 size;
-	struct acpi_compatible_id id[1];
+/*
+ * Structure returned from acpi_get_object_info.
+ * Optimized for both 32- and 64-bit builds
+ */
+struct acpi_device_info {
+	u32 info_size;		/* Size of info, including ID strings */
+	u32 name;		/* ACPI object Name */
+	acpi_object_type type;	/* ACPI object Type */
+	u8 param_count;		/* If a method, required parameter count */
+	u8 valid;		/* Indicates which optional fields are valid */
+	u8 flags;		/* Miscellaneous info */
+	u8 highest_dstates[4];	/* _sx_d values: 0xFF indicates not valid */
+	u8 lowest_dstates[5];	/* _sx_w values: 0xFF indicates not valid */
+	u32 current_status;	/* _STA value */
+	acpi_integer address;	/* _ADR value */
+	struct acpica_device_id hardware_id;	/* _HID value */
+	struct acpica_device_id unique_id;	/* _UID value */
+	struct acpica_device_id_list compatible_id_list;	/* _CID list <must be last> */
 };
 
-/* Structure and flags for acpi_get_object_info */
+/* Values for Flags field above (acpi_get_object_info) */
+
+#define ACPI_PCI_ROOT_BRIDGE            0x01
 
-#define ACPI_VALID_STA                  0x0001
-#define ACPI_VALID_ADR                  0x0002
-#define ACPI_VALID_HID                  0x0004
-#define ACPI_VALID_UID                  0x0008
-#define ACPI_VALID_CID                  0x0010
-#define ACPI_VALID_SXDS                 0x0020
+/* Flags for Valid field above (acpi_get_object_info) */
+
+#define ACPI_VALID_STA                  0x01
+#define ACPI_VALID_ADR                  0x02
+#define ACPI_VALID_HID                  0x04
+#define ACPI_VALID_UID                  0x08
+#define ACPI_VALID_CID                  0x10
+#define ACPI_VALID_SXDS                 0x20
+#define ACPI_VALID_SXWS                 0x40
 
 /* Flags for _STA method */
 
@@ -1011,29 +1033,6 @@ struct acpi_compatible_id_list {
 #define ACPI_STA_DEVICE_OK              0x08	/* Synonym */
 #define ACPI_STA_BATTERY_PRESENT        0x10
 
-#define ACPI_COMMON_OBJ_INFO \
-	acpi_object_type                type;           /* ACPI object type */ \
-	acpi_name                       name	/* ACPI object Name */
-
-struct acpi_obj_info_header {
-	ACPI_COMMON_OBJ_INFO;
-};
-
-/* Structure returned from Get Object Info */
-
-struct acpi_device_info {
-	ACPI_COMMON_OBJ_INFO;
-
-	u32 param_count;	/* If a method, required parameter count */
-	u32 valid;		/* Indicates which fields below are valid */
-	u32 current_status;	/* _STA value */
-	acpi_integer address;	/* _ADR value if any */
-	struct acpica_device_id hardware_id;	/* _HID value if any */
-	struct acpica_device_id unique_id;	/* _UID value if any */
-	u8 highest_dstates[4];	/* _sx_d values: 0xFF indicates not valid */
-	struct acpi_compatible_id_list compatibility_id;	/* List of _CIDs if any */
-};
-
 /* Context structs for address space handlers */
 
 struct acpi_pci_id {
-- 
cgit v0.10.2


From dbdc8f02fe8339686623c84745ba15b0f4f889a1 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Wed, 24 Jun 2009 11:22:22 +0800
Subject: ACPICA: Fix possible memory leak in nspredef

Fixed a possible leak when an attempt is made to repair a return
object.  The only current repair is an automatic buffer to string
conversion.

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/acpica/nspredef.c b/drivers/acpi/acpica/nspredef.c
index 7f8e066..abbb855 100644
--- a/drivers/acpi/acpica/nspredef.c
+++ b/drivers/acpi/acpica/nspredef.c
@@ -1046,22 +1046,25 @@ acpi_ns_repair_object(u32 expected_btypes,
 		ACPI_MEMCPY(new_object->string.pointer,
 			    return_object->buffer.pointer, length);
 
-		/* Install the new return object */
-
-		acpi_ut_remove_reference(return_object);
-		*return_object_ptr = new_object;
-
 		/*
-		 * If the object is a package element, we need to:
-		 * 1. Decrement the reference count of the orignal object, it was
-		 *    incremented when building the package
-		 * 2. Increment the reference count of the new object, it will be
-		 *    decremented when releasing the package
+		 * If the original object is a package element, we need to:
+		 * 1. Set the reference count of the new object to match the
+		 *    reference count of the old object.
+		 * 2. Decrement the reference count of the original object.
 		 */
 		if (package_index != ACPI_NOT_PACKAGE) {
-			acpi_ut_remove_reference(return_object);
-			acpi_ut_add_reference(new_object);
+			new_object->common.reference_count =
+			    return_object->common.reference_count;
+
+			if (return_object->common.reference_count > 1) {
+				return_object->common.reference_count--;
+			}
 		}
+
+		/* Delete old object, install the new return object */
+
+		acpi_ut_remove_reference(return_object);
+		*return_object_ptr = new_object;
 		return (AE_OK);
 
 	default:
-- 
cgit v0.10.2


From 3db20bed579bc4e7fe581c48ad1bde853aa9ff68 Mon Sep 17 00:00:00 2001
From: Lin Ming <ming.m.lin@intel.com>
Date: Wed, 24 Jun 2009 11:25:17 +0800
Subject: ACPICA: ACPI 4.0: iASL/Disassembler - IPMI keyword support.

Adds support for the new IPMI operation region keyword.
ACPICA BZ 771, 772.

http://acpica.org/bugzilla/show_bug.cgi?id=771
http://acpica.org/bugzilla/show_bug.cgi?id=772

Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/acpica/acconfig.h b/drivers/acpi/acpica/acconfig.h
index 6c1fb2d..9123d5a 100644
--- a/drivers/acpi/acpica/acconfig.h
+++ b/drivers/acpi/acpica/acconfig.h
@@ -183,7 +183,7 @@
 
 /* Operation regions */
 
-#define ACPI_NUM_PREDEFINED_REGIONS     8
+#define ACPI_NUM_PREDEFINED_REGIONS     9
 #define ACPI_USER_REGION_BEGIN          0x80
 
 /* Maximum space_ids for Operation Regions */
diff --git a/drivers/acpi/acpica/amlcode.h b/drivers/acpi/acpica/amlcode.h
index 067f967..4940249 100644
--- a/drivers/acpi/acpica/amlcode.h
+++ b/drivers/acpi/acpica/amlcode.h
@@ -404,6 +404,7 @@ typedef enum {
 	REGION_SMBUS,
 	REGION_CMOS,
 	REGION_PCI_BAR,
+	REGION_IPMI,
 	REGION_DATA_TABLE,	/* Internal use only */
 	REGION_FIXED_HW = 0x7F
 } AML_REGION_TYPES;
diff --git a/drivers/acpi/acpica/utglobal.c b/drivers/acpi/acpica/utglobal.c
index ed7a33c..9e33b62 100644
--- a/drivers/acpi/acpica/utglobal.c
+++ b/drivers/acpi/acpica/utglobal.c
@@ -359,6 +359,7 @@ const char *acpi_gbl_region_types[ACPI_NUM_PREDEFINED_REGIONS] = {
 	"SMBus",
 	"SystemCMOS",
 	"PCIBARTarget",
+	"IPMI",
 	"DataTable"
 };
 
-- 
cgit v0.10.2


From 6557a49a443a347d24aed58076365432ded30edc Mon Sep 17 00:00:00 2001
From: Lin Ming <ming.m.lin@intel.com>
Date: Wed, 24 Jun 2009 11:32:04 +0800
Subject: ACPICA: ACPI 4.0: Interpreter support for IPMI.

Adds support for IPMI which is similar to SMBus and uses a bi-directional data buffer.
ACPICA BZ 773.

http://acpica.org/bugzilla/show_bug.cgi?id=773

Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/acpica/acconfig.h b/drivers/acpi/acpica/acconfig.h
index 9123d5a..8e679ef 100644
--- a/drivers/acpi/acpica/acconfig.h
+++ b/drivers/acpi/acpica/acconfig.h
@@ -199,9 +199,10 @@
 #define ACPI_RSDP_CHECKSUM_LENGTH       20
 #define ACPI_RSDP_XCHECKSUM_LENGTH      36
 
-/* SMBus bidirectional buffer size */
+/* SMBus and IPMI bidirectional buffer size */
 
 #define ACPI_SMBUS_BUFFER_SIZE          34
+#define ACPI_IPMI_BUFFER_SIZE           66
 
 /* _sx_d and _sx_w control methods */
 
diff --git a/drivers/acpi/acpica/exfield.c b/drivers/acpi/acpica/exfield.c
index 546dcdd..0b33d6c 100644
--- a/drivers/acpi/acpica/exfield.c
+++ b/drivers/acpi/acpica/exfield.c
@@ -72,6 +72,7 @@ acpi_ex_read_data_from_field(struct acpi_walk_state *walk_state,
 	union acpi_operand_object *buffer_desc;
 	acpi_size length;
 	void *buffer;
+	u32 function;
 
 	ACPI_FUNCTION_TRACE_PTR(ex_read_data_from_field, obj_desc);
 
@@ -97,13 +98,27 @@ acpi_ex_read_data_from_field(struct acpi_walk_state *walk_state,
 		}
 	} else if ((obj_desc->common.type == ACPI_TYPE_LOCAL_REGION_FIELD) &&
 		   (obj_desc->field.region_obj->region.space_id ==
-		    ACPI_ADR_SPACE_SMBUS)) {
+		    ACPI_ADR_SPACE_SMBUS
+		    || obj_desc->field.region_obj->region.space_id ==
+		    ACPI_ADR_SPACE_IPMI)) {
 		/*
-		 * This is an SMBus read.  We must create a buffer to hold the data
-		 * and directly access the region handler.
+		 * This is an SMBus or IPMI read. We must create a buffer to hold
+		 * the data and then directly access the region handler.
+		 *
+		 * Note: Smbus protocol value is passed in upper 16-bits of Function
 		 */
-		buffer_desc =
-		    acpi_ut_create_buffer_object(ACPI_SMBUS_BUFFER_SIZE);
+		if (obj_desc->field.region_obj->region.space_id ==
+		    ACPI_ADR_SPACE_SMBUS) {
+			length = ACPI_SMBUS_BUFFER_SIZE;
+			function =
+			    ACPI_READ | (obj_desc->field.attribute << 16);
+		} else {	/* IPMI */
+
+			length = ACPI_IPMI_BUFFER_SIZE;
+			function = ACPI_READ;
+		}
+
+		buffer_desc = acpi_ut_create_buffer_object(length);
 		if (!buffer_desc) {
 			return_ACPI_STATUS(AE_NO_MEMORY);
 		}
@@ -112,16 +127,13 @@ acpi_ex_read_data_from_field(struct acpi_walk_state *walk_state,
 
 		acpi_ex_acquire_global_lock(obj_desc->common_field.field_flags);
 
-		/*
-		 * Perform the read.
-		 * Note: Smbus protocol value is passed in upper 16-bits of Function
-		 */
+		/* Call the region handler for the read */
+
 		status = acpi_ex_access_region(obj_desc, 0,
 					       ACPI_CAST_PTR(acpi_integer,
 							     buffer_desc->
 							     buffer.pointer),
-					       ACPI_READ | (obj_desc->field.
-							    attribute << 16));
+					       function);
 		acpi_ex_release_global_lock(obj_desc->common_field.field_flags);
 		goto exit;
 	}
@@ -212,6 +224,7 @@ acpi_ex_write_data_to_field(union acpi_operand_object *source_desc,
 	u32 length;
 	void *buffer;
 	union acpi_operand_object *buffer_desc;
+	u32 function;
 
 	ACPI_FUNCTION_TRACE_PTR(ex_write_data_to_field, obj_desc);
 
@@ -234,39 +247,56 @@ acpi_ex_write_data_to_field(union acpi_operand_object *source_desc,
 		}
 	} else if ((obj_desc->common.type == ACPI_TYPE_LOCAL_REGION_FIELD) &&
 		   (obj_desc->field.region_obj->region.space_id ==
-		    ACPI_ADR_SPACE_SMBUS)) {
+		    ACPI_ADR_SPACE_SMBUS
+		    || obj_desc->field.region_obj->region.space_id ==
+		    ACPI_ADR_SPACE_IPMI)) {
 		/*
-		 * This is an SMBus write.  We will bypass the entire field mechanism
-		 * and handoff the buffer directly to the handler.
+		 * This is an SMBus or IPMI write. We will bypass the entire field
+		 * mechanism and handoff the buffer directly to the handler. For
+		 * these address spaces, the buffer is bi-directional; on a write,
+		 * return data is returned in the same buffer.
+		 *
+		 * Source must be a buffer of sufficient size:
+		 * ACPI_SMBUS_BUFFER_SIZE or ACPI_IPMI_BUFFER_SIZE.
 		 *
-		 * Source must be a buffer of sufficient size (ACPI_SMBUS_BUFFER_SIZE).
+		 * Note: SMBus protocol type is passed in upper 16-bits of Function
 		 */
 		if (source_desc->common.type != ACPI_TYPE_BUFFER) {
 			ACPI_ERROR((AE_INFO,
-				    "SMBus write requires Buffer, found type %s",
+				    "SMBus or IPMI write requires Buffer, found type %s",
 				    acpi_ut_get_object_type_name(source_desc)));
 
 			return_ACPI_STATUS(AE_AML_OPERAND_TYPE);
 		}
 
-		if (source_desc->buffer.length < ACPI_SMBUS_BUFFER_SIZE) {
+		if (obj_desc->field.region_obj->region.space_id ==
+		    ACPI_ADR_SPACE_SMBUS) {
+			length = ACPI_SMBUS_BUFFER_SIZE;
+			function =
+			    ACPI_WRITE | (obj_desc->field.attribute << 16);
+		} else {	/* IPMI */
+
+			length = ACPI_IPMI_BUFFER_SIZE;
+			function = ACPI_WRITE;
+		}
+
+		if (source_desc->buffer.length < length) {
 			ACPI_ERROR((AE_INFO,
-				    "SMBus write requires Buffer of length %X, found length %X",
-				    ACPI_SMBUS_BUFFER_SIZE,
-				    source_desc->buffer.length));
+				    "SMBus or IPMI write requires Buffer of length %X, found length %X",
+				    length, source_desc->buffer.length));
 
 			return_ACPI_STATUS(AE_AML_BUFFER_LIMIT);
 		}
 
-		buffer_desc =
-		    acpi_ut_create_buffer_object(ACPI_SMBUS_BUFFER_SIZE);
+		/* Create the bi-directional buffer */
+
+		buffer_desc = acpi_ut_create_buffer_object(length);
 		if (!buffer_desc) {
 			return_ACPI_STATUS(AE_NO_MEMORY);
 		}
 
 		buffer = buffer_desc->buffer.pointer;
-		ACPI_MEMCPY(buffer, source_desc->buffer.pointer,
-			    ACPI_SMBUS_BUFFER_SIZE);
+		ACPI_MEMCPY(buffer, source_desc->buffer.pointer, length);
 
 		/* Lock entire transaction if requested */
 
@@ -275,12 +305,10 @@ acpi_ex_write_data_to_field(union acpi_operand_object *source_desc,
 		/*
 		 * Perform the write (returns status and perhaps data in the
 		 * same buffer)
-		 * Note: SMBus protocol type is passed in upper 16-bits of Function.
 		 */
 		status = acpi_ex_access_region(obj_desc, 0,
 					       (acpi_integer *) buffer,
-					       ACPI_WRITE | (obj_desc->field.
-							     attribute << 16));
+					       function);
 		acpi_ex_release_global_lock(obj_desc->common_field.field_flags);
 
 		*result_desc = buffer_desc;
diff --git a/drivers/acpi/acpica/exfldio.c b/drivers/acpi/acpica/exfldio.c
index 6687be1..d7b3b41 100644
--- a/drivers/acpi/acpica/exfldio.c
+++ b/drivers/acpi/acpica/exfldio.c
@@ -120,12 +120,13 @@ acpi_ex_setup_region(union acpi_operand_object *obj_desc,
 	}
 
 	/*
-	 * Exit now for SMBus address space, it has a non-linear address space
+	 * Exit now for SMBus or IPMI address space, it has a non-linear address space
 	 * and the request cannot be directly validated
 	 */
-	if (rgn_desc->region.space_id == ACPI_ADR_SPACE_SMBUS) {
+	if (rgn_desc->region.space_id == ACPI_ADR_SPACE_SMBUS ||
+	    rgn_desc->region.space_id == ACPI_ADR_SPACE_IPMI) {
 
-		/* SMBus has a non-linear address space */
+		/* SMBus or IPMI has a non-linear address space */
 
 		return_ACPI_STATUS(AE_OK);
 	}
diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h
index 7a4ff79..4371805 100644
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h
@@ -732,7 +732,8 @@ typedef u8 acpi_adr_space_type;
 #define ACPI_ADR_SPACE_SMBUS            (acpi_adr_space_type) 4
 #define ACPI_ADR_SPACE_CMOS             (acpi_adr_space_type) 5
 #define ACPI_ADR_SPACE_PCI_BAR_TARGET   (acpi_adr_space_type) 6
-#define ACPI_ADR_SPACE_DATA_TABLE       (acpi_adr_space_type) 7
+#define ACPI_ADR_SPACE_IPMI             (acpi_adr_space_type) 7
+#define ACPI_ADR_SPACE_DATA_TABLE       (acpi_adr_space_type) 8
 #define ACPI_ADR_SPACE_FIXED_HARDWARE   (acpi_adr_space_type) 127
 
 /*
-- 
cgit v0.10.2


From 8e4319c425077c4cc540696a5bb6c4d12f017dcd Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Mon, 29 Jun 2009 13:43:27 +0800
Subject: ACPICA: Fix several acpi_attach_data problems

Handler was never invoked. Now invoked if/when host node is deleted.
Data object was not automatically deleted when host node was deleted.
Interface to handler had an unused parameter, removed it.
ACPICA BZ 778.

http://acpica.org/bugzilla/show_bug.cgi?id=778

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/acpica/acnamesp.h b/drivers/acpi/acpica/acnamesp.h
index 94cdc2b..a78e02f 100644
--- a/drivers/acpi/acpica/acnamesp.h
+++ b/drivers/acpi/acpica/acnamesp.h
@@ -144,6 +144,8 @@ struct acpi_namespace_node *acpi_ns_create_node(u32 name);
 
 void acpi_ns_delete_node(struct acpi_namespace_node *node);
 
+void acpi_ns_remove_node(struct acpi_namespace_node *node);
+
 void
 acpi_ns_delete_namespace_subtree(struct acpi_namespace_node *parent_handle);
 
diff --git a/drivers/acpi/acpica/nsalloc.c b/drivers/acpi/acpica/nsalloc.c
index efc971a..8a58a1b 100644
--- a/drivers/acpi/acpica/nsalloc.c
+++ b/drivers/acpi/acpica/nsalloc.c
@@ -96,17 +96,68 @@ struct acpi_namespace_node *acpi_ns_create_node(u32 name)
  *
  * RETURN:      None
  *
- * DESCRIPTION: Delete a namespace node
+ * DESCRIPTION: Delete a namespace node. All node deletions must come through
+ *              here. Detaches any attached objects, including any attached
+ *              data. If a handler is associated with attached data, it is
+ *              invoked before the node is deleted.
  *
  ******************************************************************************/
 
 void acpi_ns_delete_node(struct acpi_namespace_node *node)
 {
+	union acpi_operand_object *obj_desc;
+
+	ACPI_FUNCTION_NAME(ns_delete_node);
+
+	/* Detach an object if there is one */
+
+	acpi_ns_detach_object(node);
+
+	/*
+	 * Delete an attached data object if present (an object that was created
+	 * and attached via acpi_attach_data). Note: After any normal object is
+	 * detached above, the only possible remaining object is a data object.
+	 */
+	obj_desc = node->object;
+	if (obj_desc && (obj_desc->common.type == ACPI_TYPE_LOCAL_DATA)) {
+
+		/* Invoke the attached data deletion handler if present */
+
+		if (obj_desc->data.handler) {
+			obj_desc->data.handler(node, obj_desc->data.pointer);
+		}
+
+		acpi_ut_remove_reference(obj_desc);
+	}
+
+	/* Now we can delete the node */
+
+	(void)acpi_os_release_object(acpi_gbl_namespace_cache, node);
+
+	ACPI_MEM_TRACKING(acpi_gbl_ns_node_list->total_freed++);
+	ACPI_DEBUG_PRINT((ACPI_DB_ALLOCATIONS, "Node %p, Remaining %X\n",
+			  node, acpi_gbl_current_node_count));
+}
+
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_ns_remove_node
+ *
+ * PARAMETERS:  Node            - Node to be removed/deleted
+ *
+ * RETURN:      None
+ *
+ * DESCRIPTION: Remove (unlink) and delete a namespace node
+ *
+ ******************************************************************************/
+
+void acpi_ns_remove_node(struct acpi_namespace_node *node)
+{
 	struct acpi_namespace_node *parent_node;
 	struct acpi_namespace_node *prev_node;
 	struct acpi_namespace_node *next_node;
 
-	ACPI_FUNCTION_TRACE_PTR(ns_delete_node, node);
+	ACPI_FUNCTION_TRACE_PTR(ns_remove_node, node);
 
 	parent_node = acpi_ns_get_parent_node(node);
 
@@ -142,12 +193,9 @@ void acpi_ns_delete_node(struct acpi_namespace_node *node)
 		}
 	}
 
-	ACPI_MEM_TRACKING(acpi_gbl_ns_node_list->total_freed++);
-
-	/* Detach an object if there is one, then delete the node */
+	/* Delete the node and any attached objects */
 
-	acpi_ns_detach_object(node);
-	(void)acpi_os_release_object(acpi_gbl_namespace_cache, node);
+	acpi_ns_delete_node(node);
 	return_VOID;
 }
 
@@ -273,25 +321,11 @@ void acpi_ns_delete_children(struct acpi_namespace_node *parent_node)
 				    parent_node, child_node));
 		}
 
-		/* Now we can free this child object */
-
-		ACPI_MEM_TRACKING(acpi_gbl_ns_node_list->total_freed++);
-
-		ACPI_DEBUG_PRINT((ACPI_DB_ALLOCATIONS,
-				  "Object %p, Remaining %X\n", child_node,
-				  acpi_gbl_current_node_count));
-
-		/* Detach an object if there is one, then free the child node */
-
-		acpi_ns_detach_object(child_node);
-
-		/* Now we can delete the node */
-
-		(void)acpi_os_release_object(acpi_gbl_namespace_cache,
-					     child_node);
-
-		/* And move on to the next child in the list */
-
+		/*
+		 * Delete this child node and move on to the next child in the list.
+		 * No need to unlink the node since we are deleting the entire branch.
+		 */
+		acpi_ns_delete_node(child_node);
 		child_node = next_node;
 
 	} while (!(flags & ANOBJ_END_OF_PEER_LIST));
@@ -433,7 +467,7 @@ void acpi_ns_delete_namespace_by_owner(acpi_owner_id owner_id)
 
 		if (deletion_node) {
 			acpi_ns_delete_children(deletion_node);
-			acpi_ns_delete_node(deletion_node);
+			acpi_ns_remove_node(deletion_node);
 			deletion_node = NULL;
 		}
 
diff --git a/drivers/acpi/acpica/nsload.c b/drivers/acpi/acpica/nsload.c
index dcd7a6a..a7234e6 100644
--- a/drivers/acpi/acpica/nsload.c
+++ b/drivers/acpi/acpica/nsload.c
@@ -270,8 +270,7 @@ static acpi_status acpi_ns_delete_subtree(acpi_handle start_handle)
 
 	/* Now delete the starting object, and we are done */
 
-	acpi_ns_delete_node(child_handle);
-
+	acpi_ns_remove_node(child_handle);
 	return_ACPI_STATUS(AE_OK);
 }
 
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 2876fc7..620183f 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -141,7 +141,7 @@ int acpi_bus_get_status(struct acpi_device *device)
 EXPORT_SYMBOL(acpi_bus_get_status);
 
 void acpi_bus_private_data_handler(acpi_handle handle,
-				   u32 function, void *context)
+				   void *context)
 {
 	return;
 }
diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c
index 27a7072..9a4ce33 100644
--- a/drivers/acpi/glue.c
+++ b/drivers/acpi/glue.c
@@ -119,7 +119,7 @@ EXPORT_SYMBOL(acpi_get_child);
 
 /* Link ACPI devices with physical devices */
 static void acpi_glue_data_handler(acpi_handle handle,
-				   u32 function, void *context)
+				   void *context)
 {
 	/* we provide an empty handler */
 }
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 0ab526d..9606af1 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -687,7 +687,7 @@ acpi_bus_get_ejd(acpi_handle handle, acpi_handle *ejd)
 }
 EXPORT_SYMBOL_GPL(acpi_bus_get_ejd);
 
-void acpi_bus_data_handler(acpi_handle handle, u32 function, void *context)
+void acpi_bus_data_handler(acpi_handle handle, void *context)
 {
 
 	/* TBD */
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index b91420b..6e83a68 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -312,7 +312,7 @@ struct acpi_bus_event {
 
 extern struct kobject *acpi_kobj;
 extern int acpi_bus_generate_netlink_event(const char*, const char*, u8, int);
-void acpi_bus_private_data_handler(acpi_handle, u32, void *);
+void acpi_bus_private_data_handler(acpi_handle, void *);
 int acpi_bus_get_private_data(acpi_handle, void **);
 extern int acpi_notifier_call_chain(struct acpi_device *, u32, u32);
 extern int register_acpi_notifier(struct notifier_block *);
@@ -325,7 +325,7 @@ extern void unregister_acpi_bus_notifier(struct notifier_block *nb);
  */
 
 int acpi_bus_get_device(acpi_handle handle, struct acpi_device **device);
-void acpi_bus_data_handler(acpi_handle handle, u32 function, void *context);
+void acpi_bus_data_handler(acpi_handle handle, void *context);
 int acpi_bus_get_status(struct acpi_device *device);
 int acpi_bus_get_power(acpi_handle handle, int *state);
 int acpi_bus_set_power(acpi_handle handle, int state);
diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h
index 4371805..ef46011 100644
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h
@@ -922,7 +922,7 @@ typedef
 void (*acpi_notify_handler) (acpi_handle device, u32 value, void *context);
 
 typedef
-void (*acpi_object_handler) (acpi_handle object, u32 function, void *data);
+void (*acpi_object_handler) (acpi_handle object, void *data);
 
 typedef acpi_status(*acpi_init_handler) (acpi_handle object, u32 function);
 
-- 
cgit v0.10.2


From cf02cd47d4747abf8ff0617e15fc05a00202e6d5 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Wed, 24 Jun 2009 11:38:46 +0800
Subject: ACPICA: Dump table header - suppress output of non-printable
 characters

Function acpi_tb_print_table_header. Some ACPI tables contain
non-printable characters in one of the string fields of the the
header - Signature, OemId, OemTableId, or CompilerId. Invalid
characters are replaced by '?'. ACPICA BZ 788.

http://acpica.org/bugzilla/show_bug.cgi?id=788

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/acpica/tbutils.c b/drivers/acpi/acpica/tbutils.c
index ef7d2c2..1f15497 100644
--- a/drivers/acpi/acpica/tbutils.c
+++ b/drivers/acpi/acpica/tbutils.c
@@ -49,6 +49,12 @@
 ACPI_MODULE_NAME("tbutils")
 
 /* Local prototypes */
+static void acpi_tb_fix_string(char *string, acpi_size length);
+
+static void
+acpi_tb_cleanup_table_header(struct acpi_table_header *out_header,
+			     struct acpi_table_header *header);
+
 static acpi_physical_address
 acpi_tb_get_root_table_entry(u8 *table_entry, u32 table_entry_size);
 
@@ -161,6 +167,59 @@ u8 acpi_tb_tables_loaded(void)
 
 /*******************************************************************************
  *
+ * FUNCTION:    acpi_tb_fix_string
+ *
+ * PARAMETERS:  String              - String to be repaired
+ *              Length              - Maximum length
+ *
+ * RETURN:      None
+ *
+ * DESCRIPTION: Replace every non-printable or non-ascii byte in the string
+ *              with a question mark '?'.
+ *
+ ******************************************************************************/
+
+static void acpi_tb_fix_string(char *string, acpi_size length)
+{
+
+	while (length && *string) {
+		if (!ACPI_IS_PRINT(*string)) {
+			*string = '?';
+		}
+		string++;
+		length--;
+	}
+}
+
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_tb_cleanup_table_header
+ *
+ * PARAMETERS:  out_header          - Where the cleaned header is returned
+ *              Header              - Input ACPI table header
+ *
+ * RETURN:      Returns the cleaned header in out_header
+ *
+ * DESCRIPTION: Copy the table header and ensure that all "string" fields in
+ *              the header consist of printable characters.
+ *
+ ******************************************************************************/
+
+static void
+acpi_tb_cleanup_table_header(struct acpi_table_header *out_header,
+			     struct acpi_table_header *header)
+{
+
+	ACPI_MEMCPY(out_header, header, sizeof(struct acpi_table_header));
+
+	acpi_tb_fix_string(out_header->signature, ACPI_NAME_SIZE);
+	acpi_tb_fix_string(out_header->oem_id, ACPI_OEM_ID_SIZE);
+	acpi_tb_fix_string(out_header->oem_table_id, ACPI_OEM_TABLE_ID_SIZE);
+	acpi_tb_fix_string(out_header->asl_compiler_id, ACPI_NAME_SIZE);
+}
+
+/*******************************************************************************
+ *
  * FUNCTION:    acpi_tb_print_table_header
  *
  * PARAMETERS:  Address             - Table physical address
@@ -176,6 +235,7 @@ void
 acpi_tb_print_table_header(acpi_physical_address address,
 			   struct acpi_table_header *header)
 {
+	struct acpi_table_header local_header;
 
 	/*
 	 * The reason that the Address is cast to a void pointer is so that we
@@ -192,6 +252,11 @@ acpi_tb_print_table_header(acpi_physical_address address,
 
 		/* RSDP has no common fields */
 
+		ACPI_MEMCPY(local_header.oem_id,
+			    ACPI_CAST_PTR(struct acpi_table_rsdp,
+					  header)->oem_id, ACPI_OEM_ID_SIZE);
+		acpi_tb_fix_string(local_header.oem_id, ACPI_OEM_ID_SIZE);
+
 		ACPI_INFO((AE_INFO, "RSDP %p %05X (v%.2d %6.6s)",
 			   ACPI_CAST_PTR (void, address),
 			   (ACPI_CAST_PTR(struct acpi_table_rsdp, header)->
@@ -200,18 +265,21 @@ acpi_tb_print_table_header(acpi_physical_address address,
 					       header)->length : 20,
 			   ACPI_CAST_PTR(struct acpi_table_rsdp,
 					 header)->revision,
-			   ACPI_CAST_PTR(struct acpi_table_rsdp,
-					 header)->oem_id));
+			   local_header.oem_id));
 	} else {
 		/* Standard ACPI table with full common header */
 
+		acpi_tb_cleanup_table_header(&local_header, header);
+
 		ACPI_INFO((AE_INFO,
 			   "%4.4s %p %05X (v%.2d %6.6s %8.8s %08X %4.4s %08X)",
-			   header->signature, ACPI_CAST_PTR (void, address),
-			   header->length, header->revision, header->oem_id,
-			   header->oem_table_id, header->oem_revision,
-			   header->asl_compiler_id,
-			   header->asl_compiler_revision));
+			   local_header.signature, ACPI_CAST_PTR(void, address),
+			   local_header.length, local_header.revision,
+			   local_header.oem_id, local_header.oem_table_id,
+			   local_header.oem_revision,
+			   local_header.asl_compiler_id,
+			   local_header.asl_compiler_revision));
+
 	}
 }
 
-- 
cgit v0.10.2


From 0444e8f6d72d6e38f92d48884bc90bbc6c22fd5a Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Wed, 24 Jun 2009 13:38:02 +0800
Subject: ACPICA: Fix: Predefined object repair executed only once

This fixes a problem where the code that attempts to repair/convert
an object of incorrect type is only executed on the first time the
predefined method is called. The mechanism that disables warnings
on subsequent calls was interfering with the repair mechanism.
ACPICA BZ 781.

http://acpica.org/bugzilla/show_bug.cgi?id=781

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/acpica/aclocal.h b/drivers/acpi/acpica/aclocal.h
index ee986ed..ff6689e 100644
--- a/drivers/acpi/acpica/aclocal.h
+++ b/drivers/acpi/acpica/aclocal.h
@@ -369,6 +369,19 @@ union acpi_predefined_info {
 	struct acpi_package_info3 ret_info3;
 };
 
+/* Data block used during object validation */
+
+struct acpi_predefined_data {
+	char *pathname;
+	const union acpi_predefined_info *predefined;
+	u32 flags;
+	u8 node_flags;
+};
+
+/* Defines for Flags field above */
+
+#define ACPI_OBJECT_REPAIRED    1
+
 /*
  * Bitmapped return value types
  * Note: the actual data types must be contiguous, a loop in nspredef.c
diff --git a/drivers/acpi/acpica/acmacros.h b/drivers/acpi/acpica/acmacros.h
index 91ac7d7..3acd9c6 100644
--- a/drivers/acpi/acpica/acmacros.h
+++ b/drivers/acpi/acpica/acmacros.h
@@ -340,6 +340,7 @@
  */
 #define ACPI_ERROR_NAMESPACE(s, e)      acpi_ns_report_error (AE_INFO, s, e);
 #define ACPI_ERROR_METHOD(s, n, p, e)   acpi_ns_report_method_error (AE_INFO, s, n, p, e);
+#define ACPI_WARN_PREDEFINED(plist)     acpi_ut_predefined_warning plist
 
 #else
 
@@ -347,6 +348,7 @@
 
 #define ACPI_ERROR_NAMESPACE(s, e)
 #define ACPI_ERROR_METHOD(s, n, p, e)
+#define ACPI_WARN_PREDEFINED(plist)
 #endif		/* ACPI_NO_ERROR_MESSAGES */
 
 /*
diff --git a/drivers/acpi/acpica/acutils.h b/drivers/acpi/acpica/acutils.h
index b0add85..863a264 100644
--- a/drivers/acpi/acpica/acutils.h
+++ b/drivers/acpi/acpica/acutils.h
@@ -475,6 +475,12 @@ u8 acpi_ut_valid_acpi_char(char character, u32 position);
 acpi_status
 acpi_ut_strtoul64(char *string, u32 base, acpi_integer * ret_integer);
 
+void ACPI_INTERNAL_VAR_XFACE
+acpi_ut_predefined_warning(const char *module_name,
+			   u32 line_number,
+			   char *pathname,
+			   u8 node_flags, const char *format, ...);
+
 /* Values for Base above (16=Hex, 10=Decimal) */
 
 #define ACPI_ANY_BASE        0
diff --git a/drivers/acpi/acpica/nspredef.c b/drivers/acpi/acpica/nspredef.c
index abbb855..1dc1a47 100644
--- a/drivers/acpi/acpica/nspredef.c
+++ b/drivers/acpi/acpica/nspredef.c
@@ -72,31 +72,33 @@ ACPI_MODULE_NAME("nspredef")
  ******************************************************************************/
 /* Local prototypes */
 static acpi_status
-acpi_ns_check_package(char *pathname,
-		      union acpi_operand_object **return_object_ptr,
-		      const union acpi_predefined_info *predefined);
+acpi_ns_check_package(struct acpi_predefined_data *data,
+		      union acpi_operand_object **return_object_ptr);
 
 static acpi_status
-acpi_ns_check_package_elements(char *pathname,
+acpi_ns_check_package_elements(struct acpi_predefined_data *data,
 			       union acpi_operand_object **elements,
 			       u8 type1,
 			       u32 count1,
 			       u8 type2, u32 count2, u32 start_index);
 
 static acpi_status
-acpi_ns_check_object_type(char *pathname,
+acpi_ns_check_object_type(struct acpi_predefined_data *data,
 			  union acpi_operand_object **return_object_ptr,
 			  u32 expected_btypes, u32 package_index);
 
 static acpi_status
-acpi_ns_check_reference(char *pathname,
+acpi_ns_check_reference(struct acpi_predefined_data *data,
 			union acpi_operand_object *return_object);
 
 static acpi_status
-acpi_ns_repair_object(u32 expected_btypes,
+acpi_ns_repair_object(struct acpi_predefined_data *data,
+		      u32 expected_btypes,
 		      u32 package_index,
 		      union acpi_operand_object **return_object_ptr);
 
+static void acpi_ns_get_expected_types(char *buffer, u32 expected_btypes);
+
 /*
  * Names for the types that can be returned by the predefined objects.
  * Used for warning messages. Must be in the same order as the ACPI_RTYPEs
@@ -109,13 +111,21 @@ static const char *acpi_rtype_names[] = {
 	"/Reference",
 };
 
-#define ACPI_NOT_PACKAGE    ACPI_UINT32_MAX
+/* Object is not a package element */
+
+#define ACPI_NOT_PACKAGE_ELEMENT    ACPI_UINT32_MAX
+
+/* Always emit warning message, not dependent on node flags */
+
+#define ACPI_WARN_ALWAYS            0
 
 /*******************************************************************************
  *
  * FUNCTION:    acpi_ns_check_predefined_names
  *
  * PARAMETERS:  Node            - Namespace node for the method/object
+ *              user_param_count - Number of parameters actually passed
+ *              return_status   - Status from the object evaluation
  *              return_object_ptr - Pointer to the object returned from the
  *                                evaluation of a method or object
  *
@@ -135,12 +145,13 @@ acpi_ns_check_predefined_names(struct acpi_namespace_node *node,
 	acpi_status status = AE_OK;
 	const union acpi_predefined_info *predefined;
 	char *pathname;
+	struct acpi_predefined_data *data;
 
 	/* Match the name for this method/object against the predefined list */
 
 	predefined = acpi_ns_check_for_predefined_name(node);
 
-	/* Get the full pathname to the object, for use in error messages */
+	/* Get the full pathname to the object, for use in warning messages */
 
 	pathname = acpi_ns_get_external_pathname(node);
 	if (!pathname) {
@@ -158,28 +169,17 @@ acpi_ns_check_predefined_names(struct acpi_namespace_node *node,
 	/* If not a predefined name, we cannot validate the return object */
 
 	if (!predefined) {
-		goto exit;
-	}
-
-	/* If the method failed, we cannot validate the return object */
-
-	if ((return_status != AE_OK) && (return_status != AE_CTRL_RETURN_VALUE)) {
-		goto exit;
+		goto cleanup;
 	}
 
 	/*
-	 * Only validate the return value on the first successful evaluation of
-	 * the method. This ensures that any warnings will only be emitted during
-	 * the very first evaluation of the method/object.
+	 * If the method failed or did not actually return an object, we cannot
+	 * validate the return object
 	 */
-	if (node->flags & ANOBJ_EVALUATED) {
-		goto exit;
+	if ((return_status != AE_OK) && (return_status != AE_CTRL_RETURN_VALUE)) {
+		goto cleanup;
 	}
 
-	/* Mark the node as having been successfully evaluated */
-
-	node->flags |= ANOBJ_EVALUATED;
-
 	/*
 	 * If there is no return value, check if we require a return value for
 	 * this predefined name. Either one return value is expected, or none,
@@ -190,46 +190,63 @@ acpi_ns_check_predefined_names(struct acpi_namespace_node *node,
 	if (!return_object) {
 		if ((predefined->info.expected_btypes) &&
 		    (!(predefined->info.expected_btypes & ACPI_RTYPE_NONE))) {
-			ACPI_ERROR((AE_INFO,
-				    "%s: Missing expected return value",
-				    pathname));
+			ACPI_WARN_PREDEFINED((AE_INFO, pathname,
+					      ACPI_WARN_ALWAYS,
+					      "Missing expected return value"));
 
 			status = AE_AML_NO_RETURN_VALUE;
 		}
-		goto exit;
+		goto cleanup;
 	}
 
 	/*
 	 * We have a return value, but if one wasn't expected, just exit, this is
-	 * not a problem
-	 *
-	 * For example, if the "Implicit Return" feature is enabled, methods will
-	 * always return a value
+	 * not a problem. For example, if the "Implicit Return" feature is
+	 * enabled, methods will always return a value.
 	 */
 	if (!predefined->info.expected_btypes) {
-		goto exit;
+		goto cleanup;
+	}
+
+	/* Create the parameter data block for object validation */
+
+	data = ACPI_ALLOCATE_ZEROED(sizeof(struct acpi_predefined_data));
+	if (!data) {
+		goto cleanup;
 	}
+	data->predefined = predefined;
+	data->node_flags = node->flags;
+	data->pathname = pathname;
 
 	/*
 	 * Check that the type of the return object is what is expected for
 	 * this predefined name
 	 */
-	status = acpi_ns_check_object_type(pathname, return_object_ptr,
+	status = acpi_ns_check_object_type(data, return_object_ptr,
 					   predefined->info.expected_btypes,
-					   ACPI_NOT_PACKAGE);
+					   ACPI_NOT_PACKAGE_ELEMENT);
 	if (ACPI_FAILURE(status)) {
-		goto exit;
+		goto check_validation_status;
 	}
 
 	/* For returned Package objects, check the type of all sub-objects */
 
 	if (return_object->common.type == ACPI_TYPE_PACKAGE) {
-		status =
-		    acpi_ns_check_package(pathname, return_object_ptr,
-					  predefined);
+		status = acpi_ns_check_package(data, return_object_ptr);
 	}
 
-      exit:
+check_validation_status:
+	/*
+	 * If the object validation failed or if we successfully repaired one
+	 * or more objects, mark the parent node to suppress further warning
+	 * messages during the next evaluation of the same method/object.
+	 */
+	if (ACPI_FAILURE(status) || (data->flags & ACPI_OBJECT_REPAIRED)) {
+		node->flags |= ANOBJ_EVALUATED;
+	}
+	ACPI_FREE(data);
+
+cleanup:
 	ACPI_FREE(pathname);
 	return (status);
 }
@@ -268,64 +285,58 @@ acpi_ns_check_parameter_count(char *pathname,
 		param_count = node->object->method.param_count;
 	}
 
-	/* Argument count check for non-predefined methods/objects */
-
 	if (!predefined) {
 		/*
+		 * Check the parameter count for non-predefined methods/objects.
+		 *
 		 * Warning if too few or too many arguments have been passed by the
 		 * caller. An incorrect number of arguments may not cause the method
 		 * to fail. However, the method will fail if there are too few
 		 * arguments and the method attempts to use one of the missing ones.
 		 */
 		if (user_param_count < param_count) {
-			ACPI_WARNING((AE_INFO,
-				      "%s: Insufficient arguments - needs %d, found %d",
-				      pathname, param_count, user_param_count));
+			ACPI_WARN_PREDEFINED((AE_INFO, pathname,
+					      ACPI_WARN_ALWAYS,
+					      "Insufficient arguments - needs %u, found %u",
+					      param_count, user_param_count));
 		} else if (user_param_count > param_count) {
-			ACPI_WARNING((AE_INFO,
-				      "%s: Excess arguments - needs %d, found %d",
-				      pathname, param_count, user_param_count));
+			ACPI_WARN_PREDEFINED((AE_INFO, pathname,
+					      ACPI_WARN_ALWAYS,
+					      "Excess arguments - needs %u, found %u",
+					      param_count, user_param_count));
 		}
 		return;
 	}
 
-	/* Allow two different legal argument counts (_SCP, etc.) */
-
+	/*
+	 * Validate the user-supplied parameter count.
+	 * Allow two different legal argument counts (_SCP, etc.)
+	 */
 	required_params_current = predefined->info.param_count & 0x0F;
 	required_params_old = predefined->info.param_count >> 4;
 
 	if (user_param_count != ACPI_UINT32_MAX) {
-
-		/* Validate the user-supplied parameter count */
-
 		if ((user_param_count != required_params_current) &&
 		    (user_param_count != required_params_old)) {
-			ACPI_WARNING((AE_INFO,
-				      "%s: Parameter count mismatch - "
-				      "caller passed %d, ACPI requires %d",
-				      pathname, user_param_count,
-				      required_params_current));
+			ACPI_WARN_PREDEFINED((AE_INFO, pathname,
+					      ACPI_WARN_ALWAYS,
+					      "Parameter count mismatch - "
+					      "caller passed %u, ACPI requires %u",
+					      user_param_count,
+					      required_params_current));
 		}
 	}
 
 	/*
-	 * Only validate the argument count on the first successful evaluation of
-	 * the method. This ensures that any warnings will only be emitted during
-	 * the very first evaluation of the method/object.
-	 */
-	if (node->flags & ANOBJ_EVALUATED) {
-		return;
-	}
-
-	/*
 	 * Check that the ASL-defined parameter count is what is expected for
-	 * this predefined name.
+	 * this predefined name (parameter count as defined by the ACPI
+	 * specification)
 	 */
 	if ((param_count != required_params_current) &&
 	    (param_count != required_params_old)) {
-		ACPI_WARNING((AE_INFO,
-			      "%s: Parameter count mismatch - ASL declared %d, ACPI requires %d",
-			      pathname, param_count, required_params_current));
+		ACPI_WARN_PREDEFINED((AE_INFO, pathname, node->flags,
+				      "Parameter count mismatch - ASL declared %u, ACPI requires %u",
+				      param_count, required_params_current));
 	}
 }
 
@@ -358,9 +369,6 @@ const union acpi_predefined_info *acpi_ns_check_for_predefined_name(struct
 	this_name = predefined_names;
 	while (this_name->info.name[0]) {
 		if (ACPI_COMPARE_NAME(node->name.ascii, this_name->info.name)) {
-
-			/* Return pointer to this table entry */
-
 			return (this_name);
 		}
 
@@ -375,17 +383,16 @@ const union acpi_predefined_info *acpi_ns_check_for_predefined_name(struct
 		this_name++;
 	}
 
-	return (NULL);
+	return (NULL);		/* Not found */
 }
 
 /*******************************************************************************
  *
  * FUNCTION:    acpi_ns_check_package
  *
- * PARAMETERS:  Pathname        - Full pathname to the node (for error msgs)
+ * PARAMETERS:  Data            - Pointer to validation data structure
  *              return_object_ptr - Pointer to the object returned from the
  *                                evaluation of a method or object
- *              Predefined      - Pointer to entry in predefined name table
  *
  * RETURN:      Status
  *
@@ -395,9 +402,8 @@ const union acpi_predefined_info *acpi_ns_check_for_predefined_name(struct
  ******************************************************************************/
 
 static acpi_status
-acpi_ns_check_package(char *pathname,
-		      union acpi_operand_object **return_object_ptr,
-		      const union acpi_predefined_info *predefined)
+acpi_ns_check_package(struct acpi_predefined_data *data,
+		      union acpi_operand_object **return_object_ptr)
 {
 	union acpi_operand_object *return_object = *return_object_ptr;
 	const union acpi_predefined_info *package;
@@ -414,11 +420,11 @@ acpi_ns_check_package(char *pathname,
 
 	/* The package info for this name is in the next table entry */
 
-	package = predefined + 1;
+	package = data->predefined + 1;
 
 	ACPI_DEBUG_PRINT((ACPI_DB_NAMES,
 			  "%s Validating return Package of Type %X, Count %X\n",
-			  pathname, package->ret_info.type,
+			  data->pathname, package->ret_info.type,
 			  return_object->package.count));
 
 	/* Extract package count and elements array */
@@ -429,9 +435,8 @@ acpi_ns_check_package(char *pathname,
 	/* The package must have at least one element, else invalid */
 
 	if (!count) {
-		ACPI_WARNING((AE_INFO,
-			      "%s: Return Package has no elements (empty)",
-			      pathname));
+		ACPI_WARN_PREDEFINED((AE_INFO, data->pathname, data->node_flags,
+				      "Return Package has no elements (empty)"));
 
 		return (AE_AML_OPERAND_VALUE);
 	}
@@ -456,15 +461,16 @@ acpi_ns_check_package(char *pathname,
 		if (count < expected_count) {
 			goto package_too_small;
 		} else if (count > expected_count) {
-			ACPI_WARNING((AE_INFO,
-				      "%s: Return Package is larger than needed - "
-				      "found %u, expected %u", pathname, count,
-				      expected_count));
+			ACPI_WARN_PREDEFINED((AE_INFO, data->pathname,
+					      data->node_flags,
+					      "Return Package is larger than needed - "
+					      "found %u, expected %u", count,
+					      expected_count));
 		}
 
 		/* Validate all elements of the returned package */
 
-		status = acpi_ns_check_package_elements(pathname, elements,
+		status = acpi_ns_check_package_elements(data, elements,
 							package->ret_info.
 							object_type1,
 							package->ret_info.
@@ -485,7 +491,7 @@ acpi_ns_check_package(char *pathname,
 		 * elements must be of the same type
 		 */
 		for (i = 0; i < count; i++) {
-			status = acpi_ns_check_object_type(pathname, elements,
+			status = acpi_ns_check_object_type(data, elements,
 							   package->ret_info.
 							   object_type1, i);
 			if (ACPI_FAILURE(status)) {
@@ -517,8 +523,7 @@ acpi_ns_check_package(char *pathname,
 				/* These are the required package elements (0, 1, or 2) */
 
 				status =
-				    acpi_ns_check_object_type(pathname,
-							      elements,
+				    acpi_ns_check_object_type(data, elements,
 							      package->
 							      ret_info3.
 							      object_type[i],
@@ -530,8 +535,7 @@ acpi_ns_check_package(char *pathname,
 				/* These are the optional package elements */
 
 				status =
-				    acpi_ns_check_object_type(pathname,
-							      elements,
+				    acpi_ns_check_object_type(data, elements,
 							      package->
 							      ret_info3.
 							      tail_object_type,
@@ -548,7 +552,7 @@ acpi_ns_check_package(char *pathname,
 
 		/* First element is the (Integer) count of sub-packages to follow */
 
-		status = acpi_ns_check_object_type(pathname, elements,
+		status = acpi_ns_check_object_type(data, elements,
 						   ACPI_RTYPE_INTEGER, 0);
 		if (ACPI_FAILURE(status)) {
 			return (status);
@@ -585,9 +589,9 @@ acpi_ns_check_package(char *pathname,
 
 			/* Each sub-object must be of type Package */
 
-			status =
-			    acpi_ns_check_object_type(pathname, &sub_package,
-						      ACPI_RTYPE_PACKAGE, i);
+			status = acpi_ns_check_object_type(data, &sub_package,
+							   ACPI_RTYPE_PACKAGE,
+							   i);
 			if (ACPI_FAILURE(status)) {
 				return (status);
 			}
@@ -610,7 +614,7 @@ acpi_ns_check_package(char *pathname,
 				}
 
 				status =
-				    acpi_ns_check_package_elements(pathname,
+				    acpi_ns_check_package_elements(data,
 								   sub_elements,
 								   package->
 								   ret_info.
@@ -643,7 +647,7 @@ acpi_ns_check_package(char *pathname,
 
 				for (j = 0; j < expected_count; j++) {
 					status =
-					    acpi_ns_check_object_type(pathname,
+					    acpi_ns_check_object_type(data,
 						&sub_elements[j],
 						package->ret_info2.object_type[j], j);
 					if (ACPI_FAILURE(status)) {
@@ -665,7 +669,7 @@ acpi_ns_check_package(char *pathname,
 				/* Check the type of each sub-package element */
 
 				status =
-				    acpi_ns_check_package_elements(pathname,
+				    acpi_ns_check_package_elements(data,
 								   sub_elements,
 								   package->
 								   ret_info.
@@ -684,7 +688,7 @@ acpi_ns_check_package(char *pathname,
 				/* First element is the (Integer) count of elements to follow */
 
 				status =
-				    acpi_ns_check_object_type(pathname,
+				    acpi_ns_check_object_type(data,
 							      sub_elements,
 							      ACPI_RTYPE_INTEGER,
 							      0);
@@ -704,7 +708,7 @@ acpi_ns_check_package(char *pathname,
 				/* Check the type of each sub-package element */
 
 				status =
-				    acpi_ns_check_package_elements(pathname,
+				    acpi_ns_check_package_elements(data,
 								   (sub_elements
 								    + 1),
 								   package->
@@ -730,9 +734,9 @@ acpi_ns_check_package(char *pathname,
 
 		/* Should not get here if predefined info table is correct */
 
-		ACPI_WARNING((AE_INFO,
-			      "%s: Invalid internal return type in table entry: %X",
-			      pathname, package->ret_info.type));
+		ACPI_WARN_PREDEFINED((AE_INFO, data->pathname, data->node_flags,
+				      "Invalid internal return type in table entry: %X",
+				      package->ret_info.type));
 
 		return (AE_AML_INTERNAL);
 	}
@@ -743,9 +747,9 @@ acpi_ns_check_package(char *pathname,
 
 	/* Error exit for the case with an incorrect package count */
 
-	ACPI_WARNING((AE_INFO, "%s: Return Package is too small - "
-		      "found %u, expected %u", pathname, count,
-		      expected_count));
+	ACPI_WARN_PREDEFINED((AE_INFO, data->pathname, data->node_flags,
+			      "Return Package is too small - found %u, expected %u",
+			      count, expected_count));
 
 	return (AE_AML_OPERAND_VALUE);
 }
@@ -754,7 +758,7 @@ acpi_ns_check_package(char *pathname,
  *
  * FUNCTION:    acpi_ns_check_package_elements
  *
- * PARAMETERS:  Pathname        - Full pathname to the node (for error msgs)
+ * PARAMETERS:  Data            - Pointer to validation data structure
  *              Elements        - Pointer to the package elements array
  *              Type1           - Object type for first group
  *              Count1          - Count for first group
@@ -770,7 +774,7 @@ acpi_ns_check_package(char *pathname,
  ******************************************************************************/
 
 static acpi_status
-acpi_ns_check_package_elements(char *pathname,
+acpi_ns_check_package_elements(struct acpi_predefined_data *data,
 			       union acpi_operand_object **elements,
 			       u8 type1,
 			       u32 count1,
@@ -786,7 +790,7 @@ acpi_ns_check_package_elements(char *pathname,
 	 * The second group can have a count of zero.
 	 */
 	for (i = 0; i < count1; i++) {
-		status = acpi_ns_check_object_type(pathname, this_element,
+		status = acpi_ns_check_object_type(data, this_element,
 						   type1, i + start_index);
 		if (ACPI_FAILURE(status)) {
 			return (status);
@@ -795,7 +799,7 @@ acpi_ns_check_package_elements(char *pathname,
 	}
 
 	for (i = 0; i < count2; i++) {
-		status = acpi_ns_check_object_type(pathname, this_element,
+		status = acpi_ns_check_object_type(data, this_element,
 						   type2,
 						   (i + count1 + start_index));
 		if (ACPI_FAILURE(status)) {
@@ -811,12 +815,13 @@ acpi_ns_check_package_elements(char *pathname,
  *
  * FUNCTION:    acpi_ns_check_object_type
  *
- * PARAMETERS:  Pathname        - Full pathname to the node (for error msgs)
+ * PARAMETERS:  Data            - Pointer to validation data structure
  *              return_object_ptr - Pointer to the object returned from the
  *                                evaluation of a method or object
  *              expected_btypes - Bitmap of expected return type(s)
  *              package_index   - Index of object within parent package (if
- *                                applicable - ACPI_NOT_PACKAGE otherwise)
+ *                                applicable - ACPI_NOT_PACKAGE_ELEMENT
+ *                                otherwise)
  *
  * RETURN:      Status
  *
@@ -826,7 +831,7 @@ acpi_ns_check_package_elements(char *pathname,
  ******************************************************************************/
 
 static acpi_status
-acpi_ns_check_object_type(char *pathname,
+acpi_ns_check_object_type(struct acpi_predefined_data *data,
 			  union acpi_operand_object **return_object_ptr,
 			  u32 expected_btypes, u32 package_index)
 {
@@ -834,9 +839,6 @@ acpi_ns_check_object_type(char *pathname,
 	acpi_status status = AE_OK;
 	u32 return_btype;
 	char type_buffer[48];	/* Room for 5 types */
-	u32 this_rtype;
-	u32 i;
-	u32 j;
 
 	/*
 	 * If we get a NULL return_object here, it is a NULL package element,
@@ -849,10 +851,11 @@ acpi_ns_check_object_type(char *pathname,
 	/* A Namespace node should not get here, but make sure */
 
 	if (ACPI_GET_DESCRIPTOR_TYPE(return_object) == ACPI_DESC_TYPE_NAMED) {
-		ACPI_WARNING((AE_INFO,
-			      "%s: Invalid return type - Found a Namespace node [%4.4s] type %s",
-			      pathname, return_object->node.name.ascii,
-			      acpi_ut_get_type_name(return_object->node.type)));
+		ACPI_WARN_PREDEFINED((AE_INFO, data->pathname, data->node_flags,
+				      "Invalid return type - Found a Namespace node [%4.4s] type %s",
+				      return_object->node.name.ascii,
+				      acpi_ut_get_type_name(return_object->node.
+							    type)));
 		return (AE_AML_OPERAND_TYPE);
 	}
 
@@ -897,10 +900,11 @@ acpi_ns_check_object_type(char *pathname,
 
 		/* Type mismatch -- attempt repair of the returned object */
 
-		status = acpi_ns_repair_object(expected_btypes, package_index,
+		status = acpi_ns_repair_object(data, expected_btypes,
+					       package_index,
 					       return_object_ptr);
 		if (ACPI_SUCCESS(status)) {
-			return (status);
+			return (AE_OK);	/* Repair was successful */
 		}
 		goto type_error_exit;
 	}
@@ -908,7 +912,7 @@ acpi_ns_check_object_type(char *pathname,
 	/* For reference objects, check that the reference type is correct */
 
 	if (return_object->common.type == ACPI_TYPE_LOCAL_REFERENCE) {
-		status = acpi_ns_check_reference(pathname, return_object);
+		status = acpi_ns_check_reference(data, return_object);
 	}
 
 	return (status);
@@ -917,33 +921,19 @@ acpi_ns_check_object_type(char *pathname,
 
 	/* Create a string with all expected types for this predefined object */
 
-	j = 1;
-	type_buffer[0] = 0;
-	this_rtype = ACPI_RTYPE_INTEGER;
-
-	for (i = 0; i < ACPI_NUM_RTYPES; i++) {
-
-		/* If one of the expected types, concatenate the name of this type */
-
-		if (expected_btypes & this_rtype) {
-			ACPI_STRCAT(type_buffer, &acpi_rtype_names[i][j]);
-			j = 0;	/* Use name separator from now on */
-		}
-		this_rtype <<= 1;	/* Next Rtype */
-	}
+	acpi_ns_get_expected_types(type_buffer, expected_btypes);
 
-	if (package_index == ACPI_NOT_PACKAGE) {
-		ACPI_WARNING((AE_INFO,
-			      "%s: Return type mismatch - found %s, expected %s",
-			      pathname,
-			      acpi_ut_get_object_type_name(return_object),
-			      type_buffer));
+	if (package_index == ACPI_NOT_PACKAGE_ELEMENT) {
+		ACPI_WARN_PREDEFINED((AE_INFO, data->pathname, data->node_flags,
+				      "Return type mismatch - found %s, expected %s",
+				      acpi_ut_get_object_type_name
+				      (return_object), type_buffer));
 	} else {
-		ACPI_WARNING((AE_INFO,
-			      "%s: Return Package type mismatch at index %u - "
-			      "found %s, expected %s", pathname, package_index,
-			      acpi_ut_get_object_type_name(return_object),
-			      type_buffer));
+		ACPI_WARN_PREDEFINED((AE_INFO, data->pathname, data->node_flags,
+				      "Return Package type mismatch at index %u - "
+				      "found %s, expected %s", package_index,
+				      acpi_ut_get_object_type_name
+				      (return_object), type_buffer));
 	}
 
 	return (AE_AML_OPERAND_TYPE);
@@ -953,7 +943,7 @@ acpi_ns_check_object_type(char *pathname,
  *
  * FUNCTION:    acpi_ns_check_reference
  *
- * PARAMETERS:  Pathname        - Full pathname to the node (for error msgs)
+ * PARAMETERS:  Data            - Pointer to validation data structure
  *              return_object   - Object returned from the evaluation of a
  *                                method or object
  *
@@ -966,7 +956,7 @@ acpi_ns_check_object_type(char *pathname,
  ******************************************************************************/
 
 static acpi_status
-acpi_ns_check_reference(char *pathname,
+acpi_ns_check_reference(struct acpi_predefined_data *data,
 			union acpi_operand_object *return_object)
 {
 
@@ -979,11 +969,10 @@ acpi_ns_check_reference(char *pathname,
 		return (AE_OK);
 	}
 
-	ACPI_WARNING((AE_INFO,
-		      "%s: Return type mismatch - "
-		      "unexpected reference object type [%s] %2.2X",
-		      pathname, acpi_ut_get_reference_name(return_object),
-		      return_object->reference.class));
+	ACPI_WARN_PREDEFINED((AE_INFO, data->pathname, data->node_flags,
+			      "Return type mismatch - unexpected reference object type [%s] %2.2X",
+			      acpi_ut_get_reference_name(return_object),
+			      return_object->reference.class));
 
 	return (AE_AML_OPERAND_TYPE);
 }
@@ -992,8 +981,11 @@ acpi_ns_check_reference(char *pathname,
  *
  * FUNCTION:    acpi_ns_repair_object
  *
- * PARAMETERS:  Pathname        - Full pathname to the node (for error msgs)
- *              package_index   - Used to determine if target is in a package
+ * PARAMETERS:  Data            - Pointer to validation data structure
+ *              expected_btypes - Object types expected
+ *              package_index   - Index of object within parent package (if
+ *                                applicable - ACPI_NOT_PACKAGE_ELEMENT
+ *                                otherwise)
  *              return_object_ptr - Pointer to the object returned from the
  *                                evaluation of a method or object
  *
@@ -1005,7 +997,8 @@ acpi_ns_check_reference(char *pathname,
  ******************************************************************************/
 
 static acpi_status
-acpi_ns_repair_object(u32 expected_btypes,
+acpi_ns_repair_object(struct acpi_predefined_data *data,
+		      u32 expected_btypes,
 		      u32 package_index,
 		      union acpi_operand_object **return_object_ptr)
 {
@@ -1016,6 +1009,8 @@ acpi_ns_repair_object(u32 expected_btypes,
 	switch (return_object->common.type) {
 	case ACPI_TYPE_BUFFER:
 
+		/* Does the method/object legally return a string? */
+
 		if (!(expected_btypes & ACPI_RTYPE_STRING)) {
 			return (AE_AML_OPERAND_TYPE);
 		}
@@ -1052,19 +1047,29 @@ acpi_ns_repair_object(u32 expected_btypes,
 		 *    reference count of the old object.
 		 * 2. Decrement the reference count of the original object.
 		 */
-		if (package_index != ACPI_NOT_PACKAGE) {
+		if (package_index != ACPI_NOT_PACKAGE_ELEMENT) {
 			new_object->common.reference_count =
 			    return_object->common.reference_count;
 
 			if (return_object->common.reference_count > 1) {
 				return_object->common.reference_count--;
 			}
+
+			ACPI_WARN_PREDEFINED((AE_INFO, data->pathname,
+					      data->node_flags,
+					      "Converted Buffer to expected String at index %u",
+					      package_index));
+		} else {
+			ACPI_WARN_PREDEFINED((AE_INFO, data->pathname,
+					      data->node_flags,
+					      "Converted Buffer to expected String"));
 		}
 
 		/* Delete old object, install the new return object */
 
 		acpi_ut_remove_reference(return_object);
 		*return_object_ptr = new_object;
+		data->flags |= ACPI_OBJECT_REPAIRED;
 		return (AE_OK);
 
 	default:
@@ -1073,3 +1078,39 @@ acpi_ns_repair_object(u32 expected_btypes,
 
 	return (AE_AML_OPERAND_TYPE);
 }
+
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_ns_get_expected_types
+ *
+ * PARAMETERS:  Buffer          - Pointer to where the string is returned
+ *              expected_btypes - Bitmap of expected return type(s)
+ *
+ * RETURN:      Buffer is populated with type names.
+ *
+ * DESCRIPTION: Translate the expected types bitmap into a string of ascii
+ *              names of expected types, for use in warning messages.
+ *
+ ******************************************************************************/
+
+static void acpi_ns_get_expected_types(char *buffer, u32 expected_btypes)
+{
+	u32 this_rtype;
+	u32 i;
+	u32 j;
+
+	j = 1;
+	buffer[0] = 0;
+	this_rtype = ACPI_RTYPE_INTEGER;
+
+	for (i = 0; i < ACPI_NUM_RTYPES; i++) {
+
+		/* If one of the expected types, concatenate the name of this type */
+
+		if (expected_btypes & this_rtype) {
+			ACPI_STRCAT(buffer, &acpi_rtype_names[i][j]);
+			j = 0;	/* Use name separator from now on */
+		}
+		this_rtype <<= 1;	/* Next Rtype */
+	}
+}
diff --git a/drivers/acpi/acpica/utmisc.c b/drivers/acpi/acpica/utmisc.c
index 9cd6533..5bd2960 100644
--- a/drivers/acpi/acpica/utmisc.c
+++ b/drivers/acpi/acpica/utmisc.c
@@ -50,6 +50,11 @@
 #define _COMPONENT          ACPI_UTILITIES
 ACPI_MODULE_NAME("utmisc")
 
+/*
+ * Common suffix for messages
+ */
+#define ACPI_COMMON_MSG_SUFFIX \
+	acpi_os_printf(" %8.8X %s-%u\n", ACPI_CA_VERSION, module_name, line_number)
 /*******************************************************************************
  *
  * FUNCTION:    acpi_ut_validate_exception
@@ -1065,8 +1070,7 @@ acpi_error(const char *module_name, u32 line_number, const char *format, ...)
 
 	va_start(args, format);
 	acpi_os_vprintf(format, args);
-	acpi_os_printf(" %8.8X %s-%u\n", ACPI_CA_VERSION, module_name,
-		       line_number);
+	ACPI_COMMON_MSG_SUFFIX;
 	va_end(args);
 }
 
@@ -1080,8 +1084,7 @@ acpi_exception(const char *module_name,
 
 	va_start(args, format);
 	acpi_os_vprintf(format, args);
-	acpi_os_printf(" %8.8X %s-%u\n", ACPI_CA_VERSION, module_name,
-		       line_number);
+	ACPI_COMMON_MSG_SUFFIX;
 	va_end(args);
 }
 
@@ -1094,8 +1097,7 @@ acpi_warning(const char *module_name, u32 line_number, const char *format, ...)
 
 	va_start(args, format);
 	acpi_os_vprintf(format, args);
-	acpi_os_printf(" %8.8X %s-%u\n", ACPI_CA_VERSION, module_name,
-		       line_number);
+	ACPI_COMMON_MSG_SUFFIX;
 	va_end(args);
 }
 
@@ -1116,3 +1118,46 @@ ACPI_EXPORT_SYMBOL(acpi_error)
 ACPI_EXPORT_SYMBOL(acpi_exception)
 ACPI_EXPORT_SYMBOL(acpi_warning)
 ACPI_EXPORT_SYMBOL(acpi_info)
+
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_ut_predefined_warning
+ *
+ * PARAMETERS:  module_name     - Caller's module name (for error output)
+ *              line_number     - Caller's line number (for error output)
+ *              Pathname        - Full pathname to the node
+ *              node_flags      - From Namespace node for the method/object
+ *              Format          - Printf format string + additional args
+ *
+ * RETURN:      None
+ *
+ * DESCRIPTION: Warnings for the predefined validation module. Messages are
+ *              only emitted the first time a problem with a particular
+ *              method/object is detected. This prevents a flood of error
+ *              messages for methods that are repeatedly evaluated.
+ *
+******************************************************************************/
+
+void ACPI_INTERNAL_VAR_XFACE
+acpi_ut_predefined_warning(const char *module_name,
+			   u32 line_number,
+			   char *pathname,
+			   u8 node_flags, const char *format, ...)
+{
+	va_list args;
+
+	/*
+	 * Warning messages for this method/object will be disabled after the
+	 * first time a validation fails or an object is successfully repaired.
+	 */
+	if (node_flags & ANOBJ_EVALUATED) {
+		return;
+	}
+
+	acpi_os_printf("ACPI Warning for %s: ", pathname);
+
+	va_start(args, format);
+	acpi_os_vprintf(format, args);
+	ACPI_COMMON_MSG_SUFFIX;
+	va_end(args);
+}
-- 
cgit v0.10.2


From 8d590c7af1152685efcf302905baeb6dda3c2d2f Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Wed, 24 Jun 2009 13:39:29 +0800
Subject: ACPICA: Clarify common suffix for error/warning messages

Added parens around the acpica version/modulename/linenumber to
clearly differentiate this group from the rest of the message.

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/acpica/utmisc.c b/drivers/acpi/acpica/utmisc.c
index 5bd2960..61f6315 100644
--- a/drivers/acpi/acpica/utmisc.c
+++ b/drivers/acpi/acpica/utmisc.c
@@ -54,7 +54,7 @@ ACPI_MODULE_NAME("utmisc")
  * Common suffix for messages
  */
 #define ACPI_COMMON_MSG_SUFFIX \
-	acpi_os_printf(" %8.8X %s-%u\n", ACPI_CA_VERSION, module_name, line_number)
+	acpi_os_printf(" (%8.8X/%s-%u)\n", ACPI_CA_VERSION, module_name, line_number)
 /*******************************************************************************
  *
  * FUNCTION:    acpi_ut_validate_exception
-- 
cgit v0.10.2


From eb2289ba1ba994de25af0d94b5e80ba93d2c1c3c Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Wed, 24 Jun 2009 13:42:00 +0800
Subject: ACPICA: ACPI 4.0: Changes for existing ACPI tables.

FACS: new flag and new OspmFlags field.
SRAT: x2APIC - add ClockDomain field to descriptor #2

Includes header and disassembler support.

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/include/acpi/actbl.h b/include/acpi/actbl.h
index 222733d..0649a56 100644
--- a/include/acpi/actbl.h
+++ b/include/acpi/actbl.h
@@ -161,17 +161,24 @@ struct acpi_table_facs {
 	u32 flags;
 	u64 xfirmware_waking_vector;	/* 64-bit version of the Firmware Waking Vector (ACPI 2.0+) */
 	u8 version;		/* Version of this table (ACPI 2.0+) */
-	u8 reserved[31];	/* Reserved, must be zero */
+	u8 reserved[3];		/* Reserved, must be zero */
+	u32 ospm_flags;		/* Flags to be set by OSPM (ACPI 4.0) */
+	u8 reserved1[24];	/* Reserved, must be zero */
 };
 
-/* Flag macros */
+/* global_lock flags */
+
+#define ACPI_GLOCK_PENDING          (1)	/* 00: Pending global lock ownership */
+#define ACPI_GLOCK_OWNED            (1<<1)	/* 01: Global lock is owned */
+
+/* Flags  */
 
-#define ACPI_FACS_S4_BIOS_PRESENT (1)	/* 00: S4BIOS support is present */
+#define ACPI_FACS_S4_BIOS_PRESENT   (1)	/* 00: S4BIOS support is present */
+#define ACPI_FACS_64BIT_WAKE        (1<<1)	/* 01: 64-bit wake vector supported (ACPI 4.0) */
 
-/* Global lock flags */
+/* ospm_flags */
 
-#define ACPI_GLOCK_PENDING      0x01	/* 00: Pending global lock ownership */
-#define ACPI_GLOCK_OWNED        0x02	/* 01: Global lock is owned */
+#define ACPI_FACS_64BIT_ENVIRONMENT (1)	/* 00: 64-bit wake environment is required (ACPI 4.0) */
 
 /*******************************************************************************
  *
diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h
index 59ade07..ec36693 100644
--- a/include/acpi/actbl1.h
+++ b/include/acpi/actbl1.h
@@ -1011,7 +1011,7 @@ struct acpi_madt_interrupt_source {
 
 #define ACPI_MADT_CPEI_OVERRIDE     (1)
 
-/* 9: Processor Local X2_APIC (07/2008) */
+/* 9: Processor Local X2APIC (ACPI 4.0) */
 
 struct acpi_madt_local_x2apic {
 	struct acpi_subtable_header header;
@@ -1021,7 +1021,7 @@ struct acpi_madt_local_x2apic {
 	u32 uid;		/* ACPI processor UID */
 };
 
-/* 10: Local X2APIC NMI (07/2008) */
+/* 10: Local X2APIC NMI (ACPI 4.0) */
 
 struct acpi_madt_local_x2apic_nmi {
 	struct acpi_subtable_header header;
@@ -1211,7 +1211,7 @@ struct acpi_srat_mem_affinity {
 #define ACPI_SRAT_MEM_HOT_PLUGGABLE (1<<1)	/* 01: Memory region is hot pluggable */
 #define ACPI_SRAT_MEM_NON_VOLATILE  (1<<2)	/* 02: Memory region is non-volatile */
 
-/* 2: Processor Local X2_APIC Affinity (07/2008) */
+/* 2: Processor Local X2_APIC Affinity (ACPI 4.0) */
 
 struct acpi_srat_x2apic_cpu_affinity {
 	struct acpi_subtable_header header;
@@ -1219,6 +1219,8 @@ struct acpi_srat_x2apic_cpu_affinity {
 	u32 proximity_domain;
 	u32 apic_id;
 	u32 flags;
+	u32 clock_domain;
+	u32 reserved2;
 };
 
 /* Flags for struct acpi_srat_cpu_affinity and struct acpi_srat_x2apic_cpu_affinity */
-- 
cgit v0.10.2


From 3ce804ed83827a7fd27190836f9421b29ac64512 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Thu, 25 Jun 2009 10:31:32 -0700
Subject: ACPICA: Update version to 20090625

Update version number.

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 04904c7..063e577 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -47,7 +47,7 @@
 
 /* Current ACPICA subsystem version in YYYYMMDD format */
 
-#define ACPI_CA_VERSION                 0x20090521
+#define ACPI_CA_VERSION                 0x20090625
 
 #include "actypes.h"
 #include "actbl.h"
-- 
cgit v0.10.2


From a5fe1a03f7720b8da8364a1737e1e5a357904e99 Mon Sep 17 00:00:00 2001
From: Lin Ming <ming.m.lin@intel.com>
Date: Thu, 13 Aug 2009 10:43:27 +0800
Subject: ACPICA: fix leak of acpi_os_validate_address

http://bugzilla.kernel.org/show_bug.cgi?id=13620

If the dynamic region is created and added to resource list over and over again,
it has the potential to be a memory leak by growing the list every time.

This patch fixes the memory leak, as below

1) add a new field "count" to struct acpi_res_list.

   When inserting, if the region(addr, len) is already in the resource
   list, we just increase "count", otherwise, the region is inserted
   with count=1.

   When deleting, the "count" is decreased, if it's decreased to 0,
   the region is deleted from the resource list.

   With "count", the region with same address and length can only be
   inserted to the resource list once, so prevent potential memory leak.

2) add a new function acpi_os_invalidate_address, which is called when
   region is deleted.

Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/acpica/utdelete.c b/drivers/acpi/acpica/utdelete.c
index bc17103..96e26e7 100644
--- a/drivers/acpi/acpica/utdelete.c
+++ b/drivers/acpi/acpica/utdelete.c
@@ -215,6 +215,12 @@ static void acpi_ut_delete_internal_obj(union acpi_operand_object *object)
 		ACPI_DEBUG_PRINT((ACPI_DB_ALLOCATIONS,
 				  "***** Region %p\n", object));
 
+		/* Invalidate the region address/length via the host OS */
+
+		acpi_os_invalidate_address(object->region.space_id,
+					  object->region.address,
+					  (acpi_size) object->region.length);
+
 		second_desc = acpi_ns_get_secondary_object(object);
 		if (second_desc) {
 			/*
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 5691f16..c5b4f1e 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -88,6 +88,7 @@ struct acpi_res_list {
 	char name[5];   /* only can have a length of 4 chars, make use of this
 			   one instead of res->name, no need to kalloc then */
 	struct list_head resource_list;
+	int count;
 };
 
 static LIST_HEAD(resource_list_head);
@@ -1358,6 +1359,89 @@ acpi_os_validate_interface (char *interface)
 	return AE_SUPPORT;
 }
 
+static inline int acpi_res_list_add(struct acpi_res_list *res)
+{
+	struct acpi_res_list *res_list_elem;
+
+	list_for_each_entry(res_list_elem, &resource_list_head,
+			    resource_list) {
+
+		if (res->resource_type == res_list_elem->resource_type &&
+		    res->start == res_list_elem->start &&
+		    res->end == res_list_elem->end) {
+
+			/*
+			 * The Region(addr,len) already exist in the list,
+			 * just increase the count
+			 */
+
+			res_list_elem->count++;
+			return 0;
+		}
+	}
+
+	res->count = 1;
+	list_add(&res->resource_list, &resource_list_head);
+	return 1;
+}
+
+static inline void acpi_res_list_del(struct acpi_res_list *res)
+{
+	struct acpi_res_list *res_list_elem;
+
+	list_for_each_entry(res_list_elem, &resource_list_head,
+			    resource_list) {
+
+		if (res->resource_type == res_list_elem->resource_type &&
+		    res->start == res_list_elem->start &&
+		    res->end == res_list_elem->end) {
+
+			/*
+			 * If the res count is decreased to 0,
+			 * remove and free it
+			 */
+
+			if (--res_list_elem->count == 0) {
+				list_del(&res_list_elem->resource_list);
+				kfree(res_list_elem);
+			}
+			return;
+		}
+	}
+}
+
+acpi_status
+acpi_os_invalidate_address(
+    u8                   space_id,
+    acpi_physical_address   address,
+    acpi_size               length)
+{
+	struct acpi_res_list res;
+
+	switch (space_id) {
+	case ACPI_ADR_SPACE_SYSTEM_IO:
+	case ACPI_ADR_SPACE_SYSTEM_MEMORY:
+		/* Only interference checks against SystemIO and SytemMemory
+		   are needed */
+		res.start = address;
+		res.end = address + length - 1;
+		res.resource_type = space_id;
+		spin_lock(&acpi_res_lock);
+		acpi_res_list_del(&res);
+		spin_unlock(&acpi_res_lock);
+		break;
+	case ACPI_ADR_SPACE_PCI_CONFIG:
+	case ACPI_ADR_SPACE_EC:
+	case ACPI_ADR_SPACE_SMBUS:
+	case ACPI_ADR_SPACE_CMOS:
+	case ACPI_ADR_SPACE_PCI_BAR_TARGET:
+	case ACPI_ADR_SPACE_DATA_TABLE:
+	case ACPI_ADR_SPACE_FIXED_HARDWARE:
+		break;
+	}
+	return AE_OK;
+}
+
 /******************************************************************************
  *
  * FUNCTION:    acpi_os_validate_address
@@ -1382,6 +1466,7 @@ acpi_os_validate_address (
     char *name)
 {
 	struct acpi_res_list *res;
+	int added;
 	if (acpi_enforce_resources == ENFORCE_RESOURCES_NO)
 		return AE_OK;
 
@@ -1399,14 +1484,17 @@ acpi_os_validate_address (
 		res->end = address + length - 1;
 		res->resource_type = space_id;
 		spin_lock(&acpi_res_lock);
-		list_add(&res->resource_list, &resource_list_head);
+		added = acpi_res_list_add(res);
 		spin_unlock(&acpi_res_lock);
-		pr_debug("Added %s resource: start: 0x%llx, end: 0x%llx, "
-			 "name: %s\n", (space_id == ACPI_ADR_SPACE_SYSTEM_IO)
+		pr_debug("%s %s resource: start: 0x%llx, end: 0x%llx, "
+			 "name: %s\n", added ? "Added" : "Already exist",
+			 (space_id == ACPI_ADR_SPACE_SYSTEM_IO)
 			 ? "SystemIO" : "System Memory",
 			 (unsigned long long)res->start,
 			 (unsigned long long)res->end,
 			 res->name);
+		if (!added)
+			kfree(res);
 		break;
 	case ACPI_ADR_SPACE_PCI_CONFIG:
 	case ACPI_ADR_SPACE_EC:
diff --git a/include/acpi/acpiosxf.h b/include/acpi/acpiosxf.h
index ab0b85c..eb0e718 100644
--- a/include/acpi/acpiosxf.h
+++ b/include/acpi/acpiosxf.h
@@ -245,6 +245,9 @@ acpi_status acpi_osi_invalidate(char* interface);
 acpi_status
 acpi_os_validate_address(u8 space_id, acpi_physical_address address,
 			 acpi_size length, char *name);
+acpi_status
+acpi_os_invalidate_address(u8 space_id, acpi_physical_address address,
+			 acpi_size length);
 
 u64 acpi_os_get_timer(void);
 
-- 
cgit v0.10.2


From 3b5e634103a5471d74e55d774e53db3df5c7b650 Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Thu, 6 Aug 2009 15:57:54 -0700
Subject: ACPI: video: remove unneeded memsets

device->cap and video->cap are zeroed initially so we don't need to clear
them again.

Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c
index 8851315..a0cd0c7 100644
--- a/drivers/acpi/video.c
+++ b/drivers/acpi/video.c
@@ -934,9 +934,6 @@ static void acpi_video_device_find_cap(struct acpi_video_device *device)
 {
 	acpi_handle h_dummy1;
 
-
-	memset(&device->cap, 0, sizeof(device->cap));
-
 	if (ACPI_SUCCESS(acpi_get_handle(device->dev->handle, "_ADR", &h_dummy1))) {
 		device->cap._ADR = 1;
 	}
@@ -1039,7 +1036,6 @@ static void acpi_video_bus_find_cap(struct acpi_video_bus *video)
 {
 	acpi_handle h_dummy1;
 
-	memset(&video->cap, 0, sizeof(video->cap));
 	if (ACPI_SUCCESS(acpi_get_handle(video->device->handle, "_DOS", &h_dummy1))) {
 		video->cap._DOS = 1;
 	}
-- 
cgit v0.10.2


From aaf84eb95a9c610c6413cee4836764ea9194eed3 Mon Sep 17 00:00:00 2001
From: Benny Halevy <bhalevy@panasas.com>
Date: Thu, 20 Aug 2009 03:21:56 +0300
Subject: nfsd41: renew_client must be called under the state lock

Until we work out the state locking so we can use a spin lock to protect
the cl_lru, we need to take the state_lock to renew the client.

Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[nfsd41: Do not renew state on error]
Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[nfsd41: Simplify exit code]
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index d2a0524..5f634d2 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1481,7 +1481,7 @@ nfsd4_sequence(struct svc_rqst *rqstp,
 		 * for nfsd4_svc_encode_compoundres processing */
 		status = nfsd4_replay_cache_entry(resp, seq);
 		cstate->status = nfserr_replay_cache;
-		goto replay_cache;
+		goto out;
 	}
 	if (status)
 		goto out;
@@ -1497,15 +1497,18 @@ nfsd4_sequence(struct svc_rqst *rqstp,
 	cstate->slot = slot;
 	cstate->session = session;
 
-replay_cache:
-	/* Renew the clientid on success and on replay.
-	 * Hold a session reference until done processing the compound:
+	/* Hold a session reference until done processing the compound:
 	 * nfsd4_put_session called only if the cstate slot is set.
 	 */
-	renew_client(session->se_client);
 	nfsd4_get_session(session);
 out:
 	spin_unlock(&sessionid_lock);
+	/* Renew the clientid on success and on replay */
+	if (cstate->session) {
+		nfs4_lock_state();
+		renew_client(session->se_client);
+		nfs4_unlock_state();
+	}
 	dprintk("%s: return %d\n", __func__, ntohl(status));
 	return status;
 }
-- 
cgit v0.10.2


From b0401d725334a94d57335790b8ac2404144748ee Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yjwei@cn.fujitsu.com>
Date: Thu, 27 Aug 2009 10:23:39 +0800
Subject: sunrpc: move the close processing after do recvfrom method

sunrpc: "Move close processing to a single place"
(d7979ae4a050a45b78af51832475001b68263d2a) moved the
close processing before the recvfrom method. This may
cause the close processing never to execute. So this
patch moves it to the right place.

Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index dcd2d1e..912dea5 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -710,10 +710,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
 	spin_unlock_bh(&pool->sp_lock);
 
 	len = 0;
-	if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) {
-		dprintk("svc_recv: found XPT_CLOSE\n");
-		svc_delete_xprt(xprt);
-	} else if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) {
+	if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) {
 		struct svc_xprt *newxpt;
 		newxpt = xprt->xpt_ops->xpo_accept(xprt);
 		if (newxpt) {
@@ -739,7 +736,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
 			svc_xprt_received(newxpt);
 		}
 		svc_xprt_received(xprt);
-	} else {
+	} else if (!test_bit(XPT_CLOSE, &xprt->xpt_flags)) {
 		dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n",
 			rqstp, pool->sp_id, xprt,
 			atomic_read(&xprt->xpt_ref.refcount));
@@ -752,6 +749,11 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
 		dprintk("svc: got len=%d\n", len);
 	}
 
+	if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) {
+		dprintk("svc_recv: found XPT_CLOSE\n");
+		svc_delete_xprt(xprt);
+	}
+
 	/* No data, incomplete (TCP) read, or accept() */
 	if (len == 0 || len == -EAGAIN) {
 		rqstp->rq_res.len = 0;
-- 
cgit v0.10.2


From d8d0b85b11476ce59684ad2998e91a522df518a0 Mon Sep 17 00:00:00 2001
From: Frank Filz <ffilzlnx@us.ibm.com>
Date: Thu, 27 Aug 2009 17:35:41 -0400
Subject: nfsd4: remove ACE4_IDENTIFIER_GROUP flag from GROUP@ entry

RFC 3530 says "ACE4_IDENTIFIER_GROUP flag MUST be ignored on entries
with these special identifiers.  When encoding entries with these
special identifiers, the ACE4_IDENTIFIER_GROUP flag SHOULD be set to
zero."  It really shouldn't matter either way, but the point is that
this flag is used to distinguish named users from named groups (since
unix allows a group to have the same name as a user), so it doesn't
really make sense to use it on a special identifier such as this.)

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index 5320c2b..725d02f 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -321,7 +321,7 @@ _posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl,
 	deny = ~pas.group & pas.other;
 	if (deny) {
 		ace->type = NFS4_ACE_ACCESS_DENIED_ACE_TYPE;
-		ace->flag = eflag | NFS4_ACE_IDENTIFIER_GROUP;
+		ace->flag = eflag;
 		ace->access_mask = deny_mask_from_posix(deny, flags);
 		ace->whotype = NFS4_ACL_WHO_GROUP;
 		ace++;
-- 
cgit v0.10.2


From 468de9e54a900559b55aa939a4daeaea1915e572 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Thu, 27 Aug 2009 12:07:40 -0400
Subject: nfsd41: expand solo sequence check

Compounds consisting of only a sequence operation don't need any
additional caching beyond the sequence information we store in the slot
entry.  Fix nfsd4_is_solo_sequence to identify this case correctly.

The additional check for a failed sequence in nfsd4_store_cache_entry()
is redundant, since the nfsd4_is_solo_sequence call lower down catches
this case.

The final ce_cachethis set in nfsd4_sequence is also redundant.

Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 5f634d2..b44a2cf 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -991,16 +991,10 @@ nfsd4_store_cache_entry(struct nfsd4_compoundres *resp)
 {
 	struct nfsd4_cache_entry *entry = &resp->cstate.slot->sl_cache_entry;
 	struct svc_rqst *rqstp = resp->rqstp;
-	struct nfsd4_compoundargs *args = rqstp->rq_argp;
-	struct nfsd4_op *op = &args->ops[resp->opcnt];
 	struct kvec *resv = &rqstp->rq_res.head[0];
 
 	dprintk("--> %s entry %p\n", __func__, entry);
 
-	/* Don't cache a failed OP_SEQUENCE. */
-	if (resp->opcnt == 1 && op->opnum == OP_SEQUENCE && resp->cstate.status)
-		return;
-
 	nfsd4_release_respages(entry->ce_respages, entry->ce_resused);
 	entry->ce_opcnt = resp->opcnt;
 	entry->ce_status = resp->cstate.status;
@@ -1490,9 +1484,6 @@ nfsd4_sequence(struct svc_rqst *rqstp,
 	slot->sl_inuse = true;
 	slot->sl_seqid = seq->seqid;
 	slot->sl_cache_entry.ce_cachethis = seq->cachethis;
-	/* Always set the cache entry cachethis for solo sequence */
-	if (nfsd4_is_solo_sequence(resp))
-		slot->sl_cache_entry.ce_cachethis = 1;
 
 	cstate->slot = slot;
 	cstate->session = session;
diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h
index 5e4beb0..3f71660 100644
--- a/include/linux/nfsd/xdr4.h
+++ b/include/linux/nfsd/xdr4.h
@@ -467,7 +467,7 @@ struct nfsd4_compoundres {
 static inline bool nfsd4_is_solo_sequence(struct nfsd4_compoundres *resp)
 {
 	struct nfsd4_compoundargs *args = resp->rqstp->rq_argp;
-	return args->opcnt == 1;
+	return resp->opcnt == 1 && args->ops[0].opnum == OP_SEQUENCE;
 }
 
 static inline bool nfsd4_not_cached(struct nfsd4_compoundres *resp)
-- 
cgit v0.10.2


From c1b5310a957c47d1c66bb53035c6ba6aa20a150f Mon Sep 17 00:00:00 2001
From: Jonathan Woithe <jwoithe@physics.adelaide.edu.au>
Date: Thu, 27 Aug 2009 18:45:33 -0400
Subject: fujitsu-laptop: fix config corner case

This patch is a trivial fix for a config corner case, ensuring that
fujitsu-laptop doesn't get compiled into the kernel when the led class
is a module.

Signed-off-by: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Signed-off-by: Jonathan Woithe <jwoithe@physics.adelaide.edu.au>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index 77c6097..0b5a85b 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -99,6 +99,7 @@ config FUJITSU_LAPTOP
 	depends on ACPI
 	depends on INPUT
 	depends on BACKLIGHT_CLASS_DEVICE
+	depends on LEDS_CLASS || LEDS_CLASS=n
 	---help---
 	  This is a driver for laptops built by Fujitsu:
 
-- 
cgit v0.10.2


From 1e384cb0f9a940f2a431d1708f963987e61d71e3 Mon Sep 17 00:00:00 2001
From: Stephen Gildea <stepheng+linux@gildea.com>
Date: Tue, 25 Aug 2009 14:41:52 +0930
Subject: fujitsu-laptop: support led-class as module

Support fujitsu-laptop with led-class built as a module instead of
being compiled in.

Signed-off-by: Stephen Gildea <stepheng+linux@gildea.com>
Acked-by: Jonathan Woithe <jwoithe@physics.adelaide.edu.au>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c
index 218b9a1..4c8897a 100644
--- a/drivers/platform/x86/fujitsu-laptop.c
+++ b/drivers/platform/x86/fujitsu-laptop.c
@@ -66,7 +66,7 @@
 #include <linux/kfifo.h>
 #include <linux/video_output.h>
 #include <linux/platform_device.h>
-#ifdef CONFIG_LEDS_CLASS
+#if defined(CONFIG_LEDS_CLASS) || defined(CONFIG_LEDS_CLASS_MODULE)
 #include <linux/leds.h>
 #endif
 
@@ -96,7 +96,7 @@
 /* FUNC interface - responses */
 #define UNSUPPORTED_CMD 0x80000000
 
-#ifdef CONFIG_LEDS_CLASS
+#if defined(CONFIG_LEDS_CLASS) || defined(CONFIG_LEDS_CLASS_MODULE)
 /* FUNC interface - LED control */
 #define FUNC_LED_OFF	0x1
 #define FUNC_LED_ON	0x30001
@@ -176,7 +176,7 @@ static struct fujitsu_hotkey_t *fujitsu_hotkey;
 
 static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event);
 
-#ifdef CONFIG_LEDS_CLASS
+#if defined(CONFIG_LEDS_CLASS) || defined(CONFIG_LEDS_CLASS_MODULE)
 static enum led_brightness logolamp_get(struct led_classdev *cdev);
 static void logolamp_set(struct led_classdev *cdev,
 			       enum led_brightness brightness);
@@ -257,7 +257,7 @@ static int call_fext_func(int cmd, int arg0, int arg1, int arg2)
 	return out_obj.integer.value;
 }
 
-#ifdef CONFIG_LEDS_CLASS
+#if defined(CONFIG_LEDS_CLASS) || defined(CONFIG_LEDS_CLASS_MODULE)
 /* LED class callbacks */
 
 static void logolamp_set(struct led_classdev *cdev,
@@ -911,7 +911,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
 	printk(KERN_INFO "fujitsu-laptop: BTNI: [0x%x]\n",
 		call_fext_func(FUNC_BUTTONS, 0x0, 0x0, 0x0));
 
-	#ifdef CONFIG_LEDS_CLASS
+	#if defined(CONFIG_LEDS_CLASS) || defined(CONFIG_LEDS_CLASS_MODULE)
 	if (call_fext_func(FUNC_LEDS, 0x0, 0x0, 0x0) & LOGOLAMP_POWERON) {
 		result = led_classdev_register(&fujitsu->pf_device->dev,
 						&logolamp_led);
@@ -1204,7 +1204,7 @@ fail_acpi:
 
 static void __exit fujitsu_cleanup(void)
 {
-	#ifdef CONFIG_LEDS_CLASS
+	#if defined(CONFIG_LEDS_CLASS) || defined(CONFIG_LEDS_CLASS_MODULE)
 	if (fujitsu_hotkey->logolamp_registered != 0)
 		led_classdev_unregister(&logolamp_led);
 
-- 
cgit v0.10.2


From 14485c57270e8f3de2a25abaf93bae5712c97e9e Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Fri, 31 Jul 2009 18:12:00 +0930
Subject: fujitsu-laptop: Correct redundant test

device and acpi_driver_data(device) were tested just a few lines above.

A simplified version of the semantic match that finds this problem is as
follows: (http://www.emn.fr/x-info/coccinelle/)

// <smpl>
@r exists@
local idexpression x;
expression E;
@@

if (x == NULL || ...) { ... when forall
   return ...; }
.. when != \(x=E\|x--\|x++\|--x\|++x\|x-=E\|x+=E\|x|=E\|x&=E\|&x\)
(
*x == NULL
|
*x != NULL
)
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Acked-by: Jonathan Woithe <jwoithe@physics.adelaide.edu.au>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c
index 4c8897a..0d42f44 100644
--- a/drivers/platform/x86/fujitsu-laptop.c
+++ b/drivers/platform/x86/fujitsu-laptop.c
@@ -745,9 +745,6 @@ static int acpi_fujitsu_remove(struct acpi_device *device, int type)
 
 	fujitsu = acpi_driver_data(device);
 
-	if (!device || !acpi_driver_data(device))
-		return -EINVAL;
-
 	fujitsu->acpi_handle = NULL;
 
 	return 0;
-- 
cgit v0.10.2


From 67059406219d30a36b7ca93f863eb1f3032f05ce Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Fri, 31 Jul 2009 08:43:56 +0000
Subject: fujitsu-laptop: remove superfluous NULL pointer checks

This takes care of the following entries from Dan's list:

drivers/platform/x86/fujitsu-laptop.c +327 set_lcd_level(13) warning:
variable derefenced before check 'fujitsu'
drivers/platform/x86/fujitsu-laptop.c +358 set_lcd_level_alt(13) warning:
variable derefenced before check 'fujitsu'

Reported-by: Dan Carpenter <error27@gmail.com>
Cc: corbet@lwn.net
Cc: eteo@redhat.com
Cc: Julia Lawall <julia@diku.dk>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Acked-by: Jonathan Woithe <jwoithe@physics.adelaide.edu.au>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c
index 0d42f44..f9e3e3a 100644
--- a/drivers/platform/x86/fujitsu-laptop.c
+++ b/drivers/platform/x86/fujitsu-laptop.c
@@ -324,9 +324,6 @@ static int set_lcd_level(int level)
 	if (level < 0 || level >= fujitsu->max_brightness)
 		return -EINVAL;
 
-	if (!fujitsu)
-		return -EINVAL;
-
 	status = acpi_get_handle(fujitsu->acpi_handle, "SBLL", &handle);
 	if (ACPI_FAILURE(status)) {
 		vdbg_printk(FUJLAPTOP_DBG_ERROR, "SBLL not present\n");
@@ -355,9 +352,6 @@ static int set_lcd_level_alt(int level)
 	if (level < 0 || level >= fujitsu->max_brightness)
 		return -EINVAL;
 
-	if (!fujitsu)
-		return -EINVAL;
-
 	status = acpi_get_handle(fujitsu->acpi_handle, "SBL2", &handle);
 	if (ACPI_FAILURE(status)) {
 		vdbg_printk(FUJLAPTOP_DBG_ERROR, "SBL2 not present\n");
-- 
cgit v0.10.2


From 72afeeafe54853881a4e53dc78d538e249130ad8 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Fri, 31 Jul 2009 18:16:02 +0930
Subject: fujitsu-laptop: driver [un]registration fixes

* Move led_classdev_unregister() calls from fujitsu_cleanup() to
  acpi_fujitsu_hotkey_remove().

* Fix ordering in fujitsu_cleanup().

* Fix backlight_device_register() failure handling in fujitsu_init().

* Add missing sysfs group removal on failure to fujitsu_init().

* Add input device unregistering on failure to acpi_fujitsu_add()
  and acpi_fujitsu_hotkey_add().

* Add input device unregistering/freeing to acpi_fujitsu_remove()
  and acpi_fujitsu_hotkey_remove() (also remove superfluous 'device'
  and 'acpi_driver_data(device)' checks while at it).

* Do few minor cleanups.

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Acked-by: Jonathan Woithe <jwoithe@physics.adelaide.edu.au>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c
index f9e3e3a..b87a5a2 100644
--- a/drivers/platform/x86/fujitsu-laptop.c
+++ b/drivers/platform/x86/fujitsu-laptop.c
@@ -691,7 +691,7 @@ static int acpi_fujitsu_add(struct acpi_device *device)
 	result = acpi_bus_get_power(fujitsu->acpi_handle, &state);
 	if (result) {
 		printk(KERN_ERR "Error reading power state\n");
-		goto end;
+		goto err_unregister_input_dev;
 	}
 
 	printk(KERN_INFO PREFIX "%s [%s] (%s)\n",
@@ -722,22 +722,22 @@ static int acpi_fujitsu_add(struct acpi_device *device)
 
 	return result;
 
-end:
+err_unregister_input_dev:
+	input_unregister_device(input);
 err_free_input_dev:
 	input_free_device(input);
 err_stop:
-
 	return result;
 }
 
 static int acpi_fujitsu_remove(struct acpi_device *device, int type)
 {
-	struct fujitsu_t *fujitsu = NULL;
+	struct fujitsu_t *fujitsu = acpi_driver_data(device);
+	struct input_dev *input = fujitsu->input;
 
-	if (!device || !acpi_driver_data(device))
-		return -EINVAL;
+	input_unregister_device(input);
 
-	fujitsu = acpi_driver_data(device);
+	input_free_device(input);
 
 	fujitsu->acpi_handle = NULL;
 
@@ -862,7 +862,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
 	result = acpi_bus_get_power(fujitsu_hotkey->acpi_handle, &state);
 	if (result) {
 		printk(KERN_ERR "Error reading power state\n");
-		goto end;
+		goto err_unregister_input_dev;
 	}
 
 	printk(KERN_INFO PREFIX "%s [%s] (%s)\n",
@@ -902,7 +902,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
 	printk(KERN_INFO "fujitsu-laptop: BTNI: [0x%x]\n",
 		call_fext_func(FUNC_BUTTONS, 0x0, 0x0, 0x0));
 
-	#if defined(CONFIG_LEDS_CLASS) || defined(CONFIG_LEDS_CLASS_MODULE)
+#if defined(CONFIG_LEDS_CLASS) || defined(CONFIG_LEDS_CLASS_MODULE)
 	if (call_fext_func(FUNC_LEDS, 0x0, 0x0, 0x0) & LOGOLAMP_POWERON) {
 		result = led_classdev_register(&fujitsu->pf_device->dev,
 						&logolamp_led);
@@ -925,33 +925,41 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
 			"LED handler for keyboard lamps, error %i\n", result);
 		}
 	}
-	#endif
+#endif
 
 	return result;
 
-end:
+err_unregister_input_dev:
+	input_unregister_device(input);
 err_free_input_dev:
 	input_free_device(input);
 err_free_fifo:
 	kfifo_free(fujitsu_hotkey->fifo);
 err_stop:
-
 	return result;
 }
 
 static int acpi_fujitsu_hotkey_remove(struct acpi_device *device, int type)
 {
-	struct fujitsu_hotkey_t *fujitsu_hotkey = NULL;
+	struct fujitsu_hotkey_t *fujitsu_hotkey = acpi_driver_data(device);
+	struct input_dev *input = fujitsu_hotkey->input;
 
-	if (!device || !acpi_driver_data(device))
-		return -EINVAL;
+#ifdef CONFIG_LEDS_CLASS
+	if (fujitsu_hotkey->logolamp_registered)
+		led_classdev_unregister(&logolamp_led);
+
+	if (fujitsu_hotkey->kblamps_registered)
+		led_classdev_unregister(&kblamps_led);
+#endif
 
-	fujitsu_hotkey = acpi_driver_data(device);
+	input_unregister_device(input);
 
-	fujitsu_hotkey->acpi_handle = NULL;
+	input_free_device(input);
 
 	kfifo_free(fujitsu_hotkey->fifo);
 
+	fujitsu_hotkey->acpi_handle = NULL;
+
 	return 0;
 }
 
@@ -1121,8 +1129,11 @@ static int __init fujitsu_init(void)
 		fujitsu->bl_device =
 			backlight_device_register("fujitsu-laptop", NULL, NULL,
 						  &fujitsubl_ops);
-		if (IS_ERR(fujitsu->bl_device))
-			return PTR_ERR(fujitsu->bl_device);
+		if (IS_ERR(fujitsu->bl_device)) {
+			ret = PTR_ERR(fujitsu->bl_device);
+			fujitsu->bl_device = NULL;
+			goto fail_sysfs_group;
+		}
 		max_brightness = fujitsu->max_brightness;
 		fujitsu->bl_device->props.max_brightness = max_brightness - 1;
 		fujitsu->bl_device->props.brightness = fujitsu->brightness_level;
@@ -1162,32 +1173,22 @@ static int __init fujitsu_init(void)
 	return 0;
 
 fail_hotkey1:
-
 	kfree(fujitsu_hotkey);
-
 fail_hotkey:
-
 	platform_driver_unregister(&fujitsupf_driver);
-
 fail_backlight:
-
 	if (fujitsu->bl_device)
 		backlight_device_unregister(fujitsu->bl_device);
-
+fail_sysfs_group:
+	sysfs_remove_group(&fujitsu->pf_device->dev.kobj,
+			   &fujitsupf_attribute_group);
 fail_platform_device2:
-
 	platform_device_del(fujitsu->pf_device);
-
 fail_platform_device1:
-
 	platform_device_put(fujitsu->pf_device);
-
 fail_platform_driver:
-
 	acpi_bus_unregister_driver(&acpi_fujitsu_driver);
-
 fail_acpi:
-
 	kfree(fujitsu);
 
 	return ret;
@@ -1195,28 +1196,23 @@ fail_acpi:
 
 static void __exit fujitsu_cleanup(void)
 {
-	#if defined(CONFIG_LEDS_CLASS) || defined(CONFIG_LEDS_CLASS_MODULE)
-	if (fujitsu_hotkey->logolamp_registered != 0)
-		led_classdev_unregister(&logolamp_led);
+	acpi_bus_unregister_driver(&acpi_fujitsu_hotkey_driver);
 
-	if (fujitsu_hotkey->kblamps_registered != 0)
-		led_classdev_unregister(&kblamps_led);
-	#endif
+	kfree(fujitsu_hotkey);
 
-	sysfs_remove_group(&fujitsu->pf_device->dev.kobj,
-			   &fujitsupf_attribute_group);
-	platform_device_unregister(fujitsu->pf_device);
 	platform_driver_unregister(&fujitsupf_driver);
+
 	if (fujitsu->bl_device)
 		backlight_device_unregister(fujitsu->bl_device);
 
-	acpi_bus_unregister_driver(&acpi_fujitsu_driver);
+	sysfs_remove_group(&fujitsu->pf_device->dev.kobj,
+			   &fujitsupf_attribute_group);
 
-	kfree(fujitsu);
+	platform_device_unregister(fujitsu->pf_device);
 
-	acpi_bus_unregister_driver(&acpi_fujitsu_hotkey_driver);
+	acpi_bus_unregister_driver(&acpi_fujitsu_driver);
 
-	kfree(fujitsu_hotkey);
+	kfree(fujitsu);
 
 	printk(KERN_INFO "fujitsu-laptop: driver unloaded.\n");
 }
-- 
cgit v0.10.2


From 84a6ce267296dabdf427ea4aff73dc58164863bb Mon Sep 17 00:00:00 2001
From: Jonathan Woithe <jwoithe@physics.adelaide.edu.au>
Date: Fri, 31 Jul 2009 18:16:59 +0930
Subject: fujitsu-laptop: increment driver version

Increment driver version to reflect the changes from this patch series.

Signed-off-by: Jonathan Woithe <jwoithe@physics.adelaide.edu.au>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c
index b87a5a2..1ed3513 100644
--- a/drivers/platform/x86/fujitsu-laptop.c
+++ b/drivers/platform/x86/fujitsu-laptop.c
@@ -70,7 +70,7 @@
 #include <linux/leds.h>
 #endif
 
-#define FUJITSU_DRIVER_VERSION "0.5.0"
+#define FUJITSU_DRIVER_VERSION "0.6.0"
 
 #define FUJITSU_LCD_N_LEVELS 8
 
-- 
cgit v0.10.2


From 6d41839e762f8b8b03dbb97fd0d41b244d0bc902 Mon Sep 17 00:00:00 2001
From: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Date: Fri, 28 Aug 2009 12:56:32 +0000
Subject: eeepc-laptop: don't touch the pci slot if it was claimed by a
 different driver

The whole point of registering as a PCI hotplug driver was to prevent
conflict with pciehp.  At the moment it happens to work because
eeepc-laptop is loaded first, but it doesn't work the other way round.
If pciehp is loaded first then we fail to claim the slot - we need to
respect this and not handle hotplug events.

Signed-off-by: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Signed-off-by: Corentin Chary <corentincj@iksaif.net>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/platform/x86/eeepc-laptop.c b/drivers/platform/x86/eeepc-laptop.c
index 222ffb89..69d73ed 100644
--- a/drivers/platform/x86/eeepc-laptop.c
+++ b/drivers/platform/x86/eeepc-laptop.c
@@ -664,15 +664,20 @@ static int eeepc_get_adapter_status(struct hotplug_slot *hotplug_slot,
 static void eeepc_hotplug_work(struct work_struct *work)
 {
 	struct pci_dev *dev;
-	struct pci_bus *bus = pci_find_bus(0, 1);
-	bool blocked;
+	struct pci_bus *bus;
+	bool blocked = eeepc_wlan_rfkill_blocked();
+
+	rfkill_set_sw_state(ehotk->wlan_rfkill, blocked);
 
+	if (ehotk->hotplug_slot == NULL)
+		return;
+
+	bus = pci_find_bus(0, 1);
 	if (!bus) {
 		pr_warning("Unable to find PCI bus 1?\n");
 		return;
 	}
 
-	blocked = eeepc_wlan_rfkill_blocked();
 	if (!blocked) {
 		dev = pci_get_slot(bus, 0);
 		if (dev) {
@@ -693,8 +698,6 @@ static void eeepc_hotplug_work(struct work_struct *work)
 			pci_dev_put(dev);
 		}
 	}
-
-	rfkill_set_sw_state(ehotk->wlan_rfkill, blocked);
 }
 
 static void eeepc_rfkill_notify(acpi_handle handle, u32 event, void *data)
-- 
cgit v0.10.2


From dcf443b5813074031a45b05ad9c57da98bcae329 Mon Sep 17 00:00:00 2001
From: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Date: Fri, 28 Aug 2009 12:56:33 +0000
Subject: eeepc-laptop: use a mutex to serialize pci hotplug (resume vs.
 notify)

Commit d0265f0 "eeepc-laptop: fix hot-unplug on resume" used a workqueue
to protect pci hotplug against multiple simultaneous calls during
resume.  It seems to work, but a mutex would be more appropriate.

This is in preparation to fix the potential pci hotplug race on unload.

Signed-off-by: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Signed-off-by: Corentin Chary <corentincj@iksaif.net>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/platform/x86/eeepc-laptop.c b/drivers/platform/x86/eeepc-laptop.c
index 69d73ed..1790103 100644
--- a/drivers/platform/x86/eeepc-laptop.c
+++ b/drivers/platform/x86/eeepc-laptop.c
@@ -143,7 +143,7 @@ struct eeepc_hotk {
 	struct rfkill *bluetooth_rfkill;
 	struct rfkill *wwan3g_rfkill;
 	struct hotplug_slot *hotplug_slot;
-	struct work_struct hotplug_work;
+	struct mutex hotplug_lock;
 };
 
 /* The actual device the driver binds to */
@@ -661,7 +661,7 @@ static int eeepc_get_adapter_status(struct hotplug_slot *hotplug_slot,
 	return 0;
 }
 
-static void eeepc_hotplug_work(struct work_struct *work)
+static void eeepc_rfkill_hotplug(void)
 {
 	struct pci_dev *dev;
 	struct pci_bus *bus;
@@ -669,13 +669,15 @@ static void eeepc_hotplug_work(struct work_struct *work)
 
 	rfkill_set_sw_state(ehotk->wlan_rfkill, blocked);
 
+	mutex_lock(&ehotk->hotplug_lock);
+
 	if (ehotk->hotplug_slot == NULL)
-		return;
+		goto out_unlock;
 
 	bus = pci_find_bus(0, 1);
 	if (!bus) {
 		pr_warning("Unable to find PCI bus 1?\n");
-		return;
+		goto out_unlock;
 	}
 
 	if (!blocked) {
@@ -683,7 +685,7 @@ static void eeepc_hotplug_work(struct work_struct *work)
 		if (dev) {
 			/* Device already present */
 			pci_dev_put(dev);
-			return;
+			goto out_unlock;
 		}
 		dev = pci_scan_single_device(bus, 0);
 		if (dev) {
@@ -698,6 +700,9 @@ static void eeepc_hotplug_work(struct work_struct *work)
 			pci_dev_put(dev);
 		}
 	}
+
+out_unlock:
+	mutex_unlock(&ehotk->hotplug_lock);
 }
 
 static void eeepc_rfkill_notify(acpi_handle handle, u32 event, void *data)
@@ -705,7 +710,7 @@ static void eeepc_rfkill_notify(acpi_handle handle, u32 event, void *data)
 	if (event != ACPI_NOTIFY_BUS_CHECK)
 		return;
 
-	schedule_work(&ehotk->hotplug_work);
+	eeepc_rfkill_hotplug();
 }
 
 static void eeepc_hotk_notify(struct acpi_device *device, u32 event)
@@ -896,7 +901,7 @@ static int eeepc_hotk_resume(struct acpi_device *device)
 
 		rfkill_set_sw_state(ehotk->wlan_rfkill, wlan != 1);
 
-		schedule_work(&ehotk->hotplug_work);
+		eeepc_rfkill_hotplug();
 	}
 
 	if (ehotk->bluetooth_rfkill)
@@ -1097,7 +1102,7 @@ static int eeepc_rfkill_init(struct device *dev)
 {
 	int result = 0;
 
-	INIT_WORK(&ehotk->hotplug_work, eeepc_hotplug_work);
+	mutex_init(&ehotk->hotplug_lock);
 
 	eeepc_register_rfkill_notifier("\\_SB.PCI0.P0P6");
 	eeepc_register_rfkill_notifier("\\_SB.PCI0.P0P7");
-- 
cgit v0.10.2


From 07e84aa98f6b3a7278d3267f6f657955ed3eb973 Mon Sep 17 00:00:00 2001
From: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Date: Fri, 28 Aug 2009 12:56:34 +0000
Subject: eeepc-laptop: fix pci hotplug race on load and unload

Wifi rfkill state changes can race with pci hotplug cleanup.  A simple
fix is to refresh the hotplug state just before deregistering the pci
hotplug slot.

There is also potential for a hotplug notification to fire too early
during setup, while the structures it uses are still being initialised.
(This could only happen if the BIOS performs hotplug itself; a bug
triggered by removing the battery while hibernated).  Avoid this by
registering the notifier later.  The same refresh mechanism is used
to handle rfkill state changes which can now race with registration.

Signed-off-by: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Signed-off-by: Corentin Chary <corentincj@iksaif.net>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/platform/x86/eeepc-laptop.c b/drivers/platform/x86/eeepc-laptop.c
index 1790103..8dd86f7 100644
--- a/drivers/platform/x86/eeepc-laptop.c
+++ b/drivers/platform/x86/eeepc-laptop.c
@@ -667,37 +667,37 @@ static void eeepc_rfkill_hotplug(void)
 	struct pci_bus *bus;
 	bool blocked = eeepc_wlan_rfkill_blocked();
 
-	rfkill_set_sw_state(ehotk->wlan_rfkill, blocked);
+	if (ehotk->wlan_rfkill)
+		rfkill_set_sw_state(ehotk->wlan_rfkill, blocked);
 
 	mutex_lock(&ehotk->hotplug_lock);
 
-	if (ehotk->hotplug_slot == NULL)
-		goto out_unlock;
-
-	bus = pci_find_bus(0, 1);
-	if (!bus) {
-		pr_warning("Unable to find PCI bus 1?\n");
-		goto out_unlock;
-	}
-
-	if (!blocked) {
-		dev = pci_get_slot(bus, 0);
-		if (dev) {
-			/* Device already present */
-			pci_dev_put(dev);
+	if (ehotk->hotplug_slot) {
+		bus = pci_find_bus(0, 1);
+		if (!bus) {
+			pr_warning("Unable to find PCI bus 1?\n");
 			goto out_unlock;
 		}
-		dev = pci_scan_single_device(bus, 0);
-		if (dev) {
-			pci_bus_assign_resources(bus);
-			if (pci_bus_add_device(dev))
-				pr_err("Unable to hotplug wifi\n");
-		}
-	} else {
-		dev = pci_get_slot(bus, 0);
-		if (dev) {
-			pci_remove_bus_device(dev);
-			pci_dev_put(dev);
+
+		if (!blocked) {
+			dev = pci_get_slot(bus, 0);
+			if (dev) {
+				/* Device already present */
+				pci_dev_put(dev);
+				goto out_unlock;
+			}
+			dev = pci_scan_single_device(bus, 0);
+			if (dev) {
+				pci_bus_assign_resources(bus);
+				if (pci_bus_add_device(dev))
+					pr_err("Unable to hotplug wifi\n");
+			}
+		} else {
+			dev = pci_get_slot(bus, 0);
+			if (dev) {
+				pci_remove_bus_device(dev);
+				pci_dev_put(dev);
+			}
 		}
 	}
 
@@ -1029,14 +1029,22 @@ static void eeepc_rfkill_exit(void)
 {
 	eeepc_unregister_rfkill_notifier("\\_SB.PCI0.P0P6");
 	eeepc_unregister_rfkill_notifier("\\_SB.PCI0.P0P7");
-	if (ehotk->wlan_rfkill)
+	if (ehotk->wlan_rfkill) {
 		rfkill_unregister(ehotk->wlan_rfkill);
+		ehotk->wlan_rfkill = NULL;
+	}
+	/*
+	 * Refresh pci hotplug in case the rfkill state was changed after
+	 * eeepc_unregister_rfkill_notifier()
+	 */
+	eeepc_rfkill_hotplug();
+	if (ehotk->hotplug_slot)
+		pci_hp_deregister(ehotk->hotplug_slot);
+
 	if (ehotk->bluetooth_rfkill)
 		rfkill_unregister(ehotk->bluetooth_rfkill);
 	if (ehotk->wwan3g_rfkill)
 		rfkill_unregister(ehotk->wwan3g_rfkill);
-	if (ehotk->hotplug_slot)
-		pci_hp_deregister(ehotk->hotplug_slot);
 }
 
 static void eeepc_input_exit(void)
@@ -1104,9 +1112,6 @@ static int eeepc_rfkill_init(struct device *dev)
 
 	mutex_init(&ehotk->hotplug_lock);
 
-	eeepc_register_rfkill_notifier("\\_SB.PCI0.P0P6");
-	eeepc_register_rfkill_notifier("\\_SB.PCI0.P0P7");
-
 	result = eeepc_new_rfkill(&ehotk->wlan_rfkill,
 				  "eeepc-wlan", dev,
 				  RFKILL_TYPE_WLAN, CM_ASL_WLAN);
@@ -1136,6 +1141,14 @@ static int eeepc_rfkill_init(struct device *dev)
 	if (result == -EBUSY)
 		result = 0;
 
+	eeepc_register_rfkill_notifier("\\_SB.PCI0.P0P6");
+	eeepc_register_rfkill_notifier("\\_SB.PCI0.P0P7");
+	/*
+	 * Refresh pci hotplug in case the rfkill state was changed during
+	 * setup.
+	 */
+	eeepc_rfkill_hotplug();
+
 exit:
 	if (result && result != -ENODEV)
 		eeepc_rfkill_exit();
-- 
cgit v0.10.2


From 1e7798547fe6920ae27fb92c9202353e9e4c55db Mon Sep 17 00:00:00 2001
From: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Date: Fri, 28 Aug 2009 12:56:35 +0000
Subject: eeepc-laptop: fix ordering of init and exit functions

1. input and backlight devices were registered after acpi notifications
   are enabled.  This left a window where eeepc_hotk_notify() might
   find these devices in an inconsistent (half-initialized) state.

-> Move all device registration into eeepc_hotk_add(), which is called
   before enabling acpi notifications.

2. input and backlight devices were unregistered before acpi
   notifications are disabled.  This left a window where
   eeepc_hotk_notify() might find these devices in an inconsistent
   (half-destroyed) state.

-> Move all device unregistration into eeepc_hotk_remove(), which is
   called after disabling acpi notifications.

3. The acpi driver was not freed if an error occured further down in
   eeepc_laptop_init().

-> The rest of eeepc_laptop_init() has been moved to eeepc_hotk_add(),
   so this is no longer a problem.

4. The acpi driver was unregistered before the platform driver.  This
   left a window where a sysfs access could attempt to read the ehotk
   structure after it had been freed by eeepc_hotk_remove().

-> The acpi driver is now unregistered as the last step in
   eeepc_laptop_exit(), so this is no longer a problem.

Signed-off-by: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Signed-off-by: Corentin Chary <corentincj@iksaif.net>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/platform/x86/eeepc-laptop.c b/drivers/platform/x86/eeepc-laptop.c
index 8dd86f7..cf47d1c 100644
--- a/drivers/platform/x86/eeepc-laptop.c
+++ b/drivers/platform/x86/eeepc-laptop.c
@@ -847,44 +847,6 @@ error_slot:
 	return ret;
 }
 
-static int eeepc_hotk_add(struct acpi_device *device)
-{
-	int result;
-
-	if (!device)
-		 return -EINVAL;
-	pr_notice(EEEPC_HOTK_NAME "\n");
-	ehotk = kzalloc(sizeof(struct eeepc_hotk), GFP_KERNEL);
-	if (!ehotk)
-		return -ENOMEM;
-	ehotk->init_flag = DISABLE_ASL_WLAN | DISABLE_ASL_DISPLAYSWITCH;
-	ehotk->handle = device->handle;
-	strcpy(acpi_device_name(device), EEEPC_HOTK_DEVICE_NAME);
-	strcpy(acpi_device_class(device), EEEPC_HOTK_CLASS);
-	device->driver_data = ehotk;
-	ehotk->device = device;
-	result = eeepc_hotk_check();
-	if (result)
-		goto ehotk_fail;
-
-	return 0;
-
- ehotk_fail:
-	kfree(ehotk);
-	ehotk = NULL;
-
-	return result;
-}
-
-static int eeepc_hotk_remove(struct acpi_device *device, int type)
-{
-	if (!device || !acpi_driver_data(device))
-		 return -EINVAL;
-
-	kfree(ehotk);
-	return 0;
-}
-
 static int eeepc_hotk_resume(struct acpi_device *device)
 {
 	if (ehotk->wlan_rfkill) {
@@ -1066,19 +1028,6 @@ static void eeepc_hwmon_exit(void)
 	eeepc_hwmon_device = NULL;
 }
 
-static void __exit eeepc_laptop_exit(void)
-{
-	eeepc_backlight_exit();
-	eeepc_rfkill_exit();
-	eeepc_input_exit();
-	eeepc_hwmon_exit();
-	acpi_bus_unregister_driver(&eeepc_hotk_driver);
-	sysfs_remove_group(&platform_device->dev.kobj,
-			   &platform_attribute_group);
-	platform_device_unregister(platform_device);
-	platform_driver_unregister(&platform_driver);
-}
-
 static int eeepc_new_rfkill(struct rfkill **rfkill,
 			    const char *name, struct device *dev,
 			    enum rfkill_type type, int cm)
@@ -1193,21 +1142,27 @@ static int eeepc_hwmon_init(struct device *dev)
 	return result;
 }
 
-static int __init eeepc_laptop_init(void)
+static int eeepc_hotk_add(struct acpi_device *device)
 {
 	struct device *dev;
 	int result;
 
-	if (acpi_disabled)
-		return -ENODEV;
-	result = acpi_bus_register_driver(&eeepc_hotk_driver);
-	if (result < 0)
-		return result;
-	if (!ehotk) {
-		acpi_bus_unregister_driver(&eeepc_hotk_driver);
-		return -ENODEV;
-	}
+	if (!device)
+		 return -EINVAL;
+	pr_notice(EEEPC_HOTK_NAME "\n");
+	ehotk = kzalloc(sizeof(struct eeepc_hotk), GFP_KERNEL);
+	if (!ehotk)
+		return -ENOMEM;
+	ehotk->init_flag = DISABLE_ASL_WLAN | DISABLE_ASL_DISPLAYSWITCH;
+	ehotk->handle = device->handle;
+	strcpy(acpi_device_name(device), EEEPC_HOTK_DEVICE_NAME);
+	strcpy(acpi_device_class(device), EEEPC_HOTK_CLASS);
+	device->driver_data = ehotk;
+	ehotk->device = device;
 
+	result = eeepc_hotk_check();
+	if (result)
+		goto fail_check;
 	eeepc_enable_camera();
 
 	/* Register platform stuff */
@@ -1246,6 +1201,7 @@ static int __init eeepc_laptop_init(void)
 		goto fail_rfkill;
 
 	return 0;
+
 fail_rfkill:
 	eeepc_hwmon_exit();
 fail_hwmon:
@@ -1261,8 +1217,50 @@ fail_platform_device1:
 	platform_driver_unregister(&platform_driver);
 fail_platform_driver:
 	eeepc_input_exit();
+fail_check:
+	kfree(ehotk);
+
 	return result;
 }
 
+static int eeepc_hotk_remove(struct acpi_device *device, int type)
+{
+	if (!device || !acpi_driver_data(device))
+		 return -EINVAL;
+
+	eeepc_backlight_exit();
+	eeepc_rfkill_exit();
+	eeepc_input_exit();
+	eeepc_hwmon_exit();
+	sysfs_remove_group(&platform_device->dev.kobj,
+			   &platform_attribute_group);
+	platform_device_unregister(platform_device);
+	platform_driver_unregister(&platform_driver);
+
+	kfree(ehotk);
+	return 0;
+}
+
+static int __init eeepc_laptop_init(void)
+{
+	int result;
+
+	if (acpi_disabled)
+		return -ENODEV;
+	result = acpi_bus_register_driver(&eeepc_hotk_driver);
+	if (result < 0)
+		return result;
+	if (!ehotk) {
+		acpi_bus_unregister_driver(&eeepc_hotk_driver);
+		return -ENODEV;
+	}
+	return 0;
+}
+
+static void __exit eeepc_laptop_exit(void)
+{
+	acpi_bus_unregister_driver(&eeepc_hotk_driver);
+}
+
 module_init(eeepc_laptop_init);
 module_exit(eeepc_laptop_exit);
-- 
cgit v0.10.2


From f2a9d5e8a649c606f520b7a7b9f4f46fba79c327 Mon Sep 17 00:00:00 2001
From: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Date: Fri, 28 Aug 2009 12:56:36 +0000
Subject: eeepc-laptop: make input device a child of the platform device

Sysfs showed the ehotk input device as a "virtual" device - lies!
The input device is provided by a physical device, the eeepc platform.

This requires that we move the creation of the input device to come
after platform device is created.  Input initialization is moved from
ehotk_check() [sic] to a new function called eeepc_input_init().  This
brings the input device into line with the other eeepc-laptop devices.

Also, refuse to load if we fail to register the input device.

Signed-off-by: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Signed-off-by: Corentin Chary <corentincj@iksaif.net>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/platform/x86/eeepc-laptop.c b/drivers/platform/x86/eeepc-laptop.c
index cf47d1c..298dac9 100644
--- a/drivers/platform/x86/eeepc-laptop.c
+++ b/drivers/platform/x86/eeepc-laptop.c
@@ -579,7 +579,6 @@ static void cmsg_quirks(void)
 
 static int eeepc_hotk_check(void)
 {
-	const struct key_entry *key;
 	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
 	int result;
 
@@ -604,31 +603,6 @@ static int eeepc_hotk_check(void)
 			pr_info("Get control methods supported: 0x%x\n",
 				ehotk->cm_supported);
 		}
-		ehotk->inputdev = input_allocate_device();
-		if (!ehotk->inputdev) {
-			pr_info("Unable to allocate input device\n");
-			return 0;
-		}
-		ehotk->inputdev->name = "Asus EeePC extra buttons";
-		ehotk->inputdev->phys = EEEPC_HOTK_FILE "/input0";
-		ehotk->inputdev->id.bustype = BUS_HOST;
-		ehotk->inputdev->getkeycode = eeepc_getkeycode;
-		ehotk->inputdev->setkeycode = eeepc_setkeycode;
-
-		for (key = eeepc_keymap; key->type != KE_END; key++) {
-			switch (key->type) {
-			case KE_KEY:
-				set_bit(EV_KEY, ehotk->inputdev->evbit);
-				set_bit(key->keycode, ehotk->inputdev->keybit);
-				break;
-			}
-		}
-		result = input_register_device(ehotk->inputdev);
-		if (result) {
-			pr_info("Unable to register input device\n");
-			input_free_device(ehotk->inputdev);
-			return 0;
-		}
 	} else {
 		pr_err("Hotkey device not present, aborting\n");
 		return -EINVAL;
@@ -1142,6 +1116,40 @@ static int eeepc_hwmon_init(struct device *dev)
 	return result;
 }
 
+static int eeepc_input_init(struct device *dev)
+{
+	const struct key_entry *key;
+	int result;
+
+	ehotk->inputdev = input_allocate_device();
+	if (!ehotk->inputdev) {
+		pr_info("Unable to allocate input device\n");
+		return -ENOMEM;
+	}
+	ehotk->inputdev->name = "Asus EeePC extra buttons";
+	ehotk->inputdev->dev.parent = dev;
+	ehotk->inputdev->phys = EEEPC_HOTK_FILE "/input0";
+	ehotk->inputdev->id.bustype = BUS_HOST;
+	ehotk->inputdev->getkeycode = eeepc_getkeycode;
+	ehotk->inputdev->setkeycode = eeepc_setkeycode;
+
+	for (key = eeepc_keymap; key->type != KE_END; key++) {
+		switch (key->type) {
+		case KE_KEY:
+			set_bit(EV_KEY, ehotk->inputdev->evbit);
+			set_bit(key->keycode, ehotk->inputdev->keybit);
+			break;
+		}
+	}
+	result = input_register_device(ehotk->inputdev);
+	if (result) {
+		pr_info("Unable to register input device\n");
+		input_free_device(ehotk->inputdev);
+		return result;
+	}
+	return 0;
+}
+
 static int eeepc_hotk_add(struct acpi_device *device)
 {
 	struct device *dev;
@@ -1162,7 +1170,7 @@ static int eeepc_hotk_add(struct acpi_device *device)
 
 	result = eeepc_hotk_check();
 	if (result)
-		goto fail_check;
+		goto fail_platform_driver;
 	eeepc_enable_camera();
 
 	/* Register platform stuff */
@@ -1192,6 +1200,10 @@ static int eeepc_hotk_add(struct acpi_device *device)
 		pr_info("Backlight controlled by ACPI video "
 			"driver\n");
 
+	result = eeepc_input_init(dev);
+	if (result)
+		goto fail_input;
+
 	result = eeepc_hwmon_init(dev);
 	if (result)
 		goto fail_hwmon;
@@ -1205,6 +1217,8 @@ static int eeepc_hotk_add(struct acpi_device *device)
 fail_rfkill:
 	eeepc_hwmon_exit();
 fail_hwmon:
+	eeepc_input_exit();
+fail_input:
 	eeepc_backlight_exit();
 fail_backlight:
 	sysfs_remove_group(&platform_device->dev.kobj,
@@ -1216,8 +1230,6 @@ fail_platform_device2:
 fail_platform_device1:
 	platform_driver_unregister(&platform_driver);
 fail_platform_driver:
-	eeepc_input_exit();
-fail_check:
 	kfree(ehotk);
 
 	return result;
-- 
cgit v0.10.2


From ffb03575284e0f72d7ea001178c793afa265b8b5 Mon Sep 17 00:00:00 2001
From: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Date: Fri, 28 Aug 2009 12:56:37 +0000
Subject: eeepc-laptop: remove redundant rfkill_set_sw_state in resume handler

rfkill_set_sw_state() will already be called by eeepc_rfkill_hotplug().

Signed-off-by: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Signed-off-by: Corentin Chary <corentincj@iksaif.net>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/platform/x86/eeepc-laptop.c b/drivers/platform/x86/eeepc-laptop.c
index 298dac9..7f7573a 100644
--- a/drivers/platform/x86/eeepc-laptop.c
+++ b/drivers/platform/x86/eeepc-laptop.c
@@ -835,8 +835,7 @@ static int eeepc_hotk_resume(struct acpi_device *device)
 		wlan = get_acpi(CM_ASL_WLAN);
 		set_acpi(CM_ASL_WLAN, wlan);
 
-		rfkill_set_sw_state(ehotk->wlan_rfkill, wlan != 1);
-
+		/* Refresh both rfkill state and pci hotplug */
 		eeepc_rfkill_hotplug();
 	}
 
-- 
cgit v0.10.2


From a47461011a0f5110c497b9b163d1125d258418b2 Mon Sep 17 00:00:00 2001
From: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Date: Fri, 28 Aug 2009 12:56:38 +0000
Subject: eeepc-laptop: check the 3G rfkill state on resume

All the rfkill devices are treated as "persistent", 3G is no exception.
This means their state may change over hibernation.

Signed-off-by: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Signed-off-by: Corentin Chary <corentincj@iksaif.net>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/platform/x86/eeepc-laptop.c b/drivers/platform/x86/eeepc-laptop.c
index 7f7573a..8a32004 100644
--- a/drivers/platform/x86/eeepc-laptop.c
+++ b/drivers/platform/x86/eeepc-laptop.c
@@ -842,6 +842,9 @@ static int eeepc_hotk_resume(struct acpi_device *device)
 	if (ehotk->bluetooth_rfkill)
 		rfkill_set_sw_state(ehotk->bluetooth_rfkill,
 				    get_acpi(CM_ASL_BLUETOOTH) != 1);
+	if (ehotk->wwan3g_rfkill)
+		rfkill_set_sw_state(ehotk->wwan3g_rfkill,
+				    get_acpi(CM_ASL_3G) != 1);
 
 	return 0;
 }
-- 
cgit v0.10.2


From c1edd99f1c2b0285ce810d217180bf37bbae550e Mon Sep 17 00:00:00 2001
From: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Date: Fri, 28 Aug 2009 12:56:39 +0000
Subject: eeepc-laptop: correct the description of the hibernation abort bug

Actually it is only the LED which is affected.  The bios bug does not
disable the wifi.

Signed-off-by: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Signed-off-by: Corentin Chary <corentincj@iksaif.net>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/platform/x86/eeepc-laptop.c b/drivers/platform/x86/eeepc-laptop.c
index 8a32004..df68ae6 100644
--- a/drivers/platform/x86/eeepc-laptop.c
+++ b/drivers/platform/x86/eeepc-laptop.c
@@ -826,11 +826,10 @@ static int eeepc_hotk_resume(struct acpi_device *device)
 	if (ehotk->wlan_rfkill) {
 		bool wlan;
 
-		/* Workaround - it seems that _PTS disables the wireless
-		   without notification or changing the value read by WLAN.
-		   Normally this is fine because the correct value is restored
-		   from the non-volatile storage on resume, but we need to do
-		   it ourself if case suspend is aborted, or we lose wireless.
+		/*
+		 * Work around bios bug - acpi _PTS turns off the wireless led
+		 * during suspend.  Normally it restores it on resume, but
+		 * we should kick it ourselves in case suspend is aborted.
 		 */
 		wlan = get_acpi(CM_ASL_WLAN);
 		set_acpi(CM_ASL_WLAN, wlan);
-- 
cgit v0.10.2


From c200da5d2900df9c24fb8041870d92a4175bbef3 Mon Sep 17 00:00:00 2001
From: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Date: Fri, 28 Aug 2009 12:56:40 +0000
Subject: eeepc-laptop: switch to dev_pm_ops

This also involves switching the resume handler from the acpi device
to the platform device.  Using the more fine grained handlers allows
two improvements:

1. We only need to recheck rfkill state after resume from hibernation.

2. The wireless LED workaround accounts for up to 1.1s out of 1.7s
resuming devices (when wireless is enabled).  We can limit the
workaround to thaw(), so that it only delays suspend to disk.

The workaround is only likely to help when hibernation is aborted.
Suspend to ram cannot be aborted by the user.  Device suspend errors may
well happen before eeepc-laptop would even be frozen.  Suspend errors
which happen after that could be pretty funky anyway.

Signed-off-by: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Signed-off-by: Corentin Chary <corentincj@iksaif.net>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/platform/x86/eeepc-laptop.c b/drivers/platform/x86/eeepc-laptop.c
index df68ae6..1c94860 100644
--- a/drivers/platform/x86/eeepc-laptop.c
+++ b/drivers/platform/x86/eeepc-laptop.c
@@ -150,10 +150,19 @@ struct eeepc_hotk {
 static struct eeepc_hotk *ehotk;
 
 /* Platform device/driver */
+static int eeepc_hotk_thaw(struct device *device);
+static int eeepc_hotk_restore(struct device *device);
+
+static struct dev_pm_ops eeepc_pm_ops = {
+	.thaw = eeepc_hotk_thaw,
+	.restore = eeepc_hotk_restore,
+};
+
 static struct platform_driver platform_driver = {
 	.driver = {
 		.name = EEEPC_HOTK_FILE,
 		.owner = THIS_MODULE,
+		.pm = &eeepc_pm_ops,
 	}
 };
 
@@ -192,7 +201,6 @@ static struct key_entry eeepc_keymap[] = {
  */
 static int eeepc_hotk_add(struct acpi_device *device);
 static int eeepc_hotk_remove(struct acpi_device *device, int type);
-static int eeepc_hotk_resume(struct acpi_device *device);
 static void eeepc_hotk_notify(struct acpi_device *device, u32 event);
 
 static const struct acpi_device_id eeepc_device_ids[] = {
@@ -209,7 +217,6 @@ static struct acpi_driver eeepc_hotk_driver = {
 	.ops = {
 		.add = eeepc_hotk_add,
 		.remove = eeepc_hotk_remove,
-		.resume = eeepc_hotk_resume,
 		.notify = eeepc_hotk_notify,
 	},
 };
@@ -821,7 +828,7 @@ error_slot:
 	return ret;
 }
 
-static int eeepc_hotk_resume(struct acpi_device *device)
+static int eeepc_hotk_thaw(struct device *device)
 {
 	if (ehotk->wlan_rfkill) {
 		bool wlan;
@@ -829,14 +836,20 @@ static int eeepc_hotk_resume(struct acpi_device *device)
 		/*
 		 * Work around bios bug - acpi _PTS turns off the wireless led
 		 * during suspend.  Normally it restores it on resume, but
-		 * we should kick it ourselves in case suspend is aborted.
+		 * we should kick it ourselves in case hibernation is aborted.
 		 */
 		wlan = get_acpi(CM_ASL_WLAN);
 		set_acpi(CM_ASL_WLAN, wlan);
+	}
+
+	return 0;
+}
 
-		/* Refresh both rfkill state and pci hotplug */
+static int eeepc_hotk_restore(struct device *device)
+{
+	/* Refresh both wlan rfkill state and pci hotplug */
+	if (ehotk->wlan_rfkill)
 		eeepc_rfkill_hotplug();
-	}
 
 	if (ehotk->bluetooth_rfkill)
 		rfkill_set_sw_state(ehotk->bluetooth_rfkill,
-- 
cgit v0.10.2


From d1ec9c3d434d94e3674bcf433e8e8e7462b8e1c0 Mon Sep 17 00:00:00 2001
From: Corentin Chary <corentincj@iksaif.net>
Date: Fri, 28 Aug 2009 12:56:41 +0000
Subject: eeepc-laptop: add rfkill support for the Wimax in ASUS Eee PC 1000HG

Signed-off-by: Corentin Chary <corentincj@iksaif.net>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/platform/x86/eeepc-laptop.c b/drivers/platform/x86/eeepc-laptop.c
index 1c94860..c9febf4 100644
--- a/drivers/platform/x86/eeepc-laptop.c
+++ b/drivers/platform/x86/eeepc-laptop.c
@@ -142,6 +142,7 @@ struct eeepc_hotk {
 	struct rfkill *wlan_rfkill;
 	struct rfkill *bluetooth_rfkill;
 	struct rfkill *wwan3g_rfkill;
+	struct rfkill *wimax_rfkill;
 	struct hotplug_slot *hotplug_slot;
 	struct mutex hotplug_lock;
 };
@@ -857,6 +858,9 @@ static int eeepc_hotk_restore(struct device *device)
 	if (ehotk->wwan3g_rfkill)
 		rfkill_set_sw_state(ehotk->wwan3g_rfkill,
 				    get_acpi(CM_ASL_3G) != 1);
+	if (ehotk->wimax_rfkill)
+		rfkill_set_sw_state(ehotk->wimax_rfkill,
+				    get_acpi(CM_ASL_WIMAX) != 1);
 
 	return 0;
 }
@@ -995,6 +999,8 @@ static void eeepc_rfkill_exit(void)
 		rfkill_unregister(ehotk->bluetooth_rfkill);
 	if (ehotk->wwan3g_rfkill)
 		rfkill_unregister(ehotk->wwan3g_rfkill);
+	if (ehotk->wimax_rfkill)
+		rfkill_unregister(ehotk->wimax_rfkill);
 }
 
 static void eeepc_input_exit(void)
@@ -1070,6 +1076,13 @@ static int eeepc_rfkill_init(struct device *dev)
 	if (result && result != -ENODEV)
 		goto exit;
 
+	result = eeepc_new_rfkill(&ehotk->wimax_rfkill,
+				  "eeepc-wimax", dev,
+				  RFKILL_TYPE_WIMAX, CM_ASL_WIMAX);
+
+	if (result && result != -ENODEV)
+		goto exit;
+
 	result = eeepc_setup_pci_hotplug();
 	/*
 	 * If we get -EBUSY then something else is handling the PCI hotplug -
-- 
cgit v0.10.2


From d0a6825c9217cfc52d39b2b2bedd73bef8019f79 Mon Sep 17 00:00:00 2001
From: Corentin Chary <corentincj@iksaif.net>
Date: Fri, 28 Aug 2009 12:56:42 +0000
Subject: eeepc-laptop: document sysfs interface

Signed-off-by: Corentin Chary <corentincj@iksaif.net>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/Documentation/ABI/testing/sysfs-platform-eeepc-laptop b/Documentation/ABI/testing/sysfs-platform-eeepc-laptop
new file mode 100644
index 0000000..7445dfb
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-platform-eeepc-laptop
@@ -0,0 +1,50 @@
+What:		/sys/devices/platform/eeepc-laptop/disp
+Date:		May 2008
+KernelVersion:	2.6.26
+Contact:	"Corentin Chary" <corentincj@iksaif.net>
+Description:
+		This file allows display switching.
+		- 1 = LCD
+		- 2 = CRT
+		- 3 = LCD+CRT
+		If you run X11, you should use xrandr instead.
+
+What:		/sys/devices/platform/eeepc-laptop/camera
+Date:		May 2008
+KernelVersion:	2.6.26
+Contact:	"Corentin Chary" <corentincj@iksaif.net>
+Description:
+		Control the camera. 1 means on, 0 means off.
+
+What:		/sys/devices/platform/eeepc-laptop/cardr
+Date:		May 2008
+KernelVersion:	2.6.26
+Contact:	"Corentin Chary" <corentincj@iksaif.net>
+Description:
+		Control the card reader. 1 means on, 0 means off.
+
+What:		/sys/devices/platform/eeepc-laptop/cpufv
+Date:		Jun 2009
+KernelVersion:	2.6.31
+Contact:	"Corentin Chary" <corentincj@iksaif.net>
+Description:
+		Change CPU clock configuration.
+		On the Eee PC 1000H there are three available clock configuration:
+		    * 0 -> Super Performance Mode
+		    * 1 -> High Performance Mode
+		    * 2 -> Power Saving Mode
+		On Eee PC 701 there is only 2 available clock configurations.
+		Available configuration are listed in available_cpufv file.
+		Reading this file will show the raw hexadecimal value which
+		is defined as follow:
+		| 8 bit | 8 bit |
+		    |       `---- Current mode
+		    `------------ Availables modes
+		For example, 0x301 means: mode 1 selected, 3 available modes.
+
+What:		/sys/devices/platform/eeepc-laptop/available_cpufv
+Date:		Jun 2009
+KernelVersion:	2.6.31
+Contact:	"Corentin Chary" <corentincj@iksaif.net>
+Description:
+		List available cpufv modes.
-- 
cgit v0.10.2


From 3c4c1b69a2d76ac9a1c716233fde956dba757d76 Mon Sep 17 00:00:00 2001
From: Corentin Chary <corentincj@iksaif.net>
Date: Fri, 28 Aug 2009 12:56:43 +0000
Subject: video/backlight: document sysfs interface

Date and KernelVersion may be wrong because the backlight
interface was introduced before git initial import.

Cc:Richard Purdie <rpurdie@rpsys.net>
Signed-off-by: Corentin Chary <corentincj@iksaif.net>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/Documentation/ABI/stable/sysfs-class-backlight b/Documentation/ABI/stable/sysfs-class-backlight
new file mode 100644
index 0000000..4d637e1
--- /dev/null
+++ b/Documentation/ABI/stable/sysfs-class-backlight
@@ -0,0 +1,36 @@
+What:		/sys/class/backlight/<backlight>/bl_power
+Date:		April 2005
+KernelVersion:	2.6.12
+Contact:	Richard Purdie <rpurdie@rpsys.net>
+Description:
+		Control BACKLIGHT power, values are FB_BLANK_* from fb.h
+		 - FB_BLANK_UNBLANK (0)   : power on.
+		 - FB_BLANK_POWERDOWN (4) : power off
+Users:		HAL
+
+What:		/sys/class/backlight/<backlight>/brightness
+Date:		April 2005
+KernelVersion:	2.6.12
+Contact:	Richard Purdie <rpurdie@rpsys.net>
+Description:
+		Control the brightness for this <backlight>. Values
+		are between 0 and max_brightness. This file will also
+		show the brightness level stored in the driver, which
+		may not be the actual brightness (see actual_brightness).
+Users:		HAL
+
+What:		/sys/class/backlight/<backlight>/actual_brightness
+Date:		March 2006
+KernelVersion:	2.6.17
+Contact:	Richard Purdie <rpurdie@rpsys.net>
+Description:
+		Show the actual brightness by querying the hardware.
+Users:		HAL
+
+What:		/sys/class/backlight/<backlight>/max_brightness
+Date:		April 2005
+KernelVersion:	2.6.12
+Contact:	Richard Purdie <rpurdie@rpsys.net>
+Description:
+		Maximum brightness for <backlight>.
+Users:		HAL
-- 
cgit v0.10.2


From 243ca3e401bc62e704785d215931f1a51fd53bd7 Mon Sep 17 00:00:00 2001
From: Corentin Chary <corentincj@iksaif.net>
Date: Fri, 28 Aug 2009 12:56:44 +0000
Subject: video/lcd: document sysfs interface

Date and KernelVersion may be wrong because the lcd
interface was introduced before git initial import.

Cc: Richard Purdie <rpurdie@rpsys.net>
Signed-off-by: Corentin Chary <corentincj@iksaif.net>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/Documentation/ABI/testing/sysfs-class-lcd b/Documentation/ABI/testing/sysfs-class-lcd
new file mode 100644
index 0000000..35906bf
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-lcd
@@ -0,0 +1,23 @@
+What:		/sys/class/lcd/<lcd>/lcd_power
+Date:		April 2005
+KernelVersion:	2.6.12
+Contact:	Richard Purdie <rpurdie@rpsys.net>
+Description:
+		Control LCD power, values are FB_BLANK_* from fb.h
+		 - FB_BLANK_UNBLANK (0)   : power on.
+		 - FB_BLANK_POWERDOWN (4) : power off
+
+What:		/sys/class/lcd/<lcd>/contrast
+Date:		April 2005
+KernelVersion:	2.6.12
+Contact:	Richard Purdie <rpurdie@rpsys.net>
+Description:
+		Current contrast of this LCD device. Value is between 0 and
+		/sys/class/lcd/<lcd>/max_contrast.
+
+What:		/sys/class/lcd/<lcd>/max_contrast
+Date:		April 2005
+KernelVersion:	2.6.12
+Contact:	Richard Purdie <rpurdie@rpsys.net>
+Description:
+		Maximum contrast for this LCD device.
-- 
cgit v0.10.2


From 5f634c6527249275df4199a294ee9cec2f3ff3b1 Mon Sep 17 00:00:00 2001
From: Corentin Chary <corentincj@iksaif.net>
Date: Fri, 28 Aug 2009 12:56:45 +0000
Subject: led: document sysfs interface

Also fix Documentation/led-class.txt, the acceptable
range of values for brightness is 0-max_brightness, not 0-255.

Cc: Richard Purdie <rpurdie@rpsys.net>
Signed-off-by: Corentin Chary <corentincj@iksaif.net>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/Documentation/ABI/testing/sysfs-class-led b/Documentation/ABI/testing/sysfs-class-led
new file mode 100644
index 0000000..9e4541d
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-led
@@ -0,0 +1,28 @@
+What:		/sys/class/leds/<led>/brightness
+Date:		March 2006
+KernelVersion:	2.6.17
+Contact:	Richard Purdie <rpurdie@rpsys.net>
+Description:
+		Set the brightness of the LED. Most LEDs don't
+		have hardware brightness support so will just be turned on for
+		non-zero brightness settings. The value is between 0 and
+		/sys/class/leds/<led>/max_brightness.
+
+What:		/sys/class/leds/<led>/max_brightness
+Date:		March 2006
+KernelVersion:	2.6.17
+Contact:	Richard Purdie <rpurdie@rpsys.net>
+Description:
+		Maximum brightness level for this led, default is 255 (LED_FULL).
+
+What:		/sys/class/leds/<led>/trigger
+Date:		March 2006
+KernelVersion:	2.6.17
+Contact:	Richard Purdie <rpurdie@rpsys.net>
+Description:
+		Set the trigger for this LED. A trigger is a kernel based source
+		of led events.
+		You can change triggers in a similar manner to the way an IO
+		scheduler is chosen. Trigger specific parameters can appear in
+		/sys/class/leds/<led> once a given trigger is selected.
+
diff --git a/Documentation/leds-class.txt b/Documentation/leds-class.txt
index 6399557..8fd5ca2 100644
--- a/Documentation/leds-class.txt
+++ b/Documentation/leds-class.txt
@@ -1,3 +1,4 @@
+
 LED handling under Linux
 ========================
 
@@ -5,10 +6,10 @@ If you're reading this and thinking about keyboard leds, these are
 handled by the input subsystem and the led class is *not* needed.
 
 In its simplest form, the LED class just allows control of LEDs from
-userspace. LEDs appear in /sys/class/leds/. The brightness file will
-set the brightness of the LED (taking a value 0-255). Most LEDs don't
-have hardware brightness support so will just be turned on for non-zero
-brightness settings.
+userspace. LEDs appear in /sys/class/leds/. The maximum brightness of the
+LED is defined in max_brightness file. The brightness file will set the brightness
+of the LED (taking a value 0-max_brightness). Most LEDs don't have hardware
+brightness support so will just be turned on for non-zero brightness settings.
 
 The class also introduces the optional concept of an LED trigger. A trigger
 is a kernel based source of led events. Triggers can either be simple or
-- 
cgit v0.10.2


From 1d4a3800c764d111d67462a14589ed1611b2f55e Mon Sep 17 00:00:00 2001
From: Corentin Chary <corentincj@iksaif.net>
Date: Fri, 28 Aug 2009 12:56:46 +0000
Subject: asus-laptop: Show HRWS in infos and fix output format

Show HRWS in /sys/platform/devices/asus-laptop/infos.
HRWS is a bitfield used to get information about Hardware
available in the laptop.
Also change sprintf format from 0x%04x to %#x.

Signed-off-by: Corentin Chary <corentincj@iksaif.net>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/platform/x86/asus-laptop.c b/drivers/platform/x86/asus-laptop.c
index db657bb..2344950 100644
--- a/drivers/platform/x86/asus-laptop.c
+++ b/drivers/platform/x86/asus-laptop.c
@@ -516,7 +516,17 @@ static ssize_t show_infos(struct device *dev,
 	 */
 	rv = acpi_evaluate_integer(hotk->handle, "SFUN", NULL, &temp);
 	if (!ACPI_FAILURE(rv))
-		len += sprintf(page + len, "SFUN value         : 0x%04x\n",
+		len += sprintf(page + len, "SFUN value         : %#x\n",
+			       (uint) temp);
+	/*
+	 * The HWRS method return informations about the hardware.
+	 * 0x80 bit is for WLAN, 0x100 for Bluetooth.
+	 * The significance of others is yet to be found.
+	 * If we don't find the method, we assume the device are present.
+	 */
+	rv = acpi_evaluate_integer(hotk->handle, "HRWS", NULL, &temp);
+	if (!ACPI_FAILURE(rv))
+		len += sprintf(page + len, "HRWS value         : %#x\n",
 			       (uint) temp);
 	/*
 	 * Another value for userspace: the ASYM method returns 0x02 for
@@ -527,7 +537,7 @@ static ssize_t show_infos(struct device *dev,
 	 */
 	rv = acpi_evaluate_integer(hotk->handle, "ASYM", NULL, &temp);
 	if (!ACPI_FAILURE(rv))
-		len += sprintf(page + len, "ASYM value         : 0x%04x\n",
+		len += sprintf(page + len, "ASYM value         : %#x\n",
 			       (uint) temp);
 	if (asus_info) {
 		snprintf(buf, 16, "%d", asus_info->length);
-- 
cgit v0.10.2


From abfa57e15acaa6e1ec567c250e5212bc55d79e43 Mon Sep 17 00:00:00 2001
From: Corentin Chary <corentincj@iksaif.net>
Date: Fri, 28 Aug 2009 12:56:47 +0000
Subject: asus-laptop: Add *_led_get() functions

Add support for getting led brightness directly from
the hardware. Currently we don't need it, but it is needed
to support keyboard backlight/led.

Signed-off-by: Corentin Chary <corentincj@iksaif.net>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/platform/x86/asus-laptop.c b/drivers/platform/x86/asus-laptop.c
index 2344950..88cc9a1 100644
--- a/drivers/platform/x86/asus-laptop.c
+++ b/drivers/platform/x86/asus-laptop.c
@@ -246,12 +246,15 @@ static struct workqueue_struct *led_workqueue;
 #define ASUS_LED(object, ledname)					\
 	static void object##_led_set(struct led_classdev *led_cdev,	\
 				     enum led_brightness value);	\
+	static enum led_brightness object##_led_get(			\
+		struct led_classdev *led_cdev);				\
 	static void object##_led_update(struct work_struct *ignored);	\
 	static int object##_led_wk;					\
 	static DECLARE_WORK(object##_led_work, object##_led_update);	\
 	static struct led_classdev object##_led = {			\
 		.name           = "asus::" ledname,			\
 		.brightness_set = object##_led_set,			\
+		.brightness_get = object##_led_get,			\
 	}
 
 ASUS_LED(mled, "mail");
@@ -399,6 +402,11 @@ static void write_status(acpi_handle handle, int out, int mask)
 	{								\
 		int value = object##_led_wk;				\
 		write_status(object##_set_handle, value, (mask));	\
+	}								\
+	static enum led_brightness object##_led_get(			\
+		struct led_classdev *led_cdev)				\
+	{								\
+		return led_cdev->brightness;				\
 	}
 
 ASUS_LED_HANDLER(mled, MLED_ON);
-- 
cgit v0.10.2


From f641375b65f64e83be8be68ae1ebce21ee4fd578 Mon Sep 17 00:00:00 2001
From: Corentin Chary <corentincj@iksaif.net>
Date: Fri, 28 Aug 2009 12:56:48 +0000
Subject: asus-laptop: Map X50R hotkeys

Map some new hotkeys found on X50R.

Signed-off-by: Corentin Chary <corentincj@iksaif.net>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/platform/x86/asus-laptop.c b/drivers/platform/x86/asus-laptop.c
index 88cc9a1..410e545 100644
--- a/drivers/platform/x86/asus-laptop.c
+++ b/drivers/platform/x86/asus-laptop.c
@@ -285,6 +285,9 @@ static struct key_entry asus_keymap[] = {
 	{KE_KEY, 0x51, KEY_WWW},
 	{KE_KEY, 0x5C, KEY_SCREENLOCK},  /* Screenlock */
 	{KE_KEY, 0x5D, KEY_WLAN},
+	{KE_KEY, 0x5E, KEY_WLAN},
+	{KE_KEY, 0x5F, KEY_WLAN},
+	{KE_KEY, 0x60, KEY_SWITCHVIDEOMODE},
 	{KE_KEY, 0x61, KEY_SWITCHVIDEOMODE},
 	{KE_KEY, 0x6B, BTN_TOUCH}, /* Lock Mouse */
 	{KE_KEY, 0x82, KEY_CAMERA},
-- 
cgit v0.10.2


From 977c328d81e31fde70c5ba381d9cf7357451dd74 Mon Sep 17 00:00:00 2001
From: Corentin Chary <corentincj@iksaif.net>
Date: Fri, 28 Aug 2009 12:56:49 +0000
Subject: asus-laptop: set maximum led brightness

Set the right maximum brightness which is one, because
they can only be on or off.

Signed-off-by: Corentin Chary <corentincj@iksaif.net>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/platform/x86/asus-laptop.c b/drivers/platform/x86/asus-laptop.c
index 410e545..652902e 100644
--- a/drivers/platform/x86/asus-laptop.c
+++ b/drivers/platform/x86/asus-laptop.c
@@ -243,7 +243,7 @@ static struct backlight_ops asusbl_ops = {
  * potentially bad time, such as a timer interrupt. */
 static struct workqueue_struct *led_workqueue;
 
-#define ASUS_LED(object, ledname)					\
+#define ASUS_LED(object, ledname, max)					\
 	static void object##_led_set(struct led_classdev *led_cdev,	\
 				     enum led_brightness value);	\
 	static enum led_brightness object##_led_get(			\
@@ -255,13 +255,14 @@ static struct workqueue_struct *led_workqueue;
 		.name           = "asus::" ledname,			\
 		.brightness_set = object##_led_set,			\
 		.brightness_get = object##_led_get,			\
+		.max_brightness = max					\
 	}
 
-ASUS_LED(mled, "mail");
-ASUS_LED(tled, "touchpad");
-ASUS_LED(rled, "record");
-ASUS_LED(pled, "phone");
-ASUS_LED(gled, "gaming");
+ASUS_LED(mled, "mail", 1);
+ASUS_LED(tled, "touchpad", 1);
+ASUS_LED(rled, "record", 1);
+ASUS_LED(pled, "phone", 1);
+ASUS_LED(gled, "gaming", 1);
 
 struct key_entry {
 	char type;
-- 
cgit v0.10.2


From b7d3fbc2ed624cc216adda0f2574570e6d6d6aed Mon Sep 17 00:00:00 2001
From: Corentin Chary <corentincj@iksaif.net>
Date: Fri, 28 Aug 2009 12:56:50 +0000
Subject: asus-laptop: Add support for Keyboard backlight

Add support for keyboard backlight found in Asus U50VG.

The SMC driver for the Apples does it via LED. To be
consistent with that we create /sys/class/leds/asus::kbd_backlight/
to control the keyboard backlight.

SLKB and GLKB are used to get/set the backlight. On
the U50VG is supports 4 brightness level, but this may
change with other models.

SLKB take a 8 bit integer where the higher bit is used
to toggle the backlight, and the over 7 bits control the
brightness level.

Signed-off-by: Corentin Chary <corentincj@iksaif.net>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/platform/x86/asus-laptop.c b/drivers/platform/x86/asus-laptop.c
index 652902e..0fb4e59 100644
--- a/drivers/platform/x86/asus-laptop.c
+++ b/drivers/platform/x86/asus-laptop.c
@@ -86,6 +86,7 @@
 #define GLED_ON     0x40	//Gaming LED
 #define LCD_ON      0x80	//LCD backlight
 #define GPS_ON      0x100	//GPS
+#define KEY_ON      0x200       //Keyboard backlight
 
 #define ASUS_LOG    ASUS_HOTK_FILE ": "
 #define ASUS_ERR    KERN_ERR    ASUS_LOG
@@ -172,6 +173,10 @@ ASUS_HANDLE(gps_on, ASUS_HOTK_PREFIX "SDON");	/* R2H */
 ASUS_HANDLE(gps_off, ASUS_HOTK_PREFIX "SDOF");	/* R2H */
 ASUS_HANDLE(gps_status, ASUS_HOTK_PREFIX "GPST");
 
+/* Keyboard light */
+ASUS_HANDLE(kled_set, ASUS_HOTK_PREFIX "SLKB");
+ASUS_HANDLE(kled_get, ASUS_HOTK_PREFIX "GLKB");
+
 /*
  * This is the main structure, we can use it to store anything interesting
  * about the hotk device
@@ -263,6 +268,7 @@ ASUS_LED(tled, "touchpad", 1);
 ASUS_LED(rled, "record", 1);
 ASUS_LED(pled, "phone", 1);
 ASUS_LED(gled, "gaming", 1);
+ASUS_LED(kled, "kbd_backlight", 3);
 
 struct key_entry {
 	char type;
@@ -419,6 +425,60 @@ ASUS_LED_HANDLER(rled, RLED_ON);
 ASUS_LED_HANDLER(tled, TLED_ON);
 ASUS_LED_HANDLER(gled, GLED_ON);
 
+/*
+ * Keyboard backlight
+ */
+static int get_kled_lvl(void)
+{
+	unsigned long long kblv;
+	struct acpi_object_list params;
+	union acpi_object in_obj;
+	acpi_status rv;
+
+	params.count = 1;
+	params.pointer = &in_obj;
+	in_obj.type = ACPI_TYPE_INTEGER;
+	in_obj.integer.value = 2;
+
+	rv = acpi_evaluate_integer(kled_get_handle, NULL, &params, &kblv);
+	if (ACPI_FAILURE(rv)) {
+		pr_warning("Error reading kled level\n");
+		return 0;
+	}
+	return kblv;
+}
+
+static int set_kled_lvl(int kblv)
+{
+	if (kblv > 0)
+		kblv = (1 << 7) | (kblv & 0x7F);
+	else
+		kblv = 0;
+
+	if (write_acpi_int(kled_set_handle, NULL, kblv, NULL)) {
+		pr_warning("Keyboard LED display write failed\n");
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static void kled_led_set(struct led_classdev *led_cdev,
+			 enum led_brightness value)
+{
+	kled_led_wk = value;
+	queue_work(led_workqueue, &kled_led_work);
+}
+
+static void kled_led_update(struct work_struct *ignored)
+{
+	set_kled_lvl(kled_led_wk);
+}
+
+static enum led_brightness kled_led_get(struct led_classdev *led_cdev)
+{
+	return get_kled_lvl();
+}
+
 static int get_lcd_state(void)
 {
 	return read_status(LCD_ON);
@@ -1059,6 +1119,9 @@ static int asus_hotk_get_info(void)
 
 	ASUS_HANDLE_INIT(ledd_set);
 
+	ASUS_HANDLE_INIT(kled_set);
+	ASUS_HANDLE_INIT(kled_get);
+
 	/*
 	 * The HWRS method return informations about the hardware.
 	 * 0x80 bit is for WLAN, 0x100 for Bluetooth.
@@ -1190,6 +1253,10 @@ static int asus_hotk_add(struct acpi_device *device)
 	/* LCD Backlight is on by default */
 	write_status(NULL, 1, LCD_ON);
 
+	/* Keyboard Backlight is on by default */
+	if (kled_set_handle)
+		set_kled_lvl(1);
+
 	/* LED display is off by default */
 	hotk->ledd_status = 0xFFF;
 
@@ -1244,6 +1311,7 @@ static void asus_led_exit(void)
 	ASUS_LED_UNREGISTER(pled);
 	ASUS_LED_UNREGISTER(rled);
 	ASUS_LED_UNREGISTER(gled);
+	ASUS_LED_UNREGISTER(kled);
 }
 
 static void asus_input_exit(void)
@@ -1323,13 +1391,20 @@ static int asus_led_init(struct device *dev)
 	if (rv)
 		goto out4;
 
+	if (kled_set_handle && kled_get_handle)
+		rv = ASUS_LED_REGISTER(kled, dev);
+	if (rv)
+		goto out5;
+
 	led_workqueue = create_singlethread_workqueue("led_workqueue");
 	if (!led_workqueue)
-		goto out5;
+		goto out6;
 
 	return 0;
-out5:
+out6:
 	rv = -ENOMEM;
+	ASUS_LED_UNREGISTER(kled);
+out5:
 	ASUS_LED_UNREGISTER(gled);
 out4:
 	ASUS_LED_UNREGISTER(pled);
-- 
cgit v0.10.2


From dc79526078d2c0f01445e54e1d9fdf7c15ffd63d Mon Sep 17 00:00:00 2001
From: Corentin Chary <corentincj@iksaif.net>
Date: Fri, 28 Aug 2009 12:56:51 +0000
Subject: asus-laptop: handle keyboard backlight keys

Add support for the Fn+F3/Fn+F4 keys and map them
as KEY_KBDILLUMUP and KEY_KBDILLUMDOWN.

Signed-off-by: Corentin Chary <corentincj@iksaif.net>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/platform/x86/asus-laptop.c b/drivers/platform/x86/asus-laptop.c
index 0fb4e59..dd926b9 100644
--- a/drivers/platform/x86/asus-laptop.c
+++ b/drivers/platform/x86/asus-laptop.c
@@ -301,6 +301,8 @@ static struct key_entry asus_keymap[] = {
 	{KE_KEY, 0x8A, KEY_PROG1},
 	{KE_KEY, 0x95, KEY_MEDIA},
 	{KE_KEY, 0x99, KEY_PHONE},
+	{KE_KEY, 0xc4, KEY_KBDILLUMUP},
+	{KE_KEY, 0xc5, KEY_KBDILLUMDOWN},
 	{KE_END, 0},
 };
 
-- 
cgit v0.10.2


From 4644d0e5bd1412bbaed77e46c0c3376c6d060a74 Mon Sep 17 00:00:00 2001
From: Corentin Chary <corentincj@iksaif.net>
Date: Fri, 28 Aug 2009 12:56:52 +0000
Subject: asus-laptop: Add suport for another "Media" key

Signed-off-by: Corentin Chary <corentincj@iksaif.net>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/platform/x86/asus-laptop.c b/drivers/platform/x86/asus-laptop.c
index dd926b9..0325f7c 100644
--- a/drivers/platform/x86/asus-laptop.c
+++ b/drivers/platform/x86/asus-laptop.c
@@ -288,6 +288,7 @@ static struct key_entry asus_keymap[] = {
 	{KE_KEY, 0x41, KEY_NEXTSONG},
 	{KE_KEY, 0x43, KEY_STOPCD},
 	{KE_KEY, 0x45, KEY_PLAYPAUSE},
+	{KE_KEY, 0x4c, KEY_MEDIA},
 	{KE_KEY, 0x50, KEY_EMAIL},
 	{KE_KEY, 0x51, KEY_WWW},
 	{KE_KEY, 0x5C, KEY_SCREENLOCK},  /* Screenlock */
-- 
cgit v0.10.2


From 0aa20f7d720ed1feeb74df8c63a6427d9a2d3ebd Mon Sep 17 00:00:00 2001
From: Corentin Chary <corentincj@iksaif.net>
Date: Fri, 28 Aug 2009 12:56:53 +0000
Subject: asus-laptop: Add "calculator" hotkey

Found on UX50V.

Signed-off-by: Corentin Chary <corentincj@iksaif.net>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/platform/x86/asus-laptop.c b/drivers/platform/x86/asus-laptop.c
index 0325f7c..5b912cc 100644
--- a/drivers/platform/x86/asus-laptop.c
+++ b/drivers/platform/x86/asus-laptop.c
@@ -291,6 +291,7 @@ static struct key_entry asus_keymap[] = {
 	{KE_KEY, 0x4c, KEY_MEDIA},
 	{KE_KEY, 0x50, KEY_EMAIL},
 	{KE_KEY, 0x51, KEY_WWW},
+	{KE_KEY, 0x55, KEY_CALC},
 	{KE_KEY, 0x5C, KEY_SCREENLOCK},  /* Screenlock */
 	{KE_KEY, 0x5D, KEY_WLAN},
 	{KE_KEY, 0x5E, KEY_WLAN},
-- 
cgit v0.10.2


From 6ce2c9d9a531e8753005a25a686dafab9a5d04bb Mon Sep 17 00:00:00 2001
From: Corentin Chary <corentincj@iksaif.net>
Date: Fri, 28 Aug 2009 12:56:54 +0000
Subject: asus-laptop: document the module

Signed-off-by: Corentin Chary <corentincj@iksaif.net>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/Documentation/laptops/asus-laptop.txt b/Documentation/laptops/asus-laptop.txt
new file mode 100644
index 0000000..c1c5be8
--- /dev/null
+++ b/Documentation/laptops/asus-laptop.txt
@@ -0,0 +1,258 @@
+Asus Laptop Extras
+
+Version 0.1
+August 6, 2009
+
+Corentin Chary <corentincj@iksaif.net>
+http://acpi4asus.sf.net/
+
+ This driver provides support for extra features of ACPI-compatible ASUS laptops.
+ It may also support some MEDION, JVC or VICTOR laptops (such as MEDION 9675 or
+ VICTOR XP7210 for example). It makes all the extra buttons generate standard
+ ACPI events that go through /proc/acpi/events and input events (like keyboards).
+ On some models adds support for changing the display brightness and output,
+ switching the LCD backlight on and off, and most importantly, allows you to
+ blink those fancy LEDs intended for reporting mail and wireless status.
+
+This driver supercedes the old asus_acpi driver.
+
+Requirements
+------------
+
+  Kernel 2.6.X sources, configured for your computer, with ACPI support.
+  You also need CONFIG_INPUT and CONFIG_ACPI.
+
+Status
+------
+
+ The features currently supported are the following (see below for
+ detailed description):
+
+ - Fn key combinations
+ - Bluetooth enable and disable
+ - Wlan enable and disable
+ - GPS enable and disable
+ - Video output switching
+ - Ambient Light Sensor on and off
+ - LED control
+ - LED Display control
+ - LCD brightness control
+ - LCD on and off
+
+ A compatibility table by model and feature is maintained on the web
+ site, http://acpi4asus.sf.net/.
+
+Usage
+-----
+
+  Try "modprobe asus_acpi". Check your dmesg (simply type dmesg). You should
+  see some lines like this :
+
+      Asus Laptop Extras version 0.42
+        L2D model detected.
+
+  If it is not the output you have on your laptop, send it (and the laptop's
+  DSDT) to me.
+
+  That's all, now, all the events generated by the hotkeys of your laptop
+  should be reported in your /proc/acpi/event entry. You can check with
+  "acpi_listen".
+
+  Hotkeys are also reported as input keys (like keyboards) you can check
+  which key are supported using "xev" under X11.
+
+  You can get informations on the version of your DSDT table by reading the
+  /sys/devices/platform/asus-laptop/infos entry. If you have a question or a
+  bug report to do, please include the output of this entry.
+
+LEDs
+----
+
+  You can modify LEDs be echoing values to /sys/class/leds/asus::*/brightness :
+    echo 1 >  /sys/class/leds/asus::mail/brightness
+  will switch the mail LED on.
+  You can also know if they are on/off by reading their content and use
+  kernel triggers like ide-disk or heartbeat.
+
+Backlight
+---------
+
+  You can control lcd backlight power and brightness with
+  /sys/class/backlight/asus-laptop/. Brightness Values are between 0 and 15.
+
+Wireless devices
+---------------
+
+  You can turn the internal Bluetooth adapter on/off with the bluetooth entry
+  (only on models with Bluetooth). This usually controls the associated LED.
+  Same for Wlan adapter.
+
+Display switching
+-----------------
+
+  Note: the display switching code is currently considered EXPERIMENTAL.
+
+  Switching works for the following models:
+    L3800C
+    A2500H
+    L5800C
+    M5200N
+    W1000N (albeit with some glitches)
+    M6700R
+    A6JC
+    F3J
+
+  Switching doesn't work for the following:
+    M3700N
+    L2X00D (locks the laptop under certain conditions)
+
+  To switch the displays, echo values from 0 to 15 to
+  /sys/devices/platform/asus-laptop/display. The significance of those values
+  is as follows:
+
+  +-------+-----+-----+-----+-----+-----+
+  | Bin   | Val | DVI | TV  | CRT | LCD |
+  +-------+-----+-----+-----+-----+-----+
+  + 0000  +   0 +     +     +     +     +
+  +-------+-----+-----+-----+-----+-----+
+  + 0001  +   1 +     +     +     +  X  +
+  +-------+-----+-----+-----+-----+-----+
+  + 0010  +   2 +     +     +  X  +     +
+  +-------+-----+-----+-----+-----+-----+
+  + 0011  +   3 +     +     +  X  +  X  +
+  +-------+-----+-----+-----+-----+-----+
+  + 0100  +   4 +     +  X  +     +     +
+  +-------+-----+-----+-----+-----+-----+
+  + 0101  +   5 +     +  X  +     + X   +
+  +-------+-----+-----+-----+-----+-----+
+  + 0110  +   6 +     +  X  +  X  +     +
+  +-------+-----+-----+-----+-----+-----+
+  + 0111  +   7 +     +  X  +  X  +  X  +
+  +-------+-----+-----+-----+-----+-----+
+  + 1000  +   8 +  X  +     +     +     +
+  +-------+-----+-----+-----+-----+-----+
+  + 1001  +   9 +  X  +     +     +  X  +
+  +-------+-----+-----+-----+-----+-----+
+  + 1010  +  10 +  X  +     +  X  +     +
+  +-------+-----+-----+-----+-----+-----+
+  + 1011  +  11 +  X  +     +  X  +  X  +
+  +-------+-----+-----+-----+-----+-----+
+  + 1100  +  12 +  X  +  X  +     +     +
+  +-------+-----+-----+-----+-----+-----+
+  + 1101  +  13 +  X  +  X  +     +  X  +
+  +-------+-----+-----+-----+-----+-----+
+  + 1110  +  14 +  X  +  X  +  X  +     +
+  +-------+-----+-----+-----+-----+-----+
+  + 1111  +  15 +  X  +  X  +  X  +  X  +
+  +-------+-----+-----+-----+-----+-----+
+
+  In most cases, the appropriate displays must be plugged in for the above
+  combinations to work. TV-Out may need to be initialized at boot time.
+
+  Debugging:
+  1) Check whether the Fn+F8 key:
+     a) does not lock the laptop (try disabling CONFIG_X86_UP_APIC or boot with
+        noapic / nolapic if it does)
+     b) generates events (0x6n, where n is the value corresponding to the
+        configuration above)
+     c) actually works
+     Record the disp value at every configuration.
+  2) Echo values from 0 to 15 to /sys/devices/platform/asus-laptop/display.
+     Record its value, note any change. If nothing changes, try a broader range,
+     up to 65535.
+  3) Send ANY output (both positive and negative reports are needed, unless your
+     machine is already listed above) to the acpi4asus-user mailing list.
+
+  Note: on some machines (e.g. L3C), after the module has been loaded, only 0x6n
+  events are generated and no actual switching occurs. In such a case, a line
+  like:
+
+    echo $((10#$arg-60)) > /sys/devices/platform/asus-laptop/display
+
+  will usually do the trick ($arg is the 0000006n-like event passed to acpid).
+
+  Note: there is currently no reliable way to read display status on xxN
+  (Centrino) models.
+
+LED display
+-----------
+
+  Some models like the W1N have a LED display that can be used to display
+  several informations.
+
+  LED display works for the following models:
+    W1000N
+    W1J
+
+  To control the LED display, use the following :
+
+    echo 0x0T000DDD > /sys/devices/platform/asus-laptop/
+
+  where T control the 3 letters display, and DDD the 3 digits display,
+  according to the tables below.
+
+         DDD (digits)
+         000 to 999 = display digits
+         AAA        = ---
+         BBB to FFF = turn-off
+
+         T  (type)
+         0 = off
+         1 = dvd
+         2 = vcd
+         3 = mp3
+         4 = cd
+         5 = tv
+         6 = cpu
+         7 = vol
+
+  For example "echo 0x01000001 >/sys/devices/platform/asus-laptop/ledd"
+  would display "DVD001".
+
+Driver options:
+---------------
+
+ Options can be passed to the asus-laptop driver using the standard
+ module argument syntax (<param>=<value> when passing the option to the
+ module or asus-laptop.<param>=<value> on the kernel boot line when
+ asus-laptop is statically linked into the kernel).
+
+	     wapf: WAPF defines the behavior of the Fn+Fx wlan key
+		   The significance of values is yet to be found, but
+		   most of the time:
+		   - 0x0 should do nothing
+		   - 0x1 should allow to control the device with Fn+Fx key.
+		   - 0x4 should send an ACPI event (0x88) while pressing the Fn+Fx key
+		   - 0x5 like 0x1 or 0x4
+
+ The default value is 0x1.
+
+Unsupported models
+------------------
+
+ These models will never be supported by this module, as they use a completely
+ different mechanism to handle LEDs and extra stuff (meaning we have no clue
+ how it works):
+
+ - ASUS A1300 (A1B), A1370D
+ - ASUS L7300G
+ - ASUS L8400
+
+Patches, Errors, Questions:
+--------------------------
+
+ I appreciate any success or failure
+ reports, especially if they add to or correct the compatibility table.
+ Please include the following information in your report:
+
+ - Asus model name
+ - a copy of your ACPI tables, using the "acpidump" utility
+ - a copy of /sys/devices/platform/asus-laptop/infos
+ - which driver features work and which don't
+ - the observed behavior of non-working features
+
+ Any other comments or patches are also more than welcome.
+
+ acpi4asus-user@lists.sourceforge.net
+ http://sourceforge.net/projects/acpi4asus
+
-- 
cgit v0.10.2


From b09f5fecf8b97c9de7add3e2eb0cfeb91ef28dbb Mon Sep 17 00:00:00 2001
From: Corentin Chary <corentincj@iksaif.net>
Date: Fri, 28 Aug 2009 12:56:55 +0000
Subject: asus-laptop: document sysfs interface

Signed-off-by: Corentin Chary <corentincj@iksaif.net>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/Documentation/ABI/testing/sysfs-platform-asus-laptop b/Documentation/ABI/testing/sysfs-platform-asus-laptop
new file mode 100644
index 0000000..a1cb660
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-platform-asus-laptop
@@ -0,0 +1,52 @@
+What:		/sys/devices/platform/asus-laptop/display
+Date:		January 2007
+KernelVersion:	2.6.20
+Contact:	"Corentin Chary" <corentincj@iksaif.net>
+Description:
+		This file allows display switching. The value
+		is composed by 4 bits and defined as follow:
+		4321
+		|||`- LCD
+		||`-- CRT
+		|`--- TV
+		`---- DVI
+		Ex: - 0 (0000b) means no display
+		    - 3 (0011b) CRT+LCD.
+
+What:		/sys/devices/platform/asus-laptop/gps
+Date:		January 2007
+KernelVersion:	2.6.20
+Contact:	"Corentin Chary" <corentincj@iksaif.net>
+Description:
+		Control the gps device. 1 means on, 0 means off.
+Users:		Lapsus
+
+What:		/sys/devices/platform/asus-laptop/ledd
+Date:		January 2007
+KernelVersion:	2.6.20
+Contact:	"Corentin Chary" <corentincj@iksaif.net>
+Description:
+		Some models like the W1N have a LED display that can be
+		used to display several informations.
+		To control the LED display, use the following :
+		    echo 0x0T000DDD > /sys/devices/platform/asus-laptop/
+		where T control the 3 letters display, and DDD the 3 digits display.
+		The DDD table can be found in Documentation/laptops/asus-laptop.txt
+
+What:		/sys/devices/platform/asus-laptop/bluetooth
+Date:		January 2007
+KernelVersion:	2.6.20
+Contact:	"Corentin Chary" <corentincj@iksaif.net>
+Description:
+		Control the bluetooth device. 1 means on, 0 means off.
+		This may control the led, the device or both.
+Users:		Lapsus
+
+What:		/sys/devices/platform/asus-laptop/wlan
+Date:		January 2007
+KernelVersion:	2.6.20
+Contact:	"Corentin Chary" <corentincj@iksaif.net>
+Description:
+		Control the bluetooth device. 1 means on, 0 means off.
+		This may control the led, the device or both.
+Users:		Lapsus
-- 
cgit v0.10.2


From aeb41b852fe90764b75ef7a9f185ca94696af6ff Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Fri, 28 Aug 2009 19:03:11 -0400
Subject: eeepc-laptop: whitespace for checkpatch.pl

checkpatch doesn't like tab+space for a return statement.

WARNING: suspect code indent for conditional statements (8, 17)
+	if (!device)
+		 return -EINVAL;

Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/platform/x86/eeepc-laptop.c b/drivers/platform/x86/eeepc-laptop.c
index c9febf4..819c685 100644
--- a/drivers/platform/x86/eeepc-laptop.c
+++ b/drivers/platform/x86/eeepc-laptop.c
@@ -1183,7 +1183,7 @@ static int eeepc_hotk_add(struct acpi_device *device)
 	int result;
 
 	if (!device)
-		 return -EINVAL;
+		return -EINVAL;
 	pr_notice(EEEPC_HOTK_NAME "\n");
 	ehotk = kzalloc(sizeof(struct eeepc_hotk), GFP_KERNEL);
 	if (!ehotk)
@@ -1265,7 +1265,7 @@ fail_platform_driver:
 static int eeepc_hotk_remove(struct acpi_device *device, int type)
 {
 	if (!device || !acpi_driver_data(device))
-		 return -EINVAL;
+		return -EINVAL;
 
 	eeepc_backlight_exit();
 	eeepc_rfkill_exit();
-- 
cgit v0.10.2


From b2deadd53c3630786e73746fb0ad8450f4e015bf Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Fri, 24 Jul 2009 10:56:43 +0800
Subject: ACPICA: Move predefined repair code to new file, no functional change

New file is nsrepair.c. This is in preparation for additional
errror correcting code.

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/acpica/Makefile b/drivers/acpi/acpica/Makefile
index 0e7d561..e7973bc 100644
--- a/drivers/acpi/acpica/Makefile
+++ b/drivers/acpi/acpica/Makefile
@@ -28,7 +28,7 @@ acpi-$(ACPI_FUTURE_USAGE) += hwtimer.o
 acpi-y += nsaccess.o  nsload.o    nssearch.o  nsxfeval.o \
 	 nsalloc.o   nseval.o    nsnames.o   nsutils.o   nsxfname.o \
 	 nsdump.o    nsinit.o    nsobject.o  nswalk.o    nsxfobj.o  \
-	 nsparse.o   nspredef.o
+	 nsparse.o   nspredef.o  nsrepair.o
 
 acpi-$(ACPI_FUTURE_USAGE) += nsdumpdv.o
 
diff --git a/drivers/acpi/acpica/acnamesp.h b/drivers/acpi/acpica/acnamesp.h
index a78e02f..908cfdd 100644
--- a/drivers/acpi/acpica/acnamesp.h
+++ b/drivers/acpi/acpica/acnamesp.h
@@ -73,6 +73,14 @@
 #define ACPI_NS_WALK_UNLOCK         0x01
 #define ACPI_NS_WALK_TEMP_NODES     0x02
 
+/* Object is not a package element */
+
+#define ACPI_NOT_PACKAGE_ELEMENT    ACPI_UINT32_MAX
+
+/* Always emit warning message, not dependent on node flags */
+
+#define ACPI_WARN_ALWAYS            0
+
 /*
  * nsinit - Namespace initialization
  */
@@ -262,6 +270,15 @@ acpi_ns_get_attached_data(struct acpi_namespace_node *node,
 			  acpi_object_handler handler, void **data);
 
 /*
+ * nsrepair - return object repair for predefined methods/objects
+ */
+acpi_status
+acpi_ns_repair_object(struct acpi_predefined_data *data,
+		      u32 expected_btypes,
+		      u32 package_index,
+		      union acpi_operand_object **return_object_ptr);
+
+/*
  * nssearch - Namespace searching and entry
  */
 acpi_status
diff --git a/drivers/acpi/acpica/nspredef.c b/drivers/acpi/acpica/nspredef.c
index 1dc1a47..e3f08dc 100644
--- a/drivers/acpi/acpica/nspredef.c
+++ b/drivers/acpi/acpica/nspredef.c
@@ -91,12 +91,6 @@ static acpi_status
 acpi_ns_check_reference(struct acpi_predefined_data *data,
 			union acpi_operand_object *return_object);
 
-static acpi_status
-acpi_ns_repair_object(struct acpi_predefined_data *data,
-		      u32 expected_btypes,
-		      u32 package_index,
-		      union acpi_operand_object **return_object_ptr);
-
 static void acpi_ns_get_expected_types(char *buffer, u32 expected_btypes);
 
 /*
@@ -111,14 +105,6 @@ static const char *acpi_rtype_names[] = {
 	"/Reference",
 };
 
-/* Object is not a package element */
-
-#define ACPI_NOT_PACKAGE_ELEMENT    ACPI_UINT32_MAX
-
-/* Always emit warning message, not dependent on node flags */
-
-#define ACPI_WARN_ALWAYS            0
-
 /*******************************************************************************
  *
  * FUNCTION:    acpi_ns_check_predefined_names
@@ -580,8 +566,8 @@ acpi_ns_check_package(struct acpi_predefined_data *data,
 	case ACPI_PTYPE2_COUNT:
 
 		/*
-		 * These types all return a single package that consists of a variable
-		 * number of sub-packages
+		 * These types all return a single package that consists of a
+		 * variable number of sub-packages.
 		 */
 		for (i = 0; i < count; i++) {
 			sub_package = *elements;
@@ -979,108 +965,6 @@ acpi_ns_check_reference(struct acpi_predefined_data *data,
 
 /*******************************************************************************
  *
- * FUNCTION:    acpi_ns_repair_object
- *
- * PARAMETERS:  Data            - Pointer to validation data structure
- *              expected_btypes - Object types expected
- *              package_index   - Index of object within parent package (if
- *                                applicable - ACPI_NOT_PACKAGE_ELEMENT
- *                                otherwise)
- *              return_object_ptr - Pointer to the object returned from the
- *                                evaluation of a method or object
- *
- * RETURN:      Status. AE_OK if repair was successful.
- *
- * DESCRIPTION: Attempt to repair/convert a return object of a type that was
- *              not expected.
- *
- ******************************************************************************/
-
-static acpi_status
-acpi_ns_repair_object(struct acpi_predefined_data *data,
-		      u32 expected_btypes,
-		      u32 package_index,
-		      union acpi_operand_object **return_object_ptr)
-{
-	union acpi_operand_object *return_object = *return_object_ptr;
-	union acpi_operand_object *new_object;
-	acpi_size length;
-
-	switch (return_object->common.type) {
-	case ACPI_TYPE_BUFFER:
-
-		/* Does the method/object legally return a string? */
-
-		if (!(expected_btypes & ACPI_RTYPE_STRING)) {
-			return (AE_AML_OPERAND_TYPE);
-		}
-
-		/*
-		 * Have a Buffer, expected a String, convert. Use a to_string
-		 * conversion, no transform performed on the buffer data. The best
-		 * example of this is the _BIF method, where the string data from
-		 * the battery is often (incorrectly) returned as buffer object(s).
-		 */
-		length = 0;
-		while ((length < return_object->buffer.length) &&
-		       (return_object->buffer.pointer[length])) {
-			length++;
-		}
-
-		/* Allocate a new string object */
-
-		new_object = acpi_ut_create_string_object(length);
-		if (!new_object) {
-			return (AE_NO_MEMORY);
-		}
-
-		/*
-		 * Copy the raw buffer data with no transform. String is already NULL
-		 * terminated at Length+1.
-		 */
-		ACPI_MEMCPY(new_object->string.pointer,
-			    return_object->buffer.pointer, length);
-
-		/*
-		 * If the original object is a package element, we need to:
-		 * 1. Set the reference count of the new object to match the
-		 *    reference count of the old object.
-		 * 2. Decrement the reference count of the original object.
-		 */
-		if (package_index != ACPI_NOT_PACKAGE_ELEMENT) {
-			new_object->common.reference_count =
-			    return_object->common.reference_count;
-
-			if (return_object->common.reference_count > 1) {
-				return_object->common.reference_count--;
-			}
-
-			ACPI_WARN_PREDEFINED((AE_INFO, data->pathname,
-					      data->node_flags,
-					      "Converted Buffer to expected String at index %u",
-					      package_index));
-		} else {
-			ACPI_WARN_PREDEFINED((AE_INFO, data->pathname,
-					      data->node_flags,
-					      "Converted Buffer to expected String"));
-		}
-
-		/* Delete old object, install the new return object */
-
-		acpi_ut_remove_reference(return_object);
-		*return_object_ptr = new_object;
-		data->flags |= ACPI_OBJECT_REPAIRED;
-		return (AE_OK);
-
-	default:
-		break;
-	}
-
-	return (AE_AML_OPERAND_TYPE);
-}
-
-/*******************************************************************************
- *
  * FUNCTION:    acpi_ns_get_expected_types
  *
  * PARAMETERS:  Buffer          - Pointer to where the string is returned
diff --git a/drivers/acpi/acpica/nsrepair.c b/drivers/acpi/acpica/nsrepair.c
new file mode 100644
index 0000000..b64751e
--- /dev/null
+++ b/drivers/acpi/acpica/nsrepair.c
@@ -0,0 +1,151 @@
+/******************************************************************************
+ *
+ * Module Name: nsrepair - Repair for objects returned by predefined methods
+ *
+ *****************************************************************************/
+
+/*
+ * Copyright (C) 2000 - 2009, Intel Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions, and the following disclaimer,
+ *    without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ *    substantially similar to the "NO WARRANTY" disclaimer below
+ *    ("Disclaimer") and any redistribution must be conditioned upon
+ *    including a substantially similar Disclaimer requirement for further
+ *    binary redistribution.
+ * 3. Neither the names of the above-listed copyright holders nor the names
+ *    of any contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ */
+
+#include <acpi/acpi.h>
+#include "accommon.h"
+#include "acnamesp.h"
+#include "acpredef.h"
+
+#define _COMPONENT          ACPI_NAMESPACE
+ACPI_MODULE_NAME("nsrepair")
+
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_ns_repair_object
+ *
+ * PARAMETERS:  Data                - Pointer to validation data structure
+ *              expected_btypes     - Object types expected
+ *              package_index       - Index of object within parent package (if
+ *                                    applicable - ACPI_NOT_PACKAGE_ELEMENT
+ *                                    otherwise)
+ *              return_object_ptr   - Pointer to the object returned from the
+ *                                    evaluation of a method or object
+ *
+ * RETURN:      Status. AE_OK if repair was successful.
+ *
+ * DESCRIPTION: Attempt to repair/convert a return object of a type that was
+ *              not expected.
+ *
+ ******************************************************************************/
+acpi_status
+acpi_ns_repair_object(struct acpi_predefined_data *data,
+		      u32 expected_btypes,
+		      u32 package_index,
+		      union acpi_operand_object **return_object_ptr)
+{
+	union acpi_operand_object *return_object = *return_object_ptr;
+	union acpi_operand_object *new_object;
+	acpi_size length;
+
+	switch (return_object->common.type) {
+	case ACPI_TYPE_BUFFER:
+
+		/* Does the method/object legally return a string? */
+
+		if (!(expected_btypes & ACPI_RTYPE_STRING)) {
+			return (AE_AML_OPERAND_TYPE);
+		}
+
+		/*
+		 * Have a Buffer, expected a String, convert. Use a to_string
+		 * conversion, no transform performed on the buffer data. The best
+		 * example of this is the _BIF method, where the string data from
+		 * the battery is often (incorrectly) returned as buffer object(s).
+		 */
+		length = 0;
+		while ((length < return_object->buffer.length) &&
+		       (return_object->buffer.pointer[length])) {
+			length++;
+		}
+
+		/* Allocate a new string object */
+
+		new_object = acpi_ut_create_string_object(length);
+		if (!new_object) {
+			return (AE_NO_MEMORY);
+		}
+
+		/*
+		 * Copy the raw buffer data with no transform. String is already NULL
+		 * terminated at Length+1.
+		 */
+		ACPI_MEMCPY(new_object->string.pointer,
+			    return_object->buffer.pointer, length);
+
+		/*
+		 * If the original object is a package element, we need to:
+		 * 1. Set the reference count of the new object to match the
+		 *    reference count of the old object.
+		 * 2. Decrement the reference count of the original object.
+		 */
+		if (package_index != ACPI_NOT_PACKAGE_ELEMENT) {
+			new_object->common.reference_count =
+			    return_object->common.reference_count;
+
+			if (return_object->common.reference_count > 1) {
+				return_object->common.reference_count--;
+			}
+
+			ACPI_WARN_PREDEFINED((AE_INFO, data->pathname,
+					      data->node_flags,
+					      "Converted Buffer to expected String at index %u",
+					      package_index));
+		} else {
+			ACPI_WARN_PREDEFINED((AE_INFO, data->pathname,
+					      data->node_flags,
+					      "Converted Buffer to expected String"));
+		}
+
+		/* Delete old object, install the new return object */
+
+		acpi_ut_remove_reference(return_object);
+		*return_object_ptr = new_object;
+		data->flags |= ACPI_OBJECT_REPAIRED;
+		return (AE_OK);
+
+	default:
+		break;
+	}
+
+	return (AE_AML_OPERAND_TYPE);
+}
-- 
cgit v0.10.2


From e5f69d6ef7a6b0dbad8d4c00d83009960be02155 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Fri, 24 Jul 2009 11:03:09 +0800
Subject: ACPICA: Add repair for predefined methods that return nested packages

Fixes a problem where a predefined method is defined to return
a variable-length Package of sub-packages. If the length is one,
the BIOS code occasionally creates a simple single package with
no sub-packages. This code attempts to fix the problem by wrapping
a new package object around the existing package. ACPICA BZ 790.

http://acpica.org/bugzilla/show_bug.cgi?id=790

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/acpica/acnamesp.h b/drivers/acpi/acpica/acnamesp.h
index 908cfdd..f75a7a0 100644
--- a/drivers/acpi/acpica/acnamesp.h
+++ b/drivers/acpi/acpica/acnamesp.h
@@ -278,6 +278,10 @@ acpi_ns_repair_object(struct acpi_predefined_data *data,
 		      u32 package_index,
 		      union acpi_operand_object **return_object_ptr);
 
+acpi_status
+acpi_ns_repair_package_list(struct acpi_predefined_data *data,
+			    union acpi_operand_object **obj_desc_ptr);
+
 /*
  * nssearch - Namespace searching and entry
  */
diff --git a/drivers/acpi/acpica/nspredef.c b/drivers/acpi/acpica/nspredef.c
index e3f08dc..0091504 100644
--- a/drivers/acpi/acpica/nspredef.c
+++ b/drivers/acpi/acpica/nspredef.c
@@ -566,9 +566,35 @@ acpi_ns_check_package(struct acpi_predefined_data *data,
 	case ACPI_PTYPE2_COUNT:
 
 		/*
-		 * These types all return a single package that consists of a
-		 * variable number of sub-packages.
+		 * These types all return a single Package that consists of a
+		 * variable number of sub-Packages.
+		 *
+		 * First, ensure that the first element is a sub-Package. If not,
+		 * the BIOS may have incorrectly returned the object as a single
+		 * package instead of a Package of Packages (a common error if
+		 * there is only one entry). We may be able to repair this by
+		 * wrapping the returned Package with a new outer Package.
 		 */
+		if ((*elements)->common.type != ACPI_TYPE_PACKAGE) {
+
+			/* Create the new outer package and populate it */
+
+			status =
+			    acpi_ns_repair_package_list(data,
+							return_object_ptr);
+			if (ACPI_FAILURE(status)) {
+				return (status);
+			}
+
+			/* Update locals to point to the new package (of 1 element) */
+
+			return_object = *return_object_ptr;
+			elements = return_object->package.elements;
+			count = 1;
+		}
+
+		/* Validate each sub-Package in the parent Package */
+
 		for (i = 0; i < count; i++) {
 			sub_package = *elements;
 			sub_elements = sub_package->package.elements;
diff --git a/drivers/acpi/acpica/nsrepair.c b/drivers/acpi/acpica/nsrepair.c
index b64751e..db2b2a9 100644
--- a/drivers/acpi/acpica/nsrepair.c
+++ b/drivers/acpi/acpica/nsrepair.c
@@ -149,3 +149,55 @@ acpi_ns_repair_object(struct acpi_predefined_data *data,
 
 	return (AE_AML_OPERAND_TYPE);
 }
+
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_ns_repair_package_list
+ *
+ * PARAMETERS:  Data                - Pointer to validation data structure
+ *              obj_desc_ptr        - Pointer to the object to repair. The new
+ *                                    package object is returned here,
+ *                                    overwriting the old object.
+ *
+ * RETURN:      Status, new object in *obj_desc_ptr
+ *
+ * DESCRIPTION: Repair a common problem with objects that are defined to return
+ *              a variable-length Package of Packages. If the variable-length
+ *              is one, some BIOS code mistakenly simply declares a single
+ *              Package instead of a Package with one sub-Package. This
+ *              function attempts to repair this error by wrapping a Package
+ *              object around the original Package, creating the correct
+ *              Package with one sub-Package.
+ *
+ *              Names that can be repaired in this manner include:
+ *              _ALR, _CSD, _HPX, _MLS, _PRT, _PSS, _TRT, TSS
+ *
+ ******************************************************************************/
+
+acpi_status
+acpi_ns_repair_package_list(struct acpi_predefined_data *data,
+			    union acpi_operand_object **obj_desc_ptr)
+{
+	union acpi_operand_object *pkg_obj_desc;
+
+	/*
+	 * Create the new outer package and populate it. The new package will
+	 * have a single element, the lone subpackage.
+	 */
+	pkg_obj_desc = acpi_ut_create_package_object(1);
+	if (!pkg_obj_desc) {
+		return (AE_NO_MEMORY);
+	}
+
+	pkg_obj_desc->package.elements[0] = *obj_desc_ptr;
+
+	/* Return the new object in the object pointer */
+
+	*obj_desc_ptr = pkg_obj_desc;
+	data->flags |= ACPI_OBJECT_REPAIRED;
+
+	ACPI_WARN_PREDEFINED((AE_INFO, data->pathname, data->node_flags,
+			      "Incorrectly formed Package, attempting repair"));
+
+	return (AE_OK);
+}
-- 
cgit v0.10.2


From 53e9387bdd8bfef6cffff2d2eb6bd28eca812682 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Fri, 24 Jul 2009 11:22:11 +0800
Subject: ACPICA: ACPI 4.0 : Add new return package type, restructure module.

Added one new package type, a package that contains a revision number and
a variable number of sub-packages. Restructured the module to
put the sub-package list traversal in a separate function.

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/acpica/acpredef.h b/drivers/acpi/acpica/acpredef.h
index 63f656a..144b1f4 100644
--- a/drivers/acpi/acpica/acpredef.h
+++ b/drivers/acpi/acpica/acpredef.h
@@ -91,6 +91,8 @@
  * ACPI_PTYPE2_MIN: Each subpackage has a variable but minimum length
  *      (Used for _HPX)
  *
+ * ACPI_PTYPE2_REV_FIXED: Revision at start, each subpackage is Fixed-length
+ *
  *****************************************************************************/
 
 enum acpi_return_package_types {
@@ -101,7 +103,8 @@ enum acpi_return_package_types {
 	ACPI_PTYPE2_COUNT = 5,
 	ACPI_PTYPE2_PKG_COUNT = 6,
 	ACPI_PTYPE2_FIXED = 7,
-	ACPI_PTYPE2_MIN = 8
+	ACPI_PTYPE2_MIN = 8,
+	ACPI_PTYPE2_REV_FIXED = 9
 };
 
 /*
diff --git a/drivers/acpi/acpica/nspredef.c b/drivers/acpi/acpica/nspredef.c
index 0091504..0b2cdb3 100644
--- a/drivers/acpi/acpica/nspredef.c
+++ b/drivers/acpi/acpica/nspredef.c
@@ -76,6 +76,11 @@ acpi_ns_check_package(struct acpi_predefined_data *data,
 		      union acpi_operand_object **return_object_ptr);
 
 static acpi_status
+acpi_ns_check_package_list(struct acpi_predefined_data *data,
+			   const union acpi_predefined_info *package,
+			   union acpi_operand_object **elements, u32 count);
+
+static acpi_status
 acpi_ns_check_package_elements(struct acpi_predefined_data *data,
 			       union acpi_operand_object **elements,
 			       u8 type1,
@@ -393,14 +398,11 @@ acpi_ns_check_package(struct acpi_predefined_data *data,
 {
 	union acpi_operand_object *return_object = *return_object_ptr;
 	const union acpi_predefined_info *package;
-	union acpi_operand_object *sub_package;
 	union acpi_operand_object **elements;
-	union acpi_operand_object **sub_elements;
-	acpi_status status;
+	acpi_status status = AE_OK;
 	u32 expected_count;
 	u32 count;
 	u32 i;
-	u32 j;
 
 	ACPI_FUNCTION_NAME(ns_check_package);
 
@@ -465,9 +467,6 @@ acpi_ns_check_package(struct acpi_predefined_data *data,
 							object_type2,
 							package->ret_info.
 							count2, 0);
-		if (ACPI_FAILURE(status)) {
-			return (status);
-		}
 		break;
 
 	case ACPI_PTYPE1_VAR:
@@ -534,6 +533,25 @@ acpi_ns_check_package(struct acpi_predefined_data *data,
 		}
 		break;
 
+	case ACPI_PTYPE2_REV_FIXED:
+
+		/* First element is the (Integer) revision */
+
+		status = acpi_ns_check_object_type(data, elements,
+						   ACPI_RTYPE_INTEGER, 0);
+		if (ACPI_FAILURE(status)) {
+			return (status);
+		}
+
+		elements++;
+		count--;
+
+		/* Examine the sub-packages */
+
+		status =
+		    acpi_ns_check_package_list(data, package, elements, count);
+		break;
+
 	case ACPI_PTYPE2_PKG_COUNT:
 
 		/* First element is the (Integer) count of sub-packages to follow */
@@ -556,9 +574,11 @@ acpi_ns_check_package(struct acpi_predefined_data *data,
 		count = expected_count;
 		elements++;
 
-		/* Now we can walk the sub-packages */
+		/* Examine the sub-packages */
 
-		/*lint -fallthrough */
+		status =
+		    acpi_ns_check_package_list(data, package, elements, count);
+		break;
 
 	case ACPI_PTYPE2:
 	case ACPI_PTYPE2_FIXED:
@@ -593,175 +613,213 @@ acpi_ns_check_package(struct acpi_predefined_data *data,
 			count = 1;
 		}
 
-		/* Validate each sub-Package in the parent Package */
+		/* Examine the sub-packages */
 
-		for (i = 0; i < count; i++) {
-			sub_package = *elements;
-			sub_elements = sub_package->package.elements;
+		status =
+		    acpi_ns_check_package_list(data, package, elements, count);
+		break;
 
-			/* Each sub-object must be of type Package */
+	default:
 
-			status = acpi_ns_check_object_type(data, &sub_package,
-							   ACPI_RTYPE_PACKAGE,
-							   i);
-			if (ACPI_FAILURE(status)) {
-				return (status);
-			}
+		/* Should not get here if predefined info table is correct */
 
-			/* Examine the different types of sub-packages */
+		ACPI_WARN_PREDEFINED((AE_INFO, data->pathname, data->node_flags,
+				      "Invalid internal return type in table entry: %X",
+				      package->ret_info.type));
 
-			switch (package->ret_info.type) {
-			case ACPI_PTYPE2:
-			case ACPI_PTYPE2_PKG_COUNT:
+		return (AE_AML_INTERNAL);
+	}
 
-				/* Each subpackage has a fixed number of elements */
+	return (status);
 
-				expected_count =
-				    package->ret_info.count1 +
-				    package->ret_info.count2;
-				if (sub_package->package.count !=
-				    expected_count) {
-					count = sub_package->package.count;
-					goto package_too_small;
-				}
+package_too_small:
 
-				status =
-				    acpi_ns_check_package_elements(data,
-								   sub_elements,
-								   package->
-								   ret_info.
-								   object_type1,
-								   package->
-								   ret_info.
-								   count1,
-								   package->
-								   ret_info.
-								   object_type2,
-								   package->
-								   ret_info.
-								   count2, 0);
-				if (ACPI_FAILURE(status)) {
-					return (status);
-				}
-				break;
+	/* Error exit for the case with an incorrect package count */
 
-			case ACPI_PTYPE2_FIXED:
+	ACPI_WARN_PREDEFINED((AE_INFO, data->pathname, data->node_flags,
+			      "Return Package is too small - found %u elements, expected %u",
+			      count, expected_count));
 
-				/* Each sub-package has a fixed length */
+	return (AE_AML_OPERAND_VALUE);
+}
 
-				expected_count = package->ret_info2.count;
-				if (sub_package->package.count < expected_count) {
-					count = sub_package->package.count;
-					goto package_too_small;
-				}
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_ns_check_package_list
+ *
+ * PARAMETERS:  Data            - Pointer to validation data structure
+ *              Package         - Pointer to package-specific info for method
+ *              Elements        - Element list of parent package. All elements
+ *                                of this list should be of type Package.
+ *              Count           - Count of subpackages
+ *
+ * RETURN:      Status
+ *
+ * DESCRIPTION: Examine a list of subpackages
+ *
+ ******************************************************************************/
 
-				/* Check the type of each sub-package element */
+static acpi_status
+acpi_ns_check_package_list(struct acpi_predefined_data *data,
+			   const union acpi_predefined_info *package,
+			   union acpi_operand_object **elements, u32 count)
+{
+	union acpi_operand_object *sub_package;
+	union acpi_operand_object **sub_elements;
+	acpi_status status;
+	u32 expected_count;
+	u32 i;
+	u32 j;
 
-				for (j = 0; j < expected_count; j++) {
-					status =
-					    acpi_ns_check_object_type(data,
-						&sub_elements[j],
-						package->ret_info2.object_type[j], j);
-					if (ACPI_FAILURE(status)) {
-						return (status);
-					}
-				}
-				break;
+	/* Validate each sub-Package in the parent Package */
 
-			case ACPI_PTYPE2_MIN:
+	for (i = 0; i < count; i++) {
+		sub_package = *elements;
+		sub_elements = sub_package->package.elements;
 
-				/* Each sub-package has a variable but minimum length */
+		/* Each sub-object must be of type Package */
 
-				expected_count = package->ret_info.count1;
-				if (sub_package->package.count < expected_count) {
-					count = sub_package->package.count;
-					goto package_too_small;
-				}
+		status = acpi_ns_check_object_type(data, &sub_package,
+						   ACPI_RTYPE_PACKAGE, i);
+		if (ACPI_FAILURE(status)) {
+			return (status);
+		}
 
-				/* Check the type of each sub-package element */
+		/* Examine the different types of expected sub-packages */
 
-				status =
-				    acpi_ns_check_package_elements(data,
-								   sub_elements,
-								   package->
-								   ret_info.
-								   object_type1,
-								   sub_package->
-								   package.
-								   count, 0, 0,
-								   0);
-				if (ACPI_FAILURE(status)) {
-					return (status);
-				}
-				break;
+		switch (package->ret_info.type) {
+		case ACPI_PTYPE2:
+		case ACPI_PTYPE2_PKG_COUNT:
+		case ACPI_PTYPE2_REV_FIXED:
+
+			/* Each subpackage has a fixed number of elements */
+
+			expected_count =
+			    package->ret_info.count1 + package->ret_info.count2;
+			if (sub_package->package.count < expected_count) {
+				goto package_too_small;
+			}
+
+			status =
+			    acpi_ns_check_package_elements(data, sub_elements,
+							   package->ret_info.
+							   object_type1,
+							   package->ret_info.
+							   count1,
+							   package->ret_info.
+							   object_type2,
+							   package->ret_info.
+							   count2, 0);
+			if (ACPI_FAILURE(status)) {
+				return (status);
+			}
+			break;
+
+		case ACPI_PTYPE2_FIXED:
+
+			/* Each sub-package has a fixed length */
 
-			case ACPI_PTYPE2_COUNT:
+			expected_count = package->ret_info2.count;
+			if (sub_package->package.count < expected_count) {
+				goto package_too_small;
+			}
 
-				/* First element is the (Integer) count of elements to follow */
+			/* Check the type of each sub-package element */
 
+			for (j = 0; j < expected_count; j++) {
 				status =
 				    acpi_ns_check_object_type(data,
-							      sub_elements,
-							      ACPI_RTYPE_INTEGER,
-							      0);
+							      &sub_elements[j],
+							      package->
+							      ret_info2.
+							      object_type[j],
+							      j);
 				if (ACPI_FAILURE(status)) {
 					return (status);
 				}
+			}
+			break;
 
-				/* Make sure package is large enough for the Count */
+		case ACPI_PTYPE2_MIN:
 
-				expected_count =
-				    (u32) (*sub_elements)->integer.value;
-				if (sub_package->package.count < expected_count) {
-					count = sub_package->package.count;
-					goto package_too_small;
-				}
+			/* Each sub-package has a variable but minimum length */
 
-				/* Check the type of each sub-package element */
+			expected_count = package->ret_info.count1;
+			if (sub_package->package.count < expected_count) {
+				goto package_too_small;
+			}
 
-				status =
-				    acpi_ns_check_package_elements(data,
-								   (sub_elements
-								    + 1),
-								   package->
-								   ret_info.
-								   object_type1,
-								   (expected_count
-								    - 1), 0, 0,
-								   1);
-				if (ACPI_FAILURE(status)) {
-					return (status);
-				}
-				break;
+			/* Check the type of each sub-package element */
 
-			default:
-				break;
+			status =
+			    acpi_ns_check_package_elements(data, sub_elements,
+							   package->ret_info.
+							   object_type1,
+							   sub_package->package.
+							   count, 0, 0, 0);
+			if (ACPI_FAILURE(status)) {
+				return (status);
 			}
+			break;
 
-			elements++;
-		}
-		break;
+		case ACPI_PTYPE2_COUNT:
 
-	default:
+			/*
+			 * First element is the (Integer) count of elements, including
+			 * the count field.
+			 */
+			status = acpi_ns_check_object_type(data, sub_elements,
+							   ACPI_RTYPE_INTEGER,
+							   0);
+			if (ACPI_FAILURE(status)) {
+				return (status);
+			}
 
-		/* Should not get here if predefined info table is correct */
+			/*
+			 * Make sure package is large enough for the Count and is
+			 * is as large as the minimum size
+			 */
+			expected_count = (u32)(*sub_elements)->integer.value;
+			if (sub_package->package.count < expected_count) {
+				goto package_too_small;
+			}
+			if (sub_package->package.count <
+			    package->ret_info.count1) {
+				expected_count = package->ret_info.count1;
+				goto package_too_small;
+			}
 
-		ACPI_WARN_PREDEFINED((AE_INFO, data->pathname, data->node_flags,
-				      "Invalid internal return type in table entry: %X",
-				      package->ret_info.type));
+			/* Check the type of each sub-package element */
 
-		return (AE_AML_INTERNAL);
+			status =
+			    acpi_ns_check_package_elements(data,
+							   (sub_elements + 1),
+							   package->ret_info.
+							   object_type1,
+							   (expected_count - 1),
+							   0, 0, 1);
+			if (ACPI_FAILURE(status)) {
+				return (status);
+			}
+			break;
+
+		default:	/* Should not get here, type was validated by caller */
+
+			return (AE_AML_INTERNAL);
+		}
+
+		elements++;
 	}
 
 	return (AE_OK);
 
-      package_too_small:
+package_too_small:
 
-	/* Error exit for the case with an incorrect package count */
+	/* The sub-package count was smaller than required */
 
 	ACPI_WARN_PREDEFINED((AE_INFO, data->pathname, data->node_flags,
-			      "Return Package is too small - found %u, expected %u",
-			      count, expected_count));
+			      "Return Sub-Package[%u] is too small - found %u elements, expected %u",
+			      i, sub_package->package.count, expected_count));
 
 	return (AE_AML_OPERAND_VALUE);
 }
-- 
cgit v0.10.2


From 2f977b36e5f175e5126f280e7a94f0c53d1b1a16 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Fri, 24 Jul 2009 11:25:16 +0800
Subject: ACPICA: Fix fault if acpi_terminate is called twice

Fixes a problem with the mechanism that prevents problems if the
acpi_terminate interface is inadvertently called more than once
before the ACPICA code is re-initialized. ACPICA BZ 795.

http://acpica.org/bugzilla/show_bug.cgi?id=795

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/acpica/utinit.c b/drivers/acpi/acpica/utinit.c
index a54ca84..9d0919e 100644
--- a/drivers/acpi/acpica/utinit.c
+++ b/drivers/acpi/acpica/utinit.c
@@ -99,33 +99,19 @@ static void acpi_ut_terminate(void)
  *
  * FUNCTION:    acpi_ut_subsystem_shutdown
  *
- * PARAMETERS:  none
+ * PARAMETERS:  None
  *
- * RETURN:      none
+ * RETURN:      None
  *
- * DESCRIPTION: Shutdown the various subsystems.  Don't delete the mutex
- *              objects here -- because the AML debugger may be still running.
+ * DESCRIPTION: Shutdown the various components. Do not delete the mutex
+ *              objects here, because the AML debugger may be still running.
  *
  ******************************************************************************/
 
 void acpi_ut_subsystem_shutdown(void)
 {
-
 	ACPI_FUNCTION_TRACE(ut_subsystem_shutdown);
 
-	/* Just exit if subsystem is already shutdown */
-
-	if (acpi_gbl_shutdown) {
-		ACPI_ERROR((AE_INFO, "ACPI Subsystem is already terminated"));
-		return_VOID;
-	}
-
-	/* Subsystem appears active, go ahead and shut it down */
-
-	acpi_gbl_shutdown = TRUE;
-	acpi_gbl_startup_flags = 0;
-	ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Shutting down ACPI Subsystem\n"));
-
 #ifndef ACPI_ASL_COMPILER
 
 	/* Close the acpi_event Handling */
diff --git a/drivers/acpi/acpica/utxface.c b/drivers/acpi/acpica/utxface.c
index 078a227..483edbb 100644
--- a/drivers/acpi/acpica/utxface.c
+++ b/drivers/acpi/acpica/utxface.c
@@ -318,7 +318,7 @@ ACPI_EXPORT_SYMBOL(acpi_initialize_objects)
  *
  * RETURN:      Status
  *
- * DESCRIPTION: Shutdown the ACPI subsystem.  Release all resources.
+ * DESCRIPTION: Shutdown the ACPICA subsystem and release all resources.
  *
  ******************************************************************************/
 acpi_status acpi_terminate(void)
@@ -327,6 +327,19 @@ acpi_status acpi_terminate(void)
 
 	ACPI_FUNCTION_TRACE(acpi_terminate);
 
+	/* Just exit if subsystem is already shutdown */
+
+	if (acpi_gbl_shutdown) {
+		ACPI_ERROR((AE_INFO, "ACPI Subsystem is already terminated"));
+		return_ACPI_STATUS(AE_OK);
+	}
+
+	/* Subsystem appears active, go ahead and shut it down */
+
+	acpi_gbl_shutdown = TRUE;
+	acpi_gbl_startup_flags = 0;
+	ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Shutting down ACPI Subsystem\n"));
+
 	/* Terminate the AML Debugger if present */
 
 	ACPI_DEBUGGER_EXEC(acpi_gbl_db_terminate_threads = TRUE);
@@ -353,6 +366,7 @@ acpi_status acpi_terminate(void)
 }
 
 ACPI_EXPORT_SYMBOL(acpi_terminate)
+
 #ifndef ACPI_ASL_COMPILER
 #ifdef ACPI_FUTURE_USAGE
 /*******************************************************************************
-- 
cgit v0.10.2


From b24aad44438d5bc21cbbfb94a99d9bf710d8295b Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Fri, 24 Jul 2009 13:30:17 +0800
Subject: ACPICA: Split large ACPI table header

Split out the non-acpi-defined ACPI tables into the existing
(but empty) actbl2.h file. Preparation for new ACPI 4.0 tables.

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/include/acpi/actbl.h b/include/acpi/actbl.h
index 0649a56..55fcfc6 100644
--- a/include/acpi/actbl.h
+++ b/include/acpi/actbl.h
@@ -44,6 +44,19 @@
 #ifndef __ACTBL_H__
 #define __ACTBL_H__
 
+/*******************************************************************************
+ *
+ * Fundamental ACPI tables
+ *
+ * This file contains definitions for the ACPI tables that are directly consumed
+ * by ACPICA. All other tables are consumed by the OS-dependent ACPI-related
+ * device drivers and other OS support code.
+ *
+ * The RSDP and FACS do not use the common ACPI table header. All other ACPI
+ * tables use the header.
+ *
+ ******************************************************************************/
+
 /*
  * Values for description table header signatures. Useful because they make
  * it more difficult to inadvertently type in the wrong signature.
@@ -65,11 +78,6 @@
 #pragma pack(1)
 
 /*
- * These are the ACPI tables that are directly consumed by the subsystem.
- *
- * The RSDP and FACS do not use the common ACPI table header. All other ACPI
- * tables use the header.
- *
  * Note about bitfields: The u8 type is used for bitfields in ACPI tables.
  * This is the only type that is even remotely portable. Anything else is not
  * portable, so do not use any other bitfield types.
@@ -77,9 +85,8 @@
 
 /*******************************************************************************
  *
- * ACPI Table Header. This common header is used by all tables except the
- * RSDP and FACS. The define is used for direct inclusion of header into
- * other ACPI tables
+ * Master ACPI Table Header. This common header is used by all ACPI tables
+ * except the RSDP and FACS.
  *
  ******************************************************************************/
 
@@ -95,13 +102,16 @@ struct acpi_table_header {
 	u32 asl_compiler_revision;	/* ASL compiler version */
 };
 
-/*
+/*******************************************************************************
+ *
  * GAS - Generic Address Structure (ACPI 2.0+)
  *
  * Note: Since this structure is used in the ACPI tables, it is byte aligned.
- * If misalignment is not supported, access to the Address field must be
- * performed with care.
- */
+ * If misaliged access is not supported by the hardware, accesses to the
+ * 64-bit Address field must be performed with care.
+ *
+ ******************************************************************************/
+
 struct acpi_generic_address {
 	u8 space_id;		/* Address space where struct or register exists */
 	u8 bit_width;		/* Size in bits of given register */
@@ -325,5 +335,6 @@ struct acpi_table_desc {
  */
 
 #include <acpi/actbl1.h>
+#include <acpi/actbl2.h>
 
 #endif				/* __ACTBL_H__ */
diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h
index ec36693..582af1f 100644
--- a/include/acpi/actbl1.h
+++ b/include/acpi/actbl1.h
@@ -46,41 +46,29 @@
 
 /*******************************************************************************
  *
- * Additional ACPI Tables
+ * Additional ACPI Tables (1)
  *
  * These tables are not consumed directly by the ACPICA subsystem, but are
  * included here to support device drivers and the AML disassembler.
  *
+ * The tables in this file are fully defined within the ACPI specification.
+ *
  ******************************************************************************/
 
 /*
  * Values for description table header signatures. Useful because they make
  * it more difficult to inadvertently type in the wrong signature.
  */
-#define ACPI_SIG_ASF            "ASF!"	/* Alert Standard Format table */
 #define ACPI_SIG_BERT           "BERT"	/* Boot Error Record Table */
-#define ACPI_SIG_BOOT           "BOOT"	/* Simple Boot Flag Table */
 #define ACPI_SIG_CPEP           "CPEP"	/* Corrected Platform Error Polling table */
-#define ACPI_SIG_DBGP           "DBGP"	/* Debug Port table */
-#define ACPI_SIG_DMAR           "DMAR"	/* DMA Remapping table */
 #define ACPI_SIG_ECDT           "ECDT"	/* Embedded Controller Boot Resources Table */
 #define ACPI_SIG_EINJ           "EINJ"	/* Error Injection table */
 #define ACPI_SIG_ERST           "ERST"	/* Error Record Serialization Table */
 #define ACPI_SIG_HEST           "HEST"	/* Hardware Error Source Table */
-#define ACPI_SIG_HPET           "HPET"	/* High Precision Event Timer table */
-#define ACPI_SIG_IBFT           "IBFT"	/* i_sCSI Boot Firmware Table */
 #define ACPI_SIG_MADT           "APIC"	/* Multiple APIC Description Table */
-#define ACPI_SIG_MCFG           "MCFG"	/* PCI Memory Mapped Configuration table */
 #define ACPI_SIG_SBST           "SBST"	/* Smart Battery Specification Table */
-#define ACPI_SIG_SLIC           "SLIC"	/* Software Licensing Description Table */
 #define ACPI_SIG_SLIT           "SLIT"	/* System Locality Distance Information Table */
-#define ACPI_SIG_SPCR           "SPCR"	/* Serial Port Console Redirection table */
-#define ACPI_SIG_SPMI           "SPMI"	/* Server Platform Management Interface table */
 #define ACPI_SIG_SRAT           "SRAT"	/* System Resource Affinity Table */
-#define ACPI_SIG_TCPA           "TCPA"	/* Trusted Computing Platform Alliance table */
-#define ACPI_SIG_UEFI           "UEFI"	/* Uefi Boot Optimization Table */
-#define ACPI_SIG_WDAT           "WDAT"	/* Watchdog Action Table */
-#define ACPI_SIG_WDRT           "WDRT"	/* Watchdog Resource Table */
 
 /*
  * All tables must be byte-packed to match the ACPI specification, since
@@ -115,115 +103,6 @@ struct acpi_whea_header {
 
 /*******************************************************************************
  *
- * ASF - Alert Standard Format table (Signature "ASF!")
- *
- * Conforms to the Alert Standard Format Specification V2.0, 23 April 2003
- *
- ******************************************************************************/
-
-struct acpi_table_asf {
-	struct acpi_table_header header;	/* Common ACPI table header */
-};
-
-/* ASF subtable header */
-
-struct acpi_asf_header {
-	u8 type;
-	u8 reserved;
-	u16 length;
-};
-
-/* Values for Type field above */
-
-enum acpi_asf_type {
-	ACPI_ASF_TYPE_INFO = 0,
-	ACPI_ASF_TYPE_ALERT = 1,
-	ACPI_ASF_TYPE_CONTROL = 2,
-	ACPI_ASF_TYPE_BOOT = 3,
-	ACPI_ASF_TYPE_ADDRESS = 4,
-	ACPI_ASF_TYPE_RESERVED = 5
-};
-
-/*
- * ASF subtables
- */
-
-/* 0: ASF Information */
-
-struct acpi_asf_info {
-	struct acpi_asf_header header;
-	u8 min_reset_value;
-	u8 min_poll_interval;
-	u16 system_id;
-	u32 mfg_id;
-	u8 flags;
-	u8 reserved2[3];
-};
-
-/* 1: ASF Alerts */
-
-struct acpi_asf_alert {
-	struct acpi_asf_header header;
-	u8 assert_mask;
-	u8 deassert_mask;
-	u8 alerts;
-	u8 data_length;
-};
-
-struct acpi_asf_alert_data {
-	u8 address;
-	u8 command;
-	u8 mask;
-	u8 value;
-	u8 sensor_type;
-	u8 type;
-	u8 offset;
-	u8 source_type;
-	u8 severity;
-	u8 sensor_number;
-	u8 entity;
-	u8 instance;
-};
-
-/* 2: ASF Remote Control */
-
-struct acpi_asf_remote {
-	struct acpi_asf_header header;
-	u8 controls;
-	u8 data_length;
-	u16 reserved2;
-};
-
-struct acpi_asf_control_data {
-	u8 function;
-	u8 address;
-	u8 command;
-	u8 value;
-};
-
-/* 3: ASF RMCP Boot Options */
-
-struct acpi_asf_rmcp {
-	struct acpi_asf_header header;
-	u8 capabilities[7];
-	u8 completion_code;
-	u32 enterprise_id;
-	u8 command;
-	u16 parameter;
-	u16 boot_options;
-	u16 oem_parameters;
-};
-
-/* 4: ASF Address */
-
-struct acpi_asf_address {
-	struct acpi_asf_header header;
-	u8 eprom_address;
-	u8 devices;
-};
-
-/*******************************************************************************
- *
  * BERT - Boot Error Record Table
  *
  ******************************************************************************/
@@ -253,18 +132,6 @@ struct acpi_bert_region {
 
 /*******************************************************************************
  *
- * BOOT - Simple Boot Flag Table
- *
- ******************************************************************************/
-
-struct acpi_table_boot {
-	struct acpi_table_header header;	/* Common ACPI table header */
-	u8 cmos_index;		/* Index in CMOS RAM for the boot register */
-	u8 reserved[3];
-};
-
-/*******************************************************************************
- *
  * CPEP - Corrected Platform Error Polling table
  *
  ******************************************************************************/
@@ -286,123 +153,6 @@ struct acpi_cpep_polling {
 
 /*******************************************************************************
  *
- * DBGP - Debug Port table
- *
- ******************************************************************************/
-
-struct acpi_table_dbgp {
-	struct acpi_table_header header;	/* Common ACPI table header */
-	u8 type;		/* 0=full 16550, 1=subset of 16550 */
-	u8 reserved[3];
-	struct acpi_generic_address debug_port;
-};
-
-/*******************************************************************************
- *
- * DMAR - DMA Remapping table
- *	  From "Intel Virtualization Technology for Directed I/O", Sept. 2007
- *
- ******************************************************************************/
-
-struct acpi_table_dmar {
-	struct acpi_table_header header;	/* Common ACPI table header */
-	u8 width;		/* Host Address Width */
-	u8 flags;
-	u8 reserved[10];
-};
-
-/* Flags */
-
-#define ACPI_DMAR_INTR_REMAP	    (1)
-
-/* DMAR subtable header */
-
-struct acpi_dmar_header {
-	u16 type;
-	u16 length;
-};
-
-/* Values for subtable type in struct acpi_dmar_header */
-
-enum acpi_dmar_type {
-	ACPI_DMAR_TYPE_HARDWARE_UNIT = 0,
-	ACPI_DMAR_TYPE_RESERVED_MEMORY = 1,
-	ACPI_DMAR_TYPE_ATSR = 2,
-	ACPI_DMAR_TYPE_RESERVED = 3	/* 3 and greater are reserved */
-};
-
-struct acpi_dmar_device_scope {
-	u8 entry_type;
-	u8 length;
-	u16 reserved;
-	u8 enumeration_id;
-	u8 bus;
-};
-
-/* Values for entry_type in struct acpi_dmar_device_scope */
-
-enum acpi_dmar_scope_type {
-	ACPI_DMAR_SCOPE_TYPE_NOT_USED = 0,
-	ACPI_DMAR_SCOPE_TYPE_ENDPOINT = 1,
-	ACPI_DMAR_SCOPE_TYPE_BRIDGE = 2,
-	ACPI_DMAR_SCOPE_TYPE_IOAPIC = 3,
-	ACPI_DMAR_SCOPE_TYPE_HPET = 4,
-	ACPI_DMAR_SCOPE_TYPE_RESERVED = 5	/* 5 and greater are reserved */
-};
-
-struct acpi_dmar_pci_path {
-	u8 dev;
-	u8 fn;
-};
-
-/*
- * DMAR Sub-tables, correspond to Type in struct acpi_dmar_header
- */
-
-/* 0: Hardware Unit Definition */
-
-struct acpi_dmar_hardware_unit {
-	struct acpi_dmar_header header;
-	u8 flags;
-	u8 reserved;
-	u16 segment;
-	u64 address;		/* Register Base Address */
-};
-
-/* Flags */
-
-#define ACPI_DMAR_INCLUDE_ALL       (1)
-
-/* 1: Reserved Memory Defininition */
-
-struct acpi_dmar_reserved_memory {
-	struct acpi_dmar_header header;
-	u16 reserved;
-	u16 segment;
-	u64 base_address;		/* 4_k aligned base address */
-	u64 end_address;	/* 4_k aligned limit address */
-};
-
-/* Flags */
-
-#define ACPI_DMAR_ALLOW_ALL         (1)
-
-
-/* 2: Root Port ATS Capability Reporting Structure */
-
-struct acpi_dmar_atsr {
-       struct acpi_dmar_header header;
-       u8 flags;
-       u8 reserved;
-       u16 segment;
-};
-
-/* Flags */
-
-#define ACPI_DMAR_ALL_PORTS	    (1)
-
-/*******************************************************************************
- *
  * ECDT - Embedded Controller Boot Resources Table
  *
  ******************************************************************************/
@@ -764,119 +514,6 @@ struct acpi_hest_generic {
 
 /*******************************************************************************
  *
- * HPET - High Precision Event Timer table
- *
- ******************************************************************************/
-
-struct acpi_table_hpet {
-	struct acpi_table_header header;	/* Common ACPI table header */
-	u32 id;			/* Hardware ID of event timer block */
-	struct acpi_generic_address address;	/* Address of event timer block */
-	u8 sequence;		/* HPET sequence number */
-	u16 minimum_tick;	/* Main counter min tick, periodic mode */
-	u8 flags;
-};
-
-/*! Flags */
-
-#define ACPI_HPET_PAGE_PROTECT      (1)	/* 00: No page protection */
-#define ACPI_HPET_PAGE_PROTECT_4    (1<<1)	/* 01: 4KB page protected */
-#define ACPI_HPET_PAGE_PROTECT_64   (1<<2)	/* 02: 64KB page protected */
-
-/*! [End] no source code translation !*/
-
-/*******************************************************************************
- *
- * IBFT - Boot Firmware Table
- *
- ******************************************************************************/
-
-struct acpi_table_ibft {
-	struct acpi_table_header header;	/* Common ACPI table header */
-	u8 reserved[12];
-};
-
-/* IBFT common subtable header */
-
-struct acpi_ibft_header {
-	u8 type;
-	u8 version;
-	u16 length;
-	u8 index;
-	u8 flags;
-};
-
-/* Values for Type field above */
-
-enum acpi_ibft_type {
-	ACPI_IBFT_TYPE_NOT_USED = 0,
-	ACPI_IBFT_TYPE_CONTROL = 1,
-	ACPI_IBFT_TYPE_INITIATOR = 2,
-	ACPI_IBFT_TYPE_NIC = 3,
-	ACPI_IBFT_TYPE_TARGET = 4,
-	ACPI_IBFT_TYPE_EXTENSIONS = 5,
-	ACPI_IBFT_TYPE_RESERVED = 6	/* 6 and greater are reserved */
-};
-
-/* IBFT subtables */
-
-struct acpi_ibft_control {
-	struct acpi_ibft_header header;
-	u16 extensions;
-	u16 initiator_offset;
-	u16 nic0_offset;
-	u16 target0_offset;
-	u16 nic1_offset;
-	u16 target1_offset;
-};
-
-struct acpi_ibft_initiator {
-	struct acpi_ibft_header header;
-	u8 sns_server[16];
-	u8 slp_server[16];
-	u8 primary_server[16];
-	u8 secondary_server[16];
-	u16 name_length;
-	u16 name_offset;
-};
-
-struct acpi_ibft_nic {
-	struct acpi_ibft_header header;
-	u8 ip_address[16];
-	u8 subnet_mask_prefix;
-	u8 origin;
-	u8 gateway[16];
-	u8 primary_dns[16];
-	u8 secondary_dns[16];
-	u8 dhcp[16];
-	u16 vlan;
-	u8 mac_address[6];
-	u16 pci_address;
-	u16 name_length;
-	u16 name_offset;
-};
-
-struct acpi_ibft_target {
-	struct acpi_ibft_header header;
-	u8 target_ip_address[16];
-	u16 target_ip_socket;
-	u8 target_boot_lun[8];
-	u8 chap_type;
-	u8 nic_association;
-	u16 target_name_length;
-	u16 target_name_offset;
-	u16 chap_name_length;
-	u16 chap_name_offset;
-	u16 chap_secret_length;
-	u16 chap_secret_offset;
-	u16 reverse_chap_name_length;
-	u16 reverse_chap_name_offset;
-	u16 reverse_chap_secret_length;
-	u16 reverse_chap_secret_offset;
-};
-
-/*******************************************************************************
- *
  * MADT - Multiple APIC Description Table
  *
  ******************************************************************************/
@@ -1058,27 +695,6 @@ struct acpi_madt_local_x2apic_nmi {
 
 /*******************************************************************************
  *
- * MCFG - PCI Memory Mapped Configuration table and sub-table
- *
- ******************************************************************************/
-
-struct acpi_table_mcfg {
-	struct acpi_table_header header;	/* Common ACPI table header */
-	u8 reserved[8];
-};
-
-/* Subtable */
-
-struct acpi_mcfg_allocation {
-	u64 address;		/* Base address, processor-relative */
-	u16 pci_segment;	/* PCI segment group number */
-	u8 start_bus_number;	/* Starting PCI Bus number */
-	u8 end_bus_number;	/* Final PCI Bus number */
-	u32 reserved;
-};
-
-/*******************************************************************************
- *
  * SBST - Smart Battery Specification Table
  *
  ******************************************************************************/
@@ -1104,59 +720,6 @@ struct acpi_table_slit {
 
 /*******************************************************************************
  *
- * SPCR - Serial Port Console Redirection table
- *
- ******************************************************************************/
-
-struct acpi_table_spcr {
-	struct acpi_table_header header;	/* Common ACPI table header */
-	u8 interface_type;	/* 0=full 16550, 1=subset of 16550 */
-	u8 reserved[3];
-	struct acpi_generic_address serial_port;
-	u8 interrupt_type;
-	u8 pc_interrupt;
-	u32 interrupt;
-	u8 baud_rate;
-	u8 parity;
-	u8 stop_bits;
-	u8 flow_control;
-	u8 terminal_type;
-	u8 reserved1;
-	u16 pci_device_id;
-	u16 pci_vendor_id;
-	u8 pci_bus;
-	u8 pci_device;
-	u8 pci_function;
-	u32 pci_flags;
-	u8 pci_segment;
-	u32 reserved2;
-};
-
-/*******************************************************************************
- *
- * SPMI - Server Platform Management Interface table
- *
- ******************************************************************************/
-
-struct acpi_table_spmi {
-	struct acpi_table_header header;	/* Common ACPI table header */
-	u8 reserved;
-	u8 interface_type;
-	u16 spec_revision;	/* Version of IPMI */
-	u8 interrupt_type;
-	u8 gpe_number;		/* GPE assigned */
-	u8 reserved1;
-	u8 pci_device_flag;
-	u32 interrupt;
-	struct acpi_generic_address ipmi_register;
-	u8 pci_segment;
-	u8 pci_bus;
-	u8 pci_device;
-	u8 pci_function;
-};
-
-/*******************************************************************************
- *
  * SRAT - System Resource Affinity Table
  *
  ******************************************************************************/
@@ -1227,116 +790,6 @@ struct acpi_srat_x2apic_cpu_affinity {
 
 #define ACPI_SRAT_CPU_ENABLED       (1)	/* 00: Use affinity structure */
 
-/*******************************************************************************
- *
- * TCPA - Trusted Computing Platform Alliance table
- *
- ******************************************************************************/
-
-struct acpi_table_tcpa {
-	struct acpi_table_header header;	/* Common ACPI table header */
-	u16 reserved;
-	u32 max_log_length;	/* Maximum length for the event log area */
-	u64 log_address;	/* Address of the event log area */
-};
-
-/*******************************************************************************
- *
- * UEFI - UEFI Boot optimization Table
- *
- ******************************************************************************/
-
-struct acpi_table_uefi {
-	struct acpi_table_header header;	/* Common ACPI table header */
-	u8 identifier[16];	/* UUID identifier */
-	u16 data_offset;	/* Offset of remaining data in table */
-	u8 data;
-};
-
-/*******************************************************************************
- *
- * WDAT - Watchdog Action Table
- *
- ******************************************************************************/
-
-struct acpi_table_wdat {
-	struct acpi_table_header header;	/* Common ACPI table header */
-	u32 header_length;	/* Watchdog Header Length */
-	u16 pci_segment;	/* PCI Segment number */
-	u8 pci_bus;		/* PCI Bus number */
-	u8 pci_device;		/* PCI Device number */
-	u8 pci_function;	/* PCI Function number */
-	u8 reserved[3];
-	u32 timer_period;	/* Period of one timer count (msec) */
-	u32 max_count;		/* Maximum counter value supported */
-	u32 min_count;		/* Minimum counter value */
-	u8 flags;
-	u8 reserved2[3];
-	u32 entries;		/* Number of watchdog entries that follow */
-};
-
-/* WDAT Instruction Entries (actions) */
-
-struct acpi_wdat_entry {
-	struct acpi_whea_header whea_header;	/* Common header for WHEA tables */
-};
-
-/* Values for Action field above */
-
-enum acpi_wdat_actions {
-	ACPI_WDAT_RESET = 1,
-	ACPI_WDAT_GET_CURRENT_COUNTDOWN = 4,
-	ACPI_WDAT_GET_COUNTDOWN = 5,
-	ACPI_WDAT_SET_COUNTDOWN = 6,
-	ACPI_WDAT_GET_RUNNING_STATE = 8,
-	ACPI_WDAT_SET_RUNNING_STATE = 9,
-	ACPI_WDAT_GET_STOPPED_STATE = 10,
-	ACPI_WDAT_SET_STOPPED_STATE = 11,
-	ACPI_WDAT_GET_REBOOT = 16,
-	ACPI_WDAT_SET_REBOOT = 17,
-	ACPI_WDAT_GET_SHUTDOWN = 18,
-	ACPI_WDAT_SET_SHUTDOWN = 19,
-	ACPI_WDAT_GET_STATUS = 32,
-	ACPI_WDAT_SET_STATUS = 33,
-	ACPI_WDAT_ACTION_RESERVED = 34	/* 34 and greater are reserved */
-};
-
-/* Values for Instruction field above */
-
-enum acpi_wdat_instructions {
-	ACPI_WDAT_READ_VALUE = 0,
-	ACPI_WDAT_READ_COUNTDOWN = 1,
-	ACPI_WDAT_WRITE_VALUE = 2,
-	ACPI_WDAT_WRITE_COUNTDOWN = 3,
-	ACPI_WDAT_INSTRUCTION_RESERVED = 4,	/* 4 and greater are reserved */
-	ACPI_WDAT_PRESERVE_REGISTER = 0x80	/* Except for this value */
-};
-
-/*******************************************************************************
- *
- * WDRT - Watchdog Resource Table
- *
- ******************************************************************************/
-
-struct acpi_table_wdrt {
-	struct acpi_table_header header;	/* Common ACPI table header */
-	u32 header_length;	/* Watchdog Header Length */
-	u8 pci_segment;		/* PCI Segment number */
-	u8 pci_bus;		/* PCI Bus number */
-	u8 pci_device;		/* PCI Device number */
-	u8 pci_function;	/* PCI Function number */
-	u32 timer_period;	/* Period of one timer count (msec) */
-	u32 max_count;		/* Maximum counter value supported */
-	u32 min_count;		/* Minimum counter value */
-	u8 flags;
-	u8 reserved[3];
-	u32 entries;		/* Number of watchdog entries that follow */
-};
-
-/* Flags */
-
-#define ACPI_WDRT_TIMER_ENABLED     (1)	/* 00: Timer enabled */
-
 /* Reset to default packing */
 
 #pragma pack()
diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h
new file mode 100644
index 0000000..b271aba
--- /dev/null
+++ b/include/acpi/actbl2.h
@@ -0,0 +1,585 @@
+#ifndef __ACTBL2_H__
+#define __ACTBL2_H__
+
+/*******************************************************************************
+ *
+ * Additional ACPI Tables (2)
+ *
+ * These tables are not consumed directly by the ACPICA subsystem, but are
+ * included here to support device drivers and the AML disassembler.
+ *
+ * The tables in this file are defined by third-party specifications, and are
+ * not defined directly by the ACPI specification itself.
+ *
+ ******************************************************************************/
+
+/*
+ * Values for description table header signatures. Useful because they make
+ * it more difficult to inadvertently type in the wrong signature.
+ */
+#define ACPI_SIG_ASF            "ASF!"	/* Alert Standard Format table */
+#define ACPI_SIG_BOOT           "BOOT"	/* Simple Boot Flag Table */
+#define ACPI_SIG_DBGP           "DBGP"	/* Debug Port table */
+#define ACPI_SIG_DMAR           "DMAR"	/* DMA Remapping table */
+#define ACPI_SIG_HPET           "HPET"	/* High Precision Event Timer table */
+#define ACPI_SIG_IBFT           "IBFT"	/* i_sCSI Boot Firmware Table */
+#define ACPI_SIG_MCFG           "MCFG"	/* PCI Memory Mapped Configuration table */
+#define ACPI_SIG_SLIC           "SLIC"	/* Software Licensing Description Table */
+#define ACPI_SIG_SPCR           "SPCR"	/* Serial Port Console Redirection table */
+#define ACPI_SIG_SPMI           "SPMI"	/* Server Platform Management Interface table */
+#define ACPI_SIG_TCPA           "TCPA"	/* Trusted Computing Platform Alliance table */
+#define ACPI_SIG_UEFI           "UEFI"	/* Uefi Boot Optimization Table */
+#define ACPI_SIG_WDAT           "WDAT"	/* Watchdog Action Table */
+#define ACPI_SIG_WDRT           "WDRT"	/* Watchdog Resource Table */
+
+/*
+ * All tables must be byte-packed to match the ACPI specification, since
+ * the tables are provided by the system BIOS.
+ */
+#pragma pack(1)
+
+/*
+ * Note about bitfields: The u8 type is used for bitfields in ACPI tables.
+ * This is the only type that is even remotely portable. Anything else is not
+ * portable, so do not use any other bitfield types.
+ */
+
+/*******************************************************************************
+ *
+ * ASF - Alert Standard Format table (Signature "ASF!")
+ *
+ * Conforms to the Alert Standard Format Specification V2.0, 23 April 2003
+ *
+ ******************************************************************************/
+
+struct acpi_table_asf {
+	struct acpi_table_header header;	/* Common ACPI table header */
+};
+
+/* ASF subtable header */
+
+struct acpi_asf_header {
+	u8 type;
+	u8 reserved;
+	u16 length;
+};
+
+/* Values for Type field above */
+
+enum acpi_asf_type {
+	ACPI_ASF_TYPE_INFO = 0,
+	ACPI_ASF_TYPE_ALERT = 1,
+	ACPI_ASF_TYPE_CONTROL = 2,
+	ACPI_ASF_TYPE_BOOT = 3,
+	ACPI_ASF_TYPE_ADDRESS = 4,
+	ACPI_ASF_TYPE_RESERVED = 5
+};
+
+/*
+ * ASF subtables
+ */
+
+/* 0: ASF Information */
+
+struct acpi_asf_info {
+	struct acpi_asf_header header;
+	u8 min_reset_value;
+	u8 min_poll_interval;
+	u16 system_id;
+	u32 mfg_id;
+	u8 flags;
+	u8 reserved2[3];
+};
+
+/* 1: ASF Alerts */
+
+struct acpi_asf_alert {
+	struct acpi_asf_header header;
+	u8 assert_mask;
+	u8 deassert_mask;
+	u8 alerts;
+	u8 data_length;
+};
+
+struct acpi_asf_alert_data {
+	u8 address;
+	u8 command;
+	u8 mask;
+	u8 value;
+	u8 sensor_type;
+	u8 type;
+	u8 offset;
+	u8 source_type;
+	u8 severity;
+	u8 sensor_number;
+	u8 entity;
+	u8 instance;
+};
+
+/* 2: ASF Remote Control */
+
+struct acpi_asf_remote {
+	struct acpi_asf_header header;
+	u8 controls;
+	u8 data_length;
+	u16 reserved2;
+};
+
+struct acpi_asf_control_data {
+	u8 function;
+	u8 address;
+	u8 command;
+	u8 value;
+};
+
+/* 3: ASF RMCP Boot Options */
+
+struct acpi_asf_rmcp {
+	struct acpi_asf_header header;
+	u8 capabilities[7];
+	u8 completion_code;
+	u32 enterprise_id;
+	u8 command;
+	u16 parameter;
+	u16 boot_options;
+	u16 oem_parameters;
+};
+
+/* 4: ASF Address */
+
+struct acpi_asf_address {
+	struct acpi_asf_header header;
+	u8 eprom_address;
+	u8 devices;
+};
+
+/*******************************************************************************
+ *
+ * BOOT - Simple Boot Flag Table
+ *
+ ******************************************************************************/
+
+struct acpi_table_boot {
+	struct acpi_table_header header;	/* Common ACPI table header */
+	u8 cmos_index;		/* Index in CMOS RAM for the boot register */
+	u8 reserved[3];
+};
+
+/*******************************************************************************
+ *
+ * DBGP - Debug Port table
+ *
+ ******************************************************************************/
+
+struct acpi_table_dbgp {
+	struct acpi_table_header header;	/* Common ACPI table header */
+	u8 type;		/* 0=full 16550, 1=subset of 16550 */
+	u8 reserved[3];
+	struct acpi_generic_address debug_port;
+};
+
+/*******************************************************************************
+ *
+ * DMAR - DMA Remapping table
+ *        From "Intel Virtualization Technology for Directed I/O", Sept. 2007
+ *
+ ******************************************************************************/
+
+struct acpi_table_dmar {
+	struct acpi_table_header header;	/* Common ACPI table header */
+	u8 width;		/* Host Address Width */
+	u8 flags;
+	u8 reserved[10];
+};
+
+/* Flags */
+
+#define ACPI_DMAR_INTR_REMAP        (1)
+
+/* DMAR subtable header */
+
+struct acpi_dmar_header {
+	u16 type;
+	u16 length;
+};
+
+/* Values for subtable type in struct acpi_dmar_header */
+
+enum acpi_dmar_type {
+	ACPI_DMAR_TYPE_HARDWARE_UNIT = 0,
+	ACPI_DMAR_TYPE_RESERVED_MEMORY = 1,
+	ACPI_DMAR_TYPE_ATSR = 2,
+	ACPI_DMAR_TYPE_RESERVED = 3	/* 3 and greater are reserved */
+};
+
+struct acpi_dmar_device_scope {
+	u8 entry_type;
+	u8 length;
+	u16 reserved;
+	u8 enumeration_id;
+	u8 bus;
+};
+
+/* Values for entry_type in struct acpi_dmar_device_scope */
+
+enum acpi_dmar_scope_type {
+	ACPI_DMAR_SCOPE_TYPE_NOT_USED = 0,
+	ACPI_DMAR_SCOPE_TYPE_ENDPOINT = 1,
+	ACPI_DMAR_SCOPE_TYPE_BRIDGE = 2,
+	ACPI_DMAR_SCOPE_TYPE_IOAPIC = 3,
+	ACPI_DMAR_SCOPE_TYPE_HPET = 4,
+	ACPI_DMAR_SCOPE_TYPE_RESERVED = 5	/* 5 and greater are reserved */
+};
+
+struct acpi_dmar_pci_path {
+	u8 dev;
+	u8 fn;
+};
+
+/*
+ * DMAR Sub-tables, correspond to Type in struct acpi_dmar_header
+ */
+
+/* 0: Hardware Unit Definition */
+
+struct acpi_dmar_hardware_unit {
+	struct acpi_dmar_header header;
+	u8 flags;
+	u8 reserved;
+	u16 segment;
+	u64 address;		/* Register Base Address */
+};
+
+/* Flags */
+
+#define ACPI_DMAR_INCLUDE_ALL       (1)
+
+/* 1: Reserved Memory Defininition */
+
+struct acpi_dmar_reserved_memory {
+	struct acpi_dmar_header header;
+	u16 reserved;
+	u16 segment;
+	u64 base_address;	/* 4_k aligned base address */
+	u64 end_address;	/* 4_k aligned limit address */
+};
+
+/* Flags */
+
+#define ACPI_DMAR_ALLOW_ALL         (1)
+
+/* 2: Root Port ATS Capability Reporting Structure */
+
+struct acpi_dmar_atsr {
+	struct acpi_dmar_header header;
+	u8 flags;
+	u8 reserved;
+	u16 segment;
+};
+
+/* Flags */
+
+#define ACPI_DMAR_ALL_PORTS         (1)
+
+/*******************************************************************************
+ *
+ * HPET - High Precision Event Timer table
+ *
+ ******************************************************************************/
+
+struct acpi_table_hpet {
+	struct acpi_table_header header;	/* Common ACPI table header */
+	u32 id;			/* Hardware ID of event timer block */
+	struct acpi_generic_address address;	/* Address of event timer block */
+	u8 sequence;		/* HPET sequence number */
+	u16 minimum_tick;	/* Main counter min tick, periodic mode */
+	u8 flags;
+};
+
+/*! Flags */
+
+#define ACPI_HPET_PAGE_PROTECT      (1)	/* 00: No page protection */
+#define ACPI_HPET_PAGE_PROTECT_4    (1<<1)	/* 01: 4KB page protected */
+#define ACPI_HPET_PAGE_PROTECT_64   (1<<2)	/* 02: 64KB page protected */
+
+/*! [End] no source code translation !*/
+
+/*******************************************************************************
+ *
+ * IBFT - Boot Firmware Table
+ *
+ ******************************************************************************/
+
+struct acpi_table_ibft {
+	struct acpi_table_header header;	/* Common ACPI table header */
+	u8 reserved[12];
+};
+
+/* IBFT common subtable header */
+
+struct acpi_ibft_header {
+	u8 type;
+	u8 version;
+	u16 length;
+	u8 index;
+	u8 flags;
+};
+
+/* Values for Type field above */
+
+enum acpi_ibft_type {
+	ACPI_IBFT_TYPE_NOT_USED = 0,
+	ACPI_IBFT_TYPE_CONTROL = 1,
+	ACPI_IBFT_TYPE_INITIATOR = 2,
+	ACPI_IBFT_TYPE_NIC = 3,
+	ACPI_IBFT_TYPE_TARGET = 4,
+	ACPI_IBFT_TYPE_EXTENSIONS = 5,
+	ACPI_IBFT_TYPE_RESERVED = 6	/* 6 and greater are reserved */
+};
+
+/* IBFT subtables */
+
+struct acpi_ibft_control {
+	struct acpi_ibft_header header;
+	u16 extensions;
+	u16 initiator_offset;
+	u16 nic0_offset;
+	u16 target0_offset;
+	u16 nic1_offset;
+	u16 target1_offset;
+};
+
+struct acpi_ibft_initiator {
+	struct acpi_ibft_header header;
+	u8 sns_server[16];
+	u8 slp_server[16];
+	u8 primary_server[16];
+	u8 secondary_server[16];
+	u16 name_length;
+	u16 name_offset;
+};
+
+struct acpi_ibft_nic {
+	struct acpi_ibft_header header;
+	u8 ip_address[16];
+	u8 subnet_mask_prefix;
+	u8 origin;
+	u8 gateway[16];
+	u8 primary_dns[16];
+	u8 secondary_dns[16];
+	u8 dhcp[16];
+	u16 vlan;
+	u8 mac_address[6];
+	u16 pci_address;
+	u16 name_length;
+	u16 name_offset;
+};
+
+struct acpi_ibft_target {
+	struct acpi_ibft_header header;
+	u8 target_ip_address[16];
+	u16 target_ip_socket;
+	u8 target_boot_lun[8];
+	u8 chap_type;
+	u8 nic_association;
+	u16 target_name_length;
+	u16 target_name_offset;
+	u16 chap_name_length;
+	u16 chap_name_offset;
+	u16 chap_secret_length;
+	u16 chap_secret_offset;
+	u16 reverse_chap_name_length;
+	u16 reverse_chap_name_offset;
+	u16 reverse_chap_secret_length;
+	u16 reverse_chap_secret_offset;
+};
+
+/*******************************************************************************
+ *
+ * MCFG - PCI Memory Mapped Configuration table and sub-table
+ *
+ ******************************************************************************/
+
+struct acpi_table_mcfg {
+	struct acpi_table_header header;	/* Common ACPI table header */
+	u8 reserved[8];
+};
+
+/* Subtable */
+
+struct acpi_mcfg_allocation {
+	u64 address;		/* Base address, processor-relative */
+	u16 pci_segment;	/* PCI segment group number */
+	u8 start_bus_number;	/* Starting PCI Bus number */
+	u8 end_bus_number;	/* Final PCI Bus number */
+	u32 reserved;
+};
+
+/*******************************************************************************
+ *
+ * SPCR - Serial Port Console Redirection table
+ *
+ ******************************************************************************/
+
+struct acpi_table_spcr {
+	struct acpi_table_header header;	/* Common ACPI table header */
+	u8 interface_type;	/* 0=full 16550, 1=subset of 16550 */
+	u8 reserved[3];
+	struct acpi_generic_address serial_port;
+	u8 interrupt_type;
+	u8 pc_interrupt;
+	u32 interrupt;
+	u8 baud_rate;
+	u8 parity;
+	u8 stop_bits;
+	u8 flow_control;
+	u8 terminal_type;
+	u8 reserved1;
+	u16 pci_device_id;
+	u16 pci_vendor_id;
+	u8 pci_bus;
+	u8 pci_device;
+	u8 pci_function;
+	u32 pci_flags;
+	u8 pci_segment;
+	u32 reserved2;
+};
+
+/*******************************************************************************
+ *
+ * SPMI - Server Platform Management Interface table
+ *
+ ******************************************************************************/
+
+struct acpi_table_spmi {
+	struct acpi_table_header header;	/* Common ACPI table header */
+	u8 reserved;
+	u8 interface_type;
+	u16 spec_revision;	/* Version of IPMI */
+	u8 interrupt_type;
+	u8 gpe_number;		/* GPE assigned */
+	u8 reserved1;
+	u8 pci_device_flag;
+	u32 interrupt;
+	struct acpi_generic_address ipmi_register;
+	u8 pci_segment;
+	u8 pci_bus;
+	u8 pci_device;
+	u8 pci_function;
+};
+
+/*******************************************************************************
+ *
+ * TCPA - Trusted Computing Platform Alliance table
+ *
+ ******************************************************************************/
+
+struct acpi_table_tcpa {
+	struct acpi_table_header header;	/* Common ACPI table header */
+	u16 reserved;
+	u32 max_log_length;	/* Maximum length for the event log area */
+	u64 log_address;	/* Address of the event log area */
+};
+
+/*******************************************************************************
+ *
+ * UEFI - UEFI Boot optimization Table
+ *
+ ******************************************************************************/
+
+struct acpi_table_uefi {
+	struct acpi_table_header header;	/* Common ACPI table header */
+	u8 identifier[16];	/* UUID identifier */
+	u16 data_offset;	/* Offset of remaining data in table */
+	u8 data;
+};
+
+/*******************************************************************************
+ *
+ * WDAT - Watchdog Action Table
+ *
+ ******************************************************************************/
+
+struct acpi_table_wdat {
+	struct acpi_table_header header;	/* Common ACPI table header */
+	u32 header_length;	/* Watchdog Header Length */
+	u16 pci_segment;	/* PCI Segment number */
+	u8 pci_bus;		/* PCI Bus number */
+	u8 pci_device;		/* PCI Device number */
+	u8 pci_function;	/* PCI Function number */
+	u8 reserved[3];
+	u32 timer_period;	/* Period of one timer count (msec) */
+	u32 max_count;		/* Maximum counter value supported */
+	u32 min_count;		/* Minimum counter value */
+	u8 flags;
+	u8 reserved2[3];
+	u32 entries;		/* Number of watchdog entries that follow */
+};
+
+/* WDAT Instruction Entries (actions) */
+
+struct acpi_wdat_entry {
+	struct acpi_whea_header whea_header;	/* Common header for WHEA tables */
+};
+
+/* Values for Action field above */
+
+enum acpi_wdat_actions {
+	ACPI_WDAT_RESET = 1,
+	ACPI_WDAT_GET_CURRENT_COUNTDOWN = 4,
+	ACPI_WDAT_GET_COUNTDOWN = 5,
+	ACPI_WDAT_SET_COUNTDOWN = 6,
+	ACPI_WDAT_GET_RUNNING_STATE = 8,
+	ACPI_WDAT_SET_RUNNING_STATE = 9,
+	ACPI_WDAT_GET_STOPPED_STATE = 10,
+	ACPI_WDAT_SET_STOPPED_STATE = 11,
+	ACPI_WDAT_GET_REBOOT = 16,
+	ACPI_WDAT_SET_REBOOT = 17,
+	ACPI_WDAT_GET_SHUTDOWN = 18,
+	ACPI_WDAT_SET_SHUTDOWN = 19,
+	ACPI_WDAT_GET_STATUS = 32,
+	ACPI_WDAT_SET_STATUS = 33,
+	ACPI_WDAT_ACTION_RESERVED = 34	/* 34 and greater are reserved */
+};
+
+/* Values for Instruction field above */
+
+enum acpi_wdat_instructions {
+	ACPI_WDAT_READ_VALUE = 0,
+	ACPI_WDAT_READ_COUNTDOWN = 1,
+	ACPI_WDAT_WRITE_VALUE = 2,
+	ACPI_WDAT_WRITE_COUNTDOWN = 3,
+	ACPI_WDAT_INSTRUCTION_RESERVED = 4,	/* 4 and greater are reserved */
+	ACPI_WDAT_PRESERVE_REGISTER = 0x80	/* Except for this value */
+};
+
+/*******************************************************************************
+ *
+ * WDRT - Watchdog Resource Table
+ *
+ ******************************************************************************/
+
+struct acpi_table_wdrt {
+	struct acpi_table_header header;	/* Common ACPI table header */
+	u32 header_length;	/* Watchdog Header Length */
+	u8 pci_segment;		/* PCI Segment number */
+	u8 pci_bus;		/* PCI Bus number */
+	u8 pci_device;		/* PCI Device number */
+	u8 pci_function;	/* PCI Function number */
+	u32 timer_period;	/* Period of one timer count (msec) */
+	u32 max_count;		/* Maximum counter value supported */
+	u32 min_count;		/* Minimum counter value */
+	u8 flags;
+	u8 reserved[3];
+	u32 entries;		/* Number of watchdog entries that follow */
+};
+
+/* Flags */
+
+#define ACPI_WDRT_TIMER_ENABLED     (1)	/* 00: Timer enabled */
+
+/* Reset to default packing */
+
+#pragma pack()
+
+#endif				/* __ACTBL2_H__ */
-- 
cgit v0.10.2


From 6e2d5ebd0d36199920676fdceaff4f4bfe66297b Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Mon, 27 Jul 2009 10:53:00 +0800
Subject: ACPICA: ACPI 4: Update headers for new and changed ACPI tables.

Add IVRS,MSCT,UEFI,WAET,WDAT.
Updated several existing tables for ACPI 4.0-related changes.
Added document references for all tables not defined in ACPI spec.

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/include/acpi/actbl.h b/include/acpi/actbl.h
index 55fcfc6..1b65879 100644
--- a/include/acpi/actbl.h
+++ b/include/acpi/actbl.h
@@ -58,8 +58,9 @@
  ******************************************************************************/
 
 /*
- * Values for description table header signatures. Useful because they make
- * it more difficult to inadvertently type in the wrong signature.
+ * Values for description table header signatures for tables defined in this
+ * file. Useful because they make it more difficult to inadvertently type in
+ * the wrong signature.
  */
 #define ACPI_SIG_DSDT           "DSDT"	/* Differentiated System Description Table */
 #define ACPI_SIG_FADT           "FACP"	/* Fixed ACPI Description Table */
@@ -123,6 +124,7 @@ struct acpi_generic_address {
 /*******************************************************************************
  *
  * RSDP - Root System Description Pointer (Signature is "RSD PTR ")
+ *        Version 2
  *
  ******************************************************************************/
 
@@ -143,6 +145,7 @@ struct acpi_table_rsdp {
 /*******************************************************************************
  *
  * RSDT/XSDT - Root System Description Tables
+ *             Version 1 (both)
  *
  ******************************************************************************/
 
@@ -176,23 +179,24 @@ struct acpi_table_facs {
 	u8 reserved1[24];	/* Reserved, must be zero */
 };
 
-/* global_lock flags */
+/* Masks for global_lock flag field above */
 
 #define ACPI_GLOCK_PENDING          (1)	/* 00: Pending global lock ownership */
 #define ACPI_GLOCK_OWNED            (1<<1)	/* 01: Global lock is owned */
 
-/* Flags  */
+/* Masks for Flags field above  */
 
 #define ACPI_FACS_S4_BIOS_PRESENT   (1)	/* 00: S4BIOS support is present */
 #define ACPI_FACS_64BIT_WAKE        (1<<1)	/* 01: 64-bit wake vector supported (ACPI 4.0) */
 
-/* ospm_flags */
+/* Masks for ospm_flags field above */
 
 #define ACPI_FACS_64BIT_ENVIRONMENT (1)	/* 00: 64-bit wake environment is required (ACPI 4.0) */
 
 /*******************************************************************************
  *
  * FADT - Fixed ACPI Description Table (Signature "FACP")
+ *        Version 4
  *
  ******************************************************************************/
 
@@ -253,7 +257,7 @@ struct acpi_table_fadt {
 	struct acpi_generic_address xgpe1_block;	/* 64-bit Extended General Purpose Event 1 Reg Blk address */
 };
 
-/* FADT Boot Architecture Flags (boot_flags) */
+/* Masks for FADT Boot Architecture Flags (boot_flags) */
 
 #define ACPI_FADT_LEGACY_DEVICES    (1)  	/* 00: [V2] System has LPC or ISA bus devices */
 #define ACPI_FADT_8042              (1<<1)	/* 01: [V3] System has an 8042 controller on port 60/64 */
@@ -263,7 +267,7 @@ struct acpi_table_fadt {
 
 #define FADT2_REVISION_ID               3
 
-/* FADT flags */
+/* Masks for FADT flags */
 
 #define ACPI_FADT_WBINVD            (1)	/* 00: [V1] The wbinvd instruction works properly */
 #define ACPI_FADT_WBINVD_FLUSH      (1<<1)	/* 01: [V1] wbinvd flushes but does not invalidate caches */
@@ -286,7 +290,7 @@ struct acpi_table_fadt {
 #define ACPI_FADT_APIC_CLUSTER      (1<<18)	/* 18: [V4] All local APICs must use cluster model (ACPI 3.0) */
 #define ACPI_FADT_APIC_PHYSICAL     (1<<19)	/* 19: [V4] All local x_aPICs must use physical dest mode (ACPI 3.0) */
 
-/* FADT Prefered Power Management Profiles */
+/* Values for preferred_profile (Prefered Power Management Profiles) */
 
 enum acpi_prefered_pm_profiles {
 	PM_UNSPECIFIED = 0,
@@ -304,14 +308,16 @@ enum acpi_prefered_pm_profiles {
 
 #define ACPI_FADT_OFFSET(f)             (u8) ACPI_OFFSET (struct acpi_table_fadt, f)
 
+/*
+ * Internal table-related structures
+ */
 union acpi_name_union {
 	u32 integer;
 	char ascii[4];
 };
 
-/*
- * Internal ACPI Table Descriptor. One per ACPI table
- */
+/* Internal ACPI Table Descriptor. One per ACPI table. */
+
 struct acpi_table_desc {
 	acpi_physical_address address;
 	struct acpi_table_header *pointer;
@@ -321,7 +327,7 @@ struct acpi_table_desc {
 	u8 flags;
 };
 
-/* Flags for above */
+/* Masks for Flags field above */
 
 #define ACPI_TABLE_ORIGIN_UNKNOWN       (0)
 #define ACPI_TABLE_ORIGIN_MAPPED        (1)
diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h
index 582af1f..0417f2a 100644
--- a/include/acpi/actbl1.h
+++ b/include/acpi/actbl1.h
@@ -56,8 +56,9 @@
  ******************************************************************************/
 
 /*
- * Values for description table header signatures. Useful because they make
- * it more difficult to inadvertently type in the wrong signature.
+ * Values for description table header signatures for tables defined in this
+ * file. Useful because they make it more difficult to inadvertently type in
+ * the wrong signature.
  */
 #define ACPI_SIG_BERT           "BERT"	/* Boot Error Record Table */
 #define ACPI_SIG_CPEP           "CPEP"	/* Corrected Platform Error Polling table */
@@ -66,6 +67,7 @@
 #define ACPI_SIG_ERST           "ERST"	/* Error Record Serialization Table */
 #define ACPI_SIG_HEST           "HEST"	/* Hardware Error Source Table */
 #define ACPI_SIG_MADT           "APIC"	/* Multiple APIC Description Table */
+#define ACPI_SIG_MSCT           "MSCT"	/* Maximum System Characteristics Table */
 #define ACPI_SIG_SBST           "SBST"	/* Smart Battery Specification Table */
 #define ACPI_SIG_SLIT           "SLIT"	/* System Locality Distance Information Table */
 #define ACPI_SIG_SRAT           "SRAT"	/* System Resource Affinity Table */
@@ -82,14 +84,20 @@
  * portable, so do not use any other bitfield types.
  */
 
-/* Common Subtable header (used in MADT, SRAT, etc.) */
+/*******************************************************************************
+ *
+ * Common subtable headers
+ *
+ ******************************************************************************/
+
+/* Generic subtable header (used in MADT, SRAT, etc.) */
 
 struct acpi_subtable_header {
 	u8 type;
 	u8 length;
 };
 
-/* Common Subtable header for WHEA tables (EINJ, ERST, WDAT) */
+/* Subtable header for WHEA tables (EINJ, ERST, WDAT) */
 
 struct acpi_whea_header {
 	u8 action;
@@ -103,7 +111,8 @@ struct acpi_whea_header {
 
 /*******************************************************************************
  *
- * BERT - Boot Error Record Table
+ * BERT - Boot Error Record Table (ACPI 4.0)
+ *        Version 1
  *
  ******************************************************************************/
 
@@ -113,26 +122,43 @@ struct acpi_table_bert {
 	u64 address;		/* Physical addresss of the error region */
 };
 
-/* Boot Error Region */
+/* Boot Error Region (not a subtable, pointed to by Address field above) */
 
 struct acpi_bert_region {
-	u32 block_status;
-	u32 raw_data_offset;
-	u32 raw_data_length;
-	u32 data_length;
-	u32 error_severity;
+	u32 block_status;	/* Type of error information */
+	u32 raw_data_offset;	/* Offset to raw error data */
+	u32 raw_data_length;	/* Length of raw error data */
+	u32 data_length;	/* Length of generic error data */
+	u32 error_severity;	/* Severity code */
 };
 
-/* block_status Flags */
+/* Values for block_status flags above */
 
 #define ACPI_BERT_UNCORRECTABLE             (1)
-#define ACPI_BERT_CORRECTABLE               (2)
-#define ACPI_BERT_MULTIPLE_UNCORRECTABLE    (4)
-#define ACPI_BERT_MULTIPLE_CORRECTABLE      (8)
+#define ACPI_BERT_CORRECTABLE               (1<<1)
+#define ACPI_BERT_MULTIPLE_UNCORRECTABLE    (1<<2)
+#define ACPI_BERT_MULTIPLE_CORRECTABLE      (1<<3)
+#define ACPI_BERT_ERROR_ENTRY_COUNT         (0xFF<<4)	/* 8 bits, error count */
+
+/* Values for error_severity above */
+
+enum acpi_bert_error_severity {
+	ACPI_BERT_ERROR_CORRECTABLE = 0,
+	ACPI_BERT_ERROR_FATAL = 1,
+	ACPI_BERT_ERROR_CORRECTED = 2,
+	ACPI_BERT_ERROR_NONE = 3,
+	ACPI_BERT_ERROR_RESERVED = 4	/* 4 and greater are reserved */
+};
+
+/*
+ * Note: The generic error data that follows the error_severity field above
+ * uses the struct acpi_hest_generic_data defined under the HEST table below
+ */
 
 /*******************************************************************************
  *
- * CPEP - Corrected Platform Error Polling table
+ * CPEP - Corrected Platform Error Polling table (ACPI 4.0)
+ *        Version 1
  *
  ******************************************************************************/
 
@@ -144,8 +170,7 @@ struct acpi_table_cpep {
 /* Subtable */
 
 struct acpi_cpep_polling {
-	u8 type;
-	u8 length;
+	struct acpi_subtable_header header;
 	u8 id;			/* Processor ID */
 	u8 eid;			/* Processor EID */
 	u32 interval;		/* Polling interval (msec) */
@@ -154,6 +179,7 @@ struct acpi_cpep_polling {
 /*******************************************************************************
  *
  * ECDT - Embedded Controller Boot Resources Table
+ *        Version 1
  *
  ******************************************************************************/
 
@@ -168,14 +194,16 @@ struct acpi_table_ecdt {
 
 /*******************************************************************************
  *
- * EINJ - Error Injection Table
+ * EINJ - Error Injection Table (ACPI 4.0)
+ *        Version 1
  *
  ******************************************************************************/
 
 struct acpi_table_einj {
 	struct acpi_table_header header;	/* Common ACPI table header */
 	u32 header_length;
-	u32 reserved;
+	u8 flags;
+	u8 reserved[3];
 	u32 entries;
 };
 
@@ -185,6 +213,10 @@ struct acpi_einj_entry {
 	struct acpi_whea_header whea_header;	/* Common header for WHEA tables */
 };
 
+/* Masks for Flags field above */
+
+#define ACPI_EINJ_PRESERVE          (1)
+
 /* Values for Action field above */
 
 enum acpi_einj_actions {
@@ -220,9 +252,34 @@ struct acpi_einj_trigger {
 	u32 entry_count;
 };
 
+/* Command status return values */
+
+enum acpi_einj_command_status {
+	ACPI_EINJ_SUCCESS = 0,
+	ACPI_EINJ_FAILURE = 1,
+	ACPI_EINJ_INVALID_ACCESS = 2,
+	ACPI_EINJ_STATUS_RESERVED = 3	/* 3 and greater are reserved */
+};
+
+/* Error types returned from ACPI_EINJ_GET_ERROR_TYPE (bitfield) */
+
+#define ACPI_EINJ_PROCESSOR_CORRECTABLE     (1)
+#define ACPI_EINJ_PROCESSOR_UNCORRECTABLE   (1<<1)
+#define ACPI_EINJ_PROCESSOR_FATAL           (1<<2)
+#define ACPI_EINJ_MEMORY_CORRECTABLE        (1<<3)
+#define ACPI_EINJ_MEMORY_UNCORRECTABLE      (1<<4)
+#define ACPI_EINJ_MEMORY_FATAL              (1<<5)
+#define ACPI_EINJ_PCIX_CORRECTABLE          (1<<6)
+#define ACPI_EINJ_PCIX_UNCORRECTABLE        (1<<7)
+#define ACPI_EINJ_PCIX_FATAL                (1<<8)
+#define ACPI_EINJ_PLATFORM_CORRECTABLE      (1<<9)
+#define ACPI_EINJ_PLATFORM_UNCORRECTABLE    (1<<10)
+#define ACPI_EINJ_PLATFORM_FATAL            (1<<11)
+
 /*******************************************************************************
  *
- * ERST - Error Record Serialization Table
+ * ERST - Error Record Serialization Table (ACPI 4.0)
+ *        Version 1
  *
  ******************************************************************************/
 
@@ -239,19 +296,23 @@ struct acpi_erst_entry {
 	struct acpi_whea_header whea_header;	/* Common header for WHEA tables */
 };
 
+/* Masks for Flags field above */
+
+#define ACPI_ERST_PRESERVE          (1)
+
 /* Values for Action field above */
 
 enum acpi_erst_actions {
-	ACPI_ERST_BEGIN_WRITE_OPERATION = 0,
-	ACPI_ERST_BEGIN_READ_OPERATION = 1,
-	ACPI_ERST_BETGIN_CLEAR_OPERATION = 2,
-	ACPI_ERST_END_OPERATION = 3,
+	ACPI_ERST_BEGIN_WRITE = 0,
+	ACPI_ERST_BEGIN_READ = 1,
+	ACPI_ERST_BEGIN_CLEAR = 2,
+	ACPI_ERST_END = 3,
 	ACPI_ERST_SET_RECORD_OFFSET = 4,
 	ACPI_ERST_EXECUTE_OPERATION = 5,
 	ACPI_ERST_CHECK_BUSY_STATUS = 6,
 	ACPI_ERST_GET_COMMAND_STATUS = 7,
-	ACPI_ERST_GET_RECORD_IDENTIFIER = 8,
-	ACPI_ERST_SET_RECORD_IDENTIFIER = 9,
+	ACPI_ERST_GET_RECORD_ID = 8,
+	ACPI_ERST_SET_RECORD_ID = 9,
 	ACPI_ERST_GET_RECORD_COUNT = 10,
 	ACPI_ERST_BEGIN_DUMMY_WRIITE = 11,
 	ACPI_ERST_NOT_USED = 12,
@@ -286,9 +347,29 @@ enum acpi_erst_instructions {
 	ACPI_ERST_INSTRUCTION_RESERVED = 19	/* 19 and greater are reserved */
 };
 
+/* Command status return values */
+
+enum acpi_erst_command_status {
+	ACPI_ERST_SUCESS = 0,
+	ACPI_ERST_NO_SPACE = 1,
+	ACPI_ERST_NOT_AVAILABLE = 2,
+	ACPI_ERST_FAILURE = 3,
+	ACPI_ERST_RECORD_EMPTY = 4,
+	ACPI_ERST_NOT_FOUND = 5,
+	ACPI_ERST_STATUS_RESERVED = 6	/* 6 and greater are reserved */
+};
+
+/* Error Record Serialization Information */
+
+struct acpi_erst_info {
+	u16 signature;		/* Should be "ER" */
+	u8 data[48];
+};
+
 /*******************************************************************************
  *
- * HEST - Hardware Error Source Table
+ * HEST - Hardware Error Source Table (ACPI 4.0)
+ *        Version 1
  *
  ******************************************************************************/
 
@@ -301,70 +382,49 @@ struct acpi_table_hest {
 
 struct acpi_hest_header {
 	u16 type;
+	u16 source_id;
 };
 
 /* Values for Type field above for subtables */
 
 enum acpi_hest_types {
-	ACPI_HEST_TYPE_XPF_MACHINE_CHECK = 0,
-	ACPI_HEST_TYPE_XPF_CORRECTED_MACHINE_CHECK = 1,
-	ACPI_HEST_TYPE_XPF_UNUSED = 2,
-	ACPI_HEST_TYPE_XPF_NON_MASKABLE_INTERRUPT = 3,
-	ACPI_HEST_TYPE_IPF_CORRECTED_MACHINE_CHECK = 4,
-	ACPI_HEST_TYPE_IPF_CORRECTED_PLATFORM_ERROR = 5,
+	ACPI_HEST_TYPE_IA32_CHECK = 0,
+	ACPI_HEST_TYPE_IA32_CORRECTED_CHECK = 1,
+	ACPI_HEST_TYPE_IA32_NMI = 2,
+	ACPI_HEST_TYPE_NOT_USED3 = 3,
+	ACPI_HEST_TYPE_NOT_USED4 = 4,
+	ACPI_HEST_TYPE_NOT_USED5 = 5,
 	ACPI_HEST_TYPE_AER_ROOT_PORT = 6,
 	ACPI_HEST_TYPE_AER_ENDPOINT = 7,
 	ACPI_HEST_TYPE_AER_BRIDGE = 8,
-	ACPI_HEST_TYPE_GENERIC_HARDWARE_ERROR_SOURCE = 9,
+	ACPI_HEST_TYPE_GENERIC_ERROR = 9,
 	ACPI_HEST_TYPE_RESERVED = 10	/* 10 and greater are reserved */
 };
 
 /*
- * HEST Sub-subtables
+ * HEST substructures contained in subtables
  */
 
-/* XPF Machine Check Error Bank */
-
-struct acpi_hest_xpf_error_bank {
+/*
+ * IA32 Error Bank(s) - Follows the struct acpi_hest_ia_machine_check and
+ * struct acpi_hest_ia_corrected structures.
+ */
+struct acpi_hest_ia_error_bank {
 	u8 bank_number;
 	u8 clear_status_on_init;
 	u8 status_format;
-	u8 config_write_enable;
+	u8 reserved;
 	u32 control_register;
-	u64 control_init_data;
+	u64 control_data;
 	u32 status_register;
 	u32 address_register;
 	u32 misc_register;
 };
 
-/* Generic Error Status */
-
-struct acpi_hest_generic_status {
-	u32 block_status;
-	u32 raw_data_offset;
-	u32 raw_data_length;
-	u32 data_length;
-	u32 error_severity;
-};
-
-/* Generic Error Data */
-
-struct acpi_hest_generic_data {
-	u8 section_type[16];
-	u32 error_severity;
-	u16 revision;
-	u8 validation_bits;
-	u8 flags;
-	u32 error_data_length;
-	u8 fru_id[16];
-	u8 fru_text[20];
-};
-
-/* Common HEST structure for PCI/AER types below (6,7,8) */
+/* Common HEST sub-structure for PCI/AER structures below (6,7,8) */
 
 struct acpi_hest_aer_common {
-	u16 source_id;
-	u16 config_write_enable;
+	u16 reserved1;
 	u8 flags;
 	u8 enabled;
 	u32 records_to_pre_allocate;
@@ -373,13 +433,18 @@ struct acpi_hest_aer_common {
 	u16 device;
 	u16 function;
 	u16 device_control;
-	u16 reserved;
+	u16 reserved2;
 	u32 uncorrectable_error_mask;
 	u32 uncorrectable_error_severity;
 	u32 correctable_error_mask;
 	u32 advanced_error_capabilities;
 };
 
+/* Masks for HEST Flags fields */
+
+#define ACPI_HEST_FIRMWARE_FIRST        (1)
+#define ACPI_HEST_GLOBAL                (1<<1)
+
 /* Hardware Error Notification */
 
 struct acpi_hest_notify {
@@ -405,71 +470,59 @@ enum acpi_hest_notify_types {
 	ACPI_HEST_NOTIFY_RESERVED = 5	/* 5 and greater are reserved */
 };
 
+/* Values for config_write_enable bitfield above */
+
+#define ACPI_HEST_TYPE                  (1)
+#define ACPI_HEST_POLL_INTERVAL         (1<<1)
+#define ACPI_HEST_POLL_THRESHOLD_VALUE  (1<<2)
+#define ACPI_HEST_POLL_THRESHOLD_WINDOW (1<<3)
+#define ACPI_HEST_ERR_THRESHOLD_VALUE   (1<<4)
+#define ACPI_HEST_ERR_THRESHOLD_WINDOW  (1<<5)
+
 /*
  * HEST subtables
- *
- * From WHEA Design Document, 16 May 2007.
- * Note: There is no subtable type 2 in this version of the document,
- * and there are two different subtable type 3s.
  */
 
- /* 0: XPF Machine Check Exception */
+/* 0: IA32 Machine Check Exception */
 
-struct acpi_hest_xpf_machine_check {
+struct acpi_hest_ia_machine_check {
 	struct acpi_hest_header header;
-	u16 source_id;
-	u16 config_write_enable;
+	u16 reserved1;
 	u8 flags;
-	u8 reserved1;
+	u8 enabled;
 	u32 records_to_pre_allocate;
 	u32 max_sections_per_record;
 	u64 global_capability_data;
 	u64 global_control_data;
 	u8 num_hardware_banks;
-	u8 reserved2[7];
+	u8 reserved3[7];
 };
 
-/* 1: XPF Corrected Machine Check */
+/* 1: IA32 Corrected Machine Check */
 
-struct acpi_table_hest_xpf_corrected {
+struct acpi_table_hest_ia_corrected {
 	struct acpi_hest_header header;
-	u16 source_id;
-	u16 config_write_enable;
+	u16 reserved1;
 	u8 flags;
 	u8 enabled;
 	u32 records_to_pre_allocate;
 	u32 max_sections_per_record;
 	struct acpi_hest_notify notify;
 	u8 num_hardware_banks;
-	u8 reserved[3];
+	u8 reserved2[3];
 };
 
-/* 3: XPF Non-Maskable Interrupt */
+/* 2: IA32 Non-Maskable Interrupt */
 
-struct acpi_hest_xpf_nmi {
+struct acpi_hest_ia_nmi {
 	struct acpi_hest_header header;
-	u16 source_id;
 	u32 reserved;
 	u32 records_to_pre_allocate;
 	u32 max_sections_per_record;
 	u32 max_raw_data_length;
 };
 
-/* 4: IPF Corrected Machine Check */
-
-struct acpi_hest_ipf_corrected {
-	struct acpi_hest_header header;
-	u8 enabled;
-	u8 reserved;
-};
-
-/* 5: IPF Corrected Platform Error */
-
-struct acpi_hest_ipf_corrected_platform {
-	struct acpi_hest_header header;
-	u8 enabled;
-	u8 reserved;
-};
+/* 3,4,5: Not used */
 
 /* 6: PCI Express Root Port AER */
 
@@ -491,30 +544,61 @@ struct acpi_hest_aer {
 struct acpi_hest_aer_bridge {
 	struct acpi_hest_header header;
 	struct acpi_hest_aer_common aer;
-	u32 secondary_uncorrectable_error_mask;
-	u32 secondary_uncorrectable_error_severity;
-	u32 secondary_advanced_capabilities;
+	u32 second_uncorrectable_error_mask;
+	u32 second_uncorrectable_error_severity;
+	u32 second_advanced_capabilities;
 };
 
 /* 9: Generic Hardware Error Source */
 
 struct acpi_hest_generic {
 	struct acpi_hest_header header;
-	u16 source_id;
 	u16 related_source_id;
-	u8 config_write_enable;
+	u8 reserved;
 	u8 enabled;
 	u32 records_to_pre_allocate;
 	u32 max_sections_per_record;
 	u32 max_raw_data_length;
 	struct acpi_generic_address error_status_address;
 	struct acpi_hest_notify notify;
-	u32 error_status_block_length;
+	u32 error_block_length;
+};
+
+/* Generic Error Status block */
+
+struct acpi_hest_generic_status {
+	u32 block_status;
+	u32 raw_data_offset;
+	u32 raw_data_length;
+	u32 data_length;
+	u32 error_severity;
+};
+
+/* Values for block_status flags above */
+
+#define ACPI_HEST_UNCORRECTABLE             (1)
+#define ACPI_HEST_CORRECTABLE               (1<<1)
+#define ACPI_HEST_MULTIPLE_UNCORRECTABLE    (1<<2)
+#define ACPI_HEST_MULTIPLE_CORRECTABLE      (1<<3)
+#define ACPI_HEST_ERROR_ENTRY_COUNT         (0xFF<<4)	/* 8 bits, error count */
+
+/* Generic Error Data entry */
+
+struct acpi_hest_generic_data {
+	u8 section_type[16];
+	u32 error_severity;
+	u16 revision;
+	u8 validation_bits;
+	u8 flags;
+	u32 error_data_length;
+	u8 fru_id[16];
+	u8 fru_text[20];
 };
 
 /*******************************************************************************
  *
  * MADT - Multiple APIC Description Table
+ *        Version 3
  *
  ******************************************************************************/
 
@@ -524,16 +608,16 @@ struct acpi_table_madt {
 	u32 flags;
 };
 
-/* Flags */
+/* Masks for Flags field above */
 
-#define ACPI_MADT_PCAT_COMPAT       (1)	/* 00:    System also has dual 8259s */
+#define ACPI_MADT_PCAT_COMPAT       (1)	/* 00: System also has dual 8259s */
 
 /* Values for PCATCompat flag */
 
 #define ACPI_MADT_DUAL_PIC          0
 #define ACPI_MADT_MULTIPLE_APIC     1
 
-/* Values for subtable type in struct acpi_subtable_header */
+/* Values for MADT subtable type in struct acpi_subtable_header */
 
 enum acpi_madt_type {
 	ACPI_MADT_TYPE_LOCAL_APIC = 0,
@@ -644,7 +728,7 @@ struct acpi_madt_interrupt_source {
 	u32 flags;		/* Interrupt Source Flags */
 };
 
-/* Flags field above */
+/* Masks for Flags field above */
 
 #define ACPI_MADT_CPEI_OVERRIDE     (1)
 
@@ -695,7 +779,34 @@ struct acpi_madt_local_x2apic_nmi {
 
 /*******************************************************************************
  *
+ * MSCT - Maximum System Characteristics Table (ACPI 4.0)
+ *        Version 1
+ *
+ ******************************************************************************/
+
+struct acpi_table_msct {
+	struct acpi_table_header header;	/* Common ACPI table header */
+	u32 proximity_offset;	/* Location of proximity info struct(s) */
+	u32 max_proximity_domains;	/* Max number of proximity domains */
+	u32 max_clock_domains;	/* Max number of clock domains */
+	u64 max_address;	/* Max physical address in system */
+};
+
+/* Subtable - Maximum Proximity Domain Information. Version 1 */
+
+struct acpi_msct_proximity {
+	u8 revision;
+	u8 length;
+	u32 range_start;	/* Start of domain range */
+	u32 range_end;		/* End of domain range */
+	u32 processor_capacity;
+	u64 memory_capacity;	/* In bytes */
+};
+
+/*******************************************************************************
+ *
  * SBST - Smart Battery Specification Table
+ *        Version 1
  *
  ******************************************************************************/
 
@@ -709,6 +820,7 @@ struct acpi_table_sbst {
 /*******************************************************************************
  *
  * SLIT - System Locality Distance Information Table
+ *        Version 1
  *
  ******************************************************************************/
 
@@ -721,6 +833,7 @@ struct acpi_table_slit {
 /*******************************************************************************
  *
  * SRAT - System Resource Affinity Table
+ *        Version 3
  *
  ******************************************************************************/
 
@@ -755,6 +868,10 @@ struct acpi_srat_cpu_affinity {
 	u32 reserved;		/* Reserved, must be zero */
 };
 
+/* Flags */
+
+#define ACPI_SRAT_CPU_USE_AFFINITY  (1)	/* 00: Use affinity structure */
+
 /* 1: Memory Affinity */
 
 struct acpi_srat_mem_affinity {
diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h
index b271aba..6f3dce9 100644
--- a/include/acpi/actbl2.h
+++ b/include/acpi/actbl2.h
@@ -14,8 +14,9 @@
  ******************************************************************************/
 
 /*
- * Values for description table header signatures. Useful because they make
- * it more difficult to inadvertently type in the wrong signature.
+ * Values for description table header signatures for tables defined in this
+ * file. Useful because they make it more difficult to inadvertently type in
+ * the wrong signature.
  */
 #define ACPI_SIG_ASF            "ASF!"	/* Alert Standard Format table */
 #define ACPI_SIG_BOOT           "BOOT"	/* Simple Boot Flag Table */
@@ -23,12 +24,14 @@
 #define ACPI_SIG_DMAR           "DMAR"	/* DMA Remapping table */
 #define ACPI_SIG_HPET           "HPET"	/* High Precision Event Timer table */
 #define ACPI_SIG_IBFT           "IBFT"	/* i_sCSI Boot Firmware Table */
+#define ACPI_SIG_IVRS           "IVRS"	/* I/O Virtualization Reporting Structure */
 #define ACPI_SIG_MCFG           "MCFG"	/* PCI Memory Mapped Configuration table */
 #define ACPI_SIG_SLIC           "SLIC"	/* Software Licensing Description Table */
 #define ACPI_SIG_SPCR           "SPCR"	/* Serial Port Console Redirection table */
 #define ACPI_SIG_SPMI           "SPMI"	/* Server Platform Management Interface table */
 #define ACPI_SIG_TCPA           "TCPA"	/* Trusted Computing Platform Alliance table */
 #define ACPI_SIG_UEFI           "UEFI"	/* Uefi Boot Optimization Table */
+#define ACPI_SIG_WAET           "WAET"	/* Windows ACPI Emulated devices Table */
 #define ACPI_SIG_WDAT           "WDAT"	/* Watchdog Action Table */
 #define ACPI_SIG_WDRT           "WDRT"	/* Watchdog Resource Table */
 
@@ -47,6 +50,7 @@
 /*******************************************************************************
  *
  * ASF - Alert Standard Format table (Signature "ASF!")
+ *       Revision 0x10
  *
  * Conforms to the Alert Standard Format Specification V2.0, 23 April 2003
  *
@@ -91,6 +95,10 @@ struct acpi_asf_info {
 	u8 reserved2[3];
 };
 
+/* Masks for Flags field above */
+
+#define ACPI_ASF_SMBUS_PROTOCOLS    (1)
+
 /* 1: ASF Alerts */
 
 struct acpi_asf_alert {
@@ -156,6 +164,9 @@ struct acpi_asf_address {
 /*******************************************************************************
  *
  * BOOT - Simple Boot Flag Table
+ *        Version 1
+ *
+ * Conforms to the "Simple Boot Flag Specification", Version 2.1
  *
  ******************************************************************************/
 
@@ -168,6 +179,9 @@ struct acpi_table_boot {
 /*******************************************************************************
  *
  * DBGP - Debug Port table
+ *        Version 1
+ *
+ * Conforms to the "Debug Port Specification", Version 1.00, 2/9/2000
  *
  ******************************************************************************/
 
@@ -181,7 +195,10 @@ struct acpi_table_dbgp {
 /*******************************************************************************
  *
  * DMAR - DMA Remapping table
- *        From "Intel Virtualization Technology for Directed I/O", Sept. 2007
+ *        Version 1
+ *
+ * Conforms to "Intel Virtualization Technology for Directed I/O",
+ * Version 1.2, Sept. 2008
  *
  ******************************************************************************/
 
@@ -192,7 +209,7 @@ struct acpi_table_dmar {
 	u8 reserved[10];
 };
 
-/* Flags */
+/* Masks for Flags field above */
 
 #define ACPI_DMAR_INTR_REMAP        (1)
 
@@ -209,9 +226,12 @@ enum acpi_dmar_type {
 	ACPI_DMAR_TYPE_HARDWARE_UNIT = 0,
 	ACPI_DMAR_TYPE_RESERVED_MEMORY = 1,
 	ACPI_DMAR_TYPE_ATSR = 2,
-	ACPI_DMAR_TYPE_RESERVED = 3	/* 3 and greater are reserved */
+	ACPI_DMAR_HARDWARE_AFFINITY = 3,
+	ACPI_DMAR_TYPE_RESERVED = 4	/* 4 and greater are reserved */
 };
 
+/* DMAR Device Scope structure */
+
 struct acpi_dmar_device_scope {
 	u8 entry_type;
 	u8 length;
@@ -250,7 +270,7 @@ struct acpi_dmar_hardware_unit {
 	u64 address;		/* Register Base Address */
 };
 
-/* Flags */
+/* Masks for Flags field above */
 
 #define ACPI_DMAR_INCLUDE_ALL       (1)
 
@@ -264,7 +284,7 @@ struct acpi_dmar_reserved_memory {
 	u64 end_address;	/* 4_k aligned limit address */
 };
 
-/* Flags */
+/* Masks for Flags field above */
 
 #define ACPI_DMAR_ALLOW_ALL         (1)
 
@@ -277,13 +297,26 @@ struct acpi_dmar_atsr {
 	u16 segment;
 };
 
-/* Flags */
+/* Masks for Flags field above */
 
 #define ACPI_DMAR_ALL_PORTS         (1)
 
+/* 3: Remapping Hardware Static Affinity Structure */
+
+struct acpi_dmar_rhsa {
+	struct acpi_dmar_header header;
+	u32 reserved;
+	u64 base_address;
+	u32 proximity_domain;
+};
+
 /*******************************************************************************
  *
  * HPET - High Precision Event Timer table
+ *        Version 1
+ *
+ * Conforms to "IA-PC HPET (High Precision Event Timers) Specification",
+ * Version 1.0a, October 2004
  *
  ******************************************************************************/
 
@@ -296,17 +329,28 @@ struct acpi_table_hpet {
 	u8 flags;
 };
 
-/*! Flags */
+/* Masks for Flags field above */
 
-#define ACPI_HPET_PAGE_PROTECT      (1)	/* 00: No page protection */
-#define ACPI_HPET_PAGE_PROTECT_4    (1<<1)	/* 01: 4KB page protected */
-#define ACPI_HPET_PAGE_PROTECT_64   (1<<2)	/* 02: 64KB page protected */
+#define ACPI_HPET_PAGE_PROTECT_MASK (3)
 
-/*! [End] no source code translation !*/
+/* Values for Page Protect flags */
+
+enum acpi_hpet_page_protect {
+	ACPI_HPET_NO_PAGE_PROTECT = 0,
+	ACPI_HPET_PAGE_PROTECT4 = 1,
+	ACPI_HPET_PAGE_PROTECT64 = 2
+};
 
 /*******************************************************************************
  *
  * IBFT - Boot Firmware Table
+ *        Version 1
+ *
+ * Conforms to "iSCSI Boot Firmware Table (iBFT) as Defined in ACPI 3.0b
+ * Specification", Version 1.01, March 1, 2007
+ *
+ * Note: It appears that this table is not intended to appear in the RSDT/XSDT.
+ * Therefore, it is not currently supported by the disassembler.
  *
  ******************************************************************************/
 
@@ -396,7 +440,182 @@ struct acpi_ibft_target {
 
 /*******************************************************************************
  *
+ * IVRS - I/O Virtualization Reporting Structure
+ *        Version 1
+ *
+ * Conforms to "AMD I/O Virtualization Technology (IOMMU) Specification",
+ * Revision 1.26, February 2009.
+ *
+ ******************************************************************************/
+
+struct acpi_table_ivrs {
+	struct acpi_table_header header;	/* Common ACPI table header */
+	u32 info;		/* Common virtualization info */
+	u64 reserved;
+};
+
+/* Values for Info field above */
+
+#define ACPI_IVRS_PHYSICAL_SIZE     0x00007F00	/* 7 bits, physical address size */
+#define ACPI_IVRS_VIRTUAL_SIZE      0x003F8000	/* 7 bits, virtual address size */
+#define ACPI_IVRS_ATS_RESERVED      0x00400000	/* ATS address translation range reserved */
+
+/* IVRS subtable header */
+
+struct acpi_ivrs_header {
+	u8 type;		/* Subtable type */
+	u8 flags;
+	u16 length;		/* Subtable length */
+	u16 device_id;		/* ID of IOMMU */
+};
+
+/* Values for subtable Type above */
+
+enum acpi_ivrs_type {
+	ACPI_IVRS_TYPE_HARDWARE = 0x10,
+	ACPI_IVRS_TYPE_MEMORY1 = 0x20,
+	ACPI_IVRS_TYPE_MEMORY2 = 0x21,
+	ACPI_IVRS_TYPE_MEMORY3 = 0x22
+};
+
+/* Masks for Flags field above for IVHD subtable */
+
+#define ACPI_IVHD_TT_ENABLE         (1)
+#define ACPI_IVHD_PASS_PW           (1<<1)
+#define ACPI_IVHD_RES_PASS_PW       (1<<2)
+#define ACPI_IVHD_ISOC              (1<<3)
+#define ACPI_IVHD_IOTLB             (1<<4)
+
+/* Masks for Flags field above for IVMD subtable */
+
+#define ACPI_IVMD_UNITY             (1)
+#define ACPI_IVMD_READ              (1<<1)
+#define ACPI_IVMD_WRITE             (1<<2)
+#define ACPI_IVMD_EXCLUSION_RANGE   (1<<3)
+
+/*
+ * IVRS subtables, correspond to Type in struct acpi_ivrs_header
+ */
+
+/* 0x10: I/O Virtualization Hardware Definition Block (IVHD) */
+
+struct acpi_ivrs_hardware {
+	struct acpi_ivrs_header header;
+	u16 capability_offset;	/* Offset for IOMMU control fields */
+	u64 base_address;	/* IOMMU control registers */
+	u16 pci_segment_group;
+	u16 info;		/* MSI number and unit ID */
+	u32 reserved;
+};
+
+/* Masks for Info field above */
+
+#define ACPI_IVHD_MSI_NUMBER_MASK   0x001F	/* 5 bits, MSI message number */
+#define ACPI_IVHD_UNIT_ID_MASK      0x1F00	/* 5 bits, unit_iD */
+
+/*
+ * Device Entries for IVHD subtable, appear after struct acpi_ivrs_hardware structure.
+ * Upper two bits of the Type field are the (encoded) length of the structure.
+ * Currently, only 4 and 8 byte entries are defined. 16 and 32 byte entries
+ * are reserved for future use but not defined.
+ */
+struct acpi_ivrs_de_header {
+	u8 type;
+	u16 id;
+	u8 data_setting;
+};
+
+/* Length of device entry is in the top two bits of Type field above */
+
+#define ACPI_IVHD_ENTRY_LENGTH      0xC0
+
+/* Values for device entry Type field above */
+
+enum acpi_ivrs_device_entry_type {
+	/* 4-byte device entries, all use struct acpi_ivrs_device4 */
+
+	ACPI_IVRS_TYPE_PAD4 = 0,
+	ACPI_IVRS_TYPE_ALL = 1,
+	ACPI_IVRS_TYPE_SELECT = 2,
+	ACPI_IVRS_TYPE_START = 3,
+	ACPI_IVRS_TYPE_END = 4,
+
+	/* 8-byte device entries */
+
+	ACPI_IVRS_TYPE_PAD8 = 64,
+	ACPI_IVRS_TYPE_NOT_USED = 65,
+	ACPI_IVRS_TYPE_ALIAS_SELECT = 66,	/* Uses struct acpi_ivrs_device8a */
+	ACPI_IVRS_TYPE_ALIAS_START = 67,	/* Uses struct acpi_ivrs_device8a */
+	ACPI_IVRS_TYPE_EXT_SELECT = 70,	/* Uses struct acpi_ivrs_device8b */
+	ACPI_IVRS_TYPE_EXT_START = 71,	/* Uses struct acpi_ivrs_device8b */
+	ACPI_IVRS_TYPE_SPECIAL = 72	/* Uses struct acpi_ivrs_device8c */
+};
+
+/* Values for Data field above */
+
+#define ACPI_IVHD_INIT_PASS         (1)
+#define ACPI_IVHD_EINT_PASS         (1<<1)
+#define ACPI_IVHD_NMI_PASS          (1<<2)
+#define ACPI_IVHD_SYSTEM_MGMT       (3<<4)
+#define ACPI_IVHD_LINT0_PASS        (1<<6)
+#define ACPI_IVHD_LINT1_PASS        (1<<7)
+
+/* Types 0-4: 4-byte device entry */
+
+struct acpi_ivrs_device4 {
+	struct acpi_ivrs_de_header header;
+};
+
+/* Types 66-67: 8-byte device entry */
+
+struct acpi_ivrs_device8a {
+	struct acpi_ivrs_de_header header;
+	u8 reserved1;
+	u16 used_id;
+	u8 reserved2;
+};
+
+/* Types 70-71: 8-byte device entry */
+
+struct acpi_ivrs_device8b {
+	struct acpi_ivrs_de_header header;
+	u32 extended_data;
+};
+
+/* Values for extended_data above */
+
+#define ACPI_IVHD_ATS_DISABLED      (1<<31)
+
+/* Type 72: 8-byte device entry */
+
+struct acpi_ivrs_device8c {
+	struct acpi_ivrs_de_header header;
+	u8 handle;
+	u16 used_id;
+	u8 variety;
+};
+
+/* Values for Variety field above */
+
+#define ACPI_IVHD_IOAPIC            1
+#define ACPI_IVHD_HPET              2
+
+/* 0x20, 0x21, 0x22: I/O Virtualization Memory Definition Block (IVMD) */
+
+struct acpi_ivrs_memory {
+	struct acpi_ivrs_header header;
+	u16 aux_data;
+	u64 reserved;
+	u64 start_address;
+	u64 memory_length;
+};
+
+/*******************************************************************************
+ *
  * MCFG - PCI Memory Mapped Configuration table and sub-table
+ *        Version 1
+ *
+ * Conforms to "PCI Firmware Specification", Revision 3.0, June 20, 2005
  *
  ******************************************************************************/
 
@@ -418,6 +637,10 @@ struct acpi_mcfg_allocation {
 /*******************************************************************************
  *
  * SPCR - Serial Port Console Redirection table
+ *        Version 1
+ *
+ * Conforms to "Serial Port Console Redirection Table",
+ * Version 1.00, January 11, 2002
  *
  ******************************************************************************/
 
@@ -445,16 +668,25 @@ struct acpi_table_spcr {
 	u32 reserved2;
 };
 
+/* Masks for pci_flags field above */
+
+#define ACPI_SPCR_DO_NOT_DISABLE    (1)
+
 /*******************************************************************************
  *
  * SPMI - Server Platform Management Interface table
+ *        Version 5
+ *
+ * Conforms to "Intelligent Platform Management Interface Specification
+ * Second Generation v2.0", Document Revision 1.0, February 12, 2004 with
+ * June 12, 2009 markup.
  *
  ******************************************************************************/
 
 struct acpi_table_spmi {
 	struct acpi_table_header header;	/* Common ACPI table header */
-	u8 reserved;
 	u8 interface_type;
+	u8 reserved;		/* Must be 1 */
 	u16 spec_revision;	/* Version of IPMI */
 	u8 interrupt_type;
 	u8 gpe_number;		/* GPE assigned */
@@ -466,11 +698,27 @@ struct acpi_table_spmi {
 	u8 pci_bus;
 	u8 pci_device;
 	u8 pci_function;
+	u8 reserved2;
+};
+
+/* Values for interface_type above */
+
+enum acpi_spmi_interface_types {
+	ACPI_SPMI_NOT_USED = 0,
+	ACPI_SPMI_KEYBOARD = 1,
+	ACPI_SPMI_SMI = 2,
+	ACPI_SPMI_BLOCK_TRANSFER = 3,
+	ACPI_SPMI_SMBUS = 4,
+	ACPI_SPMI_RESERVED = 5	/* 5 and above are reserved */
 };
 
 /*******************************************************************************
  *
  * TCPA - Trusted Computing Platform Alliance table
+ *        Version 1
+ *
+ * Conforms to "TCG PC Specific Implementation Specification",
+ * Version 1.1, August 18, 2003
  *
  ******************************************************************************/
 
@@ -484,6 +732,10 @@ struct acpi_table_tcpa {
 /*******************************************************************************
  *
  * UEFI - UEFI Boot optimization Table
+ *        Version 1
+ *
+ * Conforms to "Unified Extensible Firmware Interface Specification",
+ * Version 2.3, May 8, 2009
  *
  ******************************************************************************/
 
@@ -491,12 +743,34 @@ struct acpi_table_uefi {
 	struct acpi_table_header header;	/* Common ACPI table header */
 	u8 identifier[16];	/* UUID identifier */
 	u16 data_offset;	/* Offset of remaining data in table */
-	u8 data;
 };
 
 /*******************************************************************************
  *
+ * WAET - Windows ACPI Emulated devices Table
+ *        Version 1
+ *
+ * Conforms to "Windows ACPI Emulated Devices Table", version 1.0, April 6, 2009
+ *
+ ******************************************************************************/
+
+struct acpi_table_waet {
+	struct acpi_table_header header;	/* Common ACPI table header */
+	u32 flags;
+};
+
+/* Masks for Flags field above */
+
+#define ACPI_WAET_RTC_NO_ACK        (1)	/* RTC requires no int acknowledge */
+#define ACPI_WAET_TIMER_ONE_READ    (1<<1)	/* PM timer requires only one read */
+
+/*******************************************************************************
+ *
  * WDAT - Watchdog Action Table
+ *        Version 1
+ *
+ * Conforms to "Hardware Watchdog Timers Design Specification",
+ * Copyright 2006 Microsoft Corporation.
  *
  ******************************************************************************/
 
@@ -516,10 +790,20 @@ struct acpi_table_wdat {
 	u32 entries;		/* Number of watchdog entries that follow */
 };
 
+/* Masks for Flags field above */
+
+#define ACPI_WDAT_ENABLED           (1)
+#define ACPI_WDAT_STOPPED           0x80
+
 /* WDAT Instruction Entries (actions) */
 
 struct acpi_wdat_entry {
-	struct acpi_whea_header whea_header;	/* Common header for WHEA tables */
+	u8 action;
+	u8 instruction;
+	u16 reserved;
+	struct acpi_generic_address register_region;
+	u32 value;		/* Value used with Read/Write register */
+	u32 mask;		/* Bitmask required for this register instruction */
 };
 
 /* Values for Action field above */
@@ -556,28 +840,27 @@ enum acpi_wdat_instructions {
 /*******************************************************************************
  *
  * WDRT - Watchdog Resource Table
+ *        Version 1
+ *
+ * Conforms to "Watchdog Timer Hardware Requirements for Windows Server 2003",
+ * Version 1.01, August 28, 2006
  *
  ******************************************************************************/
 
 struct acpi_table_wdrt {
 	struct acpi_table_header header;	/* Common ACPI table header */
-	u32 header_length;	/* Watchdog Header Length */
-	u8 pci_segment;		/* PCI Segment number */
+	struct acpi_generic_address control_register;
+	struct acpi_generic_address count_register;
+	u16 pci_device_id;
+	u16 pci_vendor_id;
 	u8 pci_bus;		/* PCI Bus number */
 	u8 pci_device;		/* PCI Device number */
 	u8 pci_function;	/* PCI Function number */
-	u32 timer_period;	/* Period of one timer count (msec) */
-	u32 max_count;		/* Maximum counter value supported */
-	u32 min_count;		/* Minimum counter value */
-	u8 flags;
-	u8 reserved[3];
-	u32 entries;		/* Number of watchdog entries that follow */
+	u8 pci_segment;		/* PCI Segment number */
+	u16 max_count;		/* Maximum counter value supported */
+	u8 units;
 };
 
-/* Flags */
-
-#define ACPI_WDRT_TIMER_ENABLED     (1)	/* 00: Timer enabled */
-
 /* Reset to default packing */
 
 #pragma pack()
-- 
cgit v0.10.2


From d9adc2e031bd22d5d9607a53a8d3b30e0b675f39 Mon Sep 17 00:00:00 2001
From: Lin Ming <ming.m.lin@intel.com>
Date: Mon, 27 Jul 2009 11:31:10 +0800
Subject: ACPICA: reformat predefined method table, no functional change

Reformatted the methods that return package objects.

Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/acpica/acpredef.h b/drivers/acpi/acpica/acpredef.h
index 144b1f4..c81f14b 100644
--- a/drivers/acpi/acpica/acpredef.h
+++ b/drivers/acpi/acpica/acpredef.h
@@ -139,123 +139,180 @@ enum acpi_return_package_types {
  * is saved here (rather than in a separate table) in order to minimize the
  * overall size of the stored data.
  */
-static const union acpi_predefined_info predefined_names[] = {
-	{.info = {"_AC0", 0, ACPI_RTYPE_INTEGER}},
-	{.info = {"_AC1", 0, ACPI_RTYPE_INTEGER}},
-	{.info = {"_AC2", 0, ACPI_RTYPE_INTEGER}},
-	{.info = {"_AC3", 0, ACPI_RTYPE_INTEGER}},
-	{.info = {"_AC4", 0, ACPI_RTYPE_INTEGER}},
-	{.info = {"_AC5", 0, ACPI_RTYPE_INTEGER}},
-	{.info = {"_AC6", 0, ACPI_RTYPE_INTEGER}},
-	{.info = {"_AC7", 0, ACPI_RTYPE_INTEGER}},
-	{.info = {"_AC8", 0, ACPI_RTYPE_INTEGER}},
-	{.info = {"_AC9", 0, ACPI_RTYPE_INTEGER}},
-	{.info = {"_ADR", 0, ACPI_RTYPE_INTEGER}},
-	{.info = {"_AL0", 0, ACPI_RTYPE_PACKAGE}}, {.ret_info = {ACPI_PTYPE1_VAR, ACPI_RTYPE_REFERENCE, 0, 0, 0, 0}},	/* variable (Refs) */
-	{.info = {"_AL1", 0, ACPI_RTYPE_PACKAGE}}, {.ret_info = {ACPI_PTYPE1_VAR, ACPI_RTYPE_REFERENCE, 0, 0, 0, 0}},	/* variable (Refs) */
-	{.info = {"_AL2", 0, ACPI_RTYPE_PACKAGE}}, {.ret_info = {ACPI_PTYPE1_VAR, ACPI_RTYPE_REFERENCE, 0, 0, 0, 0}},	/* variable (Refs) */
-	{.info = {"_AL3", 0, ACPI_RTYPE_PACKAGE}}, {.ret_info = {ACPI_PTYPE1_VAR, ACPI_RTYPE_REFERENCE, 0, 0, 0, 0}},	/* variable (Refs) */
-	{.info = {"_AL4", 0, ACPI_RTYPE_PACKAGE}}, {.ret_info = {ACPI_PTYPE1_VAR, ACPI_RTYPE_REFERENCE, 0, 0, 0, 0}},	/* variable (Refs) */
-	{.info = {"_AL5", 0, ACPI_RTYPE_PACKAGE}}, {.ret_info = {ACPI_PTYPE1_VAR, ACPI_RTYPE_REFERENCE, 0, 0, 0, 0}},	/* variable (Refs) */
-	{.info = {"_AL6", 0, ACPI_RTYPE_PACKAGE}}, {.ret_info = {ACPI_PTYPE1_VAR, ACPI_RTYPE_REFERENCE, 0, 0, 0, 0}},	/* variable (Refs) */
-	{.info = {"_AL7", 0, ACPI_RTYPE_PACKAGE}}, {.ret_info = {ACPI_PTYPE1_VAR, ACPI_RTYPE_REFERENCE, 0, 0, 0, 0}},	/* variable (Refs) */
-	{.info = {"_AL8", 0, ACPI_RTYPE_PACKAGE}}, {.ret_info = {ACPI_PTYPE1_VAR, ACPI_RTYPE_REFERENCE, 0, 0, 0, 0}},	/* variable (Refs) */
-	{.info = {"_AL9", 0, ACPI_RTYPE_PACKAGE}}, {.ret_info = {ACPI_PTYPE1_VAR, ACPI_RTYPE_REFERENCE, 0, 0, 0, 0}},	/* variable (Refs) */
-	{.info = {"_ALC", 0, ACPI_RTYPE_INTEGER}},
-	{.info = {"_ALI", 0, ACPI_RTYPE_INTEGER}},
-	{.info = {"_ALP", 0, ACPI_RTYPE_INTEGER}},
-	{.info = {"_ALR", 0, ACPI_RTYPE_PACKAGE}}, {.ret_info = {ACPI_PTYPE2, ACPI_RTYPE_INTEGER, 2, 0, 0, 0}},	/* variable (Pkgs) each 2 (Ints) */
-	{.info = {"_ALT", 0, ACPI_RTYPE_INTEGER}},
-	{.info = {"_BBN", 0, ACPI_RTYPE_INTEGER}},
-	{.info = {"_BCL", 0, ACPI_RTYPE_PACKAGE}}, {.ret_info = {ACPI_PTYPE1_VAR, ACPI_RTYPE_INTEGER, 0, 0, 0, 0}},	/* variable (Ints) */
-	{.info = {"_BCM", 1, 0}},
-	{.info = {"_BDN", 0, ACPI_RTYPE_INTEGER}},
-	{.info = {"_BFS", 1, 0}},
-	{.info = {"_BIF", 0, ACPI_RTYPE_PACKAGE}}, {.ret_info = {ACPI_PTYPE1_FIXED, ACPI_RTYPE_INTEGER,
-					  9,
-					  ACPI_RTYPE_STRING | ACPI_RTYPE_BUFFER, 4, 0}},	/* fixed (9 Int),(4 Str) */
-	{.info = {"_BLT", 3, 0}},
-	{.info = {"_BMC", 1, 0}},
-	{.info = {"_BMD", 0, ACPI_RTYPE_PACKAGE}}, {.ret_info = {ACPI_PTYPE1_FIXED, ACPI_RTYPE_INTEGER, 5, 0, 0, 0}},	/* fixed (5 Int) */
-	{.info = {"_BQC", 0, ACPI_RTYPE_INTEGER}},
-	{.info = {"_BST", 0, ACPI_RTYPE_PACKAGE}}, {.ret_info = {ACPI_PTYPE1_FIXED, ACPI_RTYPE_INTEGER, 4, 0, 0, 0}},	/* fixed (4 Int) */
-	{.info = {"_BTM", 1, ACPI_RTYPE_INTEGER}},
-	{.info = {"_BTP", 1, 0}},
-	{.info = {"_CBA", 0, ACPI_RTYPE_INTEGER}},	/* see PCI firmware spec 3.0 */
-	{.info = {"_CID", 0,
-	 ACPI_RTYPE_INTEGER | ACPI_RTYPE_STRING | ACPI_RTYPE_PACKAGE}},
-	{.ret_info = {ACPI_PTYPE1_VAR, ACPI_RTYPE_INTEGER | ACPI_RTYPE_STRING, 0, 0, 0, 0}},	/* variable (Ints/Strs) */
-	{.info = {"_CRS", 0, ACPI_RTYPE_BUFFER}},
-	{.info = {"_CRT", 0, ACPI_RTYPE_INTEGER}},
-	{.info = {"_CSD", 0, ACPI_RTYPE_PACKAGE}}, {.ret_info = {ACPI_PTYPE2_COUNT, ACPI_RTYPE_INTEGER, 0, 0, 0, 0}},	/* variable (1 Int(n), n-1 Int) */
-	{.info = {"_CST", 0, ACPI_RTYPE_PACKAGE}}, {.ret_info = {ACPI_PTYPE2_PKG_COUNT,
-					  ACPI_RTYPE_BUFFER, 1,
-					  ACPI_RTYPE_INTEGER, 3, 0}},	/* variable (1 Int(n), n Pkg (1 Buf/3 Int) */
-	{.info = {"_DCK", 1, ACPI_RTYPE_INTEGER}},
-	{.info = {"_DCS", 0, ACPI_RTYPE_INTEGER}},
-	{.info = {"_DDC", 1, ACPI_RTYPE_INTEGER | ACPI_RTYPE_BUFFER}},
-	{.info = {"_DDN", 0, ACPI_RTYPE_STRING}},
-	{.info = {"_DGS", 0, ACPI_RTYPE_INTEGER}},
-	{.info = {"_DIS", 0, 0}},
-	{.info = {"_DMA", 0, ACPI_RTYPE_BUFFER}},
-	{.info = {"_DOD", 0, ACPI_RTYPE_PACKAGE}}, {.ret_info = {ACPI_PTYPE1_VAR, ACPI_RTYPE_INTEGER, 0, 0, 0, 0}},	/* variable (Ints) */
-	{.info = {"_DOS", 1, 0}},
-	{.info = {"_DSM", 4, ACPI_RTYPE_ALL}},	/* Must return a type, but it can be of any type */
-	{.info = {"_DSS", 1, 0}},
-	{.info = {"_DSW", 3, 0}},
-	{.info = {"_EC_", 0, ACPI_RTYPE_INTEGER}},
-	{.info = {"_EDL", 0, ACPI_RTYPE_PACKAGE}}, {.ret_info = {ACPI_PTYPE1_VAR, ACPI_RTYPE_REFERENCE, 0, 0, 0, 0}},	/* variable (Refs) */
-	{.info = {"_EJ0", 1, 0}},
-	{.info = {"_EJ1", 1, 0}},
-	{.info = {"_EJ2", 1, 0}},
-	{.info = {"_EJ3", 1, 0}},
-	{.info = {"_EJ4", 1, 0}},
-	{.info = {"_EJD", 0, ACPI_RTYPE_STRING}},
-	{.info = {"_FDE", 0, ACPI_RTYPE_BUFFER}},
-	{.info = {"_FDI", 0, ACPI_RTYPE_PACKAGE}}, {.ret_info = {ACPI_PTYPE1_FIXED, ACPI_RTYPE_INTEGER, 16, 0, 0, 0}},	/* fixed (16 Int) */
-	{.info = {"_FDM", 1, 0}},
-	{.info = {"_FIX", 0, ACPI_RTYPE_PACKAGE}}, {.ret_info = {ACPI_PTYPE1_VAR, ACPI_RTYPE_INTEGER, 0, 0, 0, 0}},	/* variable (Ints) */
-	{.info = {"_GLK", 0, ACPI_RTYPE_INTEGER}},
-	{.info = {"_GPD", 0, ACPI_RTYPE_INTEGER}},
-	{.info = {"_GPE", 0, ACPI_RTYPE_INTEGER}},	/* _GPE method, not _GPE scope */
-	{.info = {"_GSB", 0, ACPI_RTYPE_INTEGER}},
-	{.info = {"_GTF", 0, ACPI_RTYPE_BUFFER}},
-	{.info = {"_GTM", 0, ACPI_RTYPE_BUFFER}},
-	{.info = {"_GTS", 1, 0}},
-	{.info = {"_HID", 0, ACPI_RTYPE_INTEGER | ACPI_RTYPE_STRING}},
-	{.info = {"_HOT", 0, ACPI_RTYPE_INTEGER}},
-	{.info = {"_HPP", 0, ACPI_RTYPE_PACKAGE}}, {.ret_info = {ACPI_PTYPE1_FIXED, ACPI_RTYPE_INTEGER, 4, 0, 0, 0}},	/* fixed (4 Int) */
+static const union acpi_predefined_info predefined_names[] =
+{
+	{{"_AC0", 0, ACPI_RTYPE_INTEGER}},
+	{{"_AC1", 0, ACPI_RTYPE_INTEGER}},
+	{{"_AC2", 0, ACPI_RTYPE_INTEGER}},
+	{{"_AC3", 0, ACPI_RTYPE_INTEGER}},
+	{{"_AC4", 0, ACPI_RTYPE_INTEGER}},
+	{{"_AC5", 0, ACPI_RTYPE_INTEGER}},
+	{{"_AC6", 0, ACPI_RTYPE_INTEGER}},
+	{{"_AC7", 0, ACPI_RTYPE_INTEGER}},
+	{{"_AC8", 0, ACPI_RTYPE_INTEGER}},
+	{{"_AC9", 0, ACPI_RTYPE_INTEGER}},
+	{{"_ADR", 0, ACPI_RTYPE_INTEGER}},
+	{{"_AL0", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Refs) */
+			  {{{ACPI_PTYPE1_VAR, ACPI_RTYPE_REFERENCE, 0,0}, 0,0}},
+
+	{{"_AL1", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Refs) */
+			  {{{ACPI_PTYPE1_VAR, ACPI_RTYPE_REFERENCE, 0,0}, 0,0}},
+
+	{{"_AL2", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Refs) */
+			  {{{ACPI_PTYPE1_VAR, ACPI_RTYPE_REFERENCE, 0,0}, 0,0}},
+
+	{{"_AL3", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Refs) */
+			  {{{ACPI_PTYPE1_VAR, ACPI_RTYPE_REFERENCE, 0,0}, 0,0}},
+
+	{{"_AL4", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Refs) */
+			  {{{ACPI_PTYPE1_VAR, ACPI_RTYPE_REFERENCE, 0,0}, 0,0}},
+
+	{{"_AL5", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Refs) */
+			  {{{ACPI_PTYPE1_VAR, ACPI_RTYPE_REFERENCE, 0,0}, 0,0}},
+
+	{{"_AL6", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Refs) */
+			  {{{ACPI_PTYPE1_VAR, ACPI_RTYPE_REFERENCE, 0,0}, 0,0}},
+
+	{{"_AL7", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Refs) */
+			  {{{ACPI_PTYPE1_VAR, ACPI_RTYPE_REFERENCE, 0,0}, 0,0}},
+
+	{{"_AL8", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Refs) */
+			  {{{ACPI_PTYPE1_VAR, ACPI_RTYPE_REFERENCE, 0,0}, 0,0}},
+
+	{{"_AL9", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Refs) */
+			  {{{ACPI_PTYPE1_VAR, ACPI_RTYPE_REFERENCE, 0,0}, 0,0}},
+
+	{{"_ALC", 0, ACPI_RTYPE_INTEGER}},
+	{{"_ALI", 0, ACPI_RTYPE_INTEGER}},
+	{{"_ALP", 0, ACPI_RTYPE_INTEGER}},
+	{{"_ALR", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Pkgs) each 2 (Ints) */
+			  {{{ACPI_PTYPE2, ACPI_RTYPE_INTEGER, 2,0}, 0,0}},
+
+	{{"_ALT", 0, ACPI_RTYPE_INTEGER}},
+	{{"_BBN", 0, ACPI_RTYPE_INTEGER}},
+	{{"_BCL", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Ints) */
+			  {{{ACPI_PTYPE1_VAR, ACPI_RTYPE_INTEGER, 0,0}, 0,0}},
+
+	{{"_BCM", 1, 0}},
+	{{"_BDN", 0, ACPI_RTYPE_INTEGER}},
+	{{"_BFS", 1, 0}},
+	{{"_BIF", 0, ACPI_RTYPE_PACKAGE}}, /* Fixed-length (9 Int),(4 Str) */
+			  {{{ACPI_PTYPE1_FIXED, ACPI_RTYPE_INTEGER, 9, ACPI_RTYPE_STRING}, 4,0}},
+
+	{{"_BLT", 3, 0}},
+	{{"_BMC", 1, 0}},
+	{{"_BMD", 0, ACPI_RTYPE_PACKAGE}}, /* Fixed-length (5 Int) */
+			  {{{ACPI_PTYPE1_FIXED, ACPI_RTYPE_INTEGER, 5,0}, 0,0}},
+
+	{{"_BQC", 0, ACPI_RTYPE_INTEGER}},
+	{{"_BST", 0, ACPI_RTYPE_PACKAGE}}, /* Fixed-length (4 Int) */
+			  {{{ACPI_PTYPE1_FIXED, ACPI_RTYPE_INTEGER, 4,0}, 0,0}},
+
+	{{"_BTM", 1, ACPI_RTYPE_INTEGER}},
+	{{"_BTP", 1, 0}},
+	{{"_CBA", 0, ACPI_RTYPE_INTEGER}}, /* See PCI firmware spec 3.0 */
+	{{"_CID", 0, ACPI_RTYPE_INTEGER | ACPI_RTYPE_STRING | ACPI_RTYPE_PACKAGE}}, /* Variable-length (Ints/Strs) */
+			  {{{ACPI_PTYPE1_VAR, ACPI_RTYPE_INTEGER | ACPI_RTYPE_STRING, 0,0}, 0,0}},
+
+	{{"_CRS", 0, ACPI_RTYPE_BUFFER}},
+	{{"_CRT", 0, ACPI_RTYPE_INTEGER}},
+	{{"_CSD", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (1 Int(n), n-1 Int) */
+			  {{{ACPI_PTYPE2_COUNT, ACPI_RTYPE_INTEGER, 0,0}, 0,0}},
+
+	{{"_CST", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (1 Int(n), n Pkg (1 Buf/3 Int) */
+			  {{{ACPI_PTYPE2_PKG_COUNT,ACPI_RTYPE_BUFFER, 1, ACPI_RTYPE_INTEGER}, 3,0}},
+
+	{{"_DCK", 1, ACPI_RTYPE_INTEGER}},
+	{{"_DCS", 0, ACPI_RTYPE_INTEGER}},
+	{{"_DDC", 1, ACPI_RTYPE_INTEGER | ACPI_RTYPE_BUFFER}},
+	{{"_DDN", 0, ACPI_RTYPE_STRING}},
+	{{"_DGS", 0, ACPI_RTYPE_INTEGER}},
+	{{"_DIS", 0, 0}},
+	{{"_DMA", 0, ACPI_RTYPE_BUFFER}},
+	{{"_DOD", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Ints) */
+			  {{{ACPI_PTYPE1_VAR, ACPI_RTYPE_INTEGER, 0,0}, 0,0}},
+
+	{{"_DOS", 1, 0}},
+	{{"_DSM", 4, ACPI_RTYPE_ALL}},     /* Must return a type, but it can be of any type */
+	{{"_DSS", 1, 0}},
+	{{"_DSW", 3, 0}},
+	{{"_EC_", 0, ACPI_RTYPE_INTEGER}},
+	{{"_EDL", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Refs)*/
+			  {{{ACPI_PTYPE1_VAR, ACPI_RTYPE_REFERENCE, 0,0}, 0,0}},
+
+	{{"_EJ0", 1, 0}},
+	{{"_EJ1", 1, 0}},
+	{{"_EJ2", 1, 0}},
+	{{"_EJ3", 1, 0}},
+	{{"_EJ4", 1, 0}},
+	{{"_EJD", 0, ACPI_RTYPE_STRING}},
+	{{"_FDE", 0, ACPI_RTYPE_BUFFER}},
+	{{"_FDI", 0, ACPI_RTYPE_PACKAGE}}, /* Fixed-length (16 Int) */
+			  {{{ACPI_PTYPE1_FIXED, ACPI_RTYPE_INTEGER, 16,0}, 0,0}},
+
+	{{"_FDM", 1, 0}},
+	{{"_FIX", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Ints) */
+			  {{{ACPI_PTYPE1_VAR, ACPI_RTYPE_INTEGER, 0,0}, 0,0}},
+
+	{{"_GLK", 0, ACPI_RTYPE_INTEGER}},
+	{{"_GPD", 0, ACPI_RTYPE_INTEGER}},
+	{{"_GPE", 0, ACPI_RTYPE_INTEGER}}, /* _GPE method, not _GPE scope */
+	{{"_GSB", 0, ACPI_RTYPE_INTEGER}},
+	{{"_GTF", 0, ACPI_RTYPE_BUFFER}},
+	{{"_GTM", 0, ACPI_RTYPE_BUFFER}},
+	{{"_GTS", 1, 0}},
+	{{"_HID", 0, ACPI_RTYPE_INTEGER | ACPI_RTYPE_STRING}},
+	{{"_HOT", 0, ACPI_RTYPE_INTEGER}},
+	{{"_HPP", 0, ACPI_RTYPE_PACKAGE}}, /* Fixed-length (4 Int) */
+			  {{{ACPI_PTYPE1_FIXED, ACPI_RTYPE_INTEGER, 4,0}, 0,0}},
 
 	/*
-	 * For _HPX, a single package is returned, containing a variable number of sub-packages.
-	 * Each sub-package contains a PCI record setting. There are several different type of
-	 * record settings, of different lengths, but all elements of all settings are Integers.
+	 * For _HPX, a single package is returned, containing a Variable-length number
+	 * of sub-packages. Each sub-package contains a PCI record setting.
+	 * There are several different type of record settings, of different
+	 * lengths, but all elements of all settings are Integers.
 	 */
-	{.info = {"_HPX", 0, ACPI_RTYPE_PACKAGE}}, {.ret_info = {ACPI_PTYPE2_MIN, ACPI_RTYPE_INTEGER, 5, 0, 0, 0}},	/* variable (Pkgs) each (var Ints) */
-	{.info = {"_IFT", 0, ACPI_RTYPE_INTEGER}},	/* see IPMI spec */
-	{.info = {"_INI", 0, 0}},
-	{.info = {"_IRC", 0, 0}},
-	{.info = {"_LCK", 1, 0}},
-	{.info = {"_LID", 0, ACPI_RTYPE_INTEGER}},
-	{.info = {"_MAT", 0, ACPI_RTYPE_BUFFER}},
-	{.info = {"_MLS", 0, ACPI_RTYPE_PACKAGE}}, {.ret_info = {ACPI_PTYPE2, ACPI_RTYPE_STRING, 2, 0, 0, 0}},	/* variable (Pkgs) each (2 Str) */
-	{.info = {"_MSG", 1, 0}},
-	{.info = {"_OFF", 0, 0}},
-	{.info = {"_ON_", 0, 0}},
-	{.info = {"_OS_", 0, ACPI_RTYPE_STRING}},
-	{.info = {"_OSC", 4, ACPI_RTYPE_BUFFER}},
-	{.info = {"_OST", 3, 0}},
-	{.info = {"_PCL", 0, ACPI_RTYPE_PACKAGE}}, {.ret_info = {ACPI_PTYPE1_VAR, ACPI_RTYPE_REFERENCE, 0, 0, 0, 0}},	/* variable (Refs) */
-	{.info = {"_PCT", 0, ACPI_RTYPE_PACKAGE}}, {.ret_info = {ACPI_PTYPE1_FIXED, ACPI_RTYPE_BUFFER, 2, 0, 0, 0}},	/* fixed (2 Buf) */
-	{.info = {"_PDC", 1, 0}},
-	{.info = {"_PIC", 1, 0}},
-	{.info = {"_PLD", 0, ACPI_RTYPE_PACKAGE}}, {.ret_info = {ACPI_PTYPE1_VAR, ACPI_RTYPE_BUFFER, 0, 0, 0, 0}},	/* variable (Bufs) */
-	{.info = {"_PPC", 0, ACPI_RTYPE_INTEGER}},
-	{.info = {"_PPE", 0, ACPI_RTYPE_INTEGER}},	/* see dig64 spec */
-	{.info = {"_PR0", 0, ACPI_RTYPE_PACKAGE}}, {.ret_info = {ACPI_PTYPE1_VAR, ACPI_RTYPE_REFERENCE, 0, 0, 0, 0}},	/* variable (Refs) */
-	{.info = {"_PR1", 0, ACPI_RTYPE_PACKAGE}}, {.ret_info = {ACPI_PTYPE1_VAR, ACPI_RTYPE_REFERENCE, 0, 0, 0, 0}},	/* variable (Refs) */
-	{.info = {"_PR2", 0, ACPI_RTYPE_PACKAGE}}, {.ret_info = {ACPI_PTYPE1_VAR, ACPI_RTYPE_REFERENCE, 0, 0, 0, 0}},	/* variable (Refs) */
-	{.info = {"_PRS", 0, ACPI_RTYPE_BUFFER}},
+	{{"_HPX", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Pkgs) each (var Ints) */
+			  {{{ACPI_PTYPE2_MIN, ACPI_RTYPE_INTEGER, 5,0}, 0,0}},
+
+	{{"_IFT", 0, ACPI_RTYPE_INTEGER}}, /* See IPMI spec */
+	{{"_INI", 0, 0}},
+	{{"_IRC", 0, 0}},
+	{{"_LCK", 1, 0}},
+	{{"_LID", 0, ACPI_RTYPE_INTEGER}},
+	{{"_MAT", 0, ACPI_RTYPE_BUFFER}},
+	{{"_MLS", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Pkgs) each (2 Str) */
+			  {{{ACPI_PTYPE2, ACPI_RTYPE_STRING, 2,0}, 0,0}},
+
+	{{"_MSG", 1, 0}},
+	{{"_OFF", 0, 0}},
+	{{"_ON_", 0, 0}},
+	{{"_OS_", 0, ACPI_RTYPE_STRING}},
+	{{"_OSC", 4, ACPI_RTYPE_BUFFER}},
+	{{"_OST", 3, 0}},
+	{{"_PCL", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Refs) */
+			  {{{ACPI_PTYPE1_VAR, ACPI_RTYPE_REFERENCE, 0,0}, 0,0}},
+
+	{{"_PCT", 0, ACPI_RTYPE_PACKAGE}}, /* Fixed-length (2 Buf) */
+			  {{{ACPI_PTYPE1_FIXED, ACPI_RTYPE_BUFFER, 2,0}, 0,0}},
+
+	{{"_PDC", 1, 0}},
+	{{"_PIC", 1, 0}},
+	{{"_PLD", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Bufs) */
+			  {{{ACPI_PTYPE1_VAR, ACPI_RTYPE_BUFFER, 0,0}, 0,0}},
+
+	{{"_PPC", 0, ACPI_RTYPE_INTEGER}},
+	{{"_PPE", 0, ACPI_RTYPE_INTEGER}}, /* See dig64 spec */
+	{{"_PR0", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Refs) */
+			  {{{ACPI_PTYPE1_VAR, ACPI_RTYPE_REFERENCE, 0,0}, 0,0}},
+
+	{{"_PR1", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Refs) */
+			  {{{ACPI_PTYPE1_VAR, ACPI_RTYPE_REFERENCE, 0,0}, 0,0}},
+
+	{{"_PR2", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Refs) */
+			  {{{ACPI_PTYPE1_VAR, ACPI_RTYPE_REFERENCE, 0,0}, 0,0}},
+
+	{{"_PRS", 0, ACPI_RTYPE_BUFFER}},
 
 	/*
 	 * For _PRT, many BIOSs reverse the 2nd and 3rd Package elements. This bug is so prevalent that there
@@ -263,115 +320,141 @@ static const union acpi_predefined_info predefined_names[] = {
 	 * and issue a warning. To allow this and eliminate the warning, add the ACPI_RTYPE_REFERENCE
 	 * type to the 2nd element (index 1) in the statement below.
 	 */
-	{.info = {"_PRT", 0, ACPI_RTYPE_PACKAGE}}, {.ret_info = {ACPI_PTYPE2_FIXED, 4,
-					  ACPI_RTYPE_INTEGER,
-					  ACPI_RTYPE_INTEGER,
-					  ACPI_RTYPE_INTEGER | ACPI_RTYPE_REFERENCE, ACPI_RTYPE_INTEGER}},	/* variable (Pkgs) each (4): Int,Int,Int/Ref,Int */
-
-	{.info = {"_PRW", 0, ACPI_RTYPE_PACKAGE}}, {.ret_info = {ACPI_PTYPE1_OPTION, 2,
-					  ACPI_RTYPE_INTEGER |
-					  ACPI_RTYPE_PACKAGE,
-					  ACPI_RTYPE_INTEGER, ACPI_RTYPE_REFERENCE, 0}},	/* variable (Pkgs) each: Pkg/Int,Int,[variable Refs] (Pkg is Ref/Int) */
-
-	{.info = {"_PS0", 0, 0}},
-	{.info = {"_PS1", 0, 0}},
-	{.info = {"_PS2", 0, 0}},
-	{.info = {"_PS3", 0, 0}},
-	{.info = {"_PSC", 0, ACPI_RTYPE_INTEGER}},
-	{.info = {"_PSD", 0, ACPI_RTYPE_PACKAGE}}, {.ret_info = {ACPI_PTYPE2_COUNT, ACPI_RTYPE_INTEGER, 0, 0, 0, 0}},	/* variable (Pkgs) each (5 Int) with count */
-	{.info = {"_PSL", 0, ACPI_RTYPE_PACKAGE}}, {.ret_info = {ACPI_PTYPE1_VAR, ACPI_RTYPE_REFERENCE, 0, 0, 0, 0}},	/* variable (Refs) */
-	{.info = {"_PSR", 0, ACPI_RTYPE_INTEGER}},
-	{.info = {"_PSS", 0, ACPI_RTYPE_PACKAGE}}, {.ret_info = {ACPI_PTYPE2, ACPI_RTYPE_INTEGER, 6, 0, 0, 0}},	/* variable (Pkgs) each (6 Int) */
-	{.info = {"_PSV", 0, ACPI_RTYPE_INTEGER}},
-	{.info = {"_PSW", 1, 0}},
-	{.info = {"_PTC", 0, ACPI_RTYPE_PACKAGE}}, {.ret_info = {ACPI_PTYPE1_FIXED, ACPI_RTYPE_BUFFER, 2, 0, 0, 0}},	/* fixed (2 Buf) */
-	{.info = {"_PTS", 1, 0}},
-	{.info = {"_PXM", 0, ACPI_RTYPE_INTEGER}},
-	{.info = {"_REG", 2, 0}},
-	{.info = {"_REV", 0, ACPI_RTYPE_INTEGER}},
-	{.info = {"_RMV", 0, ACPI_RTYPE_INTEGER}},
-	{.info = {"_ROM", 2, ACPI_RTYPE_BUFFER}},
-	{.info = {"_RTV", 0, ACPI_RTYPE_INTEGER}},
+	{{"_PRT", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Pkgs) each (4): Int,Int,Int/Ref,Int */
+			  {{{ACPI_PTYPE2_FIXED, 4, ACPI_RTYPE_INTEGER,ACPI_RTYPE_INTEGER},
+			  ACPI_RTYPE_INTEGER | ACPI_RTYPE_REFERENCE,ACPI_RTYPE_INTEGER}},
+
+	{{"_PRW", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Pkgs) each: Pkg/Int,Int,[Variable-length Refs] (Pkg is Ref/Int) */
+			  {{{ACPI_PTYPE1_OPTION, 2, ACPI_RTYPE_INTEGER | ACPI_RTYPE_PACKAGE,
+			  ACPI_RTYPE_INTEGER}, ACPI_RTYPE_REFERENCE,0}},
+
+	{{"_PS0", 0, 0}},
+	{{"_PS1", 0, 0}},
+	{{"_PS2", 0, 0}},
+	{{"_PS3", 0, 0}},
+	{{"_PSC", 0, ACPI_RTYPE_INTEGER}},
+	{{"_PSD", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Pkgs) each (5 Int) with count */
+			  {{{ACPI_PTYPE2_COUNT, ACPI_RTYPE_INTEGER,0,0}, 0,0}},
+
+	{{"_PSL", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Refs) */
+			  {{{ACPI_PTYPE1_VAR, ACPI_RTYPE_REFERENCE, 0,0}, 0,0}},
+
+	{{"_PSR", 0, ACPI_RTYPE_INTEGER}},
+	{{"_PSS", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Pkgs) each (6 Int) */
+			  {{{ACPI_PTYPE2, ACPI_RTYPE_INTEGER, 6,0}, 0,0}},
+
+	{{"_PSV", 0, ACPI_RTYPE_INTEGER}},
+	{{"_PSW", 1, 0}},
+	{{"_PTC", 0, ACPI_RTYPE_PACKAGE}}, /* Fixed-length (2 Buf) */
+			  {{{ACPI_PTYPE1_FIXED, ACPI_RTYPE_BUFFER, 2,0}, 0,0}},
+
+	{{"_PTS", 1, 0}},
+	{{"_PXM", 0, ACPI_RTYPE_INTEGER}},
+	{{"_REG", 2, 0}},
+	{{"_REV", 0, ACPI_RTYPE_INTEGER}},
+	{{"_RMV", 0, ACPI_RTYPE_INTEGER}},
+	{{"_ROM", 2, ACPI_RTYPE_BUFFER}},
+	{{"_RTV", 0, ACPI_RTYPE_INTEGER}},
 
 	/*
-	 * For _S0_ through _S5_, the ACPI spec defines a return Package containing 1 Integer,
-	 * but most DSDTs have it wrong - 2,3, or 4 integers. Allow this by making the objects "variable length",
-	 * but all elements must be Integers.
+	 * For _S0_ through _S5_, the ACPI spec defines a return Package
+	 * containing 1 Integer, but most DSDTs have it wrong - 2,3, or 4 integers.
+	 * Allow this by making the objects "Variable-length length", but all elements
+	 * must be Integers.
 	 */
-	{.info = {"_S0_", 0, ACPI_RTYPE_PACKAGE}}, {.ret_info = {ACPI_PTYPE1_VAR, ACPI_RTYPE_INTEGER, 1, 0, 0, 0}},	/* fixed (1 Int) */
-	{.info = {"_S1_", 0, ACPI_RTYPE_PACKAGE}}, {.ret_info = {ACPI_PTYPE1_VAR, ACPI_RTYPE_INTEGER, 1, 0, 0, 0}},	/* fixed (1 Int) */
-	{.info = {"_S2_", 0, ACPI_RTYPE_PACKAGE}}, {.ret_info = {ACPI_PTYPE1_VAR, ACPI_RTYPE_INTEGER, 1, 0, 0, 0}},	/* fixed (1 Int) */
-	{.info = {"_S3_", 0, ACPI_RTYPE_PACKAGE}}, {.ret_info = {ACPI_PTYPE1_VAR, ACPI_RTYPE_INTEGER, 1, 0, 0, 0}},	/* fixed (1 Int) */
-	{.info = {"_S4_", 0, ACPI_RTYPE_PACKAGE}}, {.ret_info = {ACPI_PTYPE1_VAR, ACPI_RTYPE_INTEGER, 1, 0, 0, 0}},	/* fixed (1 Int) */
-	{.info = {"_S5_", 0, ACPI_RTYPE_PACKAGE}}, {.ret_info = {ACPI_PTYPE1_VAR, ACPI_RTYPE_INTEGER, 1, 0, 0, 0}},	/* fixed (1 Int) */
-
-	{.info = {"_S1D", 0, ACPI_RTYPE_INTEGER}},
-	{.info = {"_S2D", 0, ACPI_RTYPE_INTEGER}},
-	{.info = {"_S3D", 0, ACPI_RTYPE_INTEGER}},
-	{.info = {"_S4D", 0, ACPI_RTYPE_INTEGER}},
-	{.info = {"_S0W", 0, ACPI_RTYPE_INTEGER}},
-	{.info = {"_S1W", 0, ACPI_RTYPE_INTEGER}},
-	{.info = {"_S2W", 0, ACPI_RTYPE_INTEGER}},
-	{.info = {"_S3W", 0, ACPI_RTYPE_INTEGER}},
-	{.info = {"_S4W", 0, ACPI_RTYPE_INTEGER}},
-	{.info = {"_SBS", 0, ACPI_RTYPE_INTEGER}},
-	{.info = {"_SCP", 0x13, 0}},	/* Acpi 1.0 allowed 1 arg. Acpi 3.0 expanded to 3 args. Allow both. */
-	/* Note: the 3-arg definition may be removed for ACPI 4.0 */
-	{.info = {"_SDD", 1, 0}},
-	{.info = {"_SEG", 0, ACPI_RTYPE_INTEGER}},
-	{.info = {"_SLI", 0, ACPI_RTYPE_BUFFER}},
-	{.info = {"_SPD", 1, ACPI_RTYPE_INTEGER}},
-	{.info = {"_SRS", 1, 0}},
-	{.info = {"_SRV", 0, ACPI_RTYPE_INTEGER}},	/* see IPMI spec */
-	{.info = {"_SST", 1, 0}},
-	{.info = {"_STA", 0, ACPI_RTYPE_INTEGER}},
-	{.info = {"_STM", 3, 0}},
-	{.info = {"_STR", 0, ACPI_RTYPE_BUFFER}},
-	{.info = {"_SUN", 0, ACPI_RTYPE_INTEGER}},
-	{.info = {"_SWS", 0, ACPI_RTYPE_INTEGER}},
-	{.info = {"_TC1", 0, ACPI_RTYPE_INTEGER}},
-	{.info = {"_TC2", 0, ACPI_RTYPE_INTEGER}},
-	{.info = {"_TMP", 0, ACPI_RTYPE_INTEGER}},
-	{.info = {"_TPC", 0, ACPI_RTYPE_INTEGER}},
-	{.info = {"_TPT", 1, 0}},
-	{.info = {"_TRT", 0, ACPI_RTYPE_PACKAGE}}, {.ret_info = {ACPI_PTYPE2, ACPI_RTYPE_REFERENCE, 2,
-					  ACPI_RTYPE_INTEGER, 6, 0}},	/* variable (Pkgs) each 2_ref/6_int */
-	{.info = {"_TSD", 0, ACPI_RTYPE_PACKAGE}}, {.ret_info = {ACPI_PTYPE2_COUNT, ACPI_RTYPE_INTEGER, 5, 0, 0, 0}},	/* variable (Pkgs) each 5_int with count */
-	{.info = {"_TSP", 0, ACPI_RTYPE_INTEGER}},
-	{.info = {"_TSS", 0, ACPI_RTYPE_PACKAGE}}, {.ret_info = {ACPI_PTYPE2, ACPI_RTYPE_INTEGER, 5, 0, 0, 0}},	/* variable (Pkgs) each 5_int */
-	{.info = {"_TST", 0, ACPI_RTYPE_INTEGER}},
-	{.info = {"_TTS", 1, 0}},
-	{.info = {"_TZD", 0, ACPI_RTYPE_PACKAGE}}, {.ret_info = {ACPI_PTYPE1_VAR, ACPI_RTYPE_REFERENCE, 0, 0, 0, 0}},	/* variable (Refs) */
-	{.info = {"_TZM", 0, ACPI_RTYPE_REFERENCE}},
-	{.info = {"_TZP", 0, ACPI_RTYPE_INTEGER}},
-	{.info = {"_UID", 0, ACPI_RTYPE_INTEGER | ACPI_RTYPE_STRING}},
-	{.info = {"_UPC", 0, ACPI_RTYPE_PACKAGE}}, {.ret_info = {ACPI_PTYPE1_FIXED, ACPI_RTYPE_INTEGER, 4, 0, 0, 0}},	/* fixed (4 Int) */
-	{.info = {"_UPD", 0, ACPI_RTYPE_INTEGER}},
-	{.info = {"_UPP", 0, ACPI_RTYPE_INTEGER}},
-	{.info = {"_VPO", 0, ACPI_RTYPE_INTEGER}},
+	{{"_S0_", 0, ACPI_RTYPE_PACKAGE}}, /* Fixed-length (1 Int) */
+			  {{{ACPI_PTYPE1_VAR, ACPI_RTYPE_INTEGER, 1,0}, 0,0}},
+
+	{{"_S1_", 0, ACPI_RTYPE_PACKAGE}}, /* Fixed-length (1 Int) */
+			  {{{ACPI_PTYPE1_VAR, ACPI_RTYPE_INTEGER, 1,0}, 0,0}},
+
+	{{"_S2_", 0, ACPI_RTYPE_PACKAGE}}, /* Fixed-length (1 Int) */
+			  {{{ACPI_PTYPE1_VAR, ACPI_RTYPE_INTEGER, 1,0}, 0,0}},
+
+	{{"_S3_", 0, ACPI_RTYPE_PACKAGE}}, /* Fixed-length (1 Int) */
+			  {{{ACPI_PTYPE1_VAR, ACPI_RTYPE_INTEGER, 1,0}, 0,0}},
+
+	{{"_S4_", 0, ACPI_RTYPE_PACKAGE}}, /* Fixed-length (1 Int) */
+			  {{{ACPI_PTYPE1_VAR, ACPI_RTYPE_INTEGER, 1,0}, 0,0}},
+
+	{{"_S5_", 0, ACPI_RTYPE_PACKAGE}}, /* Fixed-length (1 Int) */
+			  {{{ACPI_PTYPE1_VAR, ACPI_RTYPE_INTEGER, 1,0}, 0,0}},
+
+	{{"_S1D", 0, ACPI_RTYPE_INTEGER}},
+	{{"_S2D", 0, ACPI_RTYPE_INTEGER}},
+	{{"_S3D", 0, ACPI_RTYPE_INTEGER}},
+	{{"_S4D", 0, ACPI_RTYPE_INTEGER}},
+	{{"_S0W", 0, ACPI_RTYPE_INTEGER}},
+	{{"_S1W", 0, ACPI_RTYPE_INTEGER}},
+	{{"_S2W", 0, ACPI_RTYPE_INTEGER}},
+	{{"_S3W", 0, ACPI_RTYPE_INTEGER}},
+	{{"_S4W", 0, ACPI_RTYPE_INTEGER}},
+	{{"_SBS", 0, ACPI_RTYPE_INTEGER}},
+	{{"_SCP", 0x13, 0}},               /* Acpi 1.0 allowed 1 arg. Acpi 3.0 expanded to 3 args. Allow both. */
+			   /* Note: the 3-arg definition may be removed for ACPI 4.0 */
+	{{"_SDD", 1, 0}},
+	{{"_SEG", 0, ACPI_RTYPE_INTEGER}},
+	{{"_SLI", 0, ACPI_RTYPE_BUFFER}},
+	{{"_SPD", 1, ACPI_RTYPE_INTEGER}},
+	{{"_SRS", 1, 0}},
+	{{"_SRV", 0, ACPI_RTYPE_INTEGER}}, /* See IPMI spec */
+	{{"_SST", 1, 0}},
+	{{"_STA", 0, ACPI_RTYPE_INTEGER}},
+	{{"_STM", 3, 0}},
+	{{"_STR", 0, ACPI_RTYPE_BUFFER}},
+	{{"_SUN", 0, ACPI_RTYPE_INTEGER}},
+	{{"_SWS", 0, ACPI_RTYPE_INTEGER}},
+	{{"_TC1", 0, ACPI_RTYPE_INTEGER}},
+	{{"_TC2", 0, ACPI_RTYPE_INTEGER}},
+	{{"_TMP", 0, ACPI_RTYPE_INTEGER}},
+	{{"_TPC", 0, ACPI_RTYPE_INTEGER}},
+	{{"_TPT", 1, 0}},
+	{{"_TRT", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Pkgs) each 2_ref/6_int */
+			  {{{ACPI_PTYPE2, ACPI_RTYPE_REFERENCE, 2, ACPI_RTYPE_INTEGER}, 6, 0}},
+
+	{{"_TSD", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Pkgs) each 5_int with count */
+			  {{{ACPI_PTYPE2_COUNT,ACPI_RTYPE_INTEGER, 5,0}, 0,0}},
+
+	{{"_TSP", 0, ACPI_RTYPE_INTEGER}},
+	{{"_TSS", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Pkgs) each 5_int */
+			  {{{ACPI_PTYPE2, ACPI_RTYPE_INTEGER, 5,0}, 0,0}},
+
+	{{"_TST", 0, ACPI_RTYPE_INTEGER}},
+	{{"_TTS", 1, 0}},
+	{{"_TZD", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Refs) */
+			  {{{ACPI_PTYPE1_VAR, ACPI_RTYPE_REFERENCE, 0,0}, 0,0}},
+
+	{{"_TZM", 0, ACPI_RTYPE_REFERENCE}},
+	{{"_TZP", 0, ACPI_RTYPE_INTEGER}},
+	{{"_UID", 0, ACPI_RTYPE_INTEGER | ACPI_RTYPE_STRING}},
+	{{"_UPC", 0, ACPI_RTYPE_PACKAGE}}, /* Fixed-length (4 Int) */
+			  {{{ACPI_PTYPE1_FIXED, ACPI_RTYPE_INTEGER, 4,0}, 0,0}},
+
+	{{"_UPD", 0, ACPI_RTYPE_INTEGER}},
+	{{"_UPP", 0, ACPI_RTYPE_INTEGER}},
+	{{"_VPO", 0, ACPI_RTYPE_INTEGER}},
 
 	/* Acpi 1.0 defined _WAK with no return value. Later, it was changed to return a package */
 
-	{.info = {"_WAK", 1, ACPI_RTYPE_NONE | ACPI_RTYPE_INTEGER | ACPI_RTYPE_PACKAGE}},
-	{.ret_info = {ACPI_PTYPE1_FIXED, ACPI_RTYPE_INTEGER, 2, 0, 0, 0}},	/* fixed (2 Int), but is optional */
-	{.ret_info = {0, 0, 0, 0, 0, 0}}	/* Table terminator */
+	{{"_WAK", 1, ACPI_RTYPE_NONE | ACPI_RTYPE_INTEGER | ACPI_RTYPE_PACKAGE}},
+			  {{{ACPI_PTYPE1_FIXED, ACPI_RTYPE_INTEGER, 2,0}, 0,0}}, /* Fixed-length (2 Int), but is optional */
+
+	{{{0,0,0,0}, 0,0}} /* Table terminator */
 };
 
 #if 0
 	/* Not implemented */
 
-{
-"_WDG", 0, ACPI_RTYPE_BUFFER},	/* MS Extension */
+	{{"_WDG", 0, ACPI_RTYPE_BUFFER}},  /* MS Extension */
+	{{"_WED", 1, ACPI_RTYPE_PACKAGE}}, /* MS Extension */
 
-{
-"_WED", 1, ACPI_RTYPE_PACKAGE},	/* MS Extension */
+	/* This is an internally implemented control method, no need to check */
+	{{"_OSI", 1, ACPI_RTYPE_INTEGER}},
 
-    /* This is an internally implemented control method, no need to check */
-{
-"_OSI", 1, ACPI_RTYPE_INTEGER},
+	/* TBD: */
 
-    /* TBD: */
-    _PRT - currently ignore reversed entries.attempt to fix here ?
-    think about code that attempts to fix package elements like _BIF, etc.
+	_PRT - currently ignore reversed entries. attempt to fix here?
+	think about possibly fixing package elements like _BIF, etc.
 #endif
+
 #endif
-- 
cgit v0.10.2


From 999e08f99846a1fd6ee9642ec306a2d318925116 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Thu, 13 Aug 2009 14:30:16 +0800
Subject: ACPICA: ACPI 4: Add validation for new predefined names.

Added 31 new names for ACPI 4.0.

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/acpica/acpredef.h b/drivers/acpi/acpica/acpredef.h
index c81f14b..cd80d1d 100644
--- a/drivers/acpi/acpica/acpredef.h
+++ b/drivers/acpi/acpica/acpredef.h
@@ -64,8 +64,8 @@
  *      (Used for _PRW)
  *
  *
- * 2) PTYPE2 packages contain a variable number of sub-packages. Each of the
- *    different types describe the contents of each of the sub-packages.
+ * 2) PTYPE2 packages contain a Variable-length number of sub-packages. Each
+ *    of the different types describe the contents of each of the sub-packages.
  *
  * ACPI_PTYPE2: Each subpackage contains 1 or 2 object types:
  *      object type
@@ -92,6 +92,7 @@
  *      (Used for _HPX)
  *
  * ACPI_PTYPE2_REV_FIXED: Revision at start, each subpackage is Fixed-length
+ *      (Used for _ART, _FPS)
  *
  *****************************************************************************/
 
@@ -107,6 +108,7 @@ enum acpi_return_package_types {
 	ACPI_PTYPE2_REV_FIXED = 9
 };
 
+#ifdef ACPI_CREATE_PREDEFINED_TABLE
 /*
  * Predefined method/object information table.
  *
@@ -189,21 +191,32 @@ static const union acpi_predefined_info predefined_names[] =
 			  {{{ACPI_PTYPE2, ACPI_RTYPE_INTEGER, 2,0}, 0,0}},
 
 	{{"_ALT", 0, ACPI_RTYPE_INTEGER}},
+	{{"_ART", 0, ACPI_RTYPE_PACKAGE}},	/* Variable-length (1 Int(rev), n Pkg (2 Ref/11 Int) */
+	{{{ACPI_PTYPE2_REV_FIXED, ACPI_RTYPE_REFERENCE, 2, ACPI_RTYPE_INTEGER},
+	  11, 0}},
+
 	{{"_BBN", 0, ACPI_RTYPE_INTEGER}},
 	{{"_BCL", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Ints) */
 			  {{{ACPI_PTYPE1_VAR, ACPI_RTYPE_INTEGER, 0,0}, 0,0}},
 
 	{{"_BCM", 1, 0}},
+	{{"_BCT", 1, ACPI_RTYPE_INTEGER}},
 	{{"_BDN", 0, ACPI_RTYPE_INTEGER}},
 	{{"_BFS", 1, 0}},
 	{{"_BIF", 0, ACPI_RTYPE_PACKAGE}}, /* Fixed-length (9 Int),(4 Str) */
 			  {{{ACPI_PTYPE1_FIXED, ACPI_RTYPE_INTEGER, 9, ACPI_RTYPE_STRING}, 4,0}},
 
+	{{"_BIX", 0, ACPI_RTYPE_PACKAGE}},	/* Fixed-length (16 Int),(4 Str) */
+	{{{ACPI_PTYPE1_FIXED, ACPI_RTYPE_INTEGER, 16, ACPI_RTYPE_STRING}, 4,
+	  0}},
+
 	{{"_BLT", 3, 0}},
+	{{"_BMA", 1, ACPI_RTYPE_INTEGER}},
 	{{"_BMC", 1, 0}},
 	{{"_BMD", 0, ACPI_RTYPE_PACKAGE}}, /* Fixed-length (5 Int) */
 			  {{{ACPI_PTYPE1_FIXED, ACPI_RTYPE_INTEGER, 5,0}, 0,0}},
 
+	{{"_BMS", 1, ACPI_RTYPE_INTEGER}},
 	{{"_BQC", 0, ACPI_RTYPE_INTEGER}},
 	{{"_BST", 0, ACPI_RTYPE_PACKAGE}}, /* Fixed-length (4 Int) */
 			  {{{ACPI_PTYPE1_FIXED, ACPI_RTYPE_INTEGER, 4,0}, 0,0}},
@@ -211,6 +224,7 @@ static const union acpi_predefined_info predefined_names[] =
 	{{"_BTM", 1, ACPI_RTYPE_INTEGER}},
 	{{"_BTP", 1, 0}},
 	{{"_CBA", 0, ACPI_RTYPE_INTEGER}}, /* See PCI firmware spec 3.0 */
+	{{"_CDM", 0, ACPI_RTYPE_INTEGER}},
 	{{"_CID", 0, ACPI_RTYPE_INTEGER | ACPI_RTYPE_STRING | ACPI_RTYPE_PACKAGE}}, /* Variable-length (Ints/Strs) */
 			  {{{ACPI_PTYPE1_VAR, ACPI_RTYPE_INTEGER | ACPI_RTYPE_STRING, 0,0}, 0,0}},
 
@@ -236,6 +250,7 @@ static const union acpi_predefined_info predefined_names[] =
 	{{"_DSM", 4, ACPI_RTYPE_ALL}},     /* Must return a type, but it can be of any type */
 	{{"_DSS", 1, 0}},
 	{{"_DSW", 3, 0}},
+	{{"_DTI", 1, 0}},
 	{{"_EC_", 0, ACPI_RTYPE_INTEGER}},
 	{{"_EDL", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Refs)*/
 			  {{{ACPI_PTYPE1_VAR, ACPI_RTYPE_REFERENCE, 0,0}, 0,0}},
@@ -251,9 +266,21 @@ static const union acpi_predefined_info predefined_names[] =
 			  {{{ACPI_PTYPE1_FIXED, ACPI_RTYPE_INTEGER, 16,0}, 0,0}},
 
 	{{"_FDM", 1, 0}},
+	{{"_FIF", 0, ACPI_RTYPE_PACKAGE}},	/* Fixed-length (4 Int) */
+			  {{{ACPI_PTYPE1_FIXED, ACPI_RTYPE_INTEGER, 4, 0}, 0, 0}},
+
 	{{"_FIX", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Ints) */
 			  {{{ACPI_PTYPE1_VAR, ACPI_RTYPE_INTEGER, 0,0}, 0,0}},
 
+	{{"_FPS", 0, ACPI_RTYPE_PACKAGE}},	/* Variable-length (1 Int(rev), n Pkg (5 Int) */
+	{{{ACPI_PTYPE2_REV_FIXED, ACPI_RTYPE_INTEGER, 5, 0}, 0, 0}},
+
+	{{"_FSL", 1, 0}},
+	{{"_FST", 0, ACPI_RTYPE_PACKAGE}},	/* Fixed-length (3 Int) */
+	{{{ACPI_PTYPE1_FIXED, ACPI_RTYPE_INTEGER, 3, 0}, 0, 0}},
+
+	{{"_GAI", 0, ACPI_RTYPE_INTEGER}},
+	{{"_GHL", 0, ACPI_RTYPE_INTEGER}},
 	{{"_GLK", 0, ACPI_RTYPE_INTEGER}},
 	{{"_GPD", 0, ACPI_RTYPE_INTEGER}},
 	{{"_GPE", 0, ACPI_RTYPE_INTEGER}}, /* _GPE method, not _GPE scope */
@@ -281,15 +308,21 @@ static const union acpi_predefined_info predefined_names[] =
 	{{"_LCK", 1, 0}},
 	{{"_LID", 0, ACPI_RTYPE_INTEGER}},
 	{{"_MAT", 0, ACPI_RTYPE_BUFFER}},
+	{{"_MBM", 0, ACPI_RTYPE_PACKAGE}},	/* Fixed-length (8 Int) */
+	{{{ACPI_PTYPE1_FIXED, ACPI_RTYPE_INTEGER, 8, 0}, 0, 0}},
+
 	{{"_MLS", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Pkgs) each (2 Str) */
 			  {{{ACPI_PTYPE2, ACPI_RTYPE_STRING, 2,0}, 0,0}},
 
 	{{"_MSG", 1, 0}},
+	{{"_MSM", 4, ACPI_RTYPE_INTEGER}},
+	{{"_NTT", 0, ACPI_RTYPE_INTEGER}},
 	{{"_OFF", 0, 0}},
 	{{"_ON_", 0, 0}},
 	{{"_OS_", 0, ACPI_RTYPE_STRING}},
 	{{"_OSC", 4, ACPI_RTYPE_BUFFER}},
 	{{"_OST", 3, 0}},
+	{{"_PAI", 1, ACPI_RTYPE_INTEGER}},
 	{{"_PCL", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Refs) */
 			  {{{ACPI_PTYPE1_VAR, ACPI_RTYPE_REFERENCE, 0,0}, 0,0}},
 
@@ -297,10 +330,22 @@ static const union acpi_predefined_info predefined_names[] =
 			  {{{ACPI_PTYPE1_FIXED, ACPI_RTYPE_BUFFER, 2,0}, 0,0}},
 
 	{{"_PDC", 1, 0}},
+	{{"_PDL", 0, ACPI_RTYPE_INTEGER}},
 	{{"_PIC", 1, 0}},
+	{{"_PIF", 0, ACPI_RTYPE_PACKAGE}},	/* Fixed-length (3 Int),(3 Str) */
+	{{{ACPI_PTYPE1_FIXED, ACPI_RTYPE_INTEGER, 3, ACPI_RTYPE_STRING}, 3, 0}},
+
 	{{"_PLD", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Bufs) */
 			  {{{ACPI_PTYPE1_VAR, ACPI_RTYPE_BUFFER, 0,0}, 0,0}},
 
+	{{"_PMC", 0, ACPI_RTYPE_PACKAGE}},	/* Fixed-length (11 Int),(3 Str) */
+	{{{ACPI_PTYPE1_FIXED, ACPI_RTYPE_INTEGER, 11, ACPI_RTYPE_STRING}, 3,
+	  0}},
+
+	{{"_PMD", 0, ACPI_RTYPE_PACKAGE}},	/* Variable-length (Refs) */
+	{{{ACPI_PTYPE1_VAR, ACPI_RTYPE_REFERENCE, 0, 0}, 0, 0}},
+
+	{{"_PMM", 0, ACPI_RTYPE_INTEGER}},
 	{{"_PPC", 0, ACPI_RTYPE_INTEGER}},
 	{{"_PPE", 0, ACPI_RTYPE_INTEGER}}, /* See dig64 spec */
 	{{"_PR0", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Refs) */
@@ -312,17 +357,26 @@ static const union acpi_predefined_info predefined_names[] =
 	{{"_PR2", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Refs) */
 			  {{{ACPI_PTYPE1_VAR, ACPI_RTYPE_REFERENCE, 0,0}, 0,0}},
 
+	{{"_PR3", 0, ACPI_RTYPE_PACKAGE}},	/* Variable-length (Refs) */
+	{{{ACPI_PTYPE1_VAR, ACPI_RTYPE_REFERENCE, 0, 0}, 0, 0}},
+
+	{{"_PRL", 0, ACPI_RTYPE_PACKAGE}},	/* Variable-length (Refs) */
+	{{{ACPI_PTYPE1_VAR, ACPI_RTYPE_REFERENCE, 0, 0}, 0, 0}},
+
 	{{"_PRS", 0, ACPI_RTYPE_BUFFER}},
 
 	/*
-	 * For _PRT, many BIOSs reverse the 2nd and 3rd Package elements. This bug is so prevalent that there
-	 * is code in the ACPICA Resource Manager to detect this and switch them back. For now, do not allow
-	 * and issue a warning. To allow this and eliminate the warning, add the ACPI_RTYPE_REFERENCE
-	 * type to the 2nd element (index 1) in the statement below.
+	 * For _PRT, many BIOSs reverse the 3rd and 4th Package elements (Source
+	 * and source_index). This bug is so prevalent that there is code in the
+	 * ACPICA Resource Manager to detect this and switch them back. For now,
+	 * do not allow and issue a warning. To allow this and eliminate the
+	 * warning, add the ACPI_RTYPE_REFERENCE type to the 4th element (index 3)
+	 * in the statement below.
 	 */
 	{{"_PRT", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Pkgs) each (4): Int,Int,Int/Ref,Int */
 			  {{{ACPI_PTYPE2_FIXED, 4, ACPI_RTYPE_INTEGER,ACPI_RTYPE_INTEGER},
-			  ACPI_RTYPE_INTEGER | ACPI_RTYPE_REFERENCE,ACPI_RTYPE_INTEGER}},
+			  ACPI_RTYPE_INTEGER | ACPI_RTYPE_REFERENCE,
+			  ACPI_RTYPE_INTEGER}},
 
 	{{"_PRW", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Pkgs) each: Pkg/Int,Int,[Variable-length Refs] (Pkg is Ref/Int) */
 			  {{{ACPI_PTYPE1_OPTION, 2, ACPI_RTYPE_INTEGER | ACPI_RTYPE_PACKAGE,
@@ -348,7 +402,11 @@ static const union acpi_predefined_info predefined_names[] =
 	{{"_PTC", 0, ACPI_RTYPE_PACKAGE}}, /* Fixed-length (2 Buf) */
 			  {{{ACPI_PTYPE1_FIXED, ACPI_RTYPE_BUFFER, 2,0}, 0,0}},
 
+	{{"_PTP", 2, ACPI_RTYPE_INTEGER}},
 	{{"_PTS", 1, 0}},
+	{{"_PUR", 0, ACPI_RTYPE_PACKAGE}},	/* Fixed-length (2 Int) */
+	{{{ACPI_PTYPE1_FIXED, ACPI_RTYPE_INTEGER, 2, 0}, 0, 0}},
+
 	{{"_PXM", 0, ACPI_RTYPE_INTEGER}},
 	{{"_REG", 2, 0}},
 	{{"_REV", 0, ACPI_RTYPE_INTEGER}},
@@ -394,6 +452,7 @@ static const union acpi_predefined_info predefined_names[] =
 			   /* Note: the 3-arg definition may be removed for ACPI 4.0 */
 	{{"_SDD", 1, 0}},
 	{{"_SEG", 0, ACPI_RTYPE_INTEGER}},
+	{{"_SHL", 1, ACPI_RTYPE_INTEGER}},
 	{{"_SLI", 0, ACPI_RTYPE_BUFFER}},
 	{{"_SPD", 1, ACPI_RTYPE_INTEGER}},
 	{{"_SRS", 1, 0}},
@@ -401,11 +460,15 @@ static const union acpi_predefined_info predefined_names[] =
 	{{"_SST", 1, 0}},
 	{{"_STA", 0, ACPI_RTYPE_INTEGER}},
 	{{"_STM", 3, 0}},
+	{{"_STP", 2, ACPI_RTYPE_INTEGER}},
 	{{"_STR", 0, ACPI_RTYPE_BUFFER}},
+	{{"_STV", 2, ACPI_RTYPE_INTEGER}},
 	{{"_SUN", 0, ACPI_RTYPE_INTEGER}},
 	{{"_SWS", 0, ACPI_RTYPE_INTEGER}},
 	{{"_TC1", 0, ACPI_RTYPE_INTEGER}},
 	{{"_TC2", 0, ACPI_RTYPE_INTEGER}},
+	{{"_TIP", 1, ACPI_RTYPE_INTEGER}},
+	{{"_TIV", 1, ACPI_RTYPE_INTEGER}},
 	{{"_TMP", 0, ACPI_RTYPE_INTEGER}},
 	{{"_TPC", 0, ACPI_RTYPE_INTEGER}},
 	{{"_TPT", 1, 0}},
@@ -458,3 +521,4 @@ static const union acpi_predefined_info predefined_names[] =
 #endif
 
 #endif
+#endif
diff --git a/drivers/acpi/acpica/nspredef.c b/drivers/acpi/acpica/nspredef.c
index 0b2cdb3..8314e6a 100644
--- a/drivers/acpi/acpica/nspredef.c
+++ b/drivers/acpi/acpica/nspredef.c
@@ -42,6 +42,8 @@
  * POSSIBILITY OF SUCH DAMAGES.
  */
 
+#define ACPI_CREATE_PREDEFINED_TABLE
+
 #include <acpi/acpi.h>
 #include "accommon.h"
 #include "acnamesp.h"
-- 
cgit v0.10.2


From 7f0c826a437157d2b19662977e9cf3b472cf24a6 Mon Sep 17 00:00:00 2001
From: Lin Ming <ming.m.lin@intel.com>
Date: Thu, 13 Aug 2009 14:03:15 +0800
Subject: ACPICA: Add support for module-level executable AML code

Add limited support for executable AML code that exists outside
of any control method. This type of code has been illegal since
ACPI 2.0.  The code must exist in an If/Else/While block. All AML
tables are supported, including tables that are dynamically loaded.
ACPICA BZ 762.

http://acpica.org/bugzilla/show_bug.cgi?id=762

Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h
index 6389f7c..29ba66d 100644
--- a/drivers/acpi/acpica/acglobal.h
+++ b/drivers/acpi/acpica/acglobal.h
@@ -295,6 +295,7 @@ extern char const *acpi_gbl_exception_names_ctrl[];
 ACPI_EXTERN struct acpi_namespace_node acpi_gbl_root_node_struct;
 ACPI_EXTERN struct acpi_namespace_node *acpi_gbl_root_node;
 ACPI_EXTERN struct acpi_namespace_node *acpi_gbl_fadt_gpe_device;
+ACPI_EXTERN union acpi_operand_object *acpi_gbl_module_code_list;
 
 extern const u8 acpi_gbl_ns_properties[ACPI_NUM_NS_TYPES];
 extern const struct acpi_predefined_names
diff --git a/drivers/acpi/acpica/acnamesp.h b/drivers/acpi/acpica/acnamesp.h
index f75a7a0..09a2764 100644
--- a/drivers/acpi/acpica/acnamesp.h
+++ b/drivers/acpi/acpica/acnamesp.h
@@ -196,6 +196,8 @@ acpi_ns_dump_objects(acpi_object_type type,
  */
 acpi_status acpi_ns_evaluate(struct acpi_evaluate_info *info);
 
+void acpi_ns_exec_module_code_list(void);
+
 /*
  * nspredef - Support for predefined/reserved names
  */
diff --git a/drivers/acpi/acpica/acobject.h b/drivers/acpi/acpica/acobject.h
index eb6f038..b39d682 100644
--- a/drivers/acpi/acpica/acobject.h
+++ b/drivers/acpi/acpica/acobject.h
@@ -98,6 +98,7 @@
 #define AOPOBJ_SETUP_COMPLETE       0x10
 #define AOPOBJ_SINGLE_DATUM         0x20
 #define AOPOBJ_INVALID              0x40	/* Used if host OS won't allow an op_region address */
+#define AOPOBJ_MODULE_LEVEL         0x80
 
 /******************************************************************************
  *
diff --git a/drivers/acpi/acpica/acparser.h b/drivers/acpi/acpica/acparser.h
index 23ee0fb..22881e8 100644
--- a/drivers/acpi/acpica/acparser.h
+++ b/drivers/acpi/acpica/acparser.h
@@ -62,6 +62,8 @@
 #define ACPI_PARSE_DEFERRED_OP          0x0100
 #define ACPI_PARSE_DISASSEMBLE          0x0200
 
+#define ACPI_PARSE_MODULE_LEVEL         0x0400
+
 /******************************************************************************
  *
  * Parser interfaces
diff --git a/drivers/acpi/acpica/dsfield.c b/drivers/acpi/acpica/dsfield.c
index 53e27bc..54a225e 100644
--- a/drivers/acpi/acpica/dsfield.c
+++ b/drivers/acpi/acpica/dsfield.c
@@ -123,9 +123,12 @@ acpi_ds_create_buffer_field(union acpi_parse_object *op,
 		flags = ACPI_NS_NO_UPSEARCH | ACPI_NS_DONT_OPEN_SCOPE |
 		    ACPI_NS_ERROR_IF_FOUND;
 
-		/* Mark node temporary if we are executing a method */
-
-		if (walk_state->method_node) {
+		/*
+		 * Mark node temporary if we are executing a normal control
+		 * method. (Don't mark if this is a module-level code method)
+		 */
+		if (walk_state->method_node &&
+		    !(walk_state->parse_flags & ACPI_PARSE_MODULE_LEVEL)) {
 			flags |= ACPI_NS_TEMPORARY;
 		}
 
@@ -456,9 +459,12 @@ acpi_ds_init_field_objects(union acpi_parse_object *op,
 	flags = ACPI_NS_NO_UPSEARCH | ACPI_NS_DONT_OPEN_SCOPE |
 	    ACPI_NS_ERROR_IF_FOUND;
 
-	/* Mark node(s) temporary if we are executing a method */
-
-	if (walk_state->method_node) {
+	/*
+	 * Mark node(s) temporary if we are executing a normal control
+	 * method. (Don't mark if this is a module-level code method)
+	 */
+	if (walk_state->method_node &&
+	    !(walk_state->parse_flags & ACPI_PARSE_MODULE_LEVEL)) {
 		flags |= ACPI_NS_TEMPORARY;
 	}
 
diff --git a/drivers/acpi/acpica/dsmethod.c b/drivers/acpi/acpica/dsmethod.c
index 14b8b8e..567a489 100644
--- a/drivers/acpi/acpica/dsmethod.c
+++ b/drivers/acpi/acpica/dsmethod.c
@@ -578,10 +578,15 @@ acpi_ds_terminate_control_method(union acpi_operand_object *method_desc,
 		}
 
 		/*
-		 * Delete any namespace objects created anywhere within
-		 * the namespace by the execution of this method
+		 * Delete any namespace objects created anywhere within the
+		 * namespace by the execution of this method. Unless this method
+		 * is a module-level executable code method, in which case we
+		 * want make the objects permanent.
 		 */
-		acpi_ns_delete_namespace_by_owner(method_desc->method.owner_id);
+		if (!(method_desc->method.flags & AOPOBJ_MODULE_LEVEL)) {
+			acpi_ns_delete_namespace_by_owner(method_desc->method.
+							  owner_id);
+		}
 	}
 
 	/* Decrement the thread count on the method */
@@ -622,7 +627,9 @@ acpi_ds_terminate_control_method(union acpi_operand_object *method_desc,
 
 		/* No more threads, we can free the owner_id */
 
-		acpi_ut_release_owner_id(&method_desc->method.owner_id);
+		if (!(method_desc->method.flags & AOPOBJ_MODULE_LEVEL)) {
+			acpi_ut_release_owner_id(&method_desc->method.owner_id);
+		}
 	}
 
 	return_VOID;
diff --git a/drivers/acpi/acpica/dswload.c b/drivers/acpi/acpica/dswload.c
index 3023cea..6de3a99 100644
--- a/drivers/acpi/acpica/dswload.c
+++ b/drivers/acpi/acpica/dswload.c
@@ -581,21 +581,6 @@ acpi_ds_load2_begin_op(struct acpi_walk_state *walk_state,
 		if ((!(walk_state->op_info->flags & AML_NSOPCODE) &&
 		     (walk_state->opcode != AML_INT_NAMEPATH_OP)) ||
 		    (!(walk_state->op_info->flags & AML_NAMED))) {
-#ifdef ACPI_ENABLE_MODULE_LEVEL_CODE
-			if ((walk_state->op_info->class == AML_CLASS_EXECUTE) ||
-			    (walk_state->op_info->class == AML_CLASS_CONTROL)) {
-				ACPI_DEBUG_PRINT((ACPI_DB_DISPATCH,
-						  "Begin/EXEC: %s (fl %8.8X)\n",
-						  walk_state->op_info->name,
-						  walk_state->op_info->flags));
-
-				/* Executing a type1 or type2 opcode outside of a method */
-
-				status =
-				    acpi_ds_exec_begin_op(walk_state, out_op);
-				return_ACPI_STATUS(status);
-			}
-#endif
 			return_ACPI_STATUS(AE_OK);
 		}
 
@@ -768,7 +753,13 @@ acpi_ds_load2_begin_op(struct acpi_walk_state *walk_state,
 
 			/* Execution mode, node cannot already exist, node is temporary */
 
-			flags |= (ACPI_NS_ERROR_IF_FOUND | ACPI_NS_TEMPORARY);
+			flags |= ACPI_NS_ERROR_IF_FOUND;
+
+			if (!
+			    (walk_state->
+			     parse_flags & ACPI_PARSE_MODULE_LEVEL)) {
+				flags |= ACPI_NS_TEMPORARY;
+			}
 		}
 
 		/* Add new entry or lookup existing entry */
@@ -851,24 +842,6 @@ acpi_status acpi_ds_load2_end_op(struct acpi_walk_state *walk_state)
 	/* Check if opcode had an associated namespace object */
 
 	if (!(walk_state->op_info->flags & AML_NSOBJECT)) {
-#ifndef ACPI_NO_METHOD_EXECUTION
-#ifdef ACPI_ENABLE_MODULE_LEVEL_CODE
-		/* No namespace object. Executable opcode? */
-
-		if ((walk_state->op_info->class == AML_CLASS_EXECUTE) ||
-		    (walk_state->op_info->class == AML_CLASS_CONTROL)) {
-			ACPI_DEBUG_PRINT((ACPI_DB_DISPATCH,
-					  "End/EXEC:   %s (fl %8.8X)\n",
-					  walk_state->op_info->name,
-					  walk_state->op_info->flags));
-
-			/* Executing a type1 or type2 opcode outside of a method */
-
-			status = acpi_ds_exec_end_op(walk_state);
-			return_ACPI_STATUS(status);
-		}
-#endif
-#endif
 		return_ACPI_STATUS(AE_OK);
 	}
 
diff --git a/drivers/acpi/acpica/exconfig.c b/drivers/acpi/acpica/exconfig.c
index 277fd60..24afef8 100644
--- a/drivers/acpi/acpica/exconfig.c
+++ b/drivers/acpi/acpica/exconfig.c
@@ -110,8 +110,15 @@ acpi_ex_add_table(u32 table_index,
 	if (ACPI_FAILURE(status)) {
 		acpi_ut_remove_reference(obj_desc);
 		*ddb_handle = NULL;
+		return_ACPI_STATUS(status);
 	}
 
+	/* Execute any module-level code that was found in the table */
+
+	acpi_ex_exit_interpreter();
+	acpi_ns_exec_module_code_list();
+	acpi_ex_enter_interpreter();
+
 	return_ACPI_STATUS(status);
 }
 
diff --git a/drivers/acpi/acpica/nseval.c b/drivers/acpi/acpica/nseval.c
index 8e7dec1..846d113 100644
--- a/drivers/acpi/acpica/nseval.c
+++ b/drivers/acpi/acpica/nseval.c
@@ -50,6 +50,11 @@
 #define _COMPONENT          ACPI_NAMESPACE
 ACPI_MODULE_NAME("nseval")
 
+/* Local prototypes */
+static void
+acpi_ns_exec_module_code(union acpi_operand_object *method_obj,
+			 struct acpi_evaluate_info *info);
+
 /*******************************************************************************
  *
  * FUNCTION:    acpi_ns_evaluate
@@ -76,6 +81,7 @@ ACPI_MODULE_NAME("nseval")
  * MUTEX:       Locks interpreter
  *
  ******************************************************************************/
+
 acpi_status acpi_ns_evaluate(struct acpi_evaluate_info * info)
 {
 	acpi_status status;
@@ -276,3 +282,134 @@ acpi_status acpi_ns_evaluate(struct acpi_evaluate_info * info)
 	 */
 	return_ACPI_STATUS(status);
 }
+
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_ns_exec_module_code_list
+ *
+ * PARAMETERS:  None
+ *
+ * RETURN:      None. Exceptions during method execution are ignored, since
+ *              we cannot abort a table load.
+ *
+ * DESCRIPTION: Execute all elements of the global module-level code list.
+ *              Each element is executed as a single control method.
+ *
+ ******************************************************************************/
+
+void acpi_ns_exec_module_code_list(void)
+{
+	union acpi_operand_object *prev;
+	union acpi_operand_object *next;
+	struct acpi_evaluate_info *info;
+	u32 method_count = 0;
+
+	ACPI_FUNCTION_TRACE(ns_exec_module_code_list);
+
+	/* Exit now if the list is empty */
+
+	next = acpi_gbl_module_code_list;
+	if (!next) {
+		return_VOID;
+	}
+
+	/* Allocate the evaluation information block */
+
+	info = ACPI_ALLOCATE(sizeof(struct acpi_evaluate_info));
+	if (!info) {
+		return_VOID;
+	}
+
+	/* Walk the list, executing each "method" */
+
+	while (next) {
+		prev = next;
+		next = next->method.mutex;
+
+		/* Clear the link field and execute the method */
+
+		prev->method.mutex = NULL;
+		acpi_ns_exec_module_code(prev, info);
+		method_count++;
+
+		/* Delete the (temporary) method object */
+
+		acpi_ut_remove_reference(prev);
+	}
+
+	ACPI_INFO((AE_INFO,
+		   "Executed %u blocks of module-level executable AML code",
+		   method_count));
+
+	ACPI_FREE(info);
+	acpi_gbl_module_code_list = NULL;
+	return_VOID;
+}
+
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_ns_exec_module_code
+ *
+ * PARAMETERS:  method_obj          - Object container for the module-level code
+ *              Info                - Info block for method evaluation
+ *
+ * RETURN:      None. Exceptions during method execution are ignored, since
+ *              we cannot abort a table load.
+ *
+ * DESCRIPTION: Execute a control method containing a block of module-level
+ *              executable AML code. The control method is temporarily
+ *              installed to the root node, then evaluated.
+ *
+ ******************************************************************************/
+
+static void
+acpi_ns_exec_module_code(union acpi_operand_object *method_obj,
+			 struct acpi_evaluate_info *info)
+{
+	union acpi_operand_object *root_obj;
+	acpi_status status;
+
+	ACPI_FUNCTION_TRACE(ns_exec_module_code);
+
+	/* Initialize the evaluation information block */
+
+	ACPI_MEMSET(info, 0, sizeof(struct acpi_evaluate_info));
+	info->prefix_node = acpi_gbl_root_node;
+
+	/*
+	 * Get the currently attached root object. Add a reference, because the
+	 * ref count will be decreased when the method object is installed to
+	 * the root node.
+	 */
+	root_obj = acpi_ns_get_attached_object(acpi_gbl_root_node);
+	acpi_ut_add_reference(root_obj);
+
+	/* Install the method (module-level code) in the root node */
+
+	status = acpi_ns_attach_object(acpi_gbl_root_node, method_obj,
+				       ACPI_TYPE_METHOD);
+	if (ACPI_FAILURE(status)) {
+		goto exit;
+	}
+
+	/* Execute the root node as a control method */
+
+	status = acpi_ns_evaluate(info);
+
+	ACPI_DEBUG_PRINT((ACPI_DB_INIT, "Executed module-level code at %p\n",
+			  method_obj->method.aml_start));
+
+	/* Detach the temporary method object */
+
+	acpi_ns_detach_object(acpi_gbl_root_node);
+
+	/* Restore the original root object */
+
+	status =
+	    acpi_ns_attach_object(acpi_gbl_root_node, root_obj,
+				  ACPI_TYPE_DEVICE);
+
+      exit:
+	acpi_ut_remove_reference(root_obj);
+	return_VOID;
+}
diff --git a/drivers/acpi/acpica/psloop.c b/drivers/acpi/acpica/psloop.c
index c5f6ce1..cd7995b 100644
--- a/drivers/acpi/acpica/psloop.c
+++ b/drivers/acpi/acpica/psloop.c
@@ -86,6 +86,9 @@ static acpi_status
 acpi_ps_complete_final_op(struct acpi_walk_state *walk_state,
 			  union acpi_parse_object *op, acpi_status status);
 
+static void
+acpi_ps_link_module_code(u8 *aml_start, u32 aml_length, acpi_owner_id owner_id);
+
 /*******************************************************************************
  *
  * FUNCTION:    acpi_ps_get_aml_opcode
@@ -390,6 +393,7 @@ acpi_ps_get_arguments(struct acpi_walk_state *walk_state,
 {
 	acpi_status status = AE_OK;
 	union acpi_parse_object *arg = NULL;
+	const struct acpi_opcode_info *op_info;
 
 	ACPI_FUNCTION_TRACE_PTR(ps_get_arguments, walk_state);
 
@@ -449,13 +453,11 @@ acpi_ps_get_arguments(struct acpi_walk_state *walk_state,
 			INCREMENT_ARG_LIST(walk_state->arg_types);
 		}
 
-		/* Special processing for certain opcodes */
-
-		/* TBD (remove): Temporary mechanism to disable this code if needed */
-
-#ifdef ACPI_ENABLE_MODULE_LEVEL_CODE
-
-		if ((walk_state->pass_number <= ACPI_IMODE_LOAD_PASS1) &&
+		/*
+		 * Handle executable code at "module-level". This refers to
+		 * executable opcodes that appear outside of any control method.
+		 */
+		if ((walk_state->pass_number <= ACPI_IMODE_LOAD_PASS2) &&
 		    ((walk_state->parse_flags & ACPI_PARSE_DISASSEMBLE) == 0)) {
 			/*
 			 * We want to skip If/Else/While constructs during Pass1 because we
@@ -469,6 +471,23 @@ acpi_ps_get_arguments(struct acpi_walk_state *walk_state,
 			case AML_ELSE_OP:
 			case AML_WHILE_OP:
 
+				/*
+				 * Currently supported module-level opcodes are:
+				 * IF/ELSE/WHILE. These appear to be the most common,
+				 * and easiest to support since they open an AML
+				 * package.
+				 */
+				if (walk_state->pass_number ==
+				    ACPI_IMODE_LOAD_PASS1) {
+					acpi_ps_link_module_code(aml_op_start,
+								 walk_state->
+								 parser_state.
+								 pkg_end -
+								 aml_op_start,
+								 walk_state->
+								 owner_id);
+				}
+
 				ACPI_DEBUG_PRINT((ACPI_DB_PARSE,
 						  "Pass1: Skipping an If/Else/While body\n"));
 
@@ -480,10 +499,34 @@ acpi_ps_get_arguments(struct acpi_walk_state *walk_state,
 				break;
 
 			default:
+				/*
+				 * Check for an unsupported executable opcode at module
+				 * level. We must be in PASS1, the parent must be a SCOPE,
+				 * The opcode class must be EXECUTE, and the opcode must
+				 * not be an argument to another opcode.
+				 */
+				if ((walk_state->pass_number ==
+				     ACPI_IMODE_LOAD_PASS1)
+				    && (op->common.parent->common.aml_opcode ==
+					AML_SCOPE_OP)) {
+					op_info =
+					    acpi_ps_get_opcode_info(op->common.
+								    aml_opcode);
+					if ((op_info->class ==
+					     AML_CLASS_EXECUTE) && (!arg)) {
+						ACPI_WARNING((AE_INFO,
+							      "Detected an unsupported executable opcode "
+							      "at module-level: [0x%.4X] at table offset 0x%.4X",
+							      op->common.aml_opcode,
+							      (u32)((aml_op_start - walk_state->parser_state.aml_start)
+								+ sizeof(struct acpi_table_header))));
+					}
+				}
 				break;
 			}
 		}
-#endif
+
+		/* Special processing for certain opcodes */
 
 		switch (op->common.aml_opcode) {
 		case AML_METHOD_OP:
@@ -553,6 +596,66 @@ acpi_ps_get_arguments(struct acpi_walk_state *walk_state,
 
 /*******************************************************************************
  *
+ * FUNCTION:    acpi_ps_link_module_code
+ *
+ * PARAMETERS:  aml_start           - Pointer to the AML
+ *              aml_length          - Length of executable AML
+ *              owner_id            - owner_id of module level code
+ *
+ * RETURN:      None.
+ *
+ * DESCRIPTION: Wrap the module-level code with a method object and link the
+ *              object to the global list. Note, the mutex field of the method
+ *              object is used to link multiple module-level code objects.
+ *
+ ******************************************************************************/
+
+static void
+acpi_ps_link_module_code(u8 *aml_start, u32 aml_length, acpi_owner_id owner_id)
+{
+	union acpi_operand_object *prev;
+	union acpi_operand_object *next;
+	union acpi_operand_object *method_obj;
+
+	/* Get the tail of the list */
+
+	prev = next = acpi_gbl_module_code_list;
+	while (next) {
+		prev = next;
+		next = next->method.mutex;
+	}
+
+	/*
+	 * Insert the module level code into the list. Merge it if it is
+	 * adjacent to the previous element.
+	 */
+	if (!prev ||
+	    ((prev->method.aml_start + prev->method.aml_length) != aml_start)) {
+
+		/* Create, initialize, and link a new temporary method object */
+
+		method_obj = acpi_ut_create_internal_object(ACPI_TYPE_METHOD);
+		if (!method_obj) {
+			return;
+		}
+
+		method_obj->method.aml_start = aml_start;
+		method_obj->method.aml_length = aml_length;
+		method_obj->method.owner_id = owner_id;
+		method_obj->method.flags |= AOPOBJ_MODULE_LEVEL;
+
+		if (!prev) {
+			acpi_gbl_module_code_list = method_obj;
+		} else {
+			prev->method.mutex = method_obj;
+		}
+	} else {
+		prev->method.aml_length += aml_length;
+	}
+}
+
+/*******************************************************************************
+ *
  * FUNCTION:    acpi_ps_complete_op
  *
  * PARAMETERS:  walk_state          - Current state
diff --git a/drivers/acpi/acpica/psxface.c b/drivers/acpi/acpica/psxface.c
index ff06032..dd9731c 100644
--- a/drivers/acpi/acpica/psxface.c
+++ b/drivers/acpi/acpica/psxface.c
@@ -280,6 +280,10 @@ acpi_status acpi_ps_execute_method(struct acpi_evaluate_info *info)
 		goto cleanup;
 	}
 
+	if (info->obj_desc->method.flags & AOPOBJ_MODULE_LEVEL) {
+		walk_state->parse_flags |= ACPI_PARSE_MODULE_LEVEL;
+	}
+
 	/* Invoke an internal method if necessary */
 
 	if (info->obj_desc->method.method_flags & AML_METHOD_INTERNAL_ONLY) {
diff --git a/drivers/acpi/acpica/utglobal.c b/drivers/acpi/acpica/utglobal.c
index 9e33b62..3f2c68f 100644
--- a/drivers/acpi/acpica/utglobal.c
+++ b/drivers/acpi/acpica/utglobal.c
@@ -807,6 +807,7 @@ acpi_status acpi_ut_init_globals(void)
 
 	/* Namespace */
 
+	acpi_gbl_module_code_list = NULL;
 	acpi_gbl_root_node = NULL;
 	acpi_gbl_root_node_struct.name.integer = ACPI_ROOT_NAME;
 	acpi_gbl_root_node_struct.descriptor_type = ACPI_DESC_TYPE_NAMED;
diff --git a/drivers/acpi/acpica/utxface.c b/drivers/acpi/acpica/utxface.c
index 483edbb..b1f5f68 100644
--- a/drivers/acpi/acpica/utxface.c
+++ b/drivers/acpi/acpica/utxface.c
@@ -251,6 +251,16 @@ acpi_status acpi_initialize_objects(u32 flags)
 	}
 
 	/*
+	 * Execute any module-level code that was detected during the table load
+	 * phase. Although illegal since ACPI 2.0, there are many machines that
+	 * contain this type of code. Each block of detected executable AML code
+	 * outside of any control method is wrapped with a temporary control
+	 * method object and placed on a global list. The methods on this list
+	 * are executed below.
+	 */
+	acpi_ns_exec_module_code_list();
+
+	/*
 	 * Initialize the objects that remain uninitialized. This runs the
 	 * executable AML that may be part of the declaration of these objects:
 	 * operation_regions, buffer_fields, Buffers, and Packages.
-- 
cgit v0.10.2


From c276e3884163355464a76e60ed9e272b52b4acc2 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Mon, 27 Jul 2009 14:55:02 +0800
Subject: ACPICA: Update definitions for HEST table

Eliminate duplicated code in disassembler.
Shorten identifiers that were too long.

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h
index 0417f2a..34b10c0 100644
--- a/include/acpi/actbl1.h
+++ b/include/acpi/actbl1.h
@@ -427,17 +427,17 @@ struct acpi_hest_aer_common {
 	u16 reserved1;
 	u8 flags;
 	u8 enabled;
-	u32 records_to_pre_allocate;
+	u32 records_to_preallocate;
 	u32 max_sections_per_record;
 	u32 bus;
 	u16 device;
 	u16 function;
 	u16 device_control;
 	u16 reserved2;
-	u32 uncorrectable_error_mask;
-	u32 uncorrectable_error_severity;
-	u32 correctable_error_mask;
-	u32 advanced_error_capabilities;
+	u32 uncorrectable_mask;
+	u32 uncorrectable_severity;
+	u32 correctable_mask;
+	u32 advanced_capabilities;
 };
 
 /* Masks for HEST Flags fields */
@@ -490,7 +490,7 @@ struct acpi_hest_ia_machine_check {
 	u16 reserved1;
 	u8 flags;
 	u8 enabled;
-	u32 records_to_pre_allocate;
+	u32 records_to_preallocate;
 	u32 max_sections_per_record;
 	u64 global_capability_data;
 	u64 global_control_data;
@@ -505,7 +505,7 @@ struct acpi_table_hest_ia_corrected {
 	u16 reserved1;
 	u8 flags;
 	u8 enabled;
-	u32 records_to_pre_allocate;
+	u32 records_to_preallocate;
 	u32 max_sections_per_record;
 	struct acpi_hest_notify notify;
 	u8 num_hardware_banks;
@@ -517,7 +517,7 @@ struct acpi_table_hest_ia_corrected {
 struct acpi_hest_ia_nmi {
 	struct acpi_hest_header header;
 	u32 reserved;
-	u32 records_to_pre_allocate;
+	u32 records_to_preallocate;
 	u32 max_sections_per_record;
 	u32 max_raw_data_length;
 };
@@ -544,9 +544,9 @@ struct acpi_hest_aer {
 struct acpi_hest_aer_bridge {
 	struct acpi_hest_header header;
 	struct acpi_hest_aer_common aer;
-	u32 second_uncorrectable_error_mask;
-	u32 second_uncorrectable_error_severity;
-	u32 second_advanced_capabilities;
+	u32 uncorrectable_mask2;
+	u32 uncorrectable_severity2;
+	u32 advanced_capabilities2;
 };
 
 /* 9: Generic Hardware Error Source */
@@ -556,7 +556,7 @@ struct acpi_hest_generic {
 	u16 related_source_id;
 	u8 reserved;
 	u8 enabled;
-	u32 records_to_pre_allocate;
+	u32 records_to_preallocate;
 	u32 max_sections_per_record;
 	u32 max_raw_data_length;
 	struct acpi_generic_address error_status_address;
-- 
cgit v0.10.2


From 1872bbc94b2d092ece22a8fbf1c3e81f0fba0052 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Thu, 13 Aug 2009 13:31:00 +0800
Subject: ACPICA: Fix typo for HEST ACPI table

Problem with the name of one of the subtables.

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h
index 34b10c0..0b9b430 100644
--- a/include/acpi/actbl1.h
+++ b/include/acpi/actbl1.h
@@ -500,7 +500,7 @@ struct acpi_hest_ia_machine_check {
 
 /* 1: IA32 Corrected Machine Check */
 
-struct acpi_table_hest_ia_corrected {
+struct acpi_hest_ia_corrected {
 	struct acpi_hest_header header;
 	u16 reserved1;
 	u8 flags;
-- 
cgit v0.10.2


From 8a964236800839263b3dddd7f7851d666e7d53e1 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Thu, 13 Aug 2009 13:42:19 +0800
Subject: ACPICA: acpi_reset: Bypass port validation mechanism

Allow writes to reserved ports. This change may eventually be
driven down in to acpi_write and acpi_read.

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/acpica/hwxface.c b/drivers/acpi/acpica/hwxface.c
index 4ead85f..647c7b6 100644
--- a/drivers/acpi/acpica/hwxface.c
+++ b/drivers/acpi/acpica/hwxface.c
@@ -78,9 +78,22 @@ acpi_status acpi_reset(void)
 		return_ACPI_STATUS(AE_NOT_EXIST);
 	}
 
-	/* Write the reset value to the reset register */
+	if (reset_reg->space_id == ACPI_ADR_SPACE_SYSTEM_IO) {
+		/*
+		 * For I/O space, write directly to the OSL. This bypasses the port
+		 * validation mechanism, which may block a valid write to the reset
+		 * register.
+		 */
+		status =
+		    acpi_os_write_port((acpi_io_address) reset_reg->address,
+				       acpi_gbl_FADT.reset_value,
+				       reset_reg->bit_width);
+	} else {
+		/* Write the reset value to the reset register */
+
+		status = acpi_hw_write(acpi_gbl_FADT.reset_value, reset_reg);
+	}
 
-	status = acpi_hw_write(acpi_gbl_FADT.reset_value, reset_reg);
 	return_ACPI_STATUS(status);
 }
 
-- 
cgit v0.10.2


From 49ae80c9944401222e47108883c486b5a5a24006 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Thu, 13 Aug 2009 13:43:12 +0800
Subject: ACPICA: Update version to 20090730

Version 20090730.

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 063e577..f3b358b 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -47,7 +47,7 @@
 
 /* Current ACPICA subsystem version in YYYYMMDD format */
 
-#define ACPI_CA_VERSION                 0x20090625
+#define ACPI_CA_VERSION                 0x20090730
 
 #include "actypes.h"
 #include "actbl.h"
-- 
cgit v0.10.2


From a192a9580bcc41692be1f36b77c3b681827f566a Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Tue, 28 Jul 2009 16:45:54 -0400
Subject: ACPI: Move definition of PREFIX from acpi_bus.h to internal..h

Linux/ACPI core files using internal.h all PREFIX "ACPI: ",
however, not all ACPI drivers use/want it -- and they
should not have to #undef PREFIX to define their own.

Add GPL commment to internal.h while we are there.

This does not change any actual console output,
asside from a whitespace fix.

Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 712443e..81d3466 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -18,6 +18,8 @@
 #include <asm/e820.h>
 #include <asm/pci_x86.h>
 
+#define PREFIX "ACPI: "
+
 /* aperture is up to 256MB but BIOS may reserve less */
 #define MMCONFIG_APER_MIN	(2 * 1024*1024)
 #define MMCONFIG_APER_MAX	(256 * 1024*1024)
diff --git a/drivers/acpi/ac.c b/drivers/acpi/ac.c
index 0df8fcb..98b9690 100644
--- a/drivers/acpi/ac.c
+++ b/drivers/acpi/ac.c
@@ -37,6 +37,8 @@
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
 
+#define PREFIX "ACPI: "
+
 #define ACPI_AC_CLASS			"ac_adapter"
 #define ACPI_AC_DEVICE_NAME		"AC Adapter"
 #define ACPI_AC_FILE_STATE		"state"
diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c
index 58b4517..f8c3d1b 100644
--- a/drivers/acpi/battery.c
+++ b/drivers/acpi/battery.c
@@ -45,6 +45,8 @@
 #include <linux/power_supply.h>
 #endif
 
+#define PREFIX "ACPI: "
+
 #define ACPI_BATTERY_VALUE_UNKNOWN 0xFFFFFFFF
 
 #define ACPI_BATTERY_CLASS		"battery"
diff --git a/drivers/acpi/blacklist.c b/drivers/acpi/blacklist.c
index f6baa77..19152ea 100644
--- a/drivers/acpi/blacklist.c
+++ b/drivers/acpi/blacklist.c
@@ -34,6 +34,8 @@
 #include <acpi/acpi_bus.h>
 #include <linux/dmi.h>
 
+#include "internal.h"
+
 enum acpi_blacklist_predicates {
 	all_versions,
 	less_than_or_equal,
diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c
index 9195deb..d295bdc 100644
--- a/drivers/acpi/button.c
+++ b/drivers/acpi/button.c
@@ -33,6 +33,8 @@
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
 
+#define PREFIX "ACPI: "
+
 #define ACPI_BUTTON_CLASS		"button"
 #define ACPI_BUTTON_FILE_INFO		"info"
 #define ACPI_BUTTON_FILE_STATE		"state"
diff --git a/drivers/acpi/cm_sbs.c b/drivers/acpi/cm_sbs.c
index 332fe4b..6c9ee68 100644
--- a/drivers/acpi/cm_sbs.c
+++ b/drivers/acpi/cm_sbs.c
@@ -28,6 +28,8 @@
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
 
+#define PREFIX "ACPI: "
+
 ACPI_MODULE_NAME("cm_sbs");
 #define ACPI_AC_CLASS		"ac_adapter"
 #define ACPI_BATTERY_CLASS	"battery"
diff --git a/drivers/acpi/container.c b/drivers/acpi/container.c
index fe0cdf8..5f2c3c0 100644
--- a/drivers/acpi/container.c
+++ b/drivers/acpi/container.c
@@ -35,6 +35,8 @@
 #include <acpi/acpi_drivers.h>
 #include <acpi/container.h>
 
+#define PREFIX "ACPI: "
+
 #define ACPI_CONTAINER_DEVICE_NAME	"ACPI container device"
 #define ACPI_CONTAINER_CLASS		"container"
 
diff --git a/drivers/acpi/dock.c b/drivers/acpi/dock.c
index efb959d..9a85566 100644
--- a/drivers/acpi/dock.c
+++ b/drivers/acpi/dock.c
@@ -33,6 +33,8 @@
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
 
+#define PREFIX "ACPI: "
+
 #define ACPI_DOCK_DRIVER_DESCRIPTION "ACPI Dock Station Driver"
 
 ACPI_MODULE_NAME("dock");
diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index 391f331..5180f0f 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -47,7 +47,6 @@
 #define ACPI_EC_DEVICE_NAME		"Embedded Controller"
 #define ACPI_EC_FILE_INFO		"info"
 
-#undef PREFIX
 #define PREFIX				"ACPI: EC: "
 
 /* EC status register */
diff --git a/drivers/acpi/event.c b/drivers/acpi/event.c
index aeb7e5f..c511071 100644
--- a/drivers/acpi/event.c
+++ b/drivers/acpi/event.c
@@ -14,6 +14,8 @@
 #include <net/netlink.h>
 #include <net/genetlink.h>
 
+#include "internal.h"
+
 #define _COMPONENT		ACPI_SYSTEM_COMPONENT
 ACPI_MODULE_NAME("event");
 
diff --git a/drivers/acpi/fan.c b/drivers/acpi/fan.c
index 53698ea..f419849 100644
--- a/drivers/acpi/fan.c
+++ b/drivers/acpi/fan.c
@@ -34,6 +34,8 @@
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
 
+#define PREFIX "ACPI: "
+
 #define ACPI_FAN_CLASS			"fan"
 #define ACPI_FAN_FILE_STATE		"state"
 
diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c
index a8a5c29..dc36a44 100644
--- a/drivers/acpi/glue.c
+++ b/drivers/acpi/glue.c
@@ -12,6 +12,8 @@
 #include <linux/rwsem.h>
 #include <linux/acpi.h>
 
+#include "internal.h"
+
 #define ACPI_GLUE_DEBUG	0
 #if ACPI_GLUE_DEBUG
 #define DBG(x...) printk(PREFIX x)
diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index 11a69b5..074cf86 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -1,4 +1,24 @@
-/* For use by Linux/ACPI infrastructure, not drivers */
+/*
+ * acpi/internal.h
+ * For use by Linux/ACPI infrastructure, not drivers
+ *
+ * Copyright (c) 2009, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#define PREFIX "ACPI: "
 
 int init_acpi_device_notify(void);
 int acpi_scan_init(void);
diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c
index d440ccd..202dd0c 100644
--- a/drivers/acpi/numa.c
+++ b/drivers/acpi/numa.c
@@ -30,6 +30,8 @@
 #include <linux/acpi.h>
 #include <acpi/acpi_bus.h>
 
+#define PREFIX "ACPI: "
+
 #define ACPI_NUMA	0x80000000
 #define _COMPONENT	ACPI_NUMA
 ACPI_MODULE_NAME("numa");
diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c
index b794eb8..843699e 100644
--- a/drivers/acpi/pci_irq.c
+++ b/drivers/acpi/pci_irq.c
@@ -40,6 +40,8 @@
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
 
+#define PREFIX "ACPI: "
+
 #define _COMPONENT		ACPI_PCI_COMPONENT
 ACPI_MODULE_NAME("pci_irq");
 
diff --git a/drivers/acpi/pci_link.c b/drivers/acpi/pci_link.c
index 16e0f9d..394ae89 100644
--- a/drivers/acpi/pci_link.c
+++ b/drivers/acpi/pci_link.c
@@ -43,6 +43,8 @@
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
 
+#define PREFIX "ACPI: "
+
 #define _COMPONENT			ACPI_PCI_COMPONENT
 ACPI_MODULE_NAME("pci_link");
 #define ACPI_PCI_LINK_CLASS		"pci_irq_routing"
diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index 55b5b90..dee9167 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -36,6 +36,8 @@
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
 
+#define PREFIX "ACPI: "
+
 #define _COMPONENT		ACPI_PCI_COMPONENT
 ACPI_MODULE_NAME("pci_root");
 #define ACPI_PCI_ROOT_CLASS		"pci_bridge"
diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c
index d74365d..e86603f 100644
--- a/drivers/acpi/power.c
+++ b/drivers/acpi/power.c
@@ -44,6 +44,8 @@
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
 
+#define PREFIX "ACPI: "
+
 #define _COMPONENT			ACPI_POWER_COMPONENT
 ACPI_MODULE_NAME("power");
 #define ACPI_POWER_CLASS		"power_resource"
diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c
index 2cc4b30..b4a1ab2 100644
--- a/drivers/acpi/processor_core.c
+++ b/drivers/acpi/processor_core.c
@@ -59,6 +59,8 @@
 #include <acpi/acpi_drivers.h>
 #include <acpi/processor.h>
 
+#define PREFIX "ACPI: "
+
 #define ACPI_PROCESSOR_CLASS		"processor"
 #define ACPI_PROCESSOR_DEVICE_NAME	"Processor"
 #define ACPI_PROCESSOR_FILE_INFO	"info"
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 66393d5..22aab1f 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -60,6 +60,8 @@
 #include <acpi/processor.h>
 #include <asm/processor.h>
 
+#define PREFIX "ACPI: "
+
 #define ACPI_PROCESSOR_CLASS            "processor"
 #define _COMPONENT              ACPI_PROCESSOR_COMPONENT
 ACPI_MODULE_NAME("processor_idle");
diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
index 60e543d..11088cf 100644
--- a/drivers/acpi/processor_perflib.c
+++ b/drivers/acpi/processor_perflib.c
@@ -39,6 +39,8 @@
 #include <acpi/acpi_drivers.h>
 #include <acpi/processor.h>
 
+#define PREFIX "ACPI: "
+
 #define ACPI_PROCESSOR_CLASS		"processor"
 #define ACPI_PROCESSOR_FILE_PERFORMANCE	"performance"
 #define _COMPONENT		ACPI_PROCESSOR_COMPONENT
diff --git a/drivers/acpi/processor_thermal.c b/drivers/acpi/processor_thermal.c
index 31adda1..3e3181c 100644
--- a/drivers/acpi/processor_thermal.c
+++ b/drivers/acpi/processor_thermal.c
@@ -40,6 +40,8 @@
 #include <acpi/processor.h>
 #include <acpi/acpi_drivers.h>
 
+#define PREFIX "ACPI: "
+
 #define ACPI_PROCESSOR_CLASS            "processor"
 #define _COMPONENT              ACPI_PROCESSOR_COMPONENT
 ACPI_MODULE_NAME("processor_thermal");
diff --git a/drivers/acpi/processor_throttling.c b/drivers/acpi/processor_throttling.c
index ae39797..b366b9c 100644
--- a/drivers/acpi/processor_throttling.c
+++ b/drivers/acpi/processor_throttling.c
@@ -41,6 +41,8 @@
 #include <acpi/acpi_drivers.h>
 #include <acpi/processor.h>
 
+#define PREFIX "ACPI: "
+
 #define ACPI_PROCESSOR_CLASS            "processor"
 #define _COMPONENT              ACPI_PROCESSOR_COMPONENT
 ACPI_MODULE_NAME("processor_throttling");
diff --git a/drivers/acpi/sbs.c b/drivers/acpi/sbs.c
index 4b214b7..52b9db8 100644
--- a/drivers/acpi/sbs.c
+++ b/drivers/acpi/sbs.c
@@ -46,6 +46,8 @@
 
 #include "sbshc.h"
 
+#define PREFIX "ACPI: "
+
 #define ACPI_SBS_CLASS			"sbs"
 #define ACPI_AC_CLASS			"ac_adapter"
 #define ACPI_BATTERY_CLASS		"battery"
diff --git a/drivers/acpi/sbshc.c b/drivers/acpi/sbshc.c
index 0619734..d933980 100644
--- a/drivers/acpi/sbshc.c
+++ b/drivers/acpi/sbshc.c
@@ -15,6 +15,8 @@
 #include <linux/interrupt.h>
 #include "sbshc.h"
 
+#define PREFIX "ACPI: "
+
 #define ACPI_SMB_HC_CLASS	"smbus_host_controller"
 #define ACPI_SMB_HC_DEVICE_NAME	"ACPI SMBus HC"
 
diff --git a/drivers/acpi/system.c b/drivers/acpi/system.c
index 9c61ab2..d112829 100644
--- a/drivers/acpi/system.c
+++ b/drivers/acpi/system.c
@@ -31,6 +31,8 @@
 
 #include <acpi/acpi_drivers.h>
 
+#define PREFIX "ACPI: "
+
 #define _COMPONENT		ACPI_SYSTEM_COMPONENT
 ACPI_MODULE_NAME("system");
 
diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c
index 564ea14..65f6781 100644
--- a/drivers/acpi/thermal.c
+++ b/drivers/acpi/thermal.c
@@ -47,6 +47,8 @@
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
 
+#define PREFIX "ACPI: "
+
 #define ACPI_THERMAL_CLASS		"thermal_zone"
 #define ACPI_THERMAL_DEVICE_NAME	"Thermal Zone"
 #define ACPI_THERMAL_FILE_STATE		"state"
diff --git a/drivers/acpi/utils.c b/drivers/acpi/utils.c
index f844941..811fec1 100644
--- a/drivers/acpi/utils.c
+++ b/drivers/acpi/utils.c
@@ -30,6 +30,8 @@
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
 
+#include "internal.h"
+
 #define _COMPONENT		ACPI_BUS_COMPONENT
 ACPI_MODULE_NAME("utils");
 
diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c
index 8851315..a0fa394 100644
--- a/drivers/acpi/video.c
+++ b/drivers/acpi/video.c
@@ -44,6 +44,8 @@
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
 
+#define PREFIX "ACPI: "
+
 #define ACPI_VIDEO_CLASS		"video"
 #define ACPI_VIDEO_BUS_NAME		"Video Bus"
 #define ACPI_VIDEO_DEVICE_NAME		"Video Device"
diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c
index 7cd2b63..7032f25 100644
--- a/drivers/acpi/video_detect.c
+++ b/drivers/acpi/video_detect.c
@@ -38,6 +38,8 @@
 #include <linux/dmi.h>
 #include <linux/pci.h>
 
+#define PREFIX "ACPI: "
+
 ACPI_MODULE_NAME("video");
 #define _COMPONENT		ACPI_VIDEO_COMPONENT
 
diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index 7b287cb..998f02d 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -34,8 +34,7 @@
 #include <linux/irq.h>
 #include <linux/interrupt.h>
 
-#undef PREFIX
-#define PREFIX "DMAR:"
+#define PREFIX "DMAR: "
 
 /* No locks are needed as DMA remapping hardware unit
  * list is constructed at boot time and hotplug of
diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c
index 218b9a1..eabddc9c 100644
--- a/drivers/platform/x86/fujitsu-laptop.c
+++ b/drivers/platform/x86/fujitsu-laptop.c
@@ -700,7 +700,7 @@ static int acpi_fujitsu_add(struct acpi_device *device)
 		goto end;
 	}
 
-	printk(KERN_INFO PREFIX "%s [%s] (%s)\n",
+	printk(KERN_INFO "ACPI: %s [%s] (%s)\n",
 	       acpi_device_name(device), acpi_device_bid(device),
 	       !device->power.state ? "on" : "off");
 
@@ -874,7 +874,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
 		goto end;
 	}
 
-	printk(KERN_INFO PREFIX "%s [%s] (%s)\n",
+	printk(KERN_INFO "ACPI: %s [%s] (%s)\n",
 	       acpi_device_name(device), acpi_device_bid(device),
 	       !device->power.state ? "on" : "off");
 
diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c
index f215a59..177f8d7 100644
--- a/drivers/platform/x86/wmi.c
+++ b/drivers/platform/x86/wmi.c
@@ -42,7 +42,6 @@ MODULE_LICENSE("GPL");
 
 #define ACPI_WMI_CLASS "wmi"
 
-#undef PREFIX
 #define PREFIX "ACPI: WMI: "
 
 static DEFINE_MUTEX(wmi_data_lock);
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index c65e4ce..f485107d 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -30,8 +30,6 @@
 
 #include <acpi/acpi.h>
 
-#define PREFIX			"ACPI: "
-
 /* TBD: Make dynamic */
 #define ACPI_MAX_HANDLES	10
 struct acpi_handle_list {
-- 
cgit v0.10.2


From f4a2d5840e9f0e48d1a787b66e7346087a756029 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Tue, 28 Jul 2009 16:48:02 -0400
Subject: ACPI, PCI: Change PREFIX to "PCI" from "ACPI" in mmconfig-shared.c

Signed-off-by: Len Brown <len.brown@intel.com>
Acked-by: Jesse Barnes <jbarnes@virtuousgeek.org>

diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 81d3466..b707a01 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -18,7 +18,7 @@
 #include <asm/e820.h>
 #include <asm/pci_x86.h>
 
-#define PREFIX "ACPI: "
+#define PREFIX "PCI: "
 
 /* aperture is up to 256MB but BIOS may reserve less */
 #define MMCONFIG_APER_MIN	(2 * 1024*1024)
-- 
cgit v0.10.2


From e5b8fc6ac158f65598f58dba2c0d52ba3b412f52 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Tue, 7 Jul 2009 23:22:58 -0400
Subject: ACPI: check acpi_disabled in acpi_table_parse() and
 acpi_table_parse_entries()

Allow consumers of the acpi_table_parse()/acpi_table_parse_entries() API
to gracefully handle the acpi_disabled=1 case via return value
rather than checking the global flag themselves.

Signed-off-by: Feng Tang <feng.tang@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c
index 646d39c..f336bca7 100644
--- a/drivers/acpi/tables.c
+++ b/drivers/acpi/tables.c
@@ -213,6 +213,9 @@ acpi_table_parse_entries(char *id,
 	unsigned long table_end;
 	acpi_size tbl_size;
 
+	if (acpi_disabled)
+		return -ENODEV;
+
 	if (!handler)
 		return -EINVAL;
 
@@ -277,6 +280,9 @@ int __init acpi_table_parse(char *id, acpi_table_handler handler)
 	struct acpi_table_header *table = NULL;
 	acpi_size tbl_size;
 
+	if (acpi_disabled)
+		return -ENODEV;
+
 	if (!handler)
 		return -EINVAL;
 
-- 
cgit v0.10.2


From 6349d9979beba240fe7182872cb547250264b865 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Fri, 14 Aug 2009 15:07:14 -0400
Subject: SFI: Simple Firmware Interface - MAINTAINERS, Kconfig

CONFIG_SFI=y enables the kernel to boot and run optimally
on platforms that support the Simple Firmware Interface.

Thanks to Jacob Pan for prototyping the initial Linux SFI support,
and to Feng Tang for Linux bring-up and debug both in emulation
and on Moorestown hardware.

See http://simplefirmware.org for more information on SFI.

Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/MAINTAINERS b/MAINTAINERS
index 60299a9..055ba88 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4575,6 +4575,18 @@ L:	linux-pci@vger.kernel.org
 S:	Supported
 F:	drivers/pci/hotplug/shpchp*
 
+SIMPLE FIRMWARE INTERFACE (SFI)
+P:	Len Brown
+M:	lenb@kernel.org
+L:	sfi-devel@simplefirmware.org
+W:	http://simplefirmware.org/
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux-sfi-2.6.git
+S:	Supported
+F:	arch/x86/kernel/*sfi*
+F:	drivers/sfi/
+F:	include/linux/sfi*.h
+
+
 SIMTEC EB110ATX (Chalice CATS)
 P:	Ben Dooks
 M:	Vincent Sanders <support@simtec.co.uk>
diff --git a/drivers/sfi/Kconfig b/drivers/sfi/Kconfig
new file mode 100644
index 0000000..dd11512
--- /dev/null
+++ b/drivers/sfi/Kconfig
@@ -0,0 +1,17 @@
+#
+# SFI Configuration
+#
+
+menuconfig SFI
+	bool "SFI (Simple Firmware Interface) Support"
+	---help---
+	The Simple Firmware Interface (SFI) provides a lightweight method
+	for platform firmware to pass information to the operating system
+	via static tables in memory.  Kernel SFI support is required to
+	boot on SFI-only platforms.  Currently, all SFI-only platforms are
+	based on the 2nd generation Intel Atom processor platform,
+	code-named Moorestown.
+
+	For more information, see http://simplefirmware.org
+
+	Say 'Y' here to enable the kernel to boot on SFI-only platforms.
-- 
cgit v0.10.2


From e55a5999ffcf72dc4d43d73618957964cb87065a Mon Sep 17 00:00:00 2001
From: Feng Tang <feng.tang@intel.com>
Date: Tue, 28 Jul 2009 17:41:53 +0800
Subject: ACPI: Handle CONFIG_ACPI=n better from linux/acpi.h

linux/acpi.h is the top level header for interfacing
with the ACPI sub-system, so acpi_disabled should be
up there instead of down in asm/acpi.h -- particularly
since asm/acpi.h doesn't exist for all architectures.

Same story for acpi_table_parse(), which is a top-level
API to Linux/ACPI.

This is necessary for building some code that
used to always depend on CONFIG_ACPI=y, but will soon
also need to build with CONFIG_ACPI=n.

Signed-off-by: Feng Tang <feng.tang@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 20d1465..4518dc5 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -144,7 +144,6 @@ static inline unsigned int acpi_processor_cstate_check(unsigned int max_cstate)
 
 #else /* !CONFIG_ACPI */
 
-#define acpi_disabled 1
 #define acpi_lapic 0
 #define acpi_ioapic 0
 static inline void acpi_noirq_set(void) { }
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 34321cf..3fce811 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -292,7 +292,10 @@ void __init acpi_s4_no_nvs(void);
 extern acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 flags);
 extern void acpi_early_init(void);
 
-#else	/* CONFIG_ACPI */
+#else	/* !CONFIG_ACPI */
+
+#define acpi_disabled 1
+
 static inline void acpi_early_init(void) { }
 
 static inline int early_acpi_boot_init(void)
@@ -331,5 +334,11 @@ static inline int acpi_check_mem_region(resource_size_t start,
 	return 0;
 }
 
+struct acpi_table_header;
+static inline int acpi_table_parse(char *id,
+				int (*handler)(struct acpi_table_header *))
+{
+	return -1;
+}
 #endif	/* !CONFIG_ACPI */
 #endif	/*_LINUX_ACPI_H*/
-- 
cgit v0.10.2


From 117a9ac777f8034d4675b821172d2ff71f6ec47a Mon Sep 17 00:00:00 2001
From: Feng Tang <feng.tang@intel.com>
Date: Fri, 14 Aug 2009 15:10:24 -0400
Subject: SFI: create linux/sfi.h

include/linux/include/sfi.h defines everything that customers
of SFI need to know in order to use the SFI suport in the kernel.

The primary API is sfi_table_parse(), where a driver or another part
of the kernel can supply a handler to parse the named table.

sfi.h also includes the currently defined table signatures and table
formats.

Signed-off-by: Feng Tang <feng.tang@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/include/linux/sfi.h b/include/linux/sfi.h
new file mode 100644
index 0000000..9a6f760
--- /dev/null
+++ b/include/linux/sfi.h
@@ -0,0 +1,206 @@
+/* sfi.h Simple Firmware Interface */
+
+/*
+
+  This file is provided under a dual BSD/GPLv2 license.  When using or
+  redistributing this file, you may do so under either license.
+
+  GPL LICENSE SUMMARY
+
+  Copyright(c) 2009 Intel Corporation. All rights reserved.
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of version 2 of the GNU General Public License as
+  published by the Free Software Foundation.
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+  The full GNU General Public License is included in this distribution
+  in the file called LICENSE.GPL.
+
+  BSD LICENSE
+
+  Copyright(c) 2009 Intel Corporation. All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in
+      the documentation and/or other materials provided with the
+      distribution.
+    * Neither the name of Intel Corporation nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+*/
+
+#ifndef _LINUX_SFI_H
+#define _LINUX_SFI_H
+
+/* Table signatures reserved by the SFI specification */
+#define SFI_SIG_SYST		"SYST"
+#define SFI_SIG_FREQ		"FREQ"
+#define SFI_SIG_IDLE		"IDLE"
+#define SFI_SIG_CPUS		"CPUS"
+#define SFI_SIG_MTMR		"MTMR"
+#define SFI_SIG_MRTC		"MRTC"
+#define SFI_SIG_MMAP		"MMAP"
+#define SFI_SIG_APIC		"APIC"
+#define SFI_SIG_XSDT		"XSDT"
+#define SFI_SIG_WAKE		"WAKE"
+#define SFI_SIG_SPIB		"SPIB"
+#define SFI_SIG_I2CB		"I2CB"
+#define SFI_SIG_GPEM		"GPEM"
+
+#define SFI_SIGNATURE_SIZE	4
+#define SFI_OEM_ID_SIZE		6
+#define SFI_OEM_TABLE_ID_SIZE	8
+
+#define SFI_SYST_SEARCH_BEGIN		0x000E0000
+#define SFI_SYST_SEARCH_END		0x000FFFFF
+
+#define SFI_GET_NUM_ENTRIES(ptable, entry_type) \
+	((ptable->header.len - sizeof(struct sfi_table_header)) / \
+	(sizeof(entry_type)))
+/*
+ * Table structures must be byte-packed to match the SFI specification,
+ * as they are provided by the BIOS.
+ */
+struct sfi_table_header {
+	char	sig[SFI_SIGNATURE_SIZE];
+	u32	len;
+	u8	rev;
+	u8	csum;
+	char	oem_id[SFI_OEM_ID_SIZE];
+	char	oem_table_id[SFI_OEM_TABLE_ID_SIZE];
+} __packed;
+
+struct sfi_table_simple {
+	struct sfi_table_header		header;
+	u64				pentry[1];
+} __packed;
+
+/* Comply with UEFI spec 2.1 */
+struct sfi_mem_entry {
+	u32	type;
+	u64	phys_start;
+	u64	virt_start;
+	u64	pages;
+	u64	attrib;
+} __packed;
+
+struct sfi_cpu_table_entry {
+	u32	apic_id;
+} __packed;
+
+struct sfi_cstate_table_entry {
+	u32	hint;		/* MWAIT hint */
+	u32	latency;	/* latency in ms */
+} __packed;
+
+struct sfi_apic_table_entry {
+	u64	phys_addr;	/* phy base addr for APIC reg */
+} __packed;
+
+struct sfi_freq_table_entry {
+	u32	freq_mhz;	/* in MHZ */
+	u32	latency;	/* transition latency in ms */
+	u32	ctrl_val;	/* value to write to PERF_CTL */
+} __packed;
+
+struct sfi_wake_table_entry {
+	u64	phys_addr;	/* pointer to where the wake vector locates */
+} __packed;
+
+struct sfi_timer_table_entry {
+	u64	phys_addr;	/* phy base addr for the timer */
+	u32	freq_hz;	/* in HZ */
+	u32	irq;
+} __packed;
+
+struct sfi_rtc_table_entry {
+	u64	phys_addr;	/* phy base addr for the RTC */
+	u32	irq;
+} __packed;
+
+struct sfi_spi_table_entry {
+	u16	host_num;	/* attached to host 0, 1...*/
+	u16	cs;		/* chip select */
+	u16	irq_info;
+	char	name[16];
+	u8	dev_info[10];
+} __packed;
+
+struct sfi_i2c_table_entry {
+	u16	host_num;
+	u16	addr;		/* slave addr */
+	u16	irq_info;
+	char	name[16];
+	u8	dev_info[10];
+} __packed;
+
+struct sfi_gpe_table_entry {
+	u16	logical_id;	/* logical id */
+	u16	phys_id;	/* physical GPE id */
+} __packed;
+
+
+typedef int (*sfi_table_handler) (struct sfi_table_header *table);
+
+#ifdef CONFIG_SFI
+extern void __init sfi_init(void);
+extern int __init sfi_platform_init(void);
+extern void __init sfi_init_late(void);
+extern int sfi_table_parse(char *signature, char *oem_id, char *oem_table_id,
+				sfi_table_handler handler);
+
+extern int sfi_disabled;
+static inline void disable_sfi(void)
+{
+	sfi_disabled = 1;
+}
+
+#else /* !CONFIG_SFI */
+
+static inline void sfi_init(void)
+{
+}
+
+static inline void sfi_init_late(void)
+{
+}
+
+#define sfi_disabled	0
+
+static inline int sfi_table_parse(char *signature, char *oem_id,
+					char *oem_table_id,
+					sfi_table_handler handler)
+{
+	return -1;
+}
+
+#endif /* !CONFIG_SFI */
+
+#endif /*_LINUX_SFI_H*/
-- 
cgit v0.10.2


From 6ae6996a466e14bcf41618cde641a74ae03dc285 Mon Sep 17 00:00:00 2001
From: Feng Tang <feng.tang@intel.com>
Date: Fri, 14 Aug 2009 15:13:46 -0400
Subject: SFI: add platform-independent core support

drivers/sfi/sfi_core.c contains the generic SFI implementation.
It has a private header, sfi_core.h, for its own use and the
private use of future files in drivers/sfi/

Signed-off-by: Feng Tang <feng.tang@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/Makefile b/drivers/Makefile
index bc4205d..ccfa259f 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -11,6 +11,7 @@ obj-$(CONFIG_PARISC)		+= parisc/
 obj-$(CONFIG_RAPIDIO)		+= rapidio/
 obj-y				+= video/
 obj-$(CONFIG_ACPI)		+= acpi/
+obj-$(CONFIG_SFI)		+= sfi/
 # PnP must come after ACPI since it will eventually need to check if acpi
 # was used and do nothing if so
 obj-$(CONFIG_PNP)		+= pnp/
diff --git a/drivers/sfi/Makefile b/drivers/sfi/Makefile
new file mode 100644
index 0000000..2343732
--- /dev/null
+++ b/drivers/sfi/Makefile
@@ -0,0 +1,3 @@
+obj-y	+= sfi_acpi.o
+obj-y	+= sfi_core.o
+
diff --git a/drivers/sfi/sfi_core.c b/drivers/sfi/sfi_core.c
new file mode 100644
index 0000000..7bd6b18
--- /dev/null
+++ b/drivers/sfi/sfi_core.c
@@ -0,0 +1,412 @@
+/* sfi_core.c Simple Firmware Interface - core internals */
+
+/*
+
+  This file is provided under a dual BSD/GPLv2 license.  When using or
+  redistributing this file, you may do so under either license.
+
+  GPL LICENSE SUMMARY
+
+  Copyright(c) 2009 Intel Corporation. All rights reserved.
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of version 2 of the GNU General Public License as
+  published by the Free Software Foundation.
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+  The full GNU General Public License is included in this distribution
+  in the file called LICENSE.GPL.
+
+  BSD LICENSE
+
+  Copyright(c) 2009 Intel Corporation. All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in
+      the documentation and/or other materials provided with the
+      distribution.
+    * Neither the name of Intel Corporation nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+*/
+
+#define KMSG_COMPONENT "SFI"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/bootmem.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/acpi.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/sfi.h>
+#include <linux/smp.h>
+
+#include <asm/pgtable.h>
+
+#include "sfi_core.h"
+
+#define ON_SAME_PAGE(addr1, addr2) \
+	(((unsigned long)(addr1) & PAGE_MASK) == \
+	((unsigned long)(addr2) & PAGE_MASK))
+#define TABLE_ON_PAGE(page, table, size) (ON_SAME_PAGE(page, table) && \
+				ON_SAME_PAGE(page, table + size))
+
+int sfi_disabled __read_mostly;
+EXPORT_SYMBOL(sfi_disabled);
+
+static u64 syst_pa __read_mostly;
+static struct sfi_table_simple *syst_va __read_mostly;
+
+/*
+ * FW creates and saves the SFI tables in memory. When these tables get
+ * used, they may need to be mapped to virtual address space, and the mapping
+ * can happen before or after the ioremap() is ready, so a flag is needed
+ * to indicating this
+ */
+static u32 sfi_use_ioremap __read_mostly;
+
+static void __iomem *sfi_map_memory(u64 phys, u32 size)
+{
+	if (!phys || !size)
+		return NULL;
+
+	if (sfi_use_ioremap)
+		return ioremap(phys, size);
+	else
+		return early_ioremap(phys, size);
+}
+
+static void sfi_unmap_memory(void __iomem *virt, u32 size)
+{
+	if (!virt || !size)
+		return;
+
+	if (sfi_use_ioremap)
+		iounmap(virt);
+	else
+		early_iounmap(virt, size);
+}
+
+static void sfi_print_table_header(unsigned long long pa,
+				struct sfi_table_header *header)
+{
+	pr_info("%4.4s %llX, %04X (v%d %6.6s %8.8s)\n",
+		header->sig, pa,
+		header->len, header->rev, header->oem_id,
+		header->oem_table_id);
+}
+
+/*
+ * sfi_verify_table()
+ * Sanity check table lengh, calculate checksum
+ */
+static __init int sfi_verify_table(struct sfi_table_header *table)
+{
+
+	u8 checksum = 0;
+	u8 *puchar = (u8 *)table;
+	u32 length = table->len;
+
+	/* Sanity check table length against arbitrary 1MB limit */
+	if (length > 0x100000) {
+		pr_err("Invalid table length 0x%x\n", length);
+		return -1;
+	}
+
+	while (length--)
+		checksum += *puchar++;
+
+	if (checksum) {
+		pr_err("Checksum %2.2X should be %2.2X\n",
+			table->csum, table->csum - checksum);
+		return -1;
+	}
+	return 0;
+}
+
+/*
+ * sfi_map_table()
+ *
+ * Return address of mapped table
+ * Check for common case that we can re-use mapping to SYST,
+ * which requires syst_pa, syst_va to be initialized.
+ */
+struct sfi_table_header *sfi_map_table(u64 pa)
+{
+	struct sfi_table_header *th;
+	u32 length;
+
+	if (!TABLE_ON_PAGE(syst_pa, pa, sizeof(struct sfi_table_header)))
+		th = sfi_map_memory(pa, sizeof(struct sfi_table_header));
+	else
+		th = (void *)syst_va + (pa - syst_pa);
+
+	 /* If table fits on same page as its header, we are done */
+	if (TABLE_ON_PAGE(th, th, th->len))
+		return th;
+
+	/* Entire table does not fit on same page as SYST */
+	length = th->len;
+	if (!TABLE_ON_PAGE(syst_pa, pa, sizeof(struct sfi_table_header)))
+		sfi_unmap_memory(th, sizeof(struct sfi_table_header));
+
+	return sfi_map_memory(pa, length);
+}
+
+/*
+ * sfi_unmap_table()
+ *
+ * Undoes effect of sfi_map_table() by unmapping table
+ * if it did not completely fit on same page as SYST.
+ */
+void sfi_unmap_table(struct sfi_table_header *th)
+{
+	if (!TABLE_ON_PAGE(syst_va, th, th->len))
+		sfi_unmap_memory(th, TABLE_ON_PAGE(th, th, th->len) ?
+					sizeof(*th) : th->len);
+}
+
+static int sfi_table_check_key(struct sfi_table_header *th,
+				struct sfi_table_key *key)
+{
+
+	if (strncmp(th->sig, key->sig, SFI_SIGNATURE_SIZE)
+		|| (key->oem_id && strncmp(th->oem_id,
+				key->oem_id, SFI_OEM_ID_SIZE))
+		|| (key->oem_table_id && strncmp(th->oem_table_id,
+				key->oem_table_id, SFI_OEM_TABLE_ID_SIZE)))
+		return -1;
+
+	return 0;
+}
+
+/*
+ * This function will be used in 2 cases:
+ * 1. used to enumerate and verify the tables addressed by SYST/XSDT,
+ *    thus no signature will be given (in kernel boot phase)
+ * 2. used to parse one specific table, signature must exist, and
+ *    the mapped virt address will be returned, and the virt space
+ *    will be released by call sfi_put_table() later
+ *
+ * Return value:
+ *	NULL:			when can't find a table matching the key
+ *	ERR_PTR(error):		error value
+ *	virt table address:	when a matched table is found
+ */
+struct sfi_table_header *sfi_check_table(u64 pa, struct sfi_table_key *key)
+{
+	struct sfi_table_header *th;
+	void *ret = NULL;
+
+	th = sfi_map_table(pa);
+	if (!th)
+		return ERR_PTR(-ENOMEM);
+
+	if (!key->sig) {
+		sfi_print_table_header(pa, th);
+		if (sfi_verify_table(th))
+			ret = ERR_PTR(-EINVAL);
+	} else {
+		if (!sfi_table_check_key(th, key))
+			return th;	/* Success */
+	}
+
+	sfi_unmap_table(th);
+	return ret;
+}
+
+/*
+ * sfi_get_table()
+ *
+ * Search SYST for the specified table with the signature in
+ * the key, and return the mapped table
+ */
+struct sfi_table_header *sfi_get_table(struct sfi_table_key *key)
+{
+	struct sfi_table_header *th;
+	u32 tbl_cnt, i;
+
+	tbl_cnt = SFI_GET_NUM_ENTRIES(syst_va, u64);
+	for (i = 0; i < tbl_cnt; i++) {
+		th = sfi_check_table(syst_va->pentry[i], key);
+		if (!IS_ERR(th) && th)
+			return th;
+	}
+
+	return NULL;
+}
+
+void sfi_put_table(struct sfi_table_header *th)
+{
+	sfi_unmap_table(th);
+}
+
+/* Find table with signature, run handler on it */
+int sfi_table_parse(char *signature, char *oem_id, char *oem_table_id,
+			sfi_table_handler handler)
+{
+	struct sfi_table_header *table = NULL;
+	struct sfi_table_key key;
+	int ret = -EINVAL;
+
+	if (sfi_disabled || !handler || !signature)
+		goto exit;
+
+	key.sig = signature;
+	key.oem_id = oem_id;
+	key.oem_table_id = oem_table_id;
+
+	table = sfi_get_table(&key);
+	if (!table)
+		goto exit;
+
+	ret = handler(table);
+	sfi_put_table(table);
+exit:
+	return ret;
+}
+EXPORT_SYMBOL_GPL(sfi_table_parse);
+
+/*
+ * sfi_parse_syst()
+ * Checksum all the tables in SYST and print their headers
+ *
+ * success: set syst_va, return 0
+ */
+static int __init sfi_parse_syst(void)
+{
+	struct sfi_table_key key = SFI_ANY_KEY;
+	int tbl_cnt, i;
+	void *ret;
+
+	syst_va = sfi_map_memory(syst_pa, sizeof(struct sfi_table_simple));
+	if (!syst_va)
+		return -ENOMEM;
+
+	tbl_cnt = SFI_GET_NUM_ENTRIES(syst_va, u64);
+	for (i = 0; i < tbl_cnt; i++) {
+		ret = sfi_check_table(syst_va->pentry[i], &key);
+		if (IS_ERR(ret))
+			return PTR_ERR(ret);
+	}
+
+	return 0;
+}
+
+/*
+ * The OS finds the System Table by searching 16-byte boundaries between
+ * physical address 0x000E0000 and 0x000FFFFF. The OS shall search this region
+ * starting at the low address and shall stop searching when the 1st valid SFI
+ * System Table is found.
+ *
+ * success: set syst_pa, return 0
+ * fail: return -1
+ */
+static __init int sfi_find_syst(void)
+{
+	unsigned long offset, len;
+	void *start;
+
+	len = SFI_SYST_SEARCH_END - SFI_SYST_SEARCH_BEGIN;
+	start = sfi_map_memory(SFI_SYST_SEARCH_BEGIN, len);
+	if (!start)
+		return -1;
+
+	for (offset = 0; offset < len; offset += 16) {
+		struct sfi_table_header *syst_hdr;
+
+		syst_hdr = start + offset;
+		if (strncmp(syst_hdr->sig, SFI_SIG_SYST,
+				SFI_SIGNATURE_SIZE))
+			continue;
+
+		if (syst_hdr->len > PAGE_SIZE)
+			continue;
+
+		sfi_print_table_header(SFI_SYST_SEARCH_BEGIN + offset,
+					syst_hdr);
+
+		if (sfi_verify_table(syst_hdr))
+			continue;
+
+		/*
+		 * Enforce SFI spec mandate that SYST reside within a page.
+		 */
+		if (!ON_SAME_PAGE(syst_pa, syst_pa + syst_hdr->len)) {
+			pr_info("SYST 0x%llx + 0x%x crosses page\n",
+					syst_pa, syst_hdr->len);
+			continue;
+		}
+
+		/* Success */
+		syst_pa = SFI_SYST_SEARCH_BEGIN + offset;
+		sfi_unmap_memory(start, len);
+		return 0;
+	}
+
+	sfi_unmap_memory(start, len);
+	return -1;
+}
+
+void __init sfi_init(void)
+{
+	if (!acpi_disabled)
+		disable_sfi();
+
+	if (sfi_disabled)
+		return;
+
+	pr_info("Simple Firmware Interface v0.7 http://simplefirmware.org\n");
+
+	if (sfi_find_syst() || sfi_parse_syst() || sfi_platform_init())
+		disable_sfi();
+
+	return;
+}
+
+void __init sfi_init_late(void)
+{
+	int length;
+
+	if (sfi_disabled)
+		return;
+
+	length = syst_va->header.len;
+	sfi_unmap_memory(syst_va, sizeof(struct sfi_table_simple));
+
+	/* Use ioremap now after it is ready */
+	sfi_use_ioremap = 1;
+	syst_va = sfi_map_memory(syst_pa, length);
+
+	sfi_acpi_init();
+}
diff --git a/drivers/sfi/sfi_core.h b/drivers/sfi/sfi_core.h
new file mode 100644
index 0000000..da82d39
--- /dev/null
+++ b/drivers/sfi/sfi_core.h
@@ -0,0 +1,70 @@
+/* sfi_core.h Simple Firmware Interface, internal header */
+
+/*
+
+  This file is provided under a dual BSD/GPLv2 license.  When using or
+  redistributing this file, you may do so under either license.
+
+  GPL LICENSE SUMMARY
+
+  Copyright(c) 2009 Intel Corporation. All rights reserved.
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of version 2 of the GNU General Public License as
+  published by the Free Software Foundation.
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+  The full GNU General Public License is included in this distribution
+  in the file called LICENSE.GPL.
+
+  BSD LICENSE
+
+  Copyright(c) 2009 Intel Corporation. All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in
+      the documentation and/or other materials provided with the
+      distribution.
+    * Neither the name of Intel Corporation nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+*/
+struct sfi_table_key{
+	char	*sig;
+	char	*oem_id;
+	char	*oem_table_id;
+};
+
+#define SFI_ANY_KEY { .sig = NULL, .oem_id = NULL, .oem_table_id = NULL }
+
+extern int __init sfi_acpi_init(void);
+extern  struct sfi_table_header *sfi_check_table(u64 paddr,
+					struct sfi_table_key *key);
+struct sfi_table_header *sfi_get_table(struct sfi_table_key *key);
+extern void sfi_put_table(struct sfi_table_header *table);
diff --git a/init/main.c b/init/main.c
index 11f4f14..9f110fb 100644
--- a/init/main.c
+++ b/init/main.c
@@ -68,6 +68,7 @@
 #include <linux/async.h>
 #include <linux/kmemcheck.h>
 #include <linux/kmemtrace.h>
+#include <linux/sfi.h>
 #include <trace/boot.h>
 
 #include <asm/io.h>
@@ -712,6 +713,7 @@ asmlinkage void __init start_kernel(void)
 	check_bugs();
 
 	acpi_early_init(); /* before LAPIC and SMP init */
+	sfi_init_late();
 
 	ftrace_init();
 
-- 
cgit v0.10.2


From 13e82d023c4c3f13ab1e665cbb917a7ebba8935c Mon Sep 17 00:00:00 2001
From: Feng Tang <feng.tang@intel.com>
Date: Fri, 14 Aug 2009 15:17:53 -0400
Subject: SFI: add capability to parse ACPI tables

Extend SFI to access standard ACPI tables.
(eg. the PCI MCFG) using sfi_acpi_table_parse().

Note that this is _not_ a hybrid ACPI + SFI mode.
The platform boots in either ACPI mode or SFI mode.

SFI runs only with acpi_disabled=1, which can be set
at build-time via CONFIG_ACPI=n, or at boot time by
the failure to find ACPI platform support.

So this extension simply allows SFI-platforms to
re-use existing standard table formats that happen to
be defined to live in ACPI envelopes.

Signed-off-by: Feng Tang <feng.tang@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/sfi/sfi_acpi.c b/drivers/sfi/sfi_acpi.c
new file mode 100644
index 0000000..34aba30
--- /dev/null
+++ b/drivers/sfi/sfi_acpi.c
@@ -0,0 +1,175 @@
+/* sfi_acpi.c Simple Firmware Interface - ACPI extensions */
+
+/*
+
+  This file is provided under a dual BSD/GPLv2 license.  When using or
+  redistributing this file, you may do so under either license.
+
+  GPL LICENSE SUMMARY
+
+  Copyright(c) 2009 Intel Corporation. All rights reserved.
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of version 2 of the GNU General Public License as
+  published by the Free Software Foundation.
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+  The full GNU General Public License is included in this distribution
+  in the file called LICENSE.GPL.
+
+  BSD LICENSE
+
+  Copyright(c) 2009 Intel Corporation. All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in
+      the documentation and/or other materials provided with the
+      distribution.
+    * Neither the name of Intel Corporation nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+*/
+
+#define KMSG_COMPONENT "SFI"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <acpi/acpi.h>
+
+#include <linux/sfi.h>
+#include "sfi_core.h"
+
+/*
+ * SFI can access ACPI-defined tables via an optional ACPI XSDT.
+ *
+ * This allows re-use, and avoids re-definition, of standard tables.
+ * For example, the "MCFG" table is defined by PCI, reserved by ACPI,
+ * and is expected to be present many SFI-only systems.
+ */
+
+static struct acpi_table_xsdt *xsdt_va __read_mostly;
+
+#define XSDT_GET_NUM_ENTRIES(ptable, entry_type) \
+	((ptable->header.length - sizeof(struct acpi_table_header)) / \
+	(sizeof(entry_type)))
+
+static inline struct sfi_table_header *acpi_to_sfi_th(
+				struct acpi_table_header *th)
+{
+	return (struct sfi_table_header *)th;
+}
+
+static inline struct acpi_table_header *sfi_to_acpi_th(
+				struct sfi_table_header *th)
+{
+	return (struct acpi_table_header *)th;
+}
+
+/*
+ * sfi_acpi_parse_xsdt()
+ *
+ * Parse the ACPI XSDT for later access by sfi_acpi_table_parse().
+ */
+static int __init sfi_acpi_parse_xsdt(struct sfi_table_header *th)
+{
+	struct sfi_table_key key = SFI_ANY_KEY;
+	int tbl_cnt, i;
+	void *ret;
+
+	xsdt_va = (struct acpi_table_xsdt *)th;
+	tbl_cnt = XSDT_GET_NUM_ENTRIES(xsdt_va, u64);
+	for (i = 0; i < tbl_cnt; i++) {
+		ret = sfi_check_table(xsdt_va->table_offset_entry[i], &key);
+		if (IS_ERR(ret)) {
+			disable_sfi();
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+int __init sfi_acpi_init(void)
+{
+	struct sfi_table_key xsdt_key = { .sig = SFI_SIG_XSDT };
+
+	sfi_table_parse(SFI_SIG_XSDT, NULL, NULL, sfi_acpi_parse_xsdt);
+
+	/* Only call the get_table to keep the table mapped */
+	xsdt_va = (struct acpi_table_xsdt *)sfi_get_table(&xsdt_key);
+	return 0;
+}
+
+static struct acpi_table_header *sfi_acpi_get_table(struct sfi_table_key *key)
+{
+	u32 tbl_cnt, i;
+	void *ret;
+
+	tbl_cnt = XSDT_GET_NUM_ENTRIES(xsdt_va, u64);
+	for (i = 0; i < tbl_cnt; i++) {
+		ret = sfi_check_table(xsdt_va->table_offset_entry[i], key);
+		if (!IS_ERR(ret) && ret)
+			return sfi_to_acpi_th(ret);
+	}
+
+	return NULL;
+}
+
+static void sfi_acpi_put_table(struct acpi_table_header *table)
+{
+	sfi_put_table(acpi_to_sfi_th(table));
+}
+
+/*
+ * sfi_acpi_table_parse()
+ *
+ * Find specified table in XSDT, run handler on it and return its return value
+ */
+int sfi_acpi_table_parse(char *signature, char *oem_id, char *oem_table_id,
+			int(*handler)(struct acpi_table_header *))
+{
+	struct acpi_table_header *table = NULL;
+	struct sfi_table_key key;
+	int ret = 0;
+
+	if (sfi_disabled)
+		return -1;
+
+	key.sig = signature;
+	key.oem_id = oem_id;
+	key.oem_table_id = oem_table_id;
+
+	table = sfi_acpi_get_table(&key);
+	if (!table)
+		return -EINVAL;
+
+	ret = handler(table);
+	sfi_acpi_put_table(table);
+	return ret;
+}
diff --git a/include/linux/sfi_acpi.h b/include/linux/sfi_acpi.h
new file mode 100644
index 0000000..c4a5a8c
--- /dev/null
+++ b/include/linux/sfi_acpi.h
@@ -0,0 +1,93 @@
+/* sfi.h Simple Firmware Interface */
+
+/*
+
+  This file is provided under a dual BSD/GPLv2 license.  When using or
+  redistributing this file, you may do so under either license.
+
+  GPL LICENSE SUMMARY
+
+  Copyright(c) 2009 Intel Corporation. All rights reserved.
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of version 2 of the GNU General Public License as
+  published by the Free Software Foundation.
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+  The full GNU General Public License is included in this distribution
+  in the file called LICENSE.GPL.
+
+  BSD LICENSE
+
+  Copyright(c) 2009 Intel Corporation. All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in
+      the documentation and/or other materials provided with the
+      distribution.
+    * Neither the name of Intel Corporation nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+*/
+
+#ifndef _LINUX_SFI_ACPI_H
+#define _LINUX_SFI_ACPI_H
+
+#ifdef CONFIG_SFI
+#include <acpi/acpi.h>		/* struct acpi_table_header */
+
+extern int sfi_acpi_table_parse(char *signature, char *oem_id,
+				char *oem_table_id,
+				int (*handler)(struct acpi_table_header *));
+
+static inline int acpi_sfi_table_parse(char *signature,
+				int (*handler)(struct acpi_table_header *))
+{
+	if (!acpi_table_parse(signature, handler))
+		return 0;
+
+	return sfi_acpi_table_parse(signature, NULL, NULL, handler);
+}
+#else /* !CONFIG_SFI */
+
+static inline int sfi_acpi_table_parse(char *signature, char *oem_id,
+				char *oem_table_id,
+				int (*handler)(struct acpi_table_header *))
+{
+	return -1;
+}
+
+static inline int acpi_sfi_table_parse(char *signature,
+				int (*handler)(struct acpi_table_header *))
+{
+	return acpi_table_parse(signature, handler);
+}
+#endif /* !CONFIG_SFI */
+
+#endif /*_LINUX_SFI_ACPI_H*/
-- 
cgit v0.10.2


From efafc8b213e67ed148a5b53ade29ee7b48af907d Mon Sep 17 00:00:00 2001
From: Feng Tang <feng.tang@intel.com>
Date: Fri, 14 Aug 2009 15:23:29 -0400
Subject: x86: add arch-specific SFI support

arch/x86/kernel/sfi.c serves the dual-purpose of supporting the
SFI core with arch specific code, as well as a home for the
arch-specific code that uses SFI.

analogous to ACPI, drivers/sfi/Kconfig is pulled in by arch/x86/Kconfig

Signed-off-by: Feng Tang <feng.tang@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
Cc: x86@kernel.org

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 13ffa5d..d8ba424 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1683,6 +1683,8 @@ source "kernel/power/Kconfig"
 
 source "drivers/acpi/Kconfig"
 
+source "drivers/sfi/Kconfig"
+
 config X86_APM_BOOT
 	bool
 	default y
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 430d5b2..6321afa 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -55,6 +55,7 @@ obj-y				+= step.o
 obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
 obj-y				+= cpu/
 obj-y				+= acpi/
+obj-$(CONFIG_SFI)		+= sfi.o
 obj-y				+= reboot.o
 obj-$(CONFIG_MCA)		+= mca_32.o
 obj-$(CONFIG_X86_MSR)		+= msr.o
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 63f32d2..d784ea2 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -27,6 +27,7 @@
 #include <linux/screen_info.h>
 #include <linux/ioport.h>
 #include <linux/acpi.h>
+#include <linux/sfi.h>
 #include <linux/apm_bios.h>
 #include <linux/initrd.h>
 #include <linux/bootmem.h>
@@ -990,6 +991,8 @@ void __init setup_arch(char **cmdline_p)
 	 */
 	acpi_boot_init();
 
+	sfi_init();
+
 #if defined(CONFIG_X86_MPPARSE) || defined(CONFIG_X86_VISWS)
 	/*
 	 * get boot-time SMP configuration:
diff --git a/arch/x86/kernel/sfi.c b/arch/x86/kernel/sfi.c
new file mode 100644
index 0000000..761df3f
--- /dev/null
+++ b/arch/x86/kernel/sfi.c
@@ -0,0 +1,133 @@
+/*
+ * sfi.c - x86 architecture SFI support.
+ *
+ * Copyright (c) 2009, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+
+#define KMSG_COMPONENT "SFI"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/bootmem.h>
+#include <linux/cpumask.h>
+#include <linux/module.h>
+#include <linux/acpi.h>
+#include <linux/init.h>
+#include <linux/efi.h>
+#include <linux/irq.h>
+#include <linux/sfi.h>
+#include <linux/io.h>
+
+#include <asm/io_apic.h>
+#include <asm/pgtable.h>
+#include <asm/mpspec.h>
+#include <asm/setup.h>
+#include <asm/apic.h>
+#include <asm/e820.h>
+
+#ifdef CONFIG_X86_LOCAL_APIC
+static unsigned long sfi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
+
+void __init mp_sfi_register_lapic_address(unsigned long address)
+{
+	mp_lapic_addr = address;
+
+	set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
+	if (boot_cpu_physical_apicid == -1U)
+		boot_cpu_physical_apicid = read_apic_id();
+
+	pr_info("Boot CPU = %d\n", boot_cpu_physical_apicid);
+}
+
+/* All CPUs enumerated by SFI must be present and enabled */
+void __cpuinit mp_sfi_register_lapic(u8 id)
+{
+	int boot_cpu = 0;
+
+	if (MAX_APICS - id <= 0) {
+		pr_warning("Processor #%d invalid (max %d)\n",
+			id, MAX_APICS);
+		return;
+	}
+
+	if (id == boot_cpu_physical_apicid)
+		boot_cpu = 1;
+	pr_info("registering lapic[%d]\n", id);
+
+	generic_processor_info(id, GET_APIC_VERSION(apic_read(APIC_LVR)));
+}
+
+static int __init sfi_parse_cpus(struct sfi_table_header *table)
+{
+	struct sfi_table_simple *sb;
+	struct sfi_cpu_table_entry *pentry;
+	int i;
+	int cpu_num;
+
+	sb = (struct sfi_table_simple *)table;
+	cpu_num = SFI_GET_NUM_ENTRIES(sb, struct sfi_cpu_table_entry);
+	pentry = (struct sfi_cpu_table_entry *)sb->pentry;
+
+	for (i = 0; i < cpu_num; i++) {
+		mp_sfi_register_lapic(pentry->apic_id);
+		pentry++;
+	}
+
+	smp_found_config = 1;
+	return 0;
+}
+#endif /* CONFIG_X86_LOCAL_APIC */
+
+#ifdef CONFIG_X86_IO_APIC
+static u32 gsi_base;
+
+static int __init sfi_parse_ioapic(struct sfi_table_header *table)
+{
+	struct sfi_table_simple *sb;
+	struct sfi_apic_table_entry *pentry;
+	int i, num;
+
+	sb = (struct sfi_table_simple *)table;
+	num = SFI_GET_NUM_ENTRIES(sb, struct sfi_apic_table_entry);
+	pentry = (struct sfi_apic_table_entry *)sb->pentry;
+
+	for (i = 0; i < num; i++) {
+		mp_register_ioapic(i, pentry->phys_addr, gsi_base);
+		gsi_base += io_apic_get_redir_entries(i);
+		pentry++;
+	}
+
+	WARN(pic_mode, KERN_WARNING
+		"SFI: pic_mod shouldn't be 1 when IOAPIC table is present\n");
+	pic_mode = 0;
+	return 0;
+}
+#endif /* CONFIG_X86_IO_APIC */
+
+/*
+ * sfi_platform_init(): register lapics & io-apics
+ */
+int __init sfi_platform_init(void)
+{
+#ifdef CONFIG_X86_LOCAL_APIC
+	mp_sfi_register_lapic_address(sfi_lapic_addr);
+	sfi_table_parse(SFI_SIG_CPUS, NULL, NULL, sfi_parse_cpus);
+#endif
+#ifdef CONFIG_X86_IO_APIC
+	sfi_table_parse(SFI_SIG_APIC, NULL, NULL, sfi_parse_ioapic);
+#endif
+	return 0;
+}
-- 
cgit v0.10.2


From 5f0db7a2fb78895a197f64e548333b3bbd433996 Mon Sep 17 00:00:00 2001
From: Feng Tang <feng.tang@intel.com>
Date: Fri, 14 Aug 2009 15:37:50 -0400
Subject: SFI: Hook PCI MMCONFIG

First check ACPI, and if that fails, ask SFI to find the MCFG.

Signed-off-by: Feng Tang <feng.tang@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index d8ba424..4c92c91 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1880,7 +1880,7 @@ config PCI_DIRECT
 
 config PCI_MMCONFIG
 	def_bool y
-	depends on X86_32 && PCI && ACPI && (PCI_GOMMCONFIG || PCI_GOANY)
+	depends on X86_32 && PCI && (ACPI || SFI) && (PCI_GOMMCONFIG || PCI_GOANY)
 
 config PCI_OLPC
 	def_bool y
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index b707a01..602c172 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -13,10 +13,12 @@
 #include <linux/pci.h>
 #include <linux/init.h>
 #include <linux/acpi.h>
+#include <linux/sfi_acpi.h>
 #include <linux/bitmap.h>
 #include <linux/sort.h>
 #include <asm/e820.h>
 #include <asm/pci_x86.h>
+#include <asm/acpi.h>
 
 #define PREFIX "PCI: "
 
@@ -493,7 +495,7 @@ static void __init pci_mmcfg_reject_broken(int early)
 		       (unsigned int)cfg->start_bus_number,
 		       (unsigned int)cfg->end_bus_number);
 
-		if (!early)
+		if (!early && !acpi_disabled)
 			valid = is_mmconf_reserved(is_acpi_reserved, addr, size, i, cfg, 0);
 
 		if (valid)
@@ -608,7 +610,7 @@ static void __init __pci_mmcfg_init(int early)
 	}
 
 	if (!known_bridge)
-		acpi_table_parse(ACPI_SIG_MCFG, pci_parse_mcfg);
+		acpi_sfi_table_parse(ACPI_SIG_MCFG, pci_parse_mcfg);
 
 	pci_mmcfg_reject_broken(early);
 
diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c
index 8b2d561..f10a7e9 100644
--- a/arch/x86/pci/mmconfig_32.c
+++ b/arch/x86/pci/mmconfig_32.c
@@ -11,9 +11,9 @@
 
 #include <linux/pci.h>
 #include <linux/init.h>
-#include <linux/acpi.h>
 #include <asm/e820.h>
 #include <asm/pci_x86.h>
+#include <acpi/acpi.h>
 
 /* Assume systems with more busses have correct MCFG */
 #define mmcfg_virt_addr ((void __iomem *) fix_to_virt(FIX_PCIE_MCFG))
-- 
cgit v0.10.2


From 2b022e3d4bf9885f781221c59d86283a2cdfc2ed Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Date: Mon, 10 Aug 2009 10:48:59 +0800
Subject: timers: Add tracepoints for timer_list timers

Add tracepoints which cover the timer life cycle. The tracepoints are
integrated with the already existing debug_object debug points as far
as possible.

Based on patches from
Mathieu: http://marc.info/?l=linux-kernel&m=123791201816247&w=2
and
Anton: http://marc.info/?l=linux-kernel&m=124331396919301&w=2

[ tglx: Fixed timeout value in timer_start tracepoint, massaged
  comments and made the printk's more readable ]

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Cc: Anton Blanchard <anton@samba.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Zhaolei <zhaolei@cn.fujitsu.com>
LKML-Reference: <4A7F8A9B.3040201@cn.fujitsu.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h
new file mode 100644
index 0000000..725892a
--- /dev/null
+++ b/include/trace/events/timer.h
@@ -0,0 +1,137 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM timer
+
+#if !defined(_TRACE_TIMER_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_TIMER_H
+
+#include <linux/tracepoint.h>
+#include <linux/timer.h>
+
+/**
+ * timer_init - called when the timer is initialized
+ * @timer:	pointer to struct timer_list
+ */
+TRACE_EVENT(timer_init,
+
+	TP_PROTO(struct timer_list *timer),
+
+	TP_ARGS(timer),
+
+	TP_STRUCT__entry(
+		__field( void *,	timer	)
+	),
+
+	TP_fast_assign(
+		__entry->timer	= timer;
+	),
+
+	TP_printk("timer %p", __entry->timer)
+);
+
+/**
+ * timer_start - called when the timer is started
+ * @timer:	pointer to struct timer_list
+ * @expires:	the timers expiry time
+ */
+TRACE_EVENT(timer_start,
+
+	TP_PROTO(struct timer_list *timer, unsigned long expires),
+
+	TP_ARGS(timer, expires),
+
+	TP_STRUCT__entry(
+		__field( void *,	timer		)
+		__field( void *,	function	)
+		__field( unsigned long,	expires		)
+		__field( unsigned long,	now		)
+	),
+
+	TP_fast_assign(
+		__entry->timer		= timer;
+		__entry->function	= timer->function;
+		__entry->expires	= expires;
+		__entry->now		= jiffies;
+	),
+
+	TP_printk("timer %p: func %pf, expires %lu, timeout %ld",
+		  __entry->timer, __entry->function, __entry->expires,
+		  (long)__entry->expires - __entry->now)
+);
+
+/**
+ * timer_expire_entry - called immediately before the timer callback
+ * @timer:	pointer to struct timer_list
+ *
+ * Allows to determine the timer latency.
+ */
+TRACE_EVENT(timer_expire_entry,
+
+	TP_PROTO(struct timer_list *timer),
+
+	TP_ARGS(timer),
+
+	TP_STRUCT__entry(
+		__field( void *,	timer	)
+		__field( unsigned long,	now	)
+	),
+
+	TP_fast_assign(
+		__entry->timer		= timer;
+		__entry->now		= jiffies;
+	),
+
+	TP_printk("timer %p: now %lu", __entry->timer, __entry->now)
+);
+
+/**
+ * timer_expire_exit - called immediately after the timer callback returns
+ * @timer:	pointer to struct timer_list
+ *
+ * When used in combination with the timer_expire_entry tracepoint we can
+ * determine the runtime of the timer callback function.
+ *
+ * NOTE: Do NOT derefernce timer in TP_fast_assign. The pointer might
+ * be invalid. We solely track the pointer.
+ */
+TRACE_EVENT(timer_expire_exit,
+
+	TP_PROTO(struct timer_list *timer),
+
+	TP_ARGS(timer),
+
+	TP_STRUCT__entry(
+		__field(void *,	timer	)
+	),
+
+	TP_fast_assign(
+		__entry->timer	= timer;
+	),
+
+	TP_printk("timer %p", __entry->timer)
+);
+
+/**
+ * timer_cancel - called when the timer is canceled
+ * @timer:	pointer to struct timer_list
+ */
+TRACE_EVENT(timer_cancel,
+
+	TP_PROTO(struct timer_list *timer),
+
+	TP_ARGS(timer),
+
+	TP_STRUCT__entry(
+		__field( void *,	timer	)
+	),
+
+	TP_fast_assign(
+		__entry->timer	= timer;
+	),
+
+	TP_printk("timer %p", __entry->timer)
+);
+
+#endif /*  _TRACE_TIMER_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/kernel/timer.c b/kernel/timer.c
index 8e92be6..a7352b0 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -46,6 +46,9 @@
 #include <asm/timex.h>
 #include <asm/io.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/timer.h>
+
 u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES;
 
 EXPORT_SYMBOL(jiffies_64);
@@ -521,6 +524,25 @@ static inline void debug_timer_activate(struct timer_list *timer) { }
 static inline void debug_timer_deactivate(struct timer_list *timer) { }
 #endif
 
+static inline void debug_init(struct timer_list *timer)
+{
+	debug_timer_init(timer);
+	trace_timer_init(timer);
+}
+
+static inline void
+debug_activate(struct timer_list *timer, unsigned long expires)
+{
+	debug_timer_activate(timer);
+	trace_timer_start(timer, expires);
+}
+
+static inline void debug_deactivate(struct timer_list *timer)
+{
+	debug_timer_deactivate(timer);
+	trace_timer_cancel(timer);
+}
+
 static void __init_timer(struct timer_list *timer,
 			 const char *name,
 			 struct lock_class_key *key)
@@ -549,7 +571,7 @@ void init_timer_key(struct timer_list *timer,
 		    const char *name,
 		    struct lock_class_key *key)
 {
-	debug_timer_init(timer);
+	debug_init(timer);
 	__init_timer(timer, name, key);
 }
 EXPORT_SYMBOL(init_timer_key);
@@ -568,7 +590,7 @@ static inline void detach_timer(struct timer_list *timer,
 {
 	struct list_head *entry = &timer->entry;
 
-	debug_timer_deactivate(timer);
+	debug_deactivate(timer);
 
 	__list_del(entry->prev, entry->next);
 	if (clear_pending)
@@ -632,7 +654,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
 			goto out_unlock;
 	}
 
-	debug_timer_activate(timer);
+	debug_activate(timer, expires);
 
 	new_base = __get_cpu_var(tvec_bases);
 
@@ -787,7 +809,7 @@ void add_timer_on(struct timer_list *timer, int cpu)
 	BUG_ON(timer_pending(timer) || !timer->function);
 	spin_lock_irqsave(&base->lock, flags);
 	timer_set_base(timer, base);
-	debug_timer_activate(timer);
+	debug_activate(timer, timer->expires);
 	if (time_before(timer->expires, base->next_timer) &&
 	    !tbase_get_deferrable(timer->base))
 		base->next_timer = timer->expires;
@@ -1000,7 +1022,9 @@ static inline void __run_timers(struct tvec_base *base)
 				 */
 				lock_map_acquire(&lockdep_map);
 
+				trace_timer_expire_entry(timer);
 				fn(data);
+				trace_timer_expire_exit(timer);
 
 				lock_map_release(&lockdep_map);
 
-- 
cgit v0.10.2


From c6a2a1770245f654f35f60e1458d4356680f9519 Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Date: Mon, 10 Aug 2009 10:51:23 +0800
Subject: hrtimer: Add tracepoint for hrtimers

Add tracepoints which cover the life cycle of a hrtimer. The
tracepoints are integrated with the already existing debug_object
debug points as far as possible.

[ tglx: Fixed comments, made output conistent, easier to read and
  	parse. Fixed output for 32bit archs which do not use the
  	scalar representation of ktime_t. Hand current time to
  	trace_hrtimer_expiry_entry instead of calling get_time()
  	inside of the trace assignment. ]

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Anton Blanchard <anton@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Zhaolei <zhaolei@cn.fujitsu.com>
LKML-Reference: <4A7F8B2B.5020908@cn.fujitsu.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h
index 725892a..df3c07f 100644
--- a/include/trace/events/timer.h
+++ b/include/trace/events/timer.h
@@ -5,6 +5,7 @@
 #define _TRACE_TIMER_H
 
 #include <linux/tracepoint.h>
+#include <linux/hrtimer.h>
 #include <linux/timer.h>
 
 /**
@@ -131,6 +132,144 @@ TRACE_EVENT(timer_cancel,
 	TP_printk("timer %p", __entry->timer)
 );
 
+/**
+ * hrtimer_init - called when the hrtimer is initialized
+ * @timer:	pointer to struct hrtimer
+ * @clockid:	the hrtimers clock
+ * @mode:	the hrtimers mode
+ */
+TRACE_EVENT(hrtimer_init,
+
+	TP_PROTO(struct hrtimer *timer, clockid_t clockid,
+		 enum hrtimer_mode mode),
+
+	TP_ARGS(timer, clockid, mode),
+
+	TP_STRUCT__entry(
+		__field( void *,		timer		)
+		__field( clockid_t,		clockid		)
+		__field( enum hrtimer_mode,	mode		)
+	),
+
+	TP_fast_assign(
+		__entry->timer		= timer;
+		__entry->clockid	= clockid;
+		__entry->mode		= mode;
+	),
+
+	TP_printk("hrtimer %p, clockid %s, mode %s", __entry->timer,
+		  __entry->clockid == CLOCK_REALTIME ?
+			"CLOCK_REALTIME" : "CLOCK_MONOTONIC",
+		  __entry->mode == HRTIMER_MODE_ABS ?
+			"HRTIMER_MODE_ABS" : "HRTIMER_MODE_REL")
+);
+
+/**
+ * hrtimer_start - called when the hrtimer is started
+ * @timer: pointer to struct hrtimer
+ */
+TRACE_EVENT(hrtimer_start,
+
+	TP_PROTO(struct hrtimer *timer),
+
+	TP_ARGS(timer),
+
+	TP_STRUCT__entry(
+		__field( void *,	timer		)
+		__field( void *,	function	)
+		__field( s64,		expires		)
+		__field( s64,		softexpires	)
+	),
+
+	TP_fast_assign(
+		__entry->timer		= timer;
+		__entry->function	= timer->function;
+		__entry->expires	= hrtimer_get_expires(timer).tv64;
+		__entry->softexpires	= hrtimer_get_softexpires(timer).tv64;
+	),
+
+	TP_printk("hrtimer %p, func %pf, expires %llu, softexpires %llu",
+		  __entry->timer, __entry->function,
+		  (unsigned long long)ktime_to_ns((ktime_t) {
+				  .tv64 = __entry->expires }),
+		  (unsigned long long)ktime_to_ns((ktime_t) {
+				  .tv64 = __entry->softexpires }))
+);
+
+/**
+ * htimmer_expire_entry - called immediately before the hrtimer callback
+ * @timer:	pointer to struct hrtimer
+ * @now:	pointer to variable which contains current time of the
+ *		timers base.
+ *
+ * Allows to determine the timer latency.
+ */
+TRACE_EVENT(hrtimer_expire_entry,
+
+	TP_PROTO(struct hrtimer *timer, ktime_t *now),
+
+	TP_ARGS(timer, now),
+
+	TP_STRUCT__entry(
+		__field( void *,	timer	)
+		__field( s64,		now	)
+	),
+
+	TP_fast_assign(
+		__entry->timer	= timer;
+		__entry->now	= now->tv64;
+	),
+
+	TP_printk("hrtimer %p, now %llu", __entry->timer,
+		  (unsigned long long)ktime_to_ns((ktime_t) {
+				  .tv64 = __entry->now }))
+ );
+
+/**
+ * hrtimer_expire_exit - called immediately after the hrtimer callback returns
+ * @timer:	pointer to struct hrtimer
+ *
+ * When used in combination with the hrtimer_expire_entry tracepoint we can
+ * determine the runtime of the callback function.
+ */
+TRACE_EVENT(hrtimer_expire_exit,
+
+	TP_PROTO(struct hrtimer *timer),
+
+	TP_ARGS(timer),
+
+	TP_STRUCT__entry(
+		__field( void *,	timer	)
+	),
+
+	TP_fast_assign(
+		__entry->timer	= timer;
+	),
+
+	TP_printk("hrtimer %p", __entry->timer)
+);
+
+/**
+ * hrtimer_cancel - called when the hrtimer is canceled
+ * @timer:	pointer to struct hrtimer
+ */
+TRACE_EVENT(hrtimer_cancel,
+
+	TP_PROTO(struct hrtimer *timer),
+
+	TP_ARGS(timer),
+
+	TP_STRUCT__entry(
+		__field( void *,	timer	)
+	),
+
+	TP_fast_assign(
+		__entry->timer	= timer;
+	),
+
+	TP_printk("hrtimer %p", __entry->timer)
+);
+
 #endif /*  _TRACE_TIMER_H */
 
 /* This part must be outside protection */
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index e2f91ec..b44d1b0 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -48,6 +48,8 @@
 
 #include <asm/uaccess.h>
 
+#include <trace/events/timer.h>
+
 /*
  * The timer bases:
  *
@@ -441,6 +443,26 @@ static inline void debug_hrtimer_activate(struct hrtimer *timer) { }
 static inline void debug_hrtimer_deactivate(struct hrtimer *timer) { }
 #endif
 
+static inline void
+debug_init(struct hrtimer *timer, clockid_t clockid,
+	   enum hrtimer_mode mode)
+{
+	debug_hrtimer_init(timer);
+	trace_hrtimer_init(timer, clockid, mode);
+}
+
+static inline void debug_activate(struct hrtimer *timer)
+{
+	debug_hrtimer_activate(timer);
+	trace_hrtimer_start(timer);
+}
+
+static inline void debug_deactivate(struct hrtimer *timer)
+{
+	debug_hrtimer_deactivate(timer);
+	trace_hrtimer_cancel(timer);
+}
+
 /* High resolution timer related functions */
 #ifdef CONFIG_HIGH_RES_TIMERS
 
@@ -797,7 +819,7 @@ static int enqueue_hrtimer(struct hrtimer *timer,
 	struct hrtimer *entry;
 	int leftmost = 1;
 
-	debug_hrtimer_activate(timer);
+	debug_activate(timer);
 
 	/*
 	 * Find the right place in the rbtree:
@@ -883,7 +905,7 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base)
 		 * reprogramming happens in the interrupt handler. This is a
 		 * rare case and less expensive than a smp call.
 		 */
-		debug_hrtimer_deactivate(timer);
+		debug_deactivate(timer);
 		timer_stats_hrtimer_clear_start_info(timer);
 		reprogram = base->cpu_base == &__get_cpu_var(hrtimer_bases);
 		__remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE,
@@ -1116,7 +1138,7 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
 void hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
 		  enum hrtimer_mode mode)
 {
-	debug_hrtimer_init(timer);
+	debug_init(timer, clock_id, mode);
 	__hrtimer_init(timer, clock_id, mode);
 }
 EXPORT_SYMBOL_GPL(hrtimer_init);
@@ -1140,7 +1162,7 @@ int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp)
 }
 EXPORT_SYMBOL_GPL(hrtimer_get_res);
 
-static void __run_hrtimer(struct hrtimer *timer)
+static void __run_hrtimer(struct hrtimer *timer, ktime_t *now)
 {
 	struct hrtimer_clock_base *base = timer->base;
 	struct hrtimer_cpu_base *cpu_base = base->cpu_base;
@@ -1149,7 +1171,7 @@ static void __run_hrtimer(struct hrtimer *timer)
 
 	WARN_ON(!irqs_disabled());
 
-	debug_hrtimer_deactivate(timer);
+	debug_deactivate(timer);
 	__remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0);
 	timer_stats_account_hrtimer(timer);
 	fn = timer->function;
@@ -1160,7 +1182,9 @@ static void __run_hrtimer(struct hrtimer *timer)
 	 * the timer base.
 	 */
 	spin_unlock(&cpu_base->lock);
+	trace_hrtimer_expire_entry(timer, now);
 	restart = fn(timer);
+	trace_hrtimer_expire_exit(timer);
 	spin_lock(&cpu_base->lock);
 
 	/*
@@ -1271,7 +1295,7 @@ void hrtimer_interrupt(struct clock_event_device *dev)
 				break;
 			}
 
-			__run_hrtimer(timer);
+			__run_hrtimer(timer, &basenow);
 		}
 		base++;
 	}
@@ -1393,7 +1417,7 @@ void hrtimer_run_queues(void)
 					hrtimer_get_expires_tv64(timer))
 				break;
 
-			__run_hrtimer(timer);
+			__run_hrtimer(timer, &base->softirq_time);
 		}
 		spin_unlock(&cpu_base->lock);
 	}
@@ -1569,7 +1593,7 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
 	while ((node = rb_first(&old_base->active))) {
 		timer = rb_entry(node, struct hrtimer, node);
 		BUG_ON(hrtimer_callback_running(timer));
-		debug_hrtimer_deactivate(timer);
+		debug_deactivate(timer);
 
 		/*
 		 * Mark it as STATE_MIGRATE not INACTIVE otherwise the
-- 
cgit v0.10.2


From 3f0a525ebf4b8ef041a332bbe4a73aee94bb064b Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Date: Mon, 10 Aug 2009 10:52:30 +0800
Subject: itimers: Add tracepoints for itimer

Add tracepoints for all itimer variants: ITIMER_REAL, ITIMER_VIRTUAL
and ITIMER_PROF.

[ tglx: Fixed comments and made the output more readable, parseable
  	and consistent. Replaced pid_vnr by pid_nr because the hrtimer
  	callback can happen in any namespace ]

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Anton Blanchard <anton@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Zhaolei <zhaolei@cn.fujitsu.com>
LKML-Reference: <4A7F8B6E.2010109@cn.fujitsu.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h
index df3c07f..1844c48 100644
--- a/include/trace/events/timer.h
+++ b/include/trace/events/timer.h
@@ -270,6 +270,72 @@ TRACE_EVENT(hrtimer_cancel,
 	TP_printk("hrtimer %p", __entry->timer)
 );
 
+/**
+ * itimer_state - called when itimer is started or canceled
+ * @which:	name of the interval timer
+ * @value:	the itimers value, itimer is canceled if value->it_value is
+ *		zero, otherwise it is started
+ * @expires:	the itimers expiry time
+ */
+TRACE_EVENT(itimer_state,
+
+	TP_PROTO(int which, const struct itimerval *const value,
+		 cputime_t expires),
+
+	TP_ARGS(which, value, expires),
+
+	TP_STRUCT__entry(
+		__field(	int,		which		)
+		__field(	cputime_t,	expires		)
+		__field(	long,		value_sec	)
+		__field(	long,		value_usec	)
+		__field(	long,		interval_sec	)
+		__field(	long,		interval_usec	)
+	),
+
+	TP_fast_assign(
+		__entry->which		= which;
+		__entry->expires	= expires;
+		__entry->value_sec	= value->it_value.tv_sec;
+		__entry->value_usec	= value->it_value.tv_usec;
+		__entry->interval_sec	= value->it_interval.tv_sec;
+		__entry->interval_usec	= value->it_interval.tv_usec;
+	),
+
+	TP_printk("which %d, expires %lu, it_value %lu.%lu, it_interval %lu.%lu",
+		  __entry->which, __entry->expires,
+		  __entry->value_sec, __entry->value_usec,
+		  __entry->interval_sec, __entry->interval_usec)
+);
+
+/**
+ * itimer_expire - called when itimer expires
+ * @which:	type of the interval timer
+ * @pid:	pid of the process which owns the timer
+ * @now:	current time, used to calculate the latency of itimer
+ */
+TRACE_EVENT(itimer_expire,
+
+	TP_PROTO(int which, struct pid *pid, cputime_t now),
+
+	TP_ARGS(which, pid, now),
+
+	TP_STRUCT__entry(
+		__field( int ,		which	)
+		__field( pid_t,		pid	)
+		__field( cputime_t,	now	)
+	),
+
+	TP_fast_assign(
+		__entry->which	= which;
+		__entry->now	= now;
+		__entry->pid	= pid_nr(pid);
+	),
+
+	    TP_printk("which %d, pid %d, now %lu", __entry->which,
+		      (int) __entry->pid, __entry->now)
+);
+
 #endif /*  _TRACE_TIMER_H */
 
 /* This part must be outside protection */
diff --git a/kernel/itimer.c b/kernel/itimer.c
index 8078a32..b03451e 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -12,6 +12,7 @@
 #include <linux/time.h>
 #include <linux/posix-timers.h>
 #include <linux/hrtimer.h>
+#include <trace/events/timer.h>
 
 #include <asm/uaccess.h>
 
@@ -122,6 +123,7 @@ enum hrtimer_restart it_real_fn(struct hrtimer *timer)
 	struct signal_struct *sig =
 		container_of(timer, struct signal_struct, real_timer);
 
+	trace_itimer_expire(ITIMER_REAL, sig->leader_pid, 0);
 	kill_pid_info(SIGALRM, SEND_SIG_PRIV, sig->leader_pid);
 
 	return HRTIMER_NORESTART;
@@ -166,6 +168,8 @@ static void set_cpu_itimer(struct task_struct *tsk, unsigned int clock_id,
 	}
 	it->expires = nval;
 	it->incr = ninterval;
+	trace_itimer_state(clock_id == CPUCLOCK_VIRT ?
+			   ITIMER_VIRTUAL : ITIMER_PROF, value, nval);
 
 	spin_unlock_irq(&tsk->sighand->siglock);
 
@@ -217,6 +221,7 @@ again:
 		} else
 			tsk->signal->it_real_incr.tv64 = 0;
 
+		trace_itimer_state(ITIMER_REAL, value, 0);
 		spin_unlock_irq(&tsk->sighand->siglock);
 		break;
 	case ITIMER_VIRTUAL:
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 12161f7..5c9dc22 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -8,6 +8,7 @@
 #include <linux/math64.h>
 #include <asm/uaccess.h>
 #include <linux/kernel_stat.h>
+#include <trace/events/timer.h>
 
 /*
  * Called after updating RLIMIT_CPU to set timer expiration if necessary.
@@ -1090,9 +1091,13 @@ static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
 							  cputime_one_jiffy);
 				it->error -= onecputick;
 			}
-		} else
+		} else {
 			it->expires = cputime_zero;
+		}
 
+		trace_itimer_expire(signo == SIGPROF ?
+				    ITIMER_PROF : ITIMER_VIRTUAL,
+				    tsk->signal->leader_pid, cur_time);
 		__group_send_sig_info(signo, SEND_SIG_PRIV, tsk);
 	}
 
-- 
cgit v0.10.2


From be96666065fd36ccfa09a13903d31d7ff5f4ef91 Mon Sep 17 00:00:00 2001
From: Corentin Chary <corentincj@iksaif.net>
Date: Sat, 29 Aug 2009 10:28:29 +0200
Subject: asus-laptop: Fix coding style for comments

Signed-off-by: Corentin Chary <corentincj@iksaif.net>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/platform/x86/asus-laptop.c b/drivers/platform/x86/asus-laptop.c
index 5b912cc..b39d2bb 100644
--- a/drivers/platform/x86/asus-laptop.c
+++ b/drivers/platform/x86/asus-laptop.c
@@ -77,16 +77,16 @@
  * Flags for hotk status
  * WL_ON and BT_ON are also used for wireless_status()
  */
-#define WL_ON       0x01	//internal Wifi
-#define BT_ON       0x02	//internal Bluetooth
-#define MLED_ON     0x04	//mail LED
-#define TLED_ON     0x08	//touchpad LED
-#define RLED_ON     0x10	//Record LED
-#define PLED_ON     0x20	//Phone LED
-#define GLED_ON     0x40	//Gaming LED
-#define LCD_ON      0x80	//LCD backlight
-#define GPS_ON      0x100	//GPS
-#define KEY_ON      0x200       //Keyboard backlight
+#define WL_ON       0x01	/* internal Wifi */
+#define BT_ON       0x02	/* internal Bluetooth */
+#define MLED_ON     0x04	/* mail LED */
+#define TLED_ON     0x08	/* touchpad LED */
+#define RLED_ON     0x10	/* Record LED */
+#define PLED_ON     0x20	/* Phone LED */
+#define GLED_ON     0x40	/* Gaming LED */
+#define LCD_ON      0x80	/* LCD backlight */
+#define GPS_ON      0x100	/* GPS */
+#define KEY_ON      0x200	/* Keyboard backlight */
 
 #define ASUS_LOG    ASUS_HOTK_FILE ": "
 #define ASUS_ERR    KERN_ERR    ASUS_LOG
@@ -99,7 +99,8 @@ MODULE_AUTHOR("Julien Lerouge, Karol Kozimor, Corentin Chary");
 MODULE_DESCRIPTION(ASUS_HOTK_NAME);
 MODULE_LICENSE("GPL");
 
-/* WAPF defines the behavior of the Fn+Fx wlan key
+/*
+ * WAPF defines the behavior of the Fn+Fx wlan key
  * The significance of values is yet to be found, but
  * most of the time:
  * 0x0 will do nothing
@@ -126,7 +127,8 @@ ASUS_HANDLE(gled_set, ASUS_HOTK_PREFIX "GLED");	/* G1, G2 (probably) */
 /* LEDD */
 ASUS_HANDLE(ledd_set, ASUS_HOTK_PREFIX "SLCM");
 
-/* Bluetooth and WLAN
+/*
+ * Bluetooth and WLAN
  * WLED and BLED are not handled like other XLED, because in some dsdt
  * they also control the WLAN/Bluetooth device.
  */
@@ -150,22 +152,32 @@ ASUS_HANDLE(lcd_switch, "\\_SB.PCI0.SBRG.EC0._Q10",	/* All new models */
 
 /* Display */
 ASUS_HANDLE(display_set, ASUS_HOTK_PREFIX "SDSP");
-ASUS_HANDLE(display_get, "\\_SB.PCI0.P0P1.VGA.GETD",	/*  A6B, A6K A6R A7D F3JM L4R M6R A3G
-							   M6A M6V VX-1 V6J V6V W3Z */
-	    "\\_SB.PCI0.P0P2.VGA.GETD",	/* A3E A4K, A4D A4L A6J A7J A8J Z71V M9V
-					   S5A M5A z33A W1Jc W2V G1 */
-	    "\\_SB.PCI0.P0P3.VGA.GETD",	/* A6V A6Q */
-	    "\\_SB.PCI0.P0PA.VGA.GETD",	/* A6T, A6M */
-	    "\\_SB.PCI0.PCI1.VGAC.NMAP",	/* L3C */
-	    "\\_SB.PCI0.VGA.GETD",	/* Z96F */
-	    "\\ACTD",		/* A2D */
-	    "\\ADVG",		/* A4G Z71A W1N W5A W5F M2N M3N M5N M6N S1N S5N */
-	    "\\DNXT",		/* P30 */
-	    "\\INFB",		/* A2H D1 L2D L3D L3H L2E L5D L5C M1A M2E L4L W3V */
-	    "\\SSTE");		/* A3F A6F A3N A3L M6N W3N W6A */
-
-ASUS_HANDLE(ls_switch, ASUS_HOTK_PREFIX "ALSC");	/* Z71A Z71V */
-ASUS_HANDLE(ls_level, ASUS_HOTK_PREFIX "ALSL");	/* Z71A Z71V */
+ASUS_HANDLE(display_get,
+	    /* A6B, A6K A6R A7D F3JM L4R M6R A3G M6A M6V VX-1 V6J V6V W3Z */
+	    "\\_SB.PCI0.P0P1.VGA.GETD",
+	    /* A3E A4K, A4D A4L A6J A7J A8J Z71V M9V S5A M5A z33A W1Jc W2V G1 */
+	    "\\_SB.PCI0.P0P2.VGA.GETD",
+	    /* A6V A6Q */
+	    "\\_SB.PCI0.P0P3.VGA.GETD",
+	    /* A6T, A6M */
+	    "\\_SB.PCI0.P0PA.VGA.GETD",
+	    /* L3C */
+	    "\\_SB.PCI0.PCI1.VGAC.NMAP",
+	    /* Z96F */
+	    "\\_SB.PCI0.VGA.GETD",
+	    /* A2D */
+	    "\\ACTD",
+	    /* A4G Z71A W1N W5A W5F M2N M3N M5N M6N S1N S5N */
+	    "\\ADVG",
+	    /* P30 */
+	    "\\DNXT",
+	    /* A2H D1 L2D L3D L3H L2E L5D L5C M1A M2E L4L W3V */
+	    "\\INFB",
+	    /* A3F A6F A3N A3L M6N W3N W6A */
+	    "\\SSTE");
+
+ASUS_HANDLE(ls_switch, ASUS_HOTK_PREFIX "ALSC"); /* Z71A Z71V */
+ASUS_HANDLE(ls_level, ASUS_HOTK_PREFIX "ALSL");	 /* Z71A Z71V */
 
 /* GPS */
 /* R2H use different handle for GPS on/off */
@@ -182,14 +194,14 @@ ASUS_HANDLE(kled_get, ASUS_HOTK_PREFIX "GLKB");
  * about the hotk device
  */
 struct asus_hotk {
-	char *name;		//laptop name
-	struct acpi_device *device;	//the device we are in
-	acpi_handle handle;	//the handle of the hotk device
-	char status;		//status of the hotk, for LEDs, ...
-	u32 ledd_status;	//status of the LED display
-	u8 light_level;		//light sensor level
-	u8 light_switch;	//light sensor switch value
-	u16 event_count[128];	//count for each event TODO make this better
+	char *name;		/* laptop name */
+	struct acpi_device *device;	/* the device we are in */
+	acpi_handle handle;	/* the handle of the hotk device */
+	char status;		/* status of the hotk, for LEDs, ... */
+	u32 ledd_status;	/* status of the LED display */
+	u8 light_level;		/* light sensor level */
+	u8 light_switch;	/* light sensor switch value */
+	u16 event_count[128];	/* count for each event TODO make this better */
 	struct input_dev *inputdev;
 	u16 *keycode_map;
 };
@@ -242,10 +254,12 @@ static struct backlight_ops asusbl_ops = {
 	.update_status = update_bl_status,
 };
 
-/* These functions actually update the LED's, and are called from a
+/*
+ * These functions actually update the LED's, and are called from a
  * workqueue. By doing this as separate work rather than when the LED
  * subsystem asks, we avoid messing with the Asus ACPI stuff during a
- * potentially bad time, such as a timer interrupt. */
+ * potentially bad time, such as a timer interrupt.
+ */
 static struct workqueue_struct *led_workqueue;
 
 #define ASUS_LED(object, ledname, max)					\
@@ -318,8 +332,8 @@ static struct key_entry asus_keymap[] = {
 static int write_acpi_int(acpi_handle handle, const char *method, int val,
 			  struct acpi_buffer *output)
 {
-	struct acpi_object_list params;	//list of input parameters (an int here)
-	union acpi_object in_obj;	//the only param we use
+	struct acpi_object_list params;	/* list of input parameters (an int) */
+	union acpi_object in_obj;	/* the only param we use */
 	acpi_status status;
 
 	if (!handle)
@@ -574,7 +588,7 @@ static ssize_t show_infos(struct device *dev,
 {
 	int len = 0;
 	unsigned long long temp;
-	char buf[16];		//enough for all info
+	char buf[16];		/* enough for all info */
 	acpi_status rv = AE_OK;
 
 	/*
@@ -734,8 +748,10 @@ static int read_display(void)
 	unsigned long long value = 0;
 	acpi_status rv = AE_OK;
 
-	/* In most of the case, we know how to set the display, but sometime
-	   we can't read it */
+	/*
+	 * In most of the case, we know how to set the display, but sometime
+	 * we can't read it
+	 */
 	if (display_get_handle) {
 		rv = acpi_evaluate_integer(display_get_handle, NULL,
 					   NULL, &value);
@@ -1152,8 +1168,10 @@ static int asus_hotk_get_info(void)
 	ASUS_HANDLE_INIT(display_set);
 	ASUS_HANDLE_INIT(display_get);
 
-	/* There is a lot of models with "ALSL", but a few get
-	   a real light sens, so we need to check it. */
+	/*
+	 * There is a lot of models with "ALSL", but a few get
+	 * a real light sens, so we need to check it.
+	 */
 	if (!ASUS_HANDLE_INIT(ls_switch))
 		ASUS_HANDLE_INIT(ls_level);
 
-- 
cgit v0.10.2


From a8258069793609903b5ebf0bca3320249154c379 Mon Sep 17 00:00:00 2001
From: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Date: Sat, 29 Aug 2009 10:28:30 +0200
Subject: eeepc-laptop: fix rfkill memory leak on unload

rfkill_unregister() should always be followed by rfkill_destroy()

Signed-off-by: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Signed-off-by: Corentin Chary <corentincj@iksaif.net>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/platform/x86/eeepc-laptop.c b/drivers/platform/x86/eeepc-laptop.c
index 819c685..6f9a448 100644
--- a/drivers/platform/x86/eeepc-laptop.c
+++ b/drivers/platform/x86/eeepc-laptop.c
@@ -985,6 +985,7 @@ static void eeepc_rfkill_exit(void)
 	eeepc_unregister_rfkill_notifier("\\_SB.PCI0.P0P7");
 	if (ehotk->wlan_rfkill) {
 		rfkill_unregister(ehotk->wlan_rfkill);
+		rfkill_destroy(ehotk->wlan_rfkill);
 		ehotk->wlan_rfkill = NULL;
 	}
 	/*
@@ -995,12 +996,21 @@ static void eeepc_rfkill_exit(void)
 	if (ehotk->hotplug_slot)
 		pci_hp_deregister(ehotk->hotplug_slot);
 
-	if (ehotk->bluetooth_rfkill)
+	if (ehotk->bluetooth_rfkill) {
 		rfkill_unregister(ehotk->bluetooth_rfkill);
-	if (ehotk->wwan3g_rfkill)
+		rfkill_destroy(ehotk->bluetooth_rfkill);
+		ehotk->bluetooth_rfkill = NULL;
+	}
+	if (ehotk->wwan3g_rfkill) {
 		rfkill_unregister(ehotk->wwan3g_rfkill);
-	if (ehotk->wimax_rfkill)
+		rfkill_destroy(ehotk->wwan3g_rfkill);
+		ehotk->wwan3g_rfkill = NULL;
+	}
+	if (ehotk->wimax_rfkill) {
 		rfkill_unregister(ehotk->wimax_rfkill);
+		rfkill_destroy(ehotk->wimax_rfkill);
+		ehotk->wimax_rfkill = NULL;
+	}
 }
 
 static void eeepc_input_exit(void)
-- 
cgit v0.10.2


From 52cc96bd5b61775db2792780c610979fc02313eb Mon Sep 17 00:00:00 2001
From: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Date: Sat, 29 Aug 2009 10:28:31 +0200
Subject: eeepc-laptop: allow rfkill hotplug to work on the 900A model

The 900A provides hotplug notifications on a different ACPI object to
other models.

Reported-by: Trevor <trevor.chart@gmail.com>
Signed-off-by: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Signed-off-by: Corentin Chary <corentincj@iksaif.net>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/platform/x86/eeepc-laptop.c b/drivers/platform/x86/eeepc-laptop.c
index 6f9a448..da3c08b 100644
--- a/drivers/platform/x86/eeepc-laptop.c
+++ b/drivers/platform/x86/eeepc-laptop.c
@@ -981,6 +981,7 @@ static void eeepc_backlight_exit(void)
 
 static void eeepc_rfkill_exit(void)
 {
+	eeepc_unregister_rfkill_notifier("\\_SB.PCI0.P0P5");
 	eeepc_unregister_rfkill_notifier("\\_SB.PCI0.P0P6");
 	eeepc_unregister_rfkill_notifier("\\_SB.PCI0.P0P7");
 	if (ehotk->wlan_rfkill) {
@@ -1101,6 +1102,7 @@ static int eeepc_rfkill_init(struct device *dev)
 	if (result == -EBUSY)
 		result = 0;
 
+	eeepc_register_rfkill_notifier("\\_SB.PCI0.P0P5");
 	eeepc_register_rfkill_notifier("\\_SB.PCI0.P0P6");
 	eeepc_register_rfkill_notifier("\\_SB.PCI0.P0P7");
 	/*
-- 
cgit v0.10.2


From 138d15692bf76841f252d4b836a535cf5f9154e9 Mon Sep 17 00:00:00 2001
From: Alexey Starikovskiy <astarikovskiy@suse.de>
Date: Fri, 28 Aug 2009 23:29:38 +0400
Subject: ACPICA: Don't switch task then not allowed

Signed-off-by: Alexey Starikovskiy <astarikovskiy@suse.de>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h
index fcb8e4b..9d7febd 100644
--- a/include/acpi/platform/aclinux.h
+++ b/include/acpi/platform/aclinux.h
@@ -149,10 +149,10 @@ static inline void *acpi_os_acquire_object(acpi_cache_t * cache)
 #define ACPI_FREE(a)            kfree(a)
 
 /* Used within ACPICA to show where it is safe to preempt execution */
-
+#include <linux/hardirq.h>
 #define ACPI_PREEMPTION_POINT() \
 	do { \
-		if (!irqs_disabled()) \
+		if (!in_atomic_preempt_off()) \
 			cond_resched(); \
 	} while (0)
 
-- 
cgit v0.10.2


From 4e231fa4cbd3ff53fcb7d76eccd6fd86a152a95f Mon Sep 17 00:00:00 2001
From: Vladimir Serbinenko <phcoder@gmail.com>
Date: Wed, 24 Jun 2009 15:17:36 +0800
Subject: ACPI video: ignore buggy _BQC

_BQC doesn't return a value listed in _BCL method.
http://bugzilla.kernel.org/show_bug.cgi?id=13511

ingore the buggy _BQC method in this case

Signed-off-by: Vladimir Serbinenko <phcoder@gmail.com>
Signed-off-by: Scott Howard <showard314@gmail.com>
Acked-by: Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c
index a0cd0c7..2020907 100644
--- a/drivers/acpi/video.c
+++ b/drivers/acpi/video.c
@@ -603,6 +603,7 @@ acpi_video_device_lcd_get_level_current(struct acpi_video_device *device,
 					unsigned long long *level)
 {
 	acpi_status status = AE_OK;
+	int i;
 
 	if (device->cap._BQC || device->cap._BCQ) {
 		char *buf = device->cap._BQC ? "_BQC" : "_BCQ";
@@ -618,8 +619,15 @@ acpi_video_device_lcd_get_level_current(struct acpi_video_device *device,
 
 			}
 			*level += bqc_offset_aml_bug_workaround;
-			device->brightness->curr = *level;
-			return 0;
+			for (i = 2; i < device->brightness->count; i++)
+				if (device->brightness->levels[i] == *level) {
+					device->brightness->curr = *level;
+					return 0;
+			}
+			/* BQC returned an invalid level. Stop using it.  */
+			ACPI_WARNING((AE_INFO, "%s returned an invalid level",
+						buf));
+			device->cap._BQC = device->cap._BCQ = 0;
 		} else {
 			/* Fixme:
 			 * should we return an error or ignore this failure?
-- 
cgit v0.10.2


From bc76f90b8a5cf4aceedf210d08d5e8292f820cec Mon Sep 17 00:00:00 2001
From: Hector Martin <hector@marcansoft.com>
Date: Thu, 6 Aug 2009 15:57:48 -0700
Subject: ACPI battery: work around negative s16 battery current on Acer

Acer Aspire 8930G laptops (and possibly others) report the battery current
as a 16-bit signed negative when it is charging.  It also reports it as
0x10000 when the current is 0.  This patch adds a quirk for this which
takes the absolute value of the reported current cast to an s16.  This is
a DSDT bug present in the latest BIOS revision (the EC register is 16 bits
signed and the DSDT attempts to take the 16-bit two's complement of this,
which works for discharge but not charge.  It also breaks zero values
because a 32-bit register is used and the high bits aren't thrown away).

I've enabled this for all Acer systems which report in mA units.  This
should be safe since it won't break compliant systems unless they report a
current above 32A, which is insane.  The patch also detects the valid
32-bit value -1, which indicates unknown status, and does not attempt the
fix in that case (note that this does not conflict with 16-bit -1, which
is 65535 as read normally and gets translated to 1mA).

Signed-off-by: Hector Martin <hector@marcansoft.com>
Acked-by: Alexey Starikovskiy <astarikovskiy@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c
index 58b4517..d7a786d 100644
--- a/drivers/acpi/battery.c
+++ b/drivers/acpi/battery.c
@@ -31,6 +31,7 @@
 #include <linux/types.h>
 #include <linux/jiffies.h>
 #include <linux/async.h>
+#include <linux/dmi.h>
 
 #ifdef CONFIG_ACPI_PROCFS_POWER
 #include <linux/proc_fs.h>
@@ -85,6 +86,10 @@ static const struct acpi_device_id battery_device_ids[] = {
 
 MODULE_DEVICE_TABLE(acpi, battery_device_ids);
 
+/* For buggy DSDTs that report negative 16-bit values for either charging
+ * or discharging current and/or report 0 as 65536 due to bad math.
+ */
+#define QUIRK_SIGNED16_CURRENT 0x0001
 
 struct acpi_battery {
 	struct mutex lock;
@@ -112,6 +117,7 @@ struct acpi_battery {
 	int state;
 	int power_unit;
 	u8 alarm_present;
+	long quirks;
 };
 
 #define to_acpi_battery(x) container_of(x, struct acpi_battery, bat);
@@ -390,6 +396,11 @@ static int acpi_battery_get_state(struct acpi_battery *battery)
 				 state_offsets, ARRAY_SIZE(state_offsets));
 	battery->update_time = jiffies;
 	kfree(buffer.pointer);
+
+	if ((battery->quirks & QUIRK_SIGNED16_CURRENT) &&
+	    battery->rate_now != -1)
+		battery->rate_now = abs((s16)battery->rate_now);
+
 	return result;
 }
 
@@ -495,6 +506,14 @@ static void sysfs_remove_battery(struct acpi_battery *battery)
 }
 #endif
 
+static void acpi_battery_quirks(struct acpi_battery *battery)
+{
+	battery->quirks = 0;
+	if (dmi_name_in_vendors("Acer") && battery->power_unit) {
+		battery->quirks |= QUIRK_SIGNED16_CURRENT;
+	}
+}
+
 static int acpi_battery_update(struct acpi_battery *battery)
 {
 	int result, old_present = acpi_battery_present(battery);
@@ -513,6 +532,7 @@ static int acpi_battery_update(struct acpi_battery *battery)
 		result = acpi_battery_get_info(battery);
 		if (result)
 			return result;
+		acpi_battery_quirks(battery);
 		acpi_battery_init_alarm(battery);
 	}
 #ifdef CONFIG_ACPI_SYSFS_POWER
-- 
cgit v0.10.2


From 2a84cb9852f52c0cd1c48bca41a8792d44ad06cc Mon Sep 17 00:00:00 2001
From: Alexey Starikovskiy <astarikovskiy@suse.de>
Date: Sun, 30 Aug 2009 03:06:14 +0400
Subject: ACPI: EC: Merge IRQ and POLL modes

In general, EC transaction should complete in less than 1ms, thus it is possible to merge wait for
1ms in poll mode and 1ms of interrupt transaction timeout.
Still, driver will wait 500ms for EC to complete transaction.

This significantly simplifies driver and makes it immune to problematic EC interrupt
implementations.

It also may lessen kernel start-up time by 500ms.

References: http://bugzilla.kernel.org/show_bug.cgi?id=12949

Signed-off-by: Alexey Starikovskiy <astarikovskiy@suse.de>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index 391f331..839b542 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -68,15 +68,13 @@ enum ec_command {
 #define ACPI_EC_DELAY		500	/* Wait 500ms max. during EC ops */
 #define ACPI_EC_UDELAY_GLK	1000	/* Wait 1ms max. to get global lock */
 #define ACPI_EC_CDELAY		10	/* Wait 10us before polling EC */
+#define ACPI_EC_MSI_UDELAY	550	/* Wait 550us for MSI EC */
 
 #define ACPI_EC_STORM_THRESHOLD 8	/* number of false interrupts
 					   per one transaction */
 
 enum {
 	EC_FLAGS_QUERY_PENDING,		/* Query is pending */
-	EC_FLAGS_GPE_MODE,		/* Expect GPE to be sent
-					 * for status change */
-	EC_FLAGS_NO_GPE,		/* Don't use GPE mode */
 	EC_FLAGS_GPE_STORM,		/* GPE storm detected */
 	EC_FLAGS_HANDLERS_INSTALLED	/* Handlers for GPE and
 					 * OpReg are installed */
@@ -170,7 +168,7 @@ static void start_transaction(struct acpi_ec *ec)
 	acpi_ec_write_cmd(ec, ec->curr->command);
 }
 
-static void gpe_transaction(struct acpi_ec *ec, u8 status)
+static void advance_transaction(struct acpi_ec *ec, u8 status)
 {
 	unsigned long flags;
 	spin_lock_irqsave(&ec->curr_lock, flags);
@@ -201,29 +199,6 @@ unlock:
 	spin_unlock_irqrestore(&ec->curr_lock, flags);
 }
 
-static int acpi_ec_wait(struct acpi_ec *ec)
-{
-	if (wait_event_timeout(ec->wait, ec_transaction_done(ec),
-			       msecs_to_jiffies(ACPI_EC_DELAY)))
-		return 0;
-	/* try restart command if we get any false interrupts */
-	if (ec->curr->irq_count &&
-	    (acpi_ec_read_status(ec) & ACPI_EC_FLAG_IBF) == 0) {
-		pr_debug(PREFIX "controller reset, restart transaction\n");
-		start_transaction(ec);
-		if (wait_event_timeout(ec->wait, ec_transaction_done(ec),
-					msecs_to_jiffies(ACPI_EC_DELAY)))
-			return 0;
-	}
-	/* missing GPEs, switch back to poll mode */
-	if (printk_ratelimit())
-		pr_info(PREFIX "missing confirmations, "
-				"switch off interrupt mode.\n");
-	set_bit(EC_FLAGS_NO_GPE, &ec->flags);
-	clear_bit(EC_FLAGS_GPE_MODE, &ec->flags);
-	return 1;
-}
-
 static void acpi_ec_gpe_query(void *ec_cxt);
 
 static int ec_check_sci(struct acpi_ec *ec, u8 state)
@@ -236,43 +211,51 @@ static int ec_check_sci(struct acpi_ec *ec, u8 state)
 	return 0;
 }
 
-static void ec_delay(void)
-{
-	/* EC in MSI notebooks don't tolerate delays other than 550 usec */
-	if (EC_FLAGS_MSI)
-		udelay(ACPI_EC_DELAY);
-	else
-		/* Use shortest sleep available */
-		msleep(1);
-}
-
 static int ec_poll(struct acpi_ec *ec)
 {
-	unsigned long delay = jiffies + msecs_to_jiffies(ACPI_EC_DELAY);
-	udelay(ACPI_EC_CDELAY);
-	while (time_before(jiffies, delay)) {
-		gpe_transaction(ec, acpi_ec_read_status(ec));
-		ec_delay();
-		if (ec_transaction_done(ec))
-			return 0;
+	unsigned long flags;
+	int repeat = 2; /* number of command restarts */
+	while (repeat--) {
+		unsigned long delay = jiffies +
+			msecs_to_jiffies(ACPI_EC_DELAY);
+		do {
+			/* don't sleep with disabled interrupts */
+			if (EC_FLAGS_MSI || irqs_disabled()) {
+				udelay(ACPI_EC_MSI_UDELAY);
+				if (ec_transaction_done(ec))
+					return 0;
+			} else {
+				if (wait_event_timeout(ec->wait,
+						ec_transaction_done(ec),
+						msecs_to_jiffies(1)))
+					return 0;
+			}
+			advance_transaction(ec, acpi_ec_read_status(ec));
+		} while (time_before(jiffies, delay));
+		if (!ec->curr->irq_count ||
+		    (acpi_ec_read_status(ec) & ACPI_EC_FLAG_IBF))
+			break;
+		/* try restart command if we get any false interrupts */
+		pr_debug(PREFIX "controller reset, restart transaction\n");
+		spin_lock_irqsave(&ec->curr_lock, flags);
+		start_transaction(ec);
+		spin_unlock_irqrestore(&ec->curr_lock, flags);
 	}
 	return -ETIME;
 }
 
 static int acpi_ec_transaction_unlocked(struct acpi_ec *ec,
-					struct transaction *t,
-					int force_poll)
+					struct transaction *t)
 {
 	unsigned long tmp;
 	int ret = 0;
 	pr_debug(PREFIX "transaction start\n");
 	/* disable GPE during transaction if storm is detected */
 	if (test_bit(EC_FLAGS_GPE_STORM, &ec->flags)) {
-		clear_bit(EC_FLAGS_GPE_MODE, &ec->flags);
 		acpi_disable_gpe(NULL, ec->gpe);
 	}
 	if (EC_FLAGS_MSI)
-		udelay(ACPI_EC_DELAY);
+		udelay(ACPI_EC_MSI_UDELAY);
 	/* start transaction */
 	spin_lock_irqsave(&ec->curr_lock, tmp);
 	/* following two actions should be kept atomic */
@@ -281,11 +264,7 @@ static int acpi_ec_transaction_unlocked(struct acpi_ec *ec,
 	if (ec->curr->command == ACPI_EC_COMMAND_QUERY)
 		clear_bit(EC_FLAGS_QUERY_PENDING, &ec->flags);
 	spin_unlock_irqrestore(&ec->curr_lock, tmp);
-	/* if we selected poll mode or failed in GPE-mode do a poll loop */
-	if (force_poll ||
-	    !test_bit(EC_FLAGS_GPE_MODE, &ec->flags) ||
-	    acpi_ec_wait(ec))
-		ret = ec_poll(ec);
+	ret = ec_poll(ec);
 	pr_debug(PREFIX "transaction end\n");
 	spin_lock_irqsave(&ec->curr_lock, tmp);
 	ec->curr = NULL;
@@ -295,8 +274,7 @@ static int acpi_ec_transaction_unlocked(struct acpi_ec *ec,
 		ec_check_sci(ec, acpi_ec_read_status(ec));
 		/* it is safe to enable GPE outside of transaction */
 		acpi_enable_gpe(NULL, ec->gpe);
-	} else if (test_bit(EC_FLAGS_GPE_MODE, &ec->flags) &&
-		   t->irq_count > ACPI_EC_STORM_THRESHOLD) {
+	} else if (t->irq_count > ACPI_EC_STORM_THRESHOLD) {
 		pr_info(PREFIX "GPE storm detected, "
 			"transactions will use polling mode\n");
 		set_bit(EC_FLAGS_GPE_STORM, &ec->flags);
@@ -314,16 +292,14 @@ static int ec_wait_ibf0(struct acpi_ec *ec)
 {
 	unsigned long delay = jiffies + msecs_to_jiffies(ACPI_EC_DELAY);
 	/* interrupt wait manually if GPE mode is not active */
-	unsigned long timeout = test_bit(EC_FLAGS_GPE_MODE, &ec->flags) ?
-		msecs_to_jiffies(ACPI_EC_DELAY) : msecs_to_jiffies(1);
 	while (time_before(jiffies, delay))
-		if (wait_event_timeout(ec->wait, ec_check_ibf0(ec), timeout))
+		if (wait_event_timeout(ec->wait, ec_check_ibf0(ec),
+					msecs_to_jiffies(1)))
 			return 0;
 	return -ETIME;
 }
 
-static int acpi_ec_transaction(struct acpi_ec *ec, struct transaction *t,
-			       int force_poll)
+static int acpi_ec_transaction(struct acpi_ec *ec, struct transaction *t)
 {
 	int status;
 	u32 glk;
@@ -345,7 +321,7 @@ static int acpi_ec_transaction(struct acpi_ec *ec, struct transaction *t,
 		status = -ETIME;
 		goto end;
 	}
-	status = acpi_ec_transaction_unlocked(ec, t, force_poll);
+	status = acpi_ec_transaction_unlocked(ec, t);
 end:
 	if (ec->global_lock)
 		acpi_release_global_lock(glk);
@@ -365,7 +341,7 @@ static int acpi_ec_burst_enable(struct acpi_ec *ec)
 				.wdata = NULL, .rdata = &d,
 				.wlen = 0, .rlen = 1};
 
-	return acpi_ec_transaction(ec, &t, 0);
+	return acpi_ec_transaction(ec, &t);
 }
 
 static int acpi_ec_burst_disable(struct acpi_ec *ec)
@@ -375,7 +351,7 @@ static int acpi_ec_burst_disable(struct acpi_ec *ec)
 				.wlen = 0, .rlen = 0};
 
 	return (acpi_ec_read_status(ec) & ACPI_EC_FLAG_BURST) ?
-				acpi_ec_transaction(ec, &t, 0) : 0;
+				acpi_ec_transaction(ec, &t) : 0;
 }
 
 static int acpi_ec_read(struct acpi_ec *ec, u8 address, u8 * data)
@@ -386,7 +362,7 @@ static int acpi_ec_read(struct acpi_ec *ec, u8 address, u8 * data)
 				.wdata = &address, .rdata = &d,
 				.wlen = 1, .rlen = 1};
 
-	result = acpi_ec_transaction(ec, &t, 0);
+	result = acpi_ec_transaction(ec, &t);
 	*data = d;
 	return result;
 }
@@ -398,7 +374,7 @@ static int acpi_ec_write(struct acpi_ec *ec, u8 address, u8 data)
 				.wdata = wdata, .rdata = NULL,
 				.wlen = 2, .rlen = 0};
 
-	return acpi_ec_transaction(ec, &t, 0);
+	return acpi_ec_transaction(ec, &t);
 }
 
 /*
@@ -466,7 +442,7 @@ int ec_transaction(u8 command,
 	if (!first_ec)
 		return -ENODEV;
 
-	return acpi_ec_transaction(first_ec, &t, force_poll);
+	return acpi_ec_transaction(first_ec, &t);
 }
 
 EXPORT_SYMBOL(ec_transaction);
@@ -487,7 +463,7 @@ static int acpi_ec_query(struct acpi_ec *ec, u8 * data)
 	 * bit to be cleared (and thus clearing the interrupt source).
 	 */
 
-	result = acpi_ec_transaction(ec, &t, 0);
+	result = acpi_ec_transaction(ec, &t);
 	if (result)
 		return result;
 
@@ -570,28 +546,10 @@ static u32 acpi_ec_gpe_handler(void *data)
 	pr_debug(PREFIX "~~~> interrupt\n");
 	status = acpi_ec_read_status(ec);
 
-	if (test_bit(EC_FLAGS_GPE_MODE, &ec->flags)) {
-		gpe_transaction(ec, status);
-		if (ec_transaction_done(ec) &&
-		    (status & ACPI_EC_FLAG_IBF) == 0)
-			wake_up(&ec->wait);
-	}
-
+	advance_transaction(ec, status);
+	if (ec_transaction_done(ec) && (status & ACPI_EC_FLAG_IBF) == 0)
+		wake_up(&ec->wait);
 	ec_check_sci(ec, status);
-	if (!test_bit(EC_FLAGS_GPE_MODE, &ec->flags) &&
-	    !test_bit(EC_FLAGS_NO_GPE, &ec->flags)) {
-		/* this is non-query, must be confirmation */
-		if (!test_bit(EC_FLAGS_GPE_STORM, &ec->flags)) {
-			if (printk_ratelimit())
-				pr_info(PREFIX "non-query interrupt received,"
-					" switching to interrupt mode\n");
-		} else {
-			/* hush, STORM switches the mode every transaction */
-			pr_debug(PREFIX "non-query interrupt received,"
-				" switching to interrupt mode\n");
-		}
-		set_bit(EC_FLAGS_GPE_MODE, &ec->flags);
-	}
 	return ACPI_INTERRUPT_HANDLED;
 }
 
@@ -837,8 +795,6 @@ static int acpi_ec_add(struct acpi_device *device)
 	acpi_ec_add_fs(device);
 	pr_info(PREFIX "GPE = 0x%lx, I/O: command/status = 0x%lx, data = 0x%lx\n",
 			  ec->gpe, ec->command_addr, ec->data_addr);
-	pr_info(PREFIX "driver started in %s mode\n",
-		(test_bit(EC_FLAGS_GPE_MODE, &ec->flags))?"interrupt":"poll");
 	return 0;
 }
 
@@ -1054,8 +1010,6 @@ static int acpi_ec_suspend(struct acpi_device *device, pm_message_t state)
 {
 	struct acpi_ec *ec = acpi_driver_data(device);
 	/* Stop using GPE */
-	set_bit(EC_FLAGS_NO_GPE, &ec->flags);
-	clear_bit(EC_FLAGS_GPE_MODE, &ec->flags);
 	acpi_disable_gpe(NULL, ec->gpe);
 	return 0;
 }
@@ -1064,8 +1018,6 @@ static int acpi_ec_resume(struct acpi_device *device)
 {
 	struct acpi_ec *ec = acpi_driver_data(device);
 	/* Enable use of GPE back */
-	clear_bit(EC_FLAGS_NO_GPE, &ec->flags);
-	set_bit(EC_FLAGS_GPE_MODE, &ec->flags);
 	acpi_enable_gpe(NULL, ec->gpe);
 	return 0;
 }
-- 
cgit v0.10.2


From 6a63b06f3c494cc87eade97f081300bda60acec7 Mon Sep 17 00:00:00 2001
From: Alexey Starikovskiy <astarikovskiy@suse.de>
Date: Fri, 28 Aug 2009 23:29:44 +0400
Subject: ACPI: EC: use BURST mode only for MSI notebooks

Signed-off-by: Alexey Starikovskiy <astarikovskiy@suse.de>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index 839b542..788db78 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -575,7 +575,8 @@ acpi_ec_space_handler(u32 function, acpi_physical_address address,
 	if (bits != 8 && acpi_strict)
 		return AE_BAD_PARAMETER;
 
-	acpi_ec_burst_enable(ec);
+	if (EC_FLAGS_MSI)
+		acpi_ec_burst_enable(ec);
 
 	if (function == ACPI_READ) {
 		result = acpi_ec_read(ec, address, &temp);
@@ -596,7 +597,8 @@ acpi_ec_space_handler(u32 function, acpi_physical_address address,
 		}
 	}
 
-	acpi_ec_burst_disable(ec);
+	if (EC_FLAGS_MSI)
+		acpi_ec_burst_disable(ec);
 
 	switch (result) {
 	case -EINVAL:
-- 
cgit v0.10.2


From f25752e67d9d9ee7562ae9944314dd8c057d3fa2 Mon Sep 17 00:00:00 2001
From: Alexey Starikovskiy <astarikovskiy@suse.de>
Date: Fri, 28 Aug 2009 23:29:51 +0400
Subject: ACPI: EC: Drop orphan comment

Signed-off-by: Alexey Starikovskiy <astarikovskiy@suse.de>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index 788db78..dc55618 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -330,10 +330,6 @@ unlock:
 	return status;
 }
 
-/*
- * Note: samsung nv5000 doesn't work with ec burst mode.
- * http://bugzilla.kernel.org/show_bug.cgi?id=4980
- */
 static int acpi_ec_burst_enable(struct acpi_ec *ec)
 {
 	u8 d;
-- 
cgit v0.10.2


From eb0ca849863ecdc593ba7faa95fda5695af891c8 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Sat, 29 Aug 2009 22:39:06 -0400
Subject: ACPI: sleep: another HP DMI entry for init_set_sci_en_on_resume

DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion dv3 Notebook PC")

http://bugzilla.kernel.org/show_bug.cgi?id=13745

Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index 42159a2..e0a7409 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -405,6 +405,14 @@ static struct dmi_system_id __initdata acpisleep_dmi_table[] = {
 		},
 	},
 	{
+	.callback = init_set_sci_en_on_resume,
+	.ident = "Hewlett-Packard HP Pavilion dv3 Notebook PC",
+	.matches = {
+		DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
+		DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion dv3 Notebook PC"),
+		},
+	},
+	{
 	.callback = init_old_suspend_ordering,
 	.ident = "Panasonic CF51-2L",
 	.matches = {
-- 
cgit v0.10.2


From 4b4fe3b62e8d88068083218d3e42c45223b51d29 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Sat, 8 Aug 2009 00:26:25 -0700
Subject: ACPI: video - fix potential crash when unloading

thermal_cooling_device_register() returns error encoded in a pointer
when it fails in which case we need to explictly set device->cdev
to NULL so we don't try to unregister it when unloading.

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
Acked-by: Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c
index 2020907..aab3851 100644
--- a/drivers/acpi/video.c
+++ b/drivers/acpi/video.c
@@ -997,8 +997,18 @@ static void acpi_video_device_find_cap(struct acpi_video_device *device)
 
 		device->cdev = thermal_cooling_device_register("LCD",
 					device->dev, &video_cooling_ops);
-		if (IS_ERR(device->cdev))
+		if (IS_ERR(device->cdev)) {
+			/*
+			 * Set cdev to NULL so we don't crash trying to
+			 * free it.
+			 * Also, why the hell we are returning early and
+			 * not attempt to register video output if cooling
+			 * device registration failed?
+			 * -- dtor
+			 */
+			device->cdev = NULL;
 			return;
+		}
 
 		dev_info(&device->dev->dev, "registered as cooling_device%d\n",
 			 device->cdev->id);
-- 
cgit v0.10.2


From 4a703a8fe562824f269943d995ddff35077253a9 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dtor@mail.ru>
Date: Sat, 29 Aug 2009 23:03:16 -0400
Subject: ACPI: video - rename cdev to cooling_dev -- syntax only

Cdev name is normally used for ether class devices or character
devices so rename member to avoid confusion for casual reader
of the code.

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c
index aab3851..a8432c2 100644
--- a/drivers/acpi/video.c
+++ b/drivers/acpi/video.c
@@ -198,7 +198,7 @@ struct acpi_video_device {
 	struct acpi_device *dev;
 	struct acpi_video_device_brightness *brightness;
 	struct backlight_device *backlight;
-	struct thermal_cooling_device *cdev;
+	struct thermal_cooling_device *cooling_dev;
 	struct output_device *output_dev;
 };
 
@@ -387,20 +387,20 @@ static struct output_properties acpi_output_properties = {
 
 
 /* thermal cooling device callbacks */
-static int video_get_max_state(struct thermal_cooling_device *cdev, unsigned
+static int video_get_max_state(struct thermal_cooling_device *cooling_dev, unsigned
 			       long *state)
 {
-	struct acpi_device *device = cdev->devdata;
+	struct acpi_device *device = cooling_dev->devdata;
 	struct acpi_video_device *video = acpi_driver_data(device);
 
 	*state = video->brightness->count - 3;
 	return 0;
 }
 
-static int video_get_cur_state(struct thermal_cooling_device *cdev, unsigned
+static int video_get_cur_state(struct thermal_cooling_device *cooling_dev, unsigned
 			       long *state)
 {
-	struct acpi_device *device = cdev->devdata;
+	struct acpi_device *device = cooling_dev->devdata;
 	struct acpi_video_device *video = acpi_driver_data(device);
 	unsigned long long level;
 	int offset;
@@ -417,9 +417,9 @@ static int video_get_cur_state(struct thermal_cooling_device *cdev, unsigned
 }
 
 static int
-video_set_cur_state(struct thermal_cooling_device *cdev, unsigned long state)
+video_set_cur_state(struct thermal_cooling_device *cooling_dev, unsigned long state)
 {
-	struct acpi_device *device = cdev->devdata;
+	struct acpi_device *device = cooling_dev->devdata;
 	struct acpi_video_device *video = acpi_driver_data(device);
 	int level;
 
@@ -995,29 +995,29 @@ static void acpi_video_device_find_cap(struct acpi_video_device *device)
 		if (result)
 			printk(KERN_ERR PREFIX "Create sysfs link\n");
 
-		device->cdev = thermal_cooling_device_register("LCD",
+		device->cooling_dev = thermal_cooling_device_register("LCD",
 					device->dev, &video_cooling_ops);
-		if (IS_ERR(device->cdev)) {
+		if (IS_ERR(device->cooling_dev)) {
 			/*
-			 * Set cdev to NULL so we don't crash trying to
+			 * Set cooling_dev to NULL so we don't crash trying to
 			 * free it.
 			 * Also, why the hell we are returning early and
 			 * not attempt to register video output if cooling
 			 * device registration failed?
 			 * -- dtor
 			 */
-			device->cdev = NULL;
+			device->cooling_dev = NULL;
 			return;
 		}
 
 		dev_info(&device->dev->dev, "registered as cooling_device%d\n",
-			 device->cdev->id);
+			 device->cooling_dev->id);
 		result = sysfs_create_link(&device->dev->dev.kobj,
-				&device->cdev->device.kobj,
+				&device->cooling_dev->device.kobj,
 				"thermal_cooling");
 		if (result)
 			printk(KERN_ERR PREFIX "Create sysfs link\n");
-		result = sysfs_create_link(&device->cdev->device.kobj,
+		result = sysfs_create_link(&device->cooling_dev->device.kobj,
 				&device->dev->dev.kobj, "device");
 		if (result)
 			printk(KERN_ERR PREFIX "Create sysfs link\n");
@@ -2020,13 +2020,13 @@ static int acpi_video_bus_put_one_device(struct acpi_video_device *device)
 					    acpi_video_device_notify);
 	sysfs_remove_link(&device->backlight->dev.kobj, "device");
 	backlight_device_unregister(device->backlight);
-	if (device->cdev) {
+	if (device->cooling_dev) {
 		sysfs_remove_link(&device->dev->dev.kobj,
 				  "thermal_cooling");
-		sysfs_remove_link(&device->cdev->device.kobj,
+		sysfs_remove_link(&device->cooling_dev->device.kobj,
 				  "device");
-		thermal_cooling_device_unregister(device->cdev);
-		device->cdev = NULL;
+		thermal_cooling_device_unregister(device->cooling_dev);
+		device->cooling_dev = NULL;
 	}
 	video_output_unregister(device->output_dev);
 
-- 
cgit v0.10.2


From 8211a7b5857914058c52ae977c96463e419b37ab Mon Sep 17 00:00:00 2001
From: Troy Heber <troy.heber@hp.com>
Date: Wed, 19 Aug 2009 15:26:11 -0600
Subject: pci/dmar: correct off-by-one error in dmar_fault()

DMAR faults are recorded into a ring of "fault recording registers".
fault_index is a 0-based index into the ring. The code allows the
0-based fault_index to be equal to the total number of fault registers
available from the cap_num_fault_regs() macro, which causes access
beyond the last available register.

Signed-off-by Troy Heber <troy.heber@hp.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index 380b60e..3264b62 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -1226,7 +1226,7 @@ irqreturn_t dmar_fault(int irq, void *dev_id)
 				source_id, guest_addr);
 
 		fault_index++;
-		if (fault_index > cap_num_fault_regs(iommu->cap))
+		if (fault_index >= cap_num_fault_regs(iommu->cap))
 			fault_index = 0;
 		spin_lock_irqsave(&iommu->register_lock, flag);
 	}
-- 
cgit v0.10.2


From adb2fe0277607d50f4e9ef06e1d180051a609c25 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Mon, 31 Aug 2009 15:24:23 +1000
Subject: intel-iommu: include linux/dmi.h to use dmi_ routines

This file needs to include linux/dmi.h directly rather than relying on
it being pulled in from elsewhere.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index d36fa80..2ec5899 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -37,6 +37,7 @@
 #include <linux/iommu.h>
 #include <linux/intel-iommu.h>
 #include <linux/sysdev.h>
+#include <linux/dmi.h>
 #include <asm/cacheflush.h>
 #include <asm/iommu.h>
 #include "pci.h"
-- 
cgit v0.10.2


From 90c53ca426cb93d15eefea79dcf6bd15ad3ffeb4 Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Mon, 31 Aug 2009 12:39:54 -0400
Subject: ACPI video: work-around BIOS AML bug in _BQC

_BQC on some laptops returns an uninitialized value when it's
invoked for the first time.

Set the laptop to the maximum backlight level in this case.
http://bugzilla.kernel.org/attachment.cgi?id=22675

Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c
index a8432c2..097f24c 100644
--- a/drivers/acpi/video.c
+++ b/drivers/acpi/video.c
@@ -878,7 +878,7 @@ acpi_video_init_brightness(struct acpi_video_device *device)
 	br->flags._BCM_use_index = br->flags._BCL_use_index;
 
 	/* _BQC uses INDEX while _BCL uses VALUE in some laptops */
-	br->curr = level_old = max_level;
+	br->curr = level = max_level;
 
 	if (!device->cap._BQC)
 		goto set_level;
@@ -900,15 +900,25 @@ acpi_video_init_brightness(struct acpi_video_device *device)
 
 	br->flags._BQC_use_index = (level == max_level ? 0 : 1);
 
-	if (!br->flags._BQC_use_index)
+	if (!br->flags._BQC_use_index) {
+		/*
+		 * Set the backlight to the initial state.
+		 * On some buggy laptops, _BQC returns an uninitialized value
+		 * when invoked for the first time, i.e. level_old is invalid.
+		 * set the backlight to max_level in this case
+		 */
+		for (i = 2; i < br->count; i++)
+			if (level_old == br->levels[i])
+				level = level_old;
 		goto set_level;
+	}
 
 	if (br->flags._BCL_reversed)
 		level_old = (br->count - 1) - level_old;
-	level_old = br->levels[level_old];
+	level = br->levels[level_old];
 
 set_level:
-	result = acpi_video_device_lcd_set_level(device, level_old);
+	result = acpi_video_device_lcd_set_level(device, level);
 	if (result)
 		goto out_free_levels;
 
-- 
cgit v0.10.2


From 7e24bc1ce669b2876ffa475ea1147f2bb9ffdc52 Mon Sep 17 00:00:00 2001
From: Alex Chiang <achiang@hp.com>
Date: Tue, 4 Aug 2009 14:44:17 -0600
Subject: ACPI: pci_slot.ko wants a 64-bit _SUN

Similar to commit b6adc195 (PCI hotplug: acpiphp wants a 64-bit
_SUN), pci_slot.ko reads and creates sysfs directories based on
the _SUN method.

Certain HP platforms return 64 bits in _SUN. This change to
pci_slot.ko allows us to see the correct sysfs directories.

Reported-by: Chad Smith <chad.smith@hp.com>
Cc: stable@kernel.org
Signed-off-by: Alex Chiang <achiang@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/pci_slot.c b/drivers/acpi/pci_slot.c
index 12158e0..da9d6d2 100644
--- a/drivers/acpi/pci_slot.c
+++ b/drivers/acpi/pci_slot.c
@@ -57,7 +57,7 @@ ACPI_MODULE_NAME("pci_slot");
 				MY_NAME , ## arg);		\
 	} while (0)
 
-#define SLOT_NAME_SIZE 20		/* Inspired by #define in acpiphp.h */
+#define SLOT_NAME_SIZE 21		/* Inspired by #define in acpiphp.h */
 
 struct acpi_pci_slot {
 	acpi_handle root_handle;	/* handle of the root bridge */
@@ -149,7 +149,7 @@ register_slot(acpi_handle handle, u32 lvl, void *context, void **rv)
 		return AE_OK;
 	}
 
-	snprintf(name, sizeof(name), "%u", (u32)sun);
+	snprintf(name, sizeof(name), "%llu", sun);
 	pci_slot = pci_create_slot(pci_bus, device, name, NULL);
 	if (IS_ERR(pci_slot)) {
 		err("pci_create_slot returned %ld\n", PTR_ERR(pci_slot));
-- 
cgit v0.10.2


From 718fb0de8ff88f71b3b91a8ee8e42e60c88e5128 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Thu, 6 Aug 2009 23:18:12 +0000
Subject: ACPI: fix NULL bug for HID/UID string

acpi_device->pnp.hardware_id and unique_id are now allocated pointers,
replacing the previous arrays.  acpi_device_install_notify_handler()
oopsed on the NULL hid when probing the video device, and perhaps other
uses are vulnerable too.  So initialize those pointers to empty strings
when there is no hid or uid.  Also, free hardware_id and unique_id when
when acpi_device is going to be freed.

http://bugzilla.kernel.org/show_bug.cgi?id=14096

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 9606af1..dc14421 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -309,6 +309,10 @@ static void acpi_device_release(struct device *dev)
 	struct acpi_device *acpi_dev = to_acpi_device(dev);
 
 	kfree(acpi_dev->pnp.cid_list);
+	if (acpi_dev->flags.hardware_id)
+		kfree(acpi_dev->pnp.hardware_id);
+	if (acpi_dev->flags.unique_id)
+		kfree(acpi_dev->pnp.unique_id);
 	kfree(acpi_dev);
 }
 
@@ -1137,8 +1141,9 @@ static void acpi_device_set_id(struct acpi_device *device,
 			strcpy(device->pnp.hardware_id, hid);
 			device->flags.hardware_id = 1;
 		}
-	} else
-		device->pnp.hardware_id = NULL;
+	}
+	if (!device->flags.hardware_id)
+		device->pnp.hardware_id = "";
 
 	if (uid) {
 		device->pnp.unique_id = ACPI_ALLOCATE_ZEROED(strlen (uid) + 1);
@@ -1146,8 +1151,9 @@ static void acpi_device_set_id(struct acpi_device *device,
 			strcpy(device->pnp.unique_id, uid);
 			device->flags.unique_id = 1;
 		}
-	} else
-		device->pnp.unique_id = NULL;
+	}
+	if (!device->flags.unique_id)
+		device->pnp.unique_id = "";
 
 	if (cid_list || cid_add) {
 		struct acpica_device_id_list *list;
@@ -1362,10 +1368,8 @@ acpi_add_single_object(struct acpi_device **child,
 end:
 	if (!result)
 		*child = device;
-	else {
-		kfree(device->pnp.cid_list);
-		kfree(device);
-	}
+	else
+		acpi_device_release(&device->dev);
 
 	return result;
 }
-- 
cgit v0.10.2


From a06b1261bdb580b35967d0e055d1ab131b332254 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 31 Aug 2009 15:16:11 -0400
Subject: NFSD: Fix a bug in the NFSv4 'supported attrs' mandatory attribute

The fact that the filesystem doesn't currently list any alternate
locations does _not_ imply that the fs_locations attribute should be
marked as "unsupported".

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 6fde431..bebc0c2 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -68,7 +68,6 @@ check_attr_support(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		   u32 *bmval, u32 *writable)
 {
 	struct dentry *dentry = cstate->current_fh.fh_dentry;
-	struct svc_export *exp = cstate->current_fh.fh_export;
 
 	/*
 	 * Check about attributes are supported by the NFSv4 server or not.
@@ -80,17 +79,13 @@ check_attr_support(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		return nfserr_attrnotsupp;
 
 	/*
-	 * Check FATTR4_WORD0_ACL & FATTR4_WORD0_FS_LOCATIONS can be supported
+	 * Check FATTR4_WORD0_ACL can be supported
 	 * in current environment or not.
 	 */
 	if (bmval[0] & FATTR4_WORD0_ACL) {
 		if (!IS_POSIXACL(dentry->d_inode))
 			return nfserr_attrnotsupp;
 	}
-	if (bmval[0] & FATTR4_WORD0_FS_LOCATIONS) {
-		if (exp->ex_fslocs.locations == NULL)
-			return nfserr_attrnotsupp;
-	}
 
 	/*
 	 * According to spec, read-only attributes return ERR_INVAL.
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index fdf632b..20c5e3d 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1793,11 +1793,6 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
 				goto out_nfserr;
 		}
 	}
-	if (bmval0 & FATTR4_WORD0_FS_LOCATIONS) {
-		if (exp->ex_fslocs.locations == NULL) {
-			bmval0 &= ~FATTR4_WORD0_FS_LOCATIONS;
-		}
-	}
 	if ((buflen -= 16) < 0)
 		goto out_resource;
 
@@ -1825,8 +1820,6 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
 			goto out_resource;
 		if (!aclsupport)
 			word0 &= ~FATTR4_WORD0_ACL;
-		if (!exp->ex_fslocs.locations)
-			word0 &= ~FATTR4_WORD0_FS_LOCATIONS;
 		if (!word2) {
 			WRITE32(2);
 			WRITE32(word0);
-- 
cgit v0.10.2


From a649637c73a36174287a403cdda7607177d64523 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Fri, 28 Aug 2009 08:45:01 -0400
Subject: nfsd41: bound forechannel drc size by memory usage

By using the requested ca_maxresponsesize_cached * ca_maxresponses to bound
a forechannel drc request size, clients can tailor a session to usage.

For example, an I/O session (READ/WRITE only) can have a much smaller
ca_maxresponsesize_cached (for only WRITE compound responses) and a lot larger
ca_maxresponses to service a large in-flight data window.

Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index b44a2cf..02b3ddd 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -414,34 +414,64 @@ gen_sessionid(struct nfsd4_session *ses)
 }
 
 /*
- * Give the client the number of slots it requests bound by
- * NFSD_MAX_SLOTS_PER_SESSION and by nfsd_drc_max_mem.
+ * The protocol defines ca_maxresponssize_cached to include the size of
+ * the rpc header, but all we need to cache is the data starting after
+ * the end of the initial SEQUENCE operation--the rest we regenerate
+ * each time.  Therefore we can advertise a ca_maxresponssize_cached
+ * value that is the number of bytes in our cache plus a few additional
+ * bytes.  In order to stay on the safe side, and not promise more than
+ * we can cache, those additional bytes must be the minimum possible: 24
+ * bytes of rpc header (xid through accept state, with AUTH_NULL
+ * verifier), 12 for the compound header (with zero-length tag), and 44
+ * for the SEQUENCE op response:
+ */
+#define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
+
+/*
+ * Give the client the number of ca_maxresponsesize_cached slots it
+ * requests, of size bounded by NFSD_SLOT_CACHE_SIZE,
+ * NFSD_MAX_MEM_PER_SESSION, and nfsd_drc_max_mem. Do not allow more
+ * than NFSD_MAX_SLOTS_PER_SESSION.
  *
- * If we run out of reserved DRC memory we should (up to a point) re-negotiate
- * active sessions and reduce their slot usage to make rooom for new
- * connections. For now we just fail the create session.
+ * If we run out of reserved DRC memory we should (up to a point)
+ * re-negotiate active sessions and reduce their slot usage to make
+ * rooom for new connections. For now we just fail the create session.
  */
-static int set_forechannel_maxreqs(struct nfsd4_channel_attrs *fchan)
+static int set_forechannel_drc_size(struct nfsd4_channel_attrs *fchan)
 {
-	int mem;
+	int mem, size = fchan->maxresp_cached;
 
 	if (fchan->maxreqs < 1)
 		return nfserr_inval;
-	else if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION)
-		fchan->maxreqs = NFSD_MAX_SLOTS_PER_SESSION;
 
-	mem = fchan->maxreqs * NFSD_SLOT_CACHE_SIZE;
+	if (size < NFSD_MIN_HDR_SEQ_SZ)
+		size = NFSD_MIN_HDR_SEQ_SZ;
+	size -= NFSD_MIN_HDR_SEQ_SZ;
+	if (size > NFSD_SLOT_CACHE_SIZE)
+		size = NFSD_SLOT_CACHE_SIZE;
+
+	/* bound the maxreqs by NFSD_MAX_MEM_PER_SESSION */
+	mem = fchan->maxreqs * size;
+	if (mem > NFSD_MAX_MEM_PER_SESSION) {
+		fchan->maxreqs = NFSD_MAX_MEM_PER_SESSION / size;
+		if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION)
+			fchan->maxreqs = NFSD_MAX_SLOTS_PER_SESSION;
+		mem = fchan->maxreqs * size;
+	}
 
 	spin_lock(&nfsd_drc_lock);
-	if (mem + nfsd_drc_mem_used > nfsd_drc_max_mem)
-		mem = ((nfsd_drc_max_mem - nfsd_drc_mem_used) /
-				NFSD_SLOT_CACHE_SIZE) * NFSD_SLOT_CACHE_SIZE;
+	/* bound the total session drc memory ussage */
+	if (mem + nfsd_drc_mem_used > nfsd_drc_max_mem) {
+		fchan->maxreqs = (nfsd_drc_max_mem - nfsd_drc_mem_used) / size;
+		mem = fchan->maxreqs * size;
+	}
 	nfsd_drc_mem_used += mem;
 	spin_unlock(&nfsd_drc_lock);
 
-	fchan->maxreqs = mem / NFSD_SLOT_CACHE_SIZE;
 	if (fchan->maxreqs == 0)
 		return nfserr_resource;
+
+	fchan->maxresp_cached = size + NFSD_MIN_HDR_SEQ_SZ;
 	return 0;
 }
 
@@ -466,9 +496,6 @@ static int init_forechannel_attrs(struct svc_rqst *rqstp,
 		fchan->maxresp_sz = maxcount;
 	session_fchan->maxresp_sz = fchan->maxresp_sz;
 
-	session_fchan->maxresp_cached = NFSD_SLOT_CACHE_SIZE;
-	fchan->maxresp_cached = session_fchan->maxresp_cached;
-
 	/* Use the client's maxops if possible */
 	if (fchan->maxops > NFSD_MAX_OPS_PER_COMPOUND)
 		fchan->maxops = NFSD_MAX_OPS_PER_COMPOUND;
@@ -478,9 +505,12 @@ static int init_forechannel_attrs(struct svc_rqst *rqstp,
 	 * recover pages from existing sessions. For now fail session
 	 * creation.
 	 */
-	status = set_forechannel_maxreqs(fchan);
+	status = set_forechannel_drc_size(fchan);
 
+	session_fchan->maxresp_cached = fchan->maxresp_cached;
 	session_fchan->maxreqs = fchan->maxreqs;
+
+	dprintk("%s status %d\n", __func__, status);
 	return status;
 }
 
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index fb0c404..ff0b771 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -92,13 +92,17 @@ struct nfs4_cb_conn {
 	struct rpc_cred	*	cb_cred;
 };
 
-/* Maximum number of slots per session. 128 is useful for long haul TCP */
-#define NFSD_MAX_SLOTS_PER_SESSION	128
+/* Maximum number of slots per session. 160 is useful for long haul TCP */
+#define NFSD_MAX_SLOTS_PER_SESSION     160
 /* Maximum number of pages per slot cache entry */
 #define NFSD_PAGES_PER_SLOT	1
 #define NFSD_SLOT_CACHE_SIZE		PAGE_SIZE
 /* Maximum number of operations per session compound */
 #define NFSD_MAX_OPS_PER_COMPOUND	16
+/* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */
+#define NFSD_CACHE_SIZE_SLOTS_PER_SESSION	32
+#define NFSD_MAX_MEM_PER_SESSION  \
+		(NFSD_CACHE_SIZE_SLOTS_PER_SESSION * NFSD_SLOT_CACHE_SIZE)
 
 struct nfsd4_cache_entry {
 	__be32		ce_status;
-- 
cgit v0.10.2


From a8dfdaeb7a8b1295f45d9d208dd27e6e20113d1b Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Fri, 28 Aug 2009 08:45:02 -0400
Subject: nfsd41: use session maxreqs for sequence target and highest slotid

This fixes a bug in the sequence operation reply.

The sequence operation returns the highest slotid it will accept in the future
in sr_highest_slotid, and the highest slotid it prefers the client to use.
Since we do not re-negotiate the session slot table yet, these should both
always be set to the session ca_maxrequests.

Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 02b3ddd..ec074e7 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1133,7 +1133,6 @@ nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
 	 * session inactivity timer fires and a solo sequence operation
 	 * is sent (lease renewal).
 	 */
-	seq->maxslots = resp->cstate.session->se_fchannel.maxreqs;
 
 	/* Either returns 0 or nfserr_retry_uncached */
 	status = nfsd4_enc_sequence_replay(resp->rqstp->rq_argp, resp);
@@ -1497,6 +1496,11 @@ nfsd4_sequence(struct svc_rqst *rqstp,
 	slot = &session->se_slots[seq->slotid];
 	dprintk("%s: slotid %d\n", __func__, seq->slotid);
 
+	/* We do not negotiate the number of slots yet, so set the
+	 * maxslots to the session maxreqs which is used to encode
+	 * sr_highest_slotid and the sr_target_slot id to maxslots */
+	seq->maxslots = session->se_fchannel.maxreqs;
+
 	status = check_slot_seqid(seq->seqid, slot->sl_seqid, slot->sl_inuse);
 	if (status == nfserr_replay_cache) {
 		cstate->slot = slot;
-- 
cgit v0.10.2


From bdac86e2154cfe47552639113265d1fa27cfbe72 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Fri, 28 Aug 2009 08:45:03 -0400
Subject: nfsd41: replace nfserr_resource in pure nfs41 responses

nfserr_resource is not a legal error for NFSv4.1. Replace it with
nfserr_serverfault for EXCHANGE_ID and CREATE_SESSION processing.

We will also need to map nfserr_resource to other errors in routines shared
by NFSv4.0 and NFSv4.1

Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index ec074e7..c9a45f4 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -469,7 +469,7 @@ static int set_forechannel_drc_size(struct nfsd4_channel_attrs *fchan)
 	spin_unlock(&nfsd_drc_lock);
 
 	if (fchan->maxreqs == 0)
-		return nfserr_resource;
+		return nfserr_serverfault;
 
 	fchan->maxresp_cached = size + NFSD_MIN_HDR_SEQ_SZ;
 	return 0;
@@ -519,7 +519,7 @@ alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp,
 		   struct nfsd4_create_session *cses)
 {
 	struct nfsd4_session *new, tmp;
-	int idx, status = nfserr_resource, slotsize;
+	int idx, status = nfserr_serverfault, slotsize;
 
 	memset(&tmp, 0, sizeof(tmp));
 
@@ -1282,7 +1282,7 @@ out_new:
 	/* Normal case */
 	new = create_client(exid->clname, dname);
 	if (new == NULL) {
-		status = nfserr_resource;
+		status = nfserr_serverfault;
 		goto out;
 	}
 
-- 
cgit v0.10.2


From 557ce2646e775f6bda734dd92b10d4780874b9c7 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Fri, 28 Aug 2009 08:45:04 -0400
Subject: nfsd41: replace page based DRC with buffer based DRC

Use NFSD_SLOT_CACHE_SIZE size buffers for sessions DRC instead of holding nfsd
pages in cache.

Connectathon testing has shown that 1024 bytes for encoded compound operation
responses past the sequence operation is sufficient, 512 bytes is a little too
small. Set NFSD_SLOT_CACHE_SIZE to 1024.

Allocate memory for the session DRC in the CREATE_SESSION operation
to guarantee that the memory resource is available for caching responses.
Allocate each slot individually in preparation for slot table size negotiation.

Remove struct nfsd4_cache_entry and helper functions for the old page-based
DRC.

The iov_len calculation in nfs4svc_encode_compoundres is now always
correct.  Replay is now done in nfsd4_sequence under the state lock, so
the session ref count is only bumped on non-replay. Clean up the
nfs4svc_encode_compoundres session logic.

The nfsd4_compound_state statp pointer is also not used.
Remove nfsd4_set_statp().

Move useful nfsd4_cache_entry fields into nfsd4_slot.

Signed-off-by: Andy Adamson <andros@netapp.com
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index c9a45f4..46e9ac5 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -514,12 +514,23 @@ static int init_forechannel_attrs(struct svc_rqst *rqstp,
 	return status;
 }
 
+static void
+free_session_slots(struct nfsd4_session *ses)
+{
+	int i;
+
+	for (i = 0; i < ses->se_fchannel.maxreqs; i++)
+		kfree(ses->se_slots[i]);
+}
+
 static int
 alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp,
 		   struct nfsd4_create_session *cses)
 {
 	struct nfsd4_session *new, tmp;
-	int idx, status = nfserr_serverfault, slotsize;
+	struct nfsd4_slot *sp;
+	int idx, slotsize, cachesize, i;
+	int status;
 
 	memset(&tmp, 0, sizeof(tmp));
 
@@ -530,14 +541,27 @@ alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp,
 	if (status)
 		goto out;
 
-	/* allocate struct nfsd4_session and slot table in one piece */
-	slotsize = tmp.se_fchannel.maxreqs * sizeof(struct nfsd4_slot);
+	BUILD_BUG_ON(NFSD_MAX_SLOTS_PER_SESSION * sizeof(struct nfsd4_slot)
+		     + sizeof(struct nfsd4_session) > PAGE_SIZE);
+
+	status = nfserr_serverfault;
+	/* allocate struct nfsd4_session and slot table pointers in one piece */
+	slotsize = tmp.se_fchannel.maxreqs * sizeof(struct nfsd4_slot *);
 	new = kzalloc(sizeof(*new) + slotsize, GFP_KERNEL);
 	if (!new)
 		goto out;
 
 	memcpy(new, &tmp, sizeof(*new));
 
+	/* allocate each struct nfsd4_slot and data cache in one piece */
+	cachesize = new->se_fchannel.maxresp_cached - NFSD_MIN_HDR_SEQ_SZ;
+	for (i = 0; i < new->se_fchannel.maxreqs; i++) {
+		sp = kzalloc(sizeof(*sp) + cachesize, GFP_KERNEL);
+		if (!sp)
+			goto out_free;
+		new->se_slots[i] = sp;
+	}
+
 	new->se_client = clp;
 	gen_sessionid(new);
 	idx = hash_sessionid(&new->se_sessionid);
@@ -554,6 +578,10 @@ alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp,
 	status = nfs_ok;
 out:
 	return status;
+out_free:
+	free_session_slots(new);
+	kfree(new);
+	goto out;
 }
 
 /* caller must hold sessionid_lock */
@@ -596,22 +624,16 @@ release_session(struct nfsd4_session *ses)
 	nfsd4_put_session(ses);
 }
 
-static void nfsd4_release_respages(struct page **respages, short resused);
-
 void
 free_session(struct kref *kref)
 {
 	struct nfsd4_session *ses;
-	int i;
 
 	ses = container_of(kref, struct nfsd4_session, se_ref);
-	for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
-		struct nfsd4_cache_entry *e = &ses->se_slots[i].sl_cache_entry;
-		nfsd4_release_respages(e->ce_respages, e->ce_resused);
-	}
 	spin_lock(&nfsd_drc_lock);
 	nfsd_drc_mem_used -= ses->se_fchannel.maxreqs * NFSD_SLOT_CACHE_SIZE;
 	spin_unlock(&nfsd_drc_lock);
+	free_session_slots(ses);
 	kfree(ses);
 }
 
@@ -968,116 +990,31 @@ out_err:
 	return;
 }
 
-void
-nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp)
-{
-	struct nfsd4_compoundres *resp = rqstp->rq_resp;
-
-	resp->cstate.statp = statp;
-}
-
-/*
- * Dereference the result pages.
- */
-static void
-nfsd4_release_respages(struct page **respages, short resused)
-{
-	int i;
-
-	dprintk("--> %s\n", __func__);
-	for (i = 0; i < resused; i++) {
-		if (!respages[i])
-			continue;
-		put_page(respages[i]);
-		respages[i] = NULL;
-	}
-}
-
-static void
-nfsd4_copy_pages(struct page **topages, struct page **frompages, short count)
-{
-	int i;
-
-	for (i = 0; i < count; i++) {
-		topages[i] = frompages[i];
-		if (!topages[i])
-			continue;
-		get_page(topages[i]);
-	}
-}
-
 /*
- * Cache the reply pages up to NFSD_PAGES_PER_SLOT + 1, clearing the previous
- * pages. We add a page to NFSD_PAGES_PER_SLOT for the case where the total
- * length of the XDR response is less than se_fmaxresp_cached
- * (NFSD_PAGES_PER_SLOT * PAGE_SIZE) but the xdr_buf pages is used for a
- * of the reply (e.g. readdir).
- *
- * Store the base and length of the rq_req.head[0] page
- * of the NFSv4.1 data, just past the rpc header.
+ * Cache a reply. nfsd4_check_drc_limit() has bounded the cache size.
  */
 void
 nfsd4_store_cache_entry(struct nfsd4_compoundres *resp)
 {
-	struct nfsd4_cache_entry *entry = &resp->cstate.slot->sl_cache_entry;
-	struct svc_rqst *rqstp = resp->rqstp;
-	struct kvec *resv = &rqstp->rq_res.head[0];
-
-	dprintk("--> %s entry %p\n", __func__, entry);
+	struct nfsd4_slot *slot = resp->cstate.slot;
+	unsigned int base;
 
-	nfsd4_release_respages(entry->ce_respages, entry->ce_resused);
-	entry->ce_opcnt = resp->opcnt;
-	entry->ce_status = resp->cstate.status;
+	dprintk("--> %s slot %p\n", __func__, slot);
 
-	/*
-	 * Don't need a page to cache just the sequence operation - the slot
-	 * does this for us!
-	 */
+	slot->sl_opcnt = resp->opcnt;
+	slot->sl_status = resp->cstate.status;
 
 	if (nfsd4_not_cached(resp)) {
-		entry->ce_resused = 0;
-		entry->ce_rpchdrlen = 0;
-		dprintk("%s Just cache SEQUENCE. ce_cachethis %d\n", __func__,
-			resp->cstate.slot->sl_cache_entry.ce_cachethis);
+		slot->sl_datalen = 0;
 		return;
 	}
-	entry->ce_resused = rqstp->rq_resused;
-	if (entry->ce_resused > NFSD_PAGES_PER_SLOT + 1)
-		entry->ce_resused = NFSD_PAGES_PER_SLOT + 1;
-	nfsd4_copy_pages(entry->ce_respages, rqstp->rq_respages,
-			 entry->ce_resused);
-	entry->ce_datav.iov_base = resp->cstate.statp;
-	entry->ce_datav.iov_len = resv->iov_len - ((char *)resp->cstate.statp -
-				(char *)page_address(rqstp->rq_respages[0]));
-	/* Current request rpc header length*/
-	entry->ce_rpchdrlen = (char *)resp->cstate.statp -
-				(char *)page_address(rqstp->rq_respages[0]);
-}
-
-/*
- * We keep the rpc header, but take the nfs reply from the replycache.
- */
-static int
-nfsd41_copy_replay_data(struct nfsd4_compoundres *resp,
-			struct nfsd4_cache_entry *entry)
-{
-	struct svc_rqst *rqstp = resp->rqstp;
-	struct kvec *resv = &resp->rqstp->rq_res.head[0];
-	int len;
-
-	/* Current request rpc header length*/
-	len = (char *)resp->cstate.statp -
-			(char *)page_address(rqstp->rq_respages[0]);
-	if (entry->ce_datav.iov_len + len > PAGE_SIZE) {
-		dprintk("%s v41 cached reply too large (%Zd).\n", __func__,
-			entry->ce_datav.iov_len);
-		return 0;
-	}
-	/* copy the cached reply nfsd data past the current rpc header */
-	memcpy((char *)resv->iov_base + len, entry->ce_datav.iov_base,
-		entry->ce_datav.iov_len);
-	resv->iov_len = len + entry->ce_datav.iov_len;
-	return 1;
+	slot->sl_datalen = (char *)resp->p - (char *)resp->cstate.datap;
+	base = (char *)resp->cstate.datap -
+					(char *)resp->xbuf->head[0].iov_base;
+	if (read_bytes_from_xdr_buf(resp->xbuf, base, slot->sl_data,
+				    slot->sl_datalen))
+		WARN("%s: sessions DRC could not cache compound\n", __func__);
+	return;
 }
 
 /*
@@ -1095,14 +1032,14 @@ nfsd4_enc_sequence_replay(struct nfsd4_compoundargs *args,
 	struct nfsd4_slot *slot = resp->cstate.slot;
 
 	dprintk("--> %s resp->opcnt %d cachethis %u \n", __func__,
-		resp->opcnt, resp->cstate.slot->sl_cache_entry.ce_cachethis);
+		resp->opcnt, resp->cstate.slot->sl_cachethis);
 
 	/* Encode the replayed sequence operation */
 	op = &args->ops[resp->opcnt - 1];
 	nfsd4_encode_operation(resp, op);
 
 	/* Return nfserr_retry_uncached_rep in next operation. */
-	if (args->opcnt > 1 && slot->sl_cache_entry.ce_cachethis == 0) {
+	if (args->opcnt > 1 && slot->sl_cachethis == 0) {
 		op = &args->ops[resp->opcnt++];
 		op->status = nfserr_retry_uncached_rep;
 		nfsd4_encode_operation(resp, op);
@@ -1111,57 +1048,29 @@ nfsd4_enc_sequence_replay(struct nfsd4_compoundargs *args,
 }
 
 /*
- * Keep the first page of the replay. Copy the NFSv4.1 data from the first
- * cached page.  Replace any futher replay pages from the cache.
+ * The sequence operation is not cached because we can use the slot and
+ * session values.
  */
 __be32
 nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
 			 struct nfsd4_sequence *seq)
 {
-	struct nfsd4_cache_entry *entry = &resp->cstate.slot->sl_cache_entry;
+	struct nfsd4_slot *slot = resp->cstate.slot;
 	__be32 status;
 
-	dprintk("--> %s entry %p\n", __func__, entry);
-
-	/*
-	 * If this is just the sequence operation, we did not keep
-	 * a page in the cache entry because we can just use the
-	 * slot info stored in struct nfsd4_sequence that was checked
-	 * against the slot in nfsd4_sequence().
-	 *
-	 * This occurs when seq->cachethis is FALSE, or when the client
-	 * session inactivity timer fires and a solo sequence operation
-	 * is sent (lease renewal).
-	 */
+	dprintk("--> %s slot %p\n", __func__, slot);
 
 	/* Either returns 0 or nfserr_retry_uncached */
 	status = nfsd4_enc_sequence_replay(resp->rqstp->rq_argp, resp);
 	if (status == nfserr_retry_uncached_rep)
 		return status;
 
-	if (!nfsd41_copy_replay_data(resp, entry)) {
-		/*
-		 * Not enough room to use the replay rpc header, send the
-		 * cached header. Release all the allocated result pages.
-		 */
-		svc_free_res_pages(resp->rqstp);
-		nfsd4_copy_pages(resp->rqstp->rq_respages, entry->ce_respages,
-			entry->ce_resused);
-	} else {
-		/* Release all but the first allocated result page */
-
-		resp->rqstp->rq_resused--;
-		svc_free_res_pages(resp->rqstp);
-
-		nfsd4_copy_pages(&resp->rqstp->rq_respages[1],
-				 &entry->ce_respages[1],
-				 entry->ce_resused - 1);
-	}
+	/* The sequence operation has been encoded, cstate->datap set. */
+	memcpy(resp->cstate.datap, slot->sl_data, slot->sl_datalen);
 
-	resp->rqstp->rq_resused = entry->ce_resused;
-	resp->opcnt = entry->ce_opcnt;
-	resp->cstate.iovlen = entry->ce_datav.iov_len + entry->ce_rpchdrlen;
-	status = entry->ce_status;
+	resp->opcnt = slot->sl_opcnt;
+	resp->p = resp->cstate.datap + XDR_QUADLEN(slot->sl_datalen);
+	status = slot->sl_status;
 
 	return status;
 }
@@ -1493,7 +1402,7 @@ nfsd4_sequence(struct svc_rqst *rqstp,
 	if (seq->slotid >= session->se_fchannel.maxreqs)
 		goto out;
 
-	slot = &session->se_slots[seq->slotid];
+	slot = session->se_slots[seq->slotid];
 	dprintk("%s: slotid %d\n", __func__, seq->slotid);
 
 	/* We do not negotiate the number of slots yet, so set the
@@ -1506,7 +1415,7 @@ nfsd4_sequence(struct svc_rqst *rqstp,
 		cstate->slot = slot;
 		cstate->session = session;
 		/* Return the cached reply status and set cstate->status
-		 * for nfsd4_svc_encode_compoundres processing */
+		 * for nfsd4_proc_compound processing */
 		status = nfsd4_replay_cache_entry(resp, seq);
 		cstate->status = nfserr_replay_cache;
 		goto out;
@@ -1517,7 +1426,7 @@ nfsd4_sequence(struct svc_rqst *rqstp,
 	/* Success! bump slot seqid */
 	slot->sl_inuse = true;
 	slot->sl_seqid = seq->seqid;
-	slot->sl_cache_entry.ce_cachethis = seq->cachethis;
+	slot->sl_cachethis = seq->cachethis;
 
 	cstate->slot = slot;
 	cstate->session = session;
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 20c5e3d..00ed16a 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3057,6 +3057,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, int nfserr,
 	WRITE32(0);
 
 	ADJUST_ARGS();
+	resp->cstate.datap = p; /* DRC cache data pointer */
 	return 0;
 }
 
@@ -3159,7 +3160,7 @@ static int nfsd4_check_drc_limit(struct nfsd4_compoundres *resp)
 		return status;
 
 	session = resp->cstate.session;
-	if (session == NULL || slot->sl_cache_entry.ce_cachethis == 0)
+	if (session == NULL || slot->sl_cachethis == 0)
 		return status;
 
 	if (resp->opcnt >= args->opcnt)
@@ -3284,6 +3285,7 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo
 	/*
 	 * All that remains is to write the tag and operation count...
 	 */
+	struct nfsd4_compound_state *cs = &resp->cstate;
 	struct kvec *iov;
 	p = resp->tagp;
 	*p++ = htonl(resp->taglen);
@@ -3297,15 +3299,10 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo
 		iov = &rqstp->rq_res.head[0];
 	iov->iov_len = ((char*)resp->p) - (char*)iov->iov_base;
 	BUG_ON(iov->iov_len > PAGE_SIZE);
-	if (nfsd4_has_session(&resp->cstate)) {
-		if (resp->cstate.status == nfserr_replay_cache &&
-				!nfsd4_not_cached(resp)) {
-			iov->iov_len = resp->cstate.iovlen;
-		} else {
-			nfsd4_store_cache_entry(resp);
-			dprintk("%s: SET SLOT STATE TO AVAILABLE\n", __func__);
-			resp->cstate.slot->sl_inuse = 0;
-		}
+	if (nfsd4_has_session(cs) && cs->status != nfserr_replay_cache) {
+		nfsd4_store_cache_entry(resp);
+		dprintk("%s: SET SLOT STATE TO AVAILABLE\n", __func__);
+		resp->cstate.slot->sl_inuse = false;
 		nfsd4_put_session(resp->cstate.session);
 	}
 	return 1;
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 675d395..4472449 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -577,10 +577,6 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
 		+ rqstp->rq_res.head[0].iov_len;
 	rqstp->rq_res.head[0].iov_len += sizeof(__be32);
 
-	/* NFSv4.1 DRC requires statp */
-	if (rqstp->rq_vers == 4)
-		nfsd4_set_statp(rqstp, statp);
-
 	/* Now call the procedure handler, and encode NFS status. */
 	nfserr = proc->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp);
 	nfserr = map_new_errors(rqstp->rq_vers, nfserr);
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index ff0b771..70ef5f4 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -94,30 +94,23 @@ struct nfs4_cb_conn {
 
 /* Maximum number of slots per session. 160 is useful for long haul TCP */
 #define NFSD_MAX_SLOTS_PER_SESSION     160
-/* Maximum number of pages per slot cache entry */
-#define NFSD_PAGES_PER_SLOT	1
-#define NFSD_SLOT_CACHE_SIZE		PAGE_SIZE
 /* Maximum number of operations per session compound */
 #define NFSD_MAX_OPS_PER_COMPOUND	16
+/* Maximum  session per slot cache size */
+#define NFSD_SLOT_CACHE_SIZE		1024
 /* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */
 #define NFSD_CACHE_SIZE_SLOTS_PER_SESSION	32
 #define NFSD_MAX_MEM_PER_SESSION  \
 		(NFSD_CACHE_SIZE_SLOTS_PER_SESSION * NFSD_SLOT_CACHE_SIZE)
 
-struct nfsd4_cache_entry {
-	__be32		ce_status;
-	struct kvec	ce_datav; /* encoded NFSv4.1 data in rq_res.head[0] */
-	struct page	*ce_respages[NFSD_PAGES_PER_SLOT + 1];
-	int		ce_cachethis;
-	short		ce_resused;
-	int		ce_opcnt;
-	int		ce_rpchdrlen;
-};
-
 struct nfsd4_slot {
-	bool				sl_inuse;
-	u32				sl_seqid;
-	struct nfsd4_cache_entry	sl_cache_entry;
+	bool	sl_inuse;
+	bool	sl_cachethis;
+	u16	sl_opcnt;
+	u32	sl_seqid;
+	__be32	sl_status;
+	u32	sl_datalen;
+	char	sl_data[];
 };
 
 struct nfsd4_channel_attrs {
@@ -159,7 +152,7 @@ struct nfsd4_session {
 	struct nfs4_sessionid	se_sessionid;
 	struct nfsd4_channel_attrs se_fchannel;
 	struct nfsd4_channel_attrs se_bchannel;
-	struct nfsd4_slot	se_slots[];	/* forward channel slots */
+	struct nfsd4_slot	*se_slots[];	/* forward channel slots */
 };
 
 static inline void
diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h
index 3f71660..73164c2 100644
--- a/include/linux/nfsd/xdr4.h
+++ b/include/linux/nfsd/xdr4.h
@@ -51,7 +51,7 @@ struct nfsd4_compound_state {
 	/* For sessions DRC */
 	struct nfsd4_session	*session;
 	struct nfsd4_slot	*slot;
-	__be32			*statp;
+	__be32			*datap;
 	size_t			iovlen;
 	u32			minorversion;
 	u32			status;
@@ -472,8 +472,7 @@ static inline bool nfsd4_is_solo_sequence(struct nfsd4_compoundres *resp)
 
 static inline bool nfsd4_not_cached(struct nfsd4_compoundres *resp)
 {
-	return !resp->cstate.slot->sl_cache_entry.ce_cachethis ||
-			nfsd4_is_solo_sequence(resp);
+	return !resp->cstate.slot->sl_cachethis || nfsd4_is_solo_sequence(resp);
 }
 
 #define NFS4_SVC_XDRSIZE		sizeof(struct nfsd4_compoundargs)
-- 
cgit v0.10.2


From 8e498751f2f36074ffa6fc7f0a9ec6e055b350e6 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Wed, 2 Sep 2009 19:31:32 -0400
Subject: nfsd: move some of fh_compose into helper functions

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 8847f3f..78d8ebf 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -397,6 +397,40 @@ static inline void _fh_update_old(struct dentry *dentry,
 		fh->ofh_dirino = 0;
 }
 
+static bool is_root_export(struct svc_export *exp)
+{
+	return exp->ex_path.dentry == exp->ex_path.dentry->d_sb->s_root;
+}
+
+static struct super_block *exp_sb(struct svc_export *exp)
+{
+	return exp->ex_path.dentry->d_inode->i_sb;
+}
+
+static bool fsid_type_ok_for_exp(u8 fsid_type, struct svc_export *exp)
+{
+	switch (fsid_type) {
+	case FSID_DEV:
+		if (!old_valid_dev(exp_sb(exp)->s_dev))
+			return 0;
+		/* FALL THROUGH */
+	case FSID_MAJOR_MINOR:
+	case FSID_ENCODE_DEV:
+		return exp_sb(exp)->s_type->fs_flags & FS_REQUIRES_DEV;
+	case FSID_NUM:
+		return exp->ex_flags & NFSEXP_FSID;
+	case FSID_UUID8:
+	case FSID_UUID16:
+		if (!is_root_export(exp))
+			return 0;
+		/* fall through */
+	case FSID_UUID4_INUM:
+	case FSID_UUID16_INUM:
+		return exp->ex_uuid != NULL;
+	}
+	return 1;
+}
+
 __be32
 fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
 	   struct svc_fh *ref_fh)
@@ -414,8 +448,7 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
 	struct inode * inode = dentry->d_inode;
 	struct dentry *parent = dentry->d_parent;
 	__u32 *datap;
-	dev_t ex_dev = exp->ex_path.dentry->d_inode->i_sb->s_dev;
-	int root_export = (exp->ex_path.dentry == exp->ex_path.dentry->d_sb->s_root);
+	dev_t ex_dev = exp_sb(exp)->s_dev;
 
 	dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %s/%s, ino=%ld)\n",
 		MAJOR(ex_dev), MINOR(ex_dev),
@@ -447,49 +480,24 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
 			goto retry;
 		}
 
-		/* Need to check that this type works for this
-		 * export point.  As the fsid -> filesystem mapping
-		 * was guided by user-space, there is no guarantee
-		 * that the filesystem actually supports that fsid
-		 * type. If it doesn't we loop around again without
-		 * ref_fh set.
+		/*
+		 * As the fsid -> filesystem mapping was guided by
+		 * user-space, there is no guarantee that the filesystem
+		 * actually supports that fsid type. If it doesn't we
+		 * loop around again without ref_fh set.
 		 */
-		switch(fsid_type) {
-		case FSID_DEV:
-			if (!old_valid_dev(ex_dev))
-				goto retry;
-			/* FALL THROUGH */
-		case FSID_MAJOR_MINOR:
-		case FSID_ENCODE_DEV:
-			if (!(exp->ex_path.dentry->d_inode->i_sb->s_type->fs_flags
-			      & FS_REQUIRES_DEV))
-				goto retry;
-			break;
-		case FSID_NUM:
-			if (! (exp->ex_flags & NFSEXP_FSID))
-				goto retry;
-			break;
-		case FSID_UUID8:
-		case FSID_UUID16:
-			if (!root_export)
-				goto retry;
-			/* fall through */
-		case FSID_UUID4_INUM:
-		case FSID_UUID16_INUM:
-			if (exp->ex_uuid == NULL)
-				goto retry;
-			break;
-		}
+		if (!fsid_type_ok_for_exp(fsid_type, exp))
+			goto retry;
 	} else if (exp->ex_flags & NFSEXP_FSID) {
 		fsid_type = FSID_NUM;
 	} else if (exp->ex_uuid) {
 		if (fhp->fh_maxsize >= 64) {
-			if (root_export)
+			if (is_root_export(exp))
 				fsid_type = FSID_UUID16;
 			else
 				fsid_type = FSID_UUID16_INUM;
 		} else {
-			if (root_export)
+			if (is_root_export(exp))
 				fsid_type = FSID_UUID8;
 			else
 				fsid_type = FSID_UUID4_INUM;
@@ -639,8 +647,7 @@ enum fsid_source fsid_source(struct svc_fh *fhp)
 	case FSID_DEV:
 	case FSID_ENCODE_DEV:
 	case FSID_MAJOR_MINOR:
-		if (fhp->fh_export->ex_path.dentry->d_inode->i_sb->s_type->fs_flags
-		    & FS_REQUIRES_DEV)
+		if (exp_sb(fhp->fh_export)->s_type->fs_flags & FS_REQUIRES_DEV)
 			return FSIDSOURCE_DEV;
 		break;
 	case FSID_NUM:
-- 
cgit v0.10.2


From bc6c53d5a1383d5d9632adf33bd03458cfc0869d Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Wed, 2 Sep 2009 19:50:40 -0400
Subject: nfsd: move fsid_type choice out of fh_compose

More trivial cleanup.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 78d8ebf..bce0b2b 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -431,43 +431,17 @@ static bool fsid_type_ok_for_exp(u8 fsid_type, struct svc_export *exp)
 	return 1;
 }
 
-__be32
-fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
-	   struct svc_fh *ref_fh)
-{
-	/* ref_fh is a reference file handle.
-	 * if it is non-null and for the same filesystem, then we should compose
-	 * a filehandle which is of the same version, where possible.
-	 * Currently, that means that if ref_fh->fh_handle.fh_version == 0xca
-	 * Then create a 32byte filehandle using nfs_fhbase_old
-	 *
-	 */
 
+static void set_version_and_fsid_type(struct svc_fh *fhp, struct svc_export *exp, struct svc_fh *ref_fh)
+{
 	u8 version;
-	u8 fsid_type = 0;
-	struct inode * inode = dentry->d_inode;
-	struct dentry *parent = dentry->d_parent;
-	__u32 *datap;
-	dev_t ex_dev = exp_sb(exp)->s_dev;
-
-	dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %s/%s, ino=%ld)\n",
-		MAJOR(ex_dev), MINOR(ex_dev),
-		(long) exp->ex_path.dentry->d_inode->i_ino,
-		parent->d_name.name, dentry->d_name.name,
-		(inode ? inode->i_ino : 0));
-
-	/* Choose filehandle version and fsid type based on
-	 * the reference filehandle (if it is in the same export)
-	 * or the export options.
-	 */
- retry:
+	u8 fsid_type;
+retry:
 	version = 1;
 	if (ref_fh && ref_fh->fh_export == exp) {
 		version = ref_fh->fh_handle.fh_version;
 		fsid_type = ref_fh->fh_handle.fh_fsid_type;
 
-		if (ref_fh == fhp)
-			fh_put(ref_fh);
 		ref_fh = NULL;
 
 		switch (version) {
@@ -502,11 +476,44 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
 			else
 				fsid_type = FSID_UUID4_INUM;
 		}
-	} else if (!old_valid_dev(ex_dev))
+	} else if (!old_valid_dev(exp_sb(exp)->s_dev))
 		/* for newer device numbers, we must use a newer fsid format */
 		fsid_type = FSID_ENCODE_DEV;
 	else
 		fsid_type = FSID_DEV;
+	fhp->fh_handle.fh_version = version;
+	if (version)
+		fhp->fh_handle.fh_fsid_type = fsid_type;
+}
+
+__be32
+fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
+	   struct svc_fh *ref_fh)
+{
+	/* ref_fh is a reference file handle.
+	 * if it is non-null and for the same filesystem, then we should compose
+	 * a filehandle which is of the same version, where possible.
+	 * Currently, that means that if ref_fh->fh_handle.fh_version == 0xca
+	 * Then create a 32byte filehandle using nfs_fhbase_old
+	 *
+	 */
+
+	struct inode * inode = dentry->d_inode;
+	struct dentry *parent = dentry->d_parent;
+	__u32 *datap;
+	dev_t ex_dev = exp_sb(exp)->s_dev;
+
+	dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %s/%s, ino=%ld)\n",
+		MAJOR(ex_dev), MINOR(ex_dev),
+		(long) exp->ex_path.dentry->d_inode->i_ino,
+		parent->d_name.name, dentry->d_name.name,
+		(inode ? inode->i_ino : 0));
+
+	/* Choose filehandle version and fsid type based on
+	 * the reference filehandle (if it is in the same export)
+	 * or the export options.
+	 */
+	 set_version_and_fsid_type(fhp, exp, ref_fh);
 
 	if (ref_fh == fhp)
 		fh_put(ref_fh);
@@ -524,7 +531,7 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
 	fhp->fh_export = exp;
 	cache_get(&exp->h);
 
-	if (version == 0xca) {
+	if (fhp->fh_handle.fh_version == 0xca) {
 		/* old style filehandle please */
 		memset(&fhp->fh_handle.fh_base, 0, NFS_FHSIZE);
 		fhp->fh_handle.fh_size = NFS_FHSIZE;
@@ -538,15 +545,13 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
 			_fh_update_old(dentry, exp, &fhp->fh_handle);
 	} else {
 		int len;
-		fhp->fh_handle.fh_version = 1;
 		fhp->fh_handle.fh_auth_type = 0;
 		datap = fhp->fh_handle.fh_auth+0;
-		fhp->fh_handle.fh_fsid_type = fsid_type;
-		mk_fsid(fsid_type, datap, ex_dev,
+		mk_fsid(fhp->fh_handle.fh_fsid_type, datap, ex_dev,
 			exp->ex_path.dentry->d_inode->i_ino,
 			exp->ex_fsid, exp->ex_uuid);
 
-		len = key_len(fsid_type);
+		len = key_len(fhp->fh_handle.fh_fsid_type);
 		datap += len/4;
 		fhp->fh_handle.fh_size = 4 + len;
 
-- 
cgit v0.10.2


From 2671a4bf3516757ca028c139a7902a50f2bd994a Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 2 Sep 2009 16:48:32 -0400
Subject: NFSd: Fix filehandle leak in exp_pseudoroot() and nfsd4_path()

nfsd4_path() allocates a temporary filehandle and then fails to free it
before the function exits, leaking reference counts to the dentry and
export that it refers to.

Also, nfsd4_lookupp() puts the result of exp_pseudoroot() in a temporary
filehandle which it releases on success of exp_pseudoroot() but not on
failure; fix exp_pseudoroot to ensure that on failure it releases the
filehandle before returning.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index d946264..984a5eb 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -1341,6 +1341,8 @@ exp_pseudoroot(struct svc_rqst *rqstp, struct svc_fh *fhp)
 	if (rv)
 		goto out;
 	rv = check_nfsd_access(exp, rqstp);
+	if (rv)
+		fh_put(fhp);
 out:
 	exp_put(exp);
 	return rv;
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 00ed16a..0fbd50c 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1599,7 +1599,8 @@ static __be32 nfsd4_encode_fs_location4(struct nfsd4_fs_location *location,
 static char *nfsd4_path(struct svc_rqst *rqstp, struct svc_export *exp, __be32 *stat)
 {
 	struct svc_fh tmp_fh;
-	char *path, *rootpath;
+	char *path = NULL, *rootpath;
+	size_t rootlen;
 
 	fh_init(&tmp_fh, NFS4_FHSIZE);
 	*stat = exp_pseudoroot(rqstp, &tmp_fh);
@@ -1609,14 +1610,18 @@ static char *nfsd4_path(struct svc_rqst *rqstp, struct svc_export *exp, __be32 *
 
 	path = exp->ex_pathname;
 
-	if (strncmp(path, rootpath, strlen(rootpath))) {
+	rootlen = strlen(rootpath);
+	if (strncmp(path, rootpath, rootlen)) {
 		dprintk("nfsd: fs_locations failed;"
 			"%s is not contained in %s\n", path, rootpath);
 		*stat = nfserr_notsupp;
-		return NULL;
+		path = NULL;
+		goto out;
 	}
-
-	return path + strlen(rootpath);
+	path += rootlen;
+out:
+	fh_put(&tmp_fh);
+	return path;
 }
 
 /*
-- 
cgit v0.10.2


From 492e1501431e0d24c5b46933fdcb60639eacded7 Mon Sep 17 00:00:00 2001
From: Mika Korhonen <mika.j.korhonen@gmail.com>
Date: Tue, 9 Jun 2009 21:52:35 +0300
Subject: mtd: OneNAND: spelling fixes

Signed-off-by: Mika Korhonen <mika.j.korhonen@gmail.com>
Acked-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/mtd/onenand/onenand_base.c b/drivers/mtd/onenand/onenand_base.c
index 6e82909..ff66e43 100644
--- a/drivers/mtd/onenand/onenand_base.c
+++ b/drivers/mtd/onenand/onenand_base.c
@@ -1191,7 +1191,7 @@ static int onenand_read_ops_nolock(struct mtd_info *mtd, loff_t from,
  			/*
  			 * Chip boundary handling in DDP
  			 * Now we issued chip 1 read and pointed chip 1
- 			 * bufferam so we have to point chip 0 bufferam.
+			 * bufferram so we have to point chip 0 bufferram.
  			 */
  			if (ONENAND_IS_DDP(this) &&
  			    unlikely(from == (this->chipsize >> 1))) {
@@ -1867,8 +1867,8 @@ static int onenand_write_ops_nolock(struct mtd_info *mtd, loff_t to,
 			ONENAND_SET_NEXT_BUFFERRAM(this);
 
 		/*
-		 * 2 PLANE, MLC, and Flex-OneNAND doesn't support
-		 * write-while-programe feature.
+		 * 2 PLANE, MLC, and Flex-OneNAND do not support
+		 * write-while-program feature.
 		 */
 		if (!ONENAND_IS_2PLANE(this) && !first) {
 			ONENAND_SET_PREV_BUFFERRAM(this);
@@ -1879,7 +1879,7 @@ static int onenand_write_ops_nolock(struct mtd_info *mtd, loff_t to,
 			onenand_update_bufferram(mtd, prev, !ret && !prev_subpage);
 			if (ret) {
 				written -= prevlen;
-				printk(KERN_ERR "onenand_write_ops_nolock: write filaed %d\n", ret);
+				printk(KERN_ERR "onenand_write_ops_nolock: write failed %d\n", ret);
 				break;
 			}
 
@@ -1905,7 +1905,7 @@ static int onenand_write_ops_nolock(struct mtd_info *mtd, loff_t to,
 			/* In partial page write we don't update bufferram */
 			onenand_update_bufferram(mtd, to, !ret && !subpage);
 			if (ret) {
-				printk(KERN_ERR "onenand_write_ops_nolock: write filaed %d\n", ret);
+				printk(KERN_ERR "onenand_write_ops_nolock: write failed %d\n", ret);
 				break;
 			}
 
@@ -2201,7 +2201,7 @@ static int onenand_erase(struct mtd_info *mtd, struct erase_info *instr)
 	/* Grab the lock and see if the device is available */
 	onenand_get_device(mtd, FL_ERASING);
 
-	/* Loop throught the pages */
+	/* Loop through the blocks */
 	instr->state = MTD_ERASING;
 
 	while (len) {
@@ -2328,7 +2328,7 @@ static int onenand_default_block_markbad(struct mtd_info *mtd, loff_t ofs)
         if (bbm->bbt)
                 bbm->bbt[block >> 2] |= 0x01 << ((block & 0x03) << 1);
 
-        /* We write two bytes, so we dont have to mess with 16 bit access */
+        /* We write two bytes, so we don't have to mess with 16-bit access */
         ofs += mtd->oobsize + (bbm->badblockpos & ~0x01);
 	/* FIXME : What to do when marking SLC block in partition
 	 * 	   with MLC erasesize? For now, it is not advisable to
@@ -2557,7 +2557,7 @@ static void onenand_unlock_all(struct mtd_info *mtd)
 
 #ifdef CONFIG_MTD_ONENAND_OTP
 
-/* Interal OTP operation */
+/* Internal OTP operation */
 typedef int (*otp_op_t)(struct mtd_info *mtd, loff_t form, size_t len,
 		size_t *retlen, u_char *buf);
 
@@ -2921,7 +2921,7 @@ static void onenand_check_features(struct mtd_info *mtd)
 		this->options |= ONENAND_HAS_2PLANE;
 
 	case ONENAND_DEVICE_DENSITY_2Gb:
-		/* 2Gb DDP don't have 2 plane */
+		/* 2Gb DDP does not have 2 plane */
 		if (!ONENAND_IS_DDP(this))
 			this->options |= ONENAND_HAS_2PLANE;
 		this->options |= ONENAND_HAS_UNLOCK_ALL;
@@ -3364,7 +3364,7 @@ static int onenand_probe(struct mtd_info *mtd)
 	/* It's real page size */
 	this->writesize = mtd->writesize;
 
-	/* REVIST: Multichip handling */
+	/* REVISIT: Multichip handling */
 
 	if (FLEXONENAND(this))
 		flexonenand_get_size(mtd);
-- 
cgit v0.10.2


From 652696efce135559b98ee5a3d7899295e8d553fa Mon Sep 17 00:00:00 2001
From: Kyungmin Park <kmpark@infradead.org>
Date: Wed, 24 Jun 2009 12:03:51 +0900
Subject: mtd: OneNAND: 4-bit ECC status macros

Define ECC status for 4-bit ECC status

Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/include/linux/mtd/onenand_regs.h b/include/linux/mtd/onenand_regs.h
index 86a6bbe..acadbf5 100644
--- a/include/linux/mtd/onenand_regs.h
+++ b/include/linux/mtd/onenand_regs.h
@@ -207,6 +207,9 @@
 #define ONENAND_ECC_2BIT		(1 << 1)
 #define ONENAND_ECC_2BIT_ALL		(0xAAAA)
 #define FLEXONENAND_UNCORRECTABLE_ERROR	(0x1010)
+#define ONENAND_ECC_3BIT		(1 << 2)
+#define ONENAND_ECC_4BIT		(1 << 3)
+#define ONENAND_ECC_4BIT_UNCORRECTABLE	(0x1010)
 
 /*
  * One-Time Programmable (OTP)
-- 
cgit v0.10.2


From b8b3ee9aabbc3e6fc7ef025d861dd780b84eb6c5 Mon Sep 17 00:00:00 2001
From: vimal singh <vimalsingh@ti.com>
Date: Thu, 9 Jul 2009 20:41:22 +0530
Subject: mtd: nand: remove repeated comment, fix spelling

Singed-off-by: Vimal Singh <vimalsingh@ti.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 8c21b89..4a5a329 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -688,8 +688,7 @@ nand_get_device(struct nand_chip *chip, struct mtd_info *mtd, int new_state)
  retry:
 	spin_lock(lock);
 
-	/* Hardware controller shared among independend devices */
-	/* Hardware controller shared among independend devices */
+	/* Hardware controller shared among independent devices */
 	if (!chip->controller->active)
 		chip->controller->active = chip;
 
-- 
cgit v0.10.2


From 20d8e2489d619ac4f14c46ca376655fc06b3c1ff Mon Sep 17 00:00:00 2001
From: vimal singh <vimalsingh@ti.com>
Date: Tue, 7 Jul 2009 15:49:49 +0530
Subject: mtd: nand_base: use __func__ instead of typing names

Correcting debug prints by removing function names from print messages
and using '__func__' macro instead.

Function names were wrong in few places.

Signed-off-by: Vimal Singh <vimalsingh@ti.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 4a5a329..268c9a4 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -1412,8 +1412,8 @@ static int nand_do_read_oob(struct mtd_info *mtd, loff_t from,
 	int len;
 	uint8_t *buf = ops->oobbuf;
 
-	DEBUG(MTD_DEBUG_LEVEL3, "nand_read_oob: from = 0x%08Lx, len = %i\n",
-	      (unsigned long long)from, readlen);
+	DEBUG(MTD_DEBUG_LEVEL3, "%s: from = 0x%08Lx, len = %i\n",
+			__func__, (unsigned long long)from, readlen);
 
 	if (ops->mode == MTD_OOB_AUTO)
 		len = chip->ecc.layout->oobavail;
@@ -1421,8 +1421,8 @@ static int nand_do_read_oob(struct mtd_info *mtd, loff_t from,
 		len = mtd->oobsize;
 
 	if (unlikely(ops->ooboffs >= len)) {
-		DEBUG(MTD_DEBUG_LEVEL0, "nand_read_oob: "
-			"Attempt to start read outside oob\n");
+		DEBUG(MTD_DEBUG_LEVEL0, "%s: Attempt to start read "
+					"outside oob\n", __func__);
 		return -EINVAL;
 	}
 
@@ -1430,8 +1430,8 @@ static int nand_do_read_oob(struct mtd_info *mtd, loff_t from,
 	if (unlikely(from >= mtd->size ||
 		     ops->ooboffs + readlen > ((mtd->size >> chip->page_shift) -
 					(from >> chip->page_shift)) * len)) {
-		DEBUG(MTD_DEBUG_LEVEL0, "nand_read_oob: "
-			"Attempt read beyond end of device\n");
+		DEBUG(MTD_DEBUG_LEVEL0, "%s: Attempt read beyond end "
+					"of device\n", __func__);
 		return -EINVAL;
 	}
 
@@ -1505,8 +1505,8 @@ static int nand_read_oob(struct mtd_info *mtd, loff_t from,
 
 	/* Do not allow reads past end of device */
 	if (ops->datbuf && (from + ops->len) > mtd->size) {
-		DEBUG(MTD_DEBUG_LEVEL0, "nand_read_oob: "
-		      "Attempt read beyond end of device\n");
+		DEBUG(MTD_DEBUG_LEVEL0, "%s: Attempt read "
+				"beyond end of device\n", __func__);
 		return -EINVAL;
 	}
 
@@ -1815,8 +1815,8 @@ static int nand_do_write_ops(struct mtd_info *mtd, loff_t to,
 
 	/* reject writes, which are not page aligned */
 	if (NOTALIGNED(to) || NOTALIGNED(ops->len)) {
-		printk(KERN_NOTICE "nand_write: "
-		       "Attempt to write not page aligned data\n");
+		printk(KERN_NOTICE "%s: Attempt to write not "
+				"page aligned data\n", __func__);
 		return -EINVAL;
 	}
 
@@ -1943,8 +1943,8 @@ static int nand_do_write_oob(struct mtd_info *mtd, loff_t to,
 	int chipnr, page, status, len;
 	struct nand_chip *chip = mtd->priv;
 
-	DEBUG(MTD_DEBUG_LEVEL3, "nand_write_oob: to = 0x%08x, len = %i\n",
-	      (unsigned int)to, (int)ops->ooblen);
+	DEBUG(MTD_DEBUG_LEVEL3, "%s: to = 0x%08x, len = %i\n",
+			 __func__, (unsigned int)to, (int)ops->ooblen);
 
 	if (ops->mode == MTD_OOB_AUTO)
 		len = chip->ecc.layout->oobavail;
@@ -1953,14 +1953,14 @@ static int nand_do_write_oob(struct mtd_info *mtd, loff_t to,
 
 	/* Do not allow write past end of page */
 	if ((ops->ooboffs + ops->ooblen) > len) {
-		DEBUG(MTD_DEBUG_LEVEL0, "nand_write_oob: "
-		      "Attempt to write past end of page\n");
+		DEBUG(MTD_DEBUG_LEVEL0, "%s: Attempt to write "
+				"past end of page\n", __func__);
 		return -EINVAL;
 	}
 
 	if (unlikely(ops->ooboffs >= len)) {
-		DEBUG(MTD_DEBUG_LEVEL0, "nand_do_write_oob: "
-			"Attempt to start write outside oob\n");
+		DEBUG(MTD_DEBUG_LEVEL0, "%s: Attempt to start "
+				"write outside oob\n", __func__);
 		return -EINVAL;
 	}
 
@@ -1969,8 +1969,8 @@ static int nand_do_write_oob(struct mtd_info *mtd, loff_t to,
 		     ops->ooboffs + ops->ooblen >
 			((mtd->size >> chip->page_shift) -
 			 (to >> chip->page_shift)) * len)) {
-		DEBUG(MTD_DEBUG_LEVEL0, "nand_do_write_oob: "
-			"Attempt write beyond end of device\n");
+		DEBUG(MTD_DEBUG_LEVEL0, "%s: Attempt write beyond "
+				"end of device\n", __func__);
 		return -EINVAL;
 	}
 
@@ -2025,8 +2025,8 @@ static int nand_write_oob(struct mtd_info *mtd, loff_t to,
 
 	/* Do not allow writes past end of device */
 	if (ops->datbuf && (to + ops->len) > mtd->size) {
-		DEBUG(MTD_DEBUG_LEVEL0, "nand_write_oob: "
-		      "Attempt write beyond end of device\n");
+		DEBUG(MTD_DEBUG_LEVEL0, "%s: Attempt write beyond "
+				"end of device\n", __func__);
 		return -EINVAL;
 	}
 
@@ -2116,26 +2116,27 @@ int nand_erase_nand(struct mtd_info *mtd, struct erase_info *instr,
 	unsigned int bbt_masked_page = 0xffffffff;
 	loff_t len;
 
-	DEBUG(MTD_DEBUG_LEVEL3, "nand_erase: start = 0x%012llx, len = %llu\n",
-	      (unsigned long long)instr->addr, (unsigned long long)instr->len);
+	DEBUG(MTD_DEBUG_LEVEL3, "%s: start = 0x%012llx, len = %llu\n",
+				__func__, (unsigned long long)instr->addr,
+				(unsigned long long)instr->len);
 
 	/* Start address must align on block boundary */
 	if (instr->addr & ((1 << chip->phys_erase_shift) - 1)) {
-		DEBUG(MTD_DEBUG_LEVEL0, "nand_erase: Unaligned address\n");
+		DEBUG(MTD_DEBUG_LEVEL0, "%s: Unaligned address\n", __func__);
 		return -EINVAL;
 	}
 
 	/* Length must align on block boundary */
 	if (instr->len & ((1 << chip->phys_erase_shift) - 1)) {
-		DEBUG(MTD_DEBUG_LEVEL0, "nand_erase: "
-		      "Length not block aligned\n");
+		DEBUG(MTD_DEBUG_LEVEL0, "%s: Length not block aligned\n",
+					__func__);
 		return -EINVAL;
 	}
 
 	/* Do not allow erase past end of device */
 	if ((instr->len + instr->addr) > mtd->size) {
-		DEBUG(MTD_DEBUG_LEVEL0, "nand_erase: "
-		      "Erase past end of device\n");
+		DEBUG(MTD_DEBUG_LEVEL0, "%s: Erase past end of device\n",
+					__func__);
 		return -EINVAL;
 	}
 
@@ -2156,8 +2157,8 @@ int nand_erase_nand(struct mtd_info *mtd, struct erase_info *instr,
 
 	/* Check, if it is write protected */
 	if (nand_check_wp(mtd)) {
-		DEBUG(MTD_DEBUG_LEVEL0, "nand_erase: "
-		      "Device is write protected!!!\n");
+		DEBUG(MTD_DEBUG_LEVEL0, "%s: Device is write protected!!!\n",
+					__func__);
 		instr->state = MTD_ERASE_FAILED;
 		goto erase_exit;
 	}
@@ -2182,8 +2183,8 @@ int nand_erase_nand(struct mtd_info *mtd, struct erase_info *instr,
 		 */
 		if (nand_block_checkbad(mtd, ((loff_t) page) <<
 					chip->page_shift, 0, allowbbt)) {
-			printk(KERN_WARNING "nand_erase: attempt to erase a "
-			       "bad block at page 0x%08x\n", page);
+			printk(KERN_WARNING "%s: attempt to erase a bad block "
+					"at page 0x%08x\n", __func__, page);
 			instr->state = MTD_ERASE_FAILED;
 			goto erase_exit;
 		}
@@ -2210,8 +2211,8 @@ int nand_erase_nand(struct mtd_info *mtd, struct erase_info *instr,
 
 		/* See if block erase succeeded */
 		if (status & NAND_STATUS_FAIL) {
-			DEBUG(MTD_DEBUG_LEVEL0, "nand_erase: "
-			      "Failed erase, page 0x%08x\n", page);
+			DEBUG(MTD_DEBUG_LEVEL0, "%s: Failed erase, "
+					"page 0x%08x\n", __func__, page);
 			instr->state = MTD_ERASE_FAILED;
 			instr->fail_addr =
 				((loff_t)page << chip->page_shift);
@@ -2271,9 +2272,9 @@ int nand_erase_nand(struct mtd_info *mtd, struct erase_info *instr,
 		if (!rewrite_bbt[chipnr])
 			continue;
 		/* update the BBT for chip */
-		DEBUG(MTD_DEBUG_LEVEL0, "nand_erase_nand: nand_update_bbt "
-		      "(%d:0x%0llx 0x%0x)\n", chipnr, rewrite_bbt[chipnr],
-		      chip->bbt_td->pages[chipnr]);
+		DEBUG(MTD_DEBUG_LEVEL0, "%s: nand_update_bbt "
+			"(%d:0x%0llx 0x%0x)\n", __func__, chipnr,
+			rewrite_bbt[chipnr], chip->bbt_td->pages[chipnr]);
 		nand_update_bbt(mtd, rewrite_bbt[chipnr]);
 	}
 
@@ -2291,7 +2292,7 @@ static void nand_sync(struct mtd_info *mtd)
 {
 	struct nand_chip *chip = mtd->priv;
 
-	DEBUG(MTD_DEBUG_LEVEL3, "nand_sync: called\n");
+	DEBUG(MTD_DEBUG_LEVEL3, "%s: called\n", __func__);
 
 	/* Grab the lock and see if the device is available */
 	nand_get_device(chip, mtd, FL_SYNCING);
@@ -2355,8 +2356,8 @@ static void nand_resume(struct mtd_info *mtd)
 	if (chip->state == FL_PM_SUSPENDED)
 		nand_release_device(mtd);
 	else
-		printk(KERN_ERR "nand_resume() called for a chip which is not "
-		       "in suspended state\n");
+		printk(KERN_ERR "%s called for a chip which is not "
+		       "in suspended state\n", __func__);
 }
 
 /*
@@ -2857,7 +2858,8 @@ int nand_scan(struct mtd_info *mtd, int maxchips)
 
 	/* Many callers got this wrong, so check for it for a while... */
 	if (!mtd->owner && caller_is_module()) {
-		printk(KERN_CRIT "nand_scan() called with NULL mtd->owner!\n");
+		printk(KERN_CRIT "%s called with NULL mtd->owner!\n",
+				__func__);
 		BUG();
 	}
 
-- 
cgit v0.10.2


From dff1550986a4c0e2a4e857c9085ef3cb66b2cec5 Mon Sep 17 00:00:00 2001
From: Matthias Kaehlcke <matthias@kaehlcke.net>
Date: Mon, 6 Jul 2009 12:02:08 +0200
Subject: mtd: fix a typo in comment

mtdblock erase_write(): fix typo in comment

Signed-off-by: Matthias Kaehlcke <matthias@kaehlcke.net>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/mtd/mtdblock.c b/drivers/mtd/mtdblock.c
index 77db5ce..bcfb177 100644
--- a/drivers/mtd/mtdblock.c
+++ b/drivers/mtd/mtdblock.c
@@ -84,7 +84,7 @@ static int erase_write (struct mtd_info *mtd, unsigned long pos,
 	remove_wait_queue(&wait_q, &wait);
 
 	/*
-	 * Next, writhe data to flash.
+	 * Next, write the data to flash.
 	 */
 
 	ret = mtd->write(mtd, pos, len, &retlen, buf);
-- 
cgit v0.10.2


From 44a1f2085e8fe07b3aecdab7c391ca057d75da0f Mon Sep 17 00:00:00 2001
From: H Hartley Sweeten <hartleys@visionengravers.com>
Date: Tue, 30 Jun 2009 15:38:00 -0400
Subject: mtd: ep93xx: cleanup includes in ts7250 nand driver

1. <linux/io.h> should be included not <asm/io.h>
2. add platform specific header <mach/ts72xx.h>

Signed-off-by: H Hartley Sweeten <hsweeten@visionengravers.com>
Cc: Lennert Buytenhek <buytenh@wantstofly.org>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/mtd/nand/ts7250.c b/drivers/mtd/nand/ts7250.c
index 2c410a0..0f5562a 100644
--- a/drivers/mtd/nand/ts7250.c
+++ b/drivers/mtd/nand/ts7250.c
@@ -24,8 +24,11 @@
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/nand.h>
 #include <linux/mtd/partitions.h>
-#include <asm/io.h>
+#include <linux/io.h>
+
 #include <mach/hardware.h>
+#include <mach/ts72xx.h>
+
 #include <asm/sizes.h>
 #include <asm/mach-types.h>
 
-- 
cgit v0.10.2


From 2763c508a3c8f8ec5d6df4e8c63d5e2a5a7d3954 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <w.sang@pengutronix.de>
Date: Fri, 17 Jul 2009 17:54:14 +0200
Subject: mtd: physmap_of: use resource_size

Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/mtd/maps/physmap_of.c b/drivers/mtd/maps/physmap_of.c
index 39d357b..f223f3f 100644
--- a/drivers/mtd/maps/physmap_of.c
+++ b/drivers/mtd/maps/physmap_of.c
@@ -190,6 +190,7 @@ static int __devinit of_flash_probe(struct of_device *dev,
 	const u32 *p;
 	int reg_tuple_size;
 	struct mtd_info **mtd_list = NULL;
+	resource_size_t res_size;
 
 	reg_tuple_size = (of_n_addr_cells(dp) + of_n_size_cells(dp)) * sizeof(u32);
 
@@ -233,8 +234,8 @@ static int __devinit of_flash_probe(struct of_device *dev,
 			(unsigned long long)res.end);
 
 		err = -EBUSY;
-		info->list[i].res = request_mem_region(res.start, res.end -
-						       res.start + 1,
+		res_size = resource_size(&res);
+		info->list[i].res = request_mem_region(res.start, res_size,
 						       dev_name(&dev->dev));
 		if (!info->list[i].res)
 			goto err_out;
@@ -249,7 +250,7 @@ static int __devinit of_flash_probe(struct of_device *dev,
 
 		info->list[i].map.name = dev_name(&dev->dev);
 		info->list[i].map.phys = res.start;
-		info->list[i].map.size = res.end - res.start + 1;
+		info->list[i].map.size = res_size;
 		info->list[i].map.bankwidth = *width;
 
 		err = -ENOMEM;
-- 
cgit v0.10.2


From 76d6a4791609e4d14b411a513c7648b19b258d8f Mon Sep 17 00:00:00 2001
From: Wolfram Sang <w.sang@pengutronix.de>
Date: Fri, 17 Jul 2009 17:47:37 +0200
Subject: mtd: plat-ram: use resource_size

Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/mtd/maps/plat-ram.c b/drivers/mtd/maps/plat-ram.c
index 49c9ece..dafb919 100644
--- a/drivers/mtd/maps/plat-ram.c
+++ b/drivers/mtd/maps/plat-ram.c
@@ -175,7 +175,7 @@ static int platram_probe(struct platform_device *pdev)
 	/* setup map parameters */
 
 	info->map.phys = res->start;
-	info->map.size = (res->end - res->start) + 1;
+	info->map.size = resource_size(res);
 	info->map.name = pdata->mapname != NULL ?
 			(char *)pdata->mapname : (char *)pdev->name;
 	info->map.bankwidth = pdata->bankwidth;
-- 
cgit v0.10.2


From 0ffd24fc7f82a0b594250e5f221340be4c322cda Mon Sep 17 00:00:00 2001
From: Russell King <linux@arm.linux.org.uk>
Date: Sat, 18 Jul 2009 14:10:44 +0100
Subject: mtd: afs: fix build warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

drivers/mtd/afs.c:244: warning: format ‘%5d’ expects type ‘int’, but argument 4 has type ‘uint64_t’

[dwmw2: fix incorrect 'KB' too]
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/mtd/afs.c b/drivers/mtd/afs.c
index d072ca5..cec7ab9 100644
--- a/drivers/mtd/afs.c
+++ b/drivers/mtd/afs.c
@@ -239,7 +239,7 @@ static int parse_afs_partitions(struct mtd_info *mtd,
 		parts[idx].offset	= img_ptr;
 		parts[idx].mask_flags	= 0;
 
-		printk("  mtd%d: at 0x%08x, %5dKB, %8u, %s\n",
+		printk("  mtd%d: at 0x%08x, %5lluKiB, %8u, %s\n",
 			idx, img_ptr, parts[idx].size / 1024,
 			iis.imageNumber, str);
 
-- 
cgit v0.10.2


From 05dd180709fca14fbae617c0dab1bed56be334fc Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@crashcourse.ca>
Date: Sun, 19 Jul 2009 21:19:08 -0400
Subject: mtd: correct typo "MTD_DATAFLASH_VERIFY_WRITE"

Fix the misspelling to match the actual config variable defined in
drivers/mtd/devi ces/Kconfig:

config MTD_DATAFLASH_WRITE_VERIFY
        bool "Verify DataFlash page writes"
        depends on MTD_DATAFLASH

Signed-off-by: Robert P. J. Day <rpjday@crashcourse.ca>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/mtd/devices/mtd_dataflash.c b/drivers/mtd/devices/mtd_dataflash.c
index 43976aa..70a7369 100644
--- a/drivers/mtd/devices/mtd_dataflash.c
+++ b/drivers/mtd/devices/mtd_dataflash.c
@@ -401,7 +401,7 @@ static int dataflash_write(struct mtd_info *mtd, loff_t to, size_t len,
 		(void) dataflash_waitready(priv->spi);
 
 
-#ifdef CONFIG_MTD_DATAFLASH_VERIFY_WRITE
+#ifdef CONFIG_MTD_DATAFLASH_WRITE_VERIFY
 
 		/* (3) Compare to Buffer1 */
 		addr = pageaddr << priv->page_offset;
@@ -430,7 +430,7 @@ static int dataflash_write(struct mtd_info *mtd, loff_t to, size_t len,
 		} else
 			status = 0;
 
-#endif	/* CONFIG_MTD_DATAFLASH_VERIFY_WRITE */
+#endif	/* CONFIG_MTD_DATAFLASH_WRITE_VERIFY */
 
 		remaining = remaining - writelen;
 		pageaddr++;
-- 
cgit v0.10.2


From 9a73290d7735c0671d1d2379ed40025db8b773d0 Mon Sep 17 00:00:00 2001
From: "Singh, Vimal" <vimalsingh@ti.com>
Date: Fri, 12 Dec 2008 00:10:57 +0000
Subject: mtd: nand_base: allow drivers to choose ECC block size

This patch allows core driver to choose ECC block size in
sw ecc case.

Signed-off-by: Vimal Singh <vimalsingh@ti.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 268c9a4..4c5e8a7 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -2728,7 +2728,8 @@ int nand_scan_tail(struct mtd_info *mtd)
 		chip->ecc.write_page_raw = nand_write_page_raw;
 		chip->ecc.read_oob = nand_read_oob_std;
 		chip->ecc.write_oob = nand_write_oob_std;
-		chip->ecc.size = 256;
+		if (!chip->ecc.size)
+			chip->ecc.size = 256;
 		chip->ecc.bytes = 3;
 		break;
 
-- 
cgit v0.10.2


From ad4fbc7921bd7cca108ecc1340a014e91ecc8536 Mon Sep 17 00:00:00 2001
From: vimal singh <vimal.newwork@gmail.com>
Date: Thu, 30 Jul 2009 20:54:27 +0530
Subject: mtd: physmap_of: fix incorrect check

This patch fixes a spelling error that has resulted from copy and
pasting. The location of the error was found using a semantic patch
but the semantic patch was not trying to find these errors. After
looking things over it seemed logical that this change was needed.

The patch also makes sure mtd_list is not being freed if it has not
been allocated

Signed-off-by: Stoyan Gaydarov <sgayda2@uiuc.edu>
Signed-off-by: Vimal Singh <vimalsingh@ti.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/mtd/maps/physmap_of.c b/drivers/mtd/maps/physmap_of.c
index f223f3f..e828d58 100644
--- a/drivers/mtd/maps/physmap_of.c
+++ b/drivers/mtd/maps/physmap_of.c
@@ -205,7 +205,7 @@ static int __devinit of_flash_probe(struct of_device *dev,
 		dev_err(&dev->dev, "Malformed reg property on %s\n",
 				dev->node->full_name);
 		err = -EINVAL;
-		goto err_out;
+		goto err_flash_remove;
 	}
 	count /= reg_tuple_size;
 
@@ -213,14 +213,14 @@ static int __devinit of_flash_probe(struct of_device *dev,
 	info = kzalloc(sizeof(struct of_flash) +
 		       sizeof(struct of_flash_list) * count, GFP_KERNEL);
 	if (!info)
-		goto err_out;
-
-	mtd_list = kzalloc(sizeof(struct mtd_info) * count, GFP_KERNEL);
-	if (!info)
-		goto err_out;
+		goto err_flash_remove;
 
 	dev_set_drvdata(&dev->dev, info);
 
+	mtd_list = kzalloc(sizeof(struct mtd_info) * count, GFP_KERNEL);
+	if (!mtd_list)
+		goto err_flash_remove;
+
 	for (i = 0; i < count; i++) {
 		err = -ENXIO;
 		if (of_address_to_resource(dp, i, &res)) {
@@ -339,6 +339,7 @@ static int __devinit of_flash_probe(struct of_device *dev,
 
 err_out:
 	kfree(mtd_list);
+err_flash_remove:
 	of_flash_remove(dev);
 
 	return err;
-- 
cgit v0.10.2


From 269c0ee66367b11de9758ee64ea039843f0c7cad Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Fri, 31 Jul 2009 14:47:58 +0200
Subject: slram: Read buffer overflow

map[count] is checked before count < SLRAM_MAX_DEVICES_PARAMS

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/mtd/devices/slram.c b/drivers/mtd/devices/slram.c
index 00248e8..239500b 100644
--- a/drivers/mtd/devices/slram.c
+++ b/drivers/mtd/devices/slram.c
@@ -341,7 +341,7 @@ static int init_slram(void)
 #else
 	int count;
 
-	for (count = 0; (map[count]) && (count < SLRAM_MAX_DEVICES_PARAMS);
+	for (count = 0; count < SLRAM_MAX_DEVICES_PARAMS && map[count];
 			count++) {
 	}
 
-- 
cgit v0.10.2


From c6f7e7beb9e6a64816f534a3a0dd0cfa4937f1fe Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Fri, 31 Jul 2009 16:21:01 +0200
Subject: mtd: tests: fix read buffer overflows

Check whether index is within bounds before testing the element.

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/mtd/tests/mtd_oobtest.c b/drivers/mtd/tests/mtd_oobtest.c
index a18e8d2..5553cd4 100644
--- a/drivers/mtd/tests/mtd_oobtest.c
+++ b/drivers/mtd/tests/mtd_oobtest.c
@@ -512,7 +512,7 @@ static int __init mtd_oobtest_init(void)
 		goto out;
 
 	addr0 = 0;
-	for (i = 0; bbt[i] && i < ebcnt; ++i)
+	for (i = 0; i < ebcnt && bbt[i]; ++i)
 		addr0 += mtd->erasesize;
 
 	/* Attempt to write off end of OOB */
diff --git a/drivers/mtd/tests/mtd_pagetest.c b/drivers/mtd/tests/mtd_pagetest.c
index 9648818..103cac4 100644
--- a/drivers/mtd/tests/mtd_pagetest.c
+++ b/drivers/mtd/tests/mtd_pagetest.c
@@ -116,11 +116,11 @@ static int verify_eraseblock(int ebnum)
 	loff_t addr = ebnum * mtd->erasesize;
 
 	addr0 = 0;
-	for (i = 0; bbt[i] && i < ebcnt; ++i)
+	for (i = 0; i < ebcnt && bbt[i]; ++i)
 		addr0 += mtd->erasesize;
 
 	addrn = mtd->size;
-	for (i = 0; bbt[ebcnt - i - 1] && i < ebcnt; ++i)
+	for (i = 0; i < ebcnt && bbt[ebcnt - i - 1]; ++i)
 		addrn -= mtd->erasesize;
 
 	set_random_data(writebuf, mtd->erasesize);
@@ -219,11 +219,11 @@ static int crosstest(void)
 	memset(pp1, 0, pgsize * 4);
 
 	addr0 = 0;
-	for (i = 0; bbt[i] && i < ebcnt; ++i)
+	for (i = 0; i < ebcnt && bbt[i]; ++i)
 		addr0 += mtd->erasesize;
 
 	addrn = mtd->size;
-	for (i = 0; bbt[ebcnt - i - 1] && i < ebcnt; ++i)
+	for (i = 0; i < ebcnt && bbt[ebcnt - i - 1]; ++i)
 		addrn -= mtd->erasesize;
 
 	/* Read 2nd-to-last page to pp1 */
@@ -317,7 +317,7 @@ static int erasecrosstest(void)
 
 	ebnum = 0;
 	addr0 = 0;
-	for (i = 0; bbt[i] && i < ebcnt; ++i) {
+	for (i = 0; i < ebcnt && bbt[i]; ++i) {
 		addr0 += mtd->erasesize;
 		ebnum += 1;
 	}
@@ -413,7 +413,7 @@ static int erasetest(void)
 
 	ebnum = 0;
 	addr0 = 0;
-	for (i = 0; bbt[i] && i < ebcnt; ++i) {
+	for (i = 0; i < ebcnt && bbt[i]; ++i) {
 		addr0 += mtd->erasesize;
 		ebnum += 1;
 	}
-- 
cgit v0.10.2


From b0469ea785d12a6c025fa213293d608fc41405fe Mon Sep 17 00:00:00 2001
From: Siddarth Gore <gores@marvell.com>
Date: Tue, 4 Aug 2009 08:42:08 +0530
Subject: mtd: m25p80: add support for 3 Macronix flash chips

Signed-off-by: Siddarth Gore <gores@marvell.com>
Signed-off-by: Nicolas Pitre <nico@marvell.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c
index 10ed195..6d8d265 100644
--- a/drivers/mtd/devices/m25p80.c
+++ b/drivers/mtd/devices/m25p80.c
@@ -501,7 +501,10 @@ static struct flash_info __devinitdata m25p_data [] = {
 	{ "at26df321",  0x1f4701, 0, 64 * 1024, 64, SECT_4K, },
 
 	/* Macronix */
+	{ "mx25l3205d", 0xc22016, 0, 64 * 1024, 64, },
+	{ "mx25l6405d", 0xc22017, 0, 64 * 1024, 128, },
 	{ "mx25l12805d", 0xc22018, 0, 64 * 1024, 256, },
+	{ "mx25l12855e", 0xc22618, 0, 64 * 1024, 256, },
 
 	/* Spansion -- single (large) sector size only, at least
 	 * for the chips listed here (without boot sectors).
-- 
cgit v0.10.2


From 0acfe530a2be9192861b84566625ba9b95703226 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 7 Aug 2009 12:34:48 +0900
Subject: mtd: onenand: select MTD_PARTITIONS

All of the onenand drivers depend on mtd partition support being compiled
in, so just select it. Fixes up build breakage:

drivers/built-in.o: In function `generic_onenand_remove':
generic.c:(.devexit.text+0x80): undefined reference to `del_mtd_partitions'

Reported-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/mtd/onenand/Kconfig b/drivers/mtd/onenand/Kconfig
index 79fa79e..3a094d1 100644
--- a/drivers/mtd/onenand/Kconfig
+++ b/drivers/mtd/onenand/Kconfig
@@ -5,6 +5,7 @@
 menuconfig MTD_ONENAND
 	tristate "OneNAND Device Support"
 	depends on MTD
+	select MTD_PARTITIONS
 	help
 	  This enables support for accessing all type of OneNAND flash
 	  devices. For further information see
@@ -66,7 +67,6 @@ config MTD_ONENAND_2X_PROGRAM
 
 config MTD_ONENAND_SIM
 	tristate "OneNAND simulator support"
-	depends on MTD_PARTITIONS
 	help
 	  The simulator may simulate various OneNAND flash chips for the
 	  OneNAND MTD layer.
-- 
cgit v0.10.2


From fca910883324d437a24d447b08f524fa19261a94 Mon Sep 17 00:00:00 2001
From: H Hartley Sweeten <hartleys@visionengravers.com>
Date: Thu, 6 Aug 2009 16:05:32 -0700
Subject: mtd: make few symbols static

Make mtd_group and mtd_groups static since they are only used in this
file.

[Amended by Artem Bityutskiy]

Signed-off-by: H Hartley Sweeten <hsweeten@visionengravers.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 00ebf7a..3f559c0 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -213,11 +213,11 @@ static struct attribute *mtd_attrs[] = {
 	NULL,
 };
 
-struct attribute_group mtd_group = {
+static struct attribute_group mtd_group = {
 	.attrs		= mtd_attrs,
 };
 
-struct attribute_group *mtd_groups[] = {
+static struct attribute_group *mtd_groups[] = {
 	&mtd_group,
 	NULL,
 };
-- 
cgit v0.10.2


From 2c78c44362bc9b7c715a3c2119b89a407c1cb739 Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Mon, 10 Aug 2009 10:16:37 +0200
Subject: mtd: pmcmsp-flash: fix error paths in init_msp_flash

Cleanin up after errors in init_msp_flash().

Also cleanup_msp_flash() attempts to determine the size of
msp_flash with `sizeof(msp_flash) / sizeof(struct mtd_info **)'
This will not work since msp_flash is not an array.

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/mtd/maps/pmcmsp-flash.c b/drivers/mtd/maps/pmcmsp-flash.c
index 4768bd5..c8fd8da 100644
--- a/drivers/mtd/maps/pmcmsp-flash.c
+++ b/drivers/mtd/maps/pmcmsp-flash.c
@@ -50,7 +50,7 @@ static int fcnt;
 
 static int __init init_msp_flash(void)
 {
-	int i, j;
+	int i, j, ret = -ENOMEM;
 	int offset, coff;
 	char *env;
 	int pcnt;
@@ -75,14 +75,16 @@ static int __init init_msp_flash(void)
 	printk(KERN_NOTICE "Found %d PMC flash devices\n", fcnt);
 
 	msp_flash = kmalloc(fcnt * sizeof(struct map_info *), GFP_KERNEL);
+	if (!msp_flash)
+		return -ENOMEM;
+
 	msp_parts = kmalloc(fcnt * sizeof(struct mtd_partition *), GFP_KERNEL);
+	if (!msp_parts)
+		goto free_msp_flash;
+
 	msp_maps = kcalloc(fcnt, sizeof(struct mtd_info), GFP_KERNEL);
-	if (!msp_flash || !msp_parts || !msp_maps) {
-		kfree(msp_maps);
-		kfree(msp_parts);
-		kfree(msp_flash);
-		return -ENOMEM;
-	}
+	if (!msp_maps)
+		goto free_msp_parts;
 
 	/* loop over the flash devices, initializing each */
 	for (i = 0; i < fcnt; i++) {
@@ -100,13 +102,18 @@ static int __init init_msp_flash(void)
 
 		msp_parts[i] = kcalloc(pcnt, sizeof(struct mtd_partition),
 				       GFP_KERNEL);
+		if (!msp_parts[i])
+			goto cleanup_loop;
 
 		/* now initialize the devices proper */
 		flash_name[5] = '0' + i;
 		env = prom_getenv(flash_name);
 
-		if (sscanf(env, "%x:%x", &addr, &size) < 2)
-			return -ENXIO;
+		if (sscanf(env, "%x:%x", &addr, &size) < 2) {
+			ret = -ENXIO;
+			kfree(msp_parts[i]);
+			goto cleanup_loop;
+		}
 		addr = CPHYSADDR(addr);
 
 		printk(KERN_NOTICE
@@ -122,13 +129,23 @@ static int __init init_msp_flash(void)
 		 */
 		if (size > CONFIG_MSP_FLASH_MAP_LIMIT)
 			size = CONFIG_MSP_FLASH_MAP_LIMIT;
+
 		msp_maps[i].virt = ioremap(addr, size);
+		if (msp_maps[i].virt == NULL) {
+			ret = -ENXIO;
+			kfree(msp_parts[i]);
+			goto cleanup_loop;
+		}
+
 		msp_maps[i].bankwidth = 1;
-		msp_maps[i].name = strncpy(kmalloc(7, GFP_KERNEL),
-					flash_name, 7);
+		msp_maps[i].name = kmalloc(7, GFP_KERNEL);
+		if (!msp_maps[i].name) {
+			iounmap(msp_maps[i].virt);
+			kfree(msp_parts[i]);
+			goto cleanup_loop;
+		}
 
-		if (msp_maps[i].virt == NULL)
-			return -ENXIO;
+		msp_maps[i].name = strncpy(msp_maps[i].name, flash_name, 7);
 
 		for (j = 0; j < pcnt; j++) {
 			part_name[5] = '0' + i;
@@ -136,8 +153,14 @@ static int __init init_msp_flash(void)
 
 			env = prom_getenv(part_name);
 
-			if (sscanf(env, "%x:%x:%n", &offset, &size, &coff) < 2)
-				return -ENXIO;
+			if (sscanf(env, "%x:%x:%n", &offset, &size,
+						&coff) < 2) {
+				ret = -ENXIO;
+				kfree(msp_maps[i].name);
+				iounmap(msp_maps[i].virt);
+				kfree(msp_parts[i]);
+				goto cleanup_loop;
+			}
 
 			msp_parts[i][j].size = size;
 			msp_parts[i][j].offset = offset;
@@ -152,18 +175,37 @@ static int __init init_msp_flash(void)
 			add_mtd_partitions(msp_flash[i], msp_parts[i], pcnt);
 		} else {
 			printk(KERN_ERR "map probe failed for flash\n");
-			return -ENXIO;
+			ret = -ENXIO;
+			kfree(msp_maps[i].name);
+			iounmap(msp_maps[i].virt);
+			kfree(msp_parts[i]);
+			goto cleanup_loop;
 		}
 	}
 
 	return 0;
+
+cleanup_loop:
+	while (i--) {
+		del_mtd_partitions(msp_flash[i]);
+		map_destroy(msp_flash[i]);
+		kfree(msp_maps[i].name);
+		iounmap(msp_maps[i].virt);
+		kfree(msp_parts[i]);
+	}
+	kfree(msp_maps);
+free_msp_parts:
+	kfree(msp_parts);
+free_msp_flash:
+	kfree(msp_flash);
+	return ret;
 }
 
 static void __exit cleanup_msp_flash(void)
 {
 	int i;
 
-	for (i = 0; i < sizeof(msp_flash) / sizeof(struct mtd_info **); i++) {
+	for (i = 0; i < fcnt; i++) {
 		del_mtd_partitions(msp_flash[i]);
 		map_destroy(msp_flash[i]);
 		iounmap((void *)msp_maps[i].virt);
-- 
cgit v0.10.2


From 91e0955b57377578f7555b5d0f2a21040691004b Mon Sep 17 00:00:00 2001
From: Gerard Lledo <gerard.lledo@gmail.com>
Date: Wed, 17 Jun 2009 13:08:55 -0700
Subject: jffs2: move jffs2_gcd_mtd threads to the new kthread API

Move the jffs2 garbage collecting thread to the new kthread API.

Signed-off-by: Gerard Lledo <gerard.lledo@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/fs/jffs2/background.c b/fs/jffs2/background.c
index e958010..3ff50da 100644
--- a/fs/jffs2/background.c
+++ b/fs/jffs2/background.c
@@ -15,6 +15,7 @@
 #include <linux/completion.h>
 #include <linux/sched.h>
 #include <linux/freezer.h>
+#include <linux/kthread.h>
 #include "nodelist.h"
 
 
@@ -31,7 +32,7 @@ void jffs2_garbage_collect_trigger(struct jffs2_sb_info *c)
 /* This must only ever be called when no GC thread is currently running */
 int jffs2_start_garbage_collect_thread(struct jffs2_sb_info *c)
 {
-	pid_t pid;
+	struct task_struct *tsk;
 	int ret = 0;
 
 	BUG_ON(c->gc_task);
@@ -39,15 +40,16 @@ int jffs2_start_garbage_collect_thread(struct jffs2_sb_info *c)
 	init_completion(&c->gc_thread_start);
 	init_completion(&c->gc_thread_exit);
 
-	pid = kernel_thread(jffs2_garbage_collect_thread, c, CLONE_FS|CLONE_FILES);
-	if (pid < 0) {
-		printk(KERN_WARNING "fork failed for JFFS2 garbage collect thread: %d\n", -pid);
+	tsk = kthread_run(jffs2_garbage_collect_thread, c, "jffs2_gcd_mtd%d", c->mtd->index);
+	if (IS_ERR(tsk)) {
+		printk(KERN_WARNING "fork failed for JFFS2 garbage collect thread: %ld\n", -PTR_ERR(tsk));
 		complete(&c->gc_thread_exit);
-		ret = pid;
+		ret = PTR_ERR(tsk);
 	} else {
 		/* Wait for it... */
-		D1(printk(KERN_DEBUG "JFFS2: Garbage collect thread is pid %d\n", pid));
+		D1(printk(KERN_DEBUG "JFFS2: Garbage collect thread is pid %d\n", tsk->pid));
 		wait_for_completion(&c->gc_thread_start);
+		ret = tsk->pid;
 	}
 
 	return ret;
@@ -71,7 +73,6 @@ static int jffs2_garbage_collect_thread(void *_c)
 {
 	struct jffs2_sb_info *c = _c;
 
-	daemonize("jffs2_gcd_mtd%d", c->mtd->index);
 	allow_signal(SIGKILL);
 	allow_signal(SIGSTOP);
 	allow_signal(SIGCONT);
@@ -107,6 +108,11 @@ static int jffs2_garbage_collect_thread(void *_c)
 		 * the GC thread get there first. */
 		schedule_timeout_interruptible(msecs_to_jiffies(50));
 
+		if (kthread_should_stop()) {
+			D1(printk(KERN_DEBUG "jffs2_garbage_collect_thread():  kthread_stop() called.\n"));
+			goto die;
+		}
+
 		/* Put_super will send a SIGKILL and then wait on the sem.
 		 */
 		while (signal_pending(current) || freezing(current)) {
-- 
cgit v0.10.2


From 1be10a88cac5e589cdd2bcb0cf6a13ed30bcc233 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Fri, 4 Sep 2009 11:59:32 -0400
Subject: nfsd4: filehandle leak or error exit from fh_compose()

A number of callers (nfsd4_encode_fattr(), at least) don't bother to
release the filehandle returned to fh_compose() if fh_compose() returns
an error.  So, modify fh_compose() to release the filehandle before
returning an error.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index bce0b2b..01965b2 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -557,8 +557,10 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
 
 		if (inode)
 			_fh_update(fhp, exp, dentry);
-		if (fhp->fh_handle.fh_fileid_type == 255)
+		if (fhp->fh_handle.fh_fileid_type == 255) {
+			fh_put(fhp);
 			return nfserr_opnotsupp;
+		}
 	}
 
 	return 0;
-- 
cgit v0.10.2


From 32bb0e0c778a4a6cd4534a5b98f08cd45e9ab5b9 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marek.vasut@gmail.com>
Date: Thu, 23 Jul 2009 15:50:43 +0200
Subject: wm97xx-core: Pass platform_data to battery

Signed-off-by: Marek Vasut <marek.vasut@gmail.com>
Acked-by: Dmitry Torokhov <dtor@mail.ru>
Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>

diff --git a/drivers/input/touchscreen/wm97xx-core.c b/drivers/input/touchscreen/wm97xx-core.c
index 2957d48..cf8cbc6 100644
--- a/drivers/input/touchscreen/wm97xx-core.c
+++ b/drivers/input/touchscreen/wm97xx-core.c
@@ -561,6 +561,7 @@ static void wm97xx_ts_input_close(struct input_dev *idev)
 static int wm97xx_probe(struct device *dev)
 {
 	struct wm97xx *wm;
+	struct wm97xx_pdata *pdata = dev->platform_data;
 	int ret = 0, id = 0;
 
 	wm = kzalloc(sizeof(struct wm97xx), GFP_KERNEL);
@@ -656,6 +657,7 @@ static int wm97xx_probe(struct device *dev)
 	}
 	platform_set_drvdata(wm->battery_dev, wm);
 	wm->battery_dev->dev.parent = dev;
+	wm->battery_dev->dev.platform_data = pdata;
 	ret = platform_device_add(wm->battery_dev);
 	if (ret < 0)
 		goto batt_reg_err;
@@ -669,6 +671,7 @@ static int wm97xx_probe(struct device *dev)
 	}
 	platform_set_drvdata(wm->touch_dev, wm);
 	wm->touch_dev->dev.parent = dev;
+	wm->touch_dev->dev.platform_data = pdata;
 	ret = platform_device_add(wm->touch_dev);
 	if (ret < 0)
 		goto touch_reg_err;
-- 
cgit v0.10.2


From b8bdc1d0cfc488ac0d94724639f9a61b0a5a1d40 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marek.vasut@gmail.com>
Date: Mon, 31 Aug 2009 06:20:12 +0200
Subject: wm97xx_battery: Use platform_data

This patch converts the wm97xx-battery driver to use platform_data
supplied by ac97 bus.

Signed-off-by: Marek Vasut <marek.vasut@gmail.com>
Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>

diff --git a/drivers/power/wm97xx_battery.c b/drivers/power/wm97xx_battery.c
index 8bde921..14ebd96 100644
--- a/drivers/power/wm97xx_battery.c
+++ b/drivers/power/wm97xx_battery.c
@@ -22,17 +22,19 @@
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
 #include <linux/gpio.h>
-#include <linux/wm97xx_batt.h>
 
 static DEFINE_MUTEX(bat_lock);
 static struct work_struct bat_work;
 struct mutex work_lock;
 static int bat_status = POWER_SUPPLY_STATUS_UNKNOWN;
-static struct wm97xx_batt_info *pdata;
+static struct wm97xx_batt_info *gpdata;
 static enum power_supply_property *prop;
 
 static unsigned long wm97xx_read_bat(struct power_supply *bat_ps)
 {
+	struct wm97xx_pdata *wmdata = bat_ps->dev->parent->platform_data;
+	struct wm97xx_batt_pdata *pdata = wmdata->batt_pdata;
+
 	return wm97xx_read_aux_adc(bat_ps->dev->parent->driver_data,
 					pdata->batt_aux) * pdata->batt_mult /
 					pdata->batt_div;
@@ -40,6 +42,9 @@ static unsigned long wm97xx_read_bat(struct power_supply *bat_ps)
 
 static unsigned long wm97xx_read_temp(struct power_supply *bat_ps)
 {
+	struct wm97xx_pdata *wmdata = bat_ps->dev->parent->platform_data;
+	struct wm97xx_batt_pdata *pdata = wmdata->batt_pdata;
+
 	return wm97xx_read_aux_adc(bat_ps->dev->parent->driver_data,
 					pdata->temp_aux) * pdata->temp_mult /
 					pdata->temp_div;
@@ -49,6 +54,9 @@ static int wm97xx_bat_get_property(struct power_supply *bat_ps,
 			    enum power_supply_property psp,
 			    union power_supply_propval *val)
 {
+	struct wm97xx_pdata *wmdata = bat_ps->dev->parent->platform_data;
+	struct wm97xx_batt_pdata *pdata = wmdata->batt_pdata;
+
 	switch (psp) {
 	case POWER_SUPPLY_PROP_STATUS:
 		val->intval = bat_status;
@@ -97,6 +105,8 @@ static void wm97xx_bat_external_power_changed(struct power_supply *bat_ps)
 static void wm97xx_bat_update(struct power_supply *bat_ps)
 {
 	int old_status = bat_status;
+	struct wm97xx_pdata *wmdata = bat_ps->dev->parent->platform_data;
+	struct wm97xx_batt_pdata *pdata = wmdata->batt_pdata;
 
 	mutex_lock(&work_lock);
 
@@ -149,6 +159,15 @@ static int __devinit wm97xx_bat_probe(struct platform_device *dev)
 	int ret = 0;
 	int props = 1;	/* POWER_SUPPLY_PROP_PRESENT */
 	int i = 0;
+	struct wm97xx_pdata *wmdata = dev->dev.platform_data;
+	struct wm97xx_batt_pdata *pdata;
+
+	if (gpdata) {
+		dev_err(&dev->dev, "Do not pass platform_data through "
+			"wm97xx_bat_set_pdata!\n");
+		return -EINVAL;
+	} else
+		pdata = wmdata->batt_pdata;
 
 	if (dev->id != -1)
 		return -EINVAL;
@@ -156,7 +175,7 @@ static int __devinit wm97xx_bat_probe(struct platform_device *dev)
 	mutex_init(&work_lock);
 
 	if (!pdata) {
-		dev_err(&dev->dev, "Please use wm97xx_bat_set_pdata\n");
+		dev_err(&dev->dev, "No platform_data supplied\n");
 		return -EINVAL;
 	}
 
@@ -229,6 +248,9 @@ err:
 
 static int __devexit wm97xx_bat_remove(struct platform_device *dev)
 {
+	struct wm97xx_pdata *wmdata = dev->dev.platform_data;
+	struct wm97xx_batt_pdata *pdata = wmdata->batt_pdata;
+
 	if (pdata && pdata->charge_gpio && pdata->charge_gpio >= 0)
 		gpio_free(pdata->charge_gpio);
 	flush_scheduled_work();
@@ -258,9 +280,9 @@ static void __exit wm97xx_bat_exit(void)
 	platform_driver_unregister(&wm97xx_bat_driver);
 }
 
-void __init wm97xx_bat_set_pdata(struct wm97xx_batt_info *data)
+void wm97xx_bat_set_pdata(struct wm97xx_batt_info *data)
 {
-	pdata = data;
+	gpdata = data;
 }
 EXPORT_SYMBOL_GPL(wm97xx_bat_set_pdata);
 
diff --git a/include/linux/wm97xx.h b/include/linux/wm97xx.h
index 6f69968..b2c2297 100644
--- a/include/linux/wm97xx.h
+++ b/include/linux/wm97xx.h
@@ -286,6 +286,24 @@ struct wm97xx {
 	u16 suspend_mode;               /* PRP in suspend mode */
 };
 
+struct wm97xx_batt_pdata {
+	int	batt_aux;
+	int	temp_aux;
+	int	charge_gpio;
+	int	min_voltage;
+	int	max_voltage;
+	int	batt_div;
+	int	batt_mult;
+	int	temp_div;
+	int	temp_mult;
+	int	batt_tech;
+	char	*batt_name;
+};
+
+struct wm97xx_pdata {
+	struct wm97xx_batt_pdata	*batt_pdata;	/* battery data */
+};
+
 /*
  * Codec GPIO access (not supported on WM9705)
  * This can be used to set/get codec GPIO and Virtual GPIO status.
diff --git a/include/linux/wm97xx_batt.h b/include/linux/wm97xx_batt.h
index 9681d1a..a1d6419 100644
--- a/include/linux/wm97xx_batt.h
+++ b/include/linux/wm97xx_batt.h
@@ -3,22 +3,12 @@
 
 #include <linux/wm97xx.h>
 
-struct wm97xx_batt_info {
-	int	batt_aux;
-	int	temp_aux;
-	int	charge_gpio;
-	int	min_voltage;
-	int	max_voltage;
-	int	batt_div;
-	int	batt_mult;
-	int	temp_div;
-	int	temp_mult;
-	int	batt_tech;
-	char	*batt_name;
-};
+#warning This file will be removed soon, use wm97xx.h instead!
+
+#define wm97xx_batt_info wm97xx_batt_pdata
 
 #ifdef CONFIG_BATTERY_WM97XX
-void __init wm97xx_bat_set_pdata(struct wm97xx_batt_info *data);
+void wm97xx_bat_set_pdata(struct wm97xx_batt_info *data);
 #else
 static inline void wm97xx_bat_set_pdata(struct wm97xx_batt_info *data) {}
 #endif
-- 
cgit v0.10.2


From 7c87942aef52d2120e95ff1dec739998b9f95a78 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marek.vasut@gmail.com>
Date: Thu, 23 Jul 2009 16:02:08 +0200
Subject: wm97xx_battery: Use irq to detect charger state

Signed-off-by: Marek Vasut <marek.vasut@gmail.com>
Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>

diff --git a/drivers/power/wm97xx_battery.c b/drivers/power/wm97xx_battery.c
index 14ebd96..cad1ba2 100644
--- a/drivers/power/wm97xx_battery.c
+++ b/drivers/power/wm97xx_battery.c
@@ -22,6 +22,7 @@
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
 #include <linux/gpio.h>
+#include <linux/irq.h>
 
 static DEFINE_MUTEX(bat_lock);
 static struct work_struct bat_work;
@@ -137,6 +138,12 @@ static void wm97xx_bat_work(struct work_struct *work)
 	wm97xx_bat_update(&bat_ps);
 }
 
+static irqreturn_t wm97xx_chrg_irq(int irq, void *data)
+{
+	schedule_work(&bat_work);
+	return IRQ_HANDLED;
+}
+
 #ifdef CONFIG_PM
 static int wm97xx_bat_suspend(struct platform_device *dev, pm_message_t state)
 {
@@ -179,13 +186,18 @@ static int __devinit wm97xx_bat_probe(struct platform_device *dev)
 		return -EINVAL;
 	}
 
-	if (pdata->charge_gpio >= 0 && gpio_is_valid(pdata->charge_gpio)) {
+	if (gpio_is_valid(pdata->charge_gpio)) {
 		ret = gpio_request(pdata->charge_gpio, "BATT CHRG");
 		if (ret)
 			goto err;
 		ret = gpio_direction_input(pdata->charge_gpio);
 		if (ret)
 			goto err2;
+		ret = request_irq(gpio_to_irq(pdata->charge_gpio),
+				wm97xx_chrg_irq, IRQF_DISABLED,
+				"AC Detect", 0);
+		if (ret)
+			goto err2;
 		props++;	/* POWER_SUPPLY_PROP_STATUS */
 	}
 
@@ -202,7 +214,7 @@ static int __devinit wm97xx_bat_probe(struct platform_device *dev)
 
 	prop = kzalloc(props * sizeof(*prop), GFP_KERNEL);
 	if (!prop)
-		goto err2;
+		goto err3;
 
 	prop[i++] = POWER_SUPPLY_PROP_PRESENT;
 	if (pdata->charge_gpio >= 0)
@@ -235,13 +247,17 @@ static int __devinit wm97xx_bat_probe(struct platform_device *dev)
 	if (!ret)
 		schedule_work(&bat_work);
 	else
-		goto err3;
+		goto err4;
 
 	return 0;
-err3:
+err4:
 	kfree(prop);
+err3:
+	if (gpio_is_valid(pdata->charge_gpio))
+		free_irq(gpio_to_irq(pdata->charge_gpio), dev);
 err2:
-	gpio_free(pdata->charge_gpio);
+	if (gpio_is_valid(pdata->charge_gpio))
+		gpio_free(pdata->charge_gpio);
 err:
 	return ret;
 }
@@ -251,8 +267,10 @@ static int __devexit wm97xx_bat_remove(struct platform_device *dev)
 	struct wm97xx_pdata *wmdata = dev->dev.platform_data;
 	struct wm97xx_batt_pdata *pdata = wmdata->batt_pdata;
 
-	if (pdata && pdata->charge_gpio && pdata->charge_gpio >= 0)
+	if (pdata && gpio_is_valid(pdata->charge_gpio)) {
+		free_irq(gpio_to_irq(pdata->charge_gpio), dev);
 		gpio_free(pdata->charge_gpio);
+	}
 	flush_scheduled_work();
 	power_supply_unregister(&bat_ps);
 	kfree(prop);
-- 
cgit v0.10.2


From 83a8af0d31cfa6c728a68c00f6b1b518e2dcc03d Mon Sep 17 00:00:00 2001
From: Marek Vasut <marek.vasut@gmail.com>
Date: Sat, 22 Aug 2009 00:36:43 +0200
Subject: wm97xx_battery: Convert to dev_pm_ops

Signed-off-by: Marek Vasut <marek.vasut@gmail.com>
Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>

diff --git a/drivers/power/wm97xx_battery.c b/drivers/power/wm97xx_battery.c
index cad1ba2..c552082c 100644
--- a/drivers/power/wm97xx_battery.c
+++ b/drivers/power/wm97xx_battery.c
@@ -145,20 +145,22 @@ static irqreturn_t wm97xx_chrg_irq(int irq, void *data)
 }
 
 #ifdef CONFIG_PM
-static int wm97xx_bat_suspend(struct platform_device *dev, pm_message_t state)
+static int wm97xx_bat_suspend(struct device *dev)
 {
 	flush_scheduled_work();
 	return 0;
 }
 
-static int wm97xx_bat_resume(struct platform_device *dev)
+static int wm97xx_bat_resume(struct device *dev)
 {
 	schedule_work(&bat_work);
 	return 0;
 }
-#else
-#define wm97xx_bat_suspend NULL
-#define wm97xx_bat_resume NULL
+
+static struct dev_pm_ops wm97xx_bat_pm_ops = {
+	.suspend	= wm97xx_bat_suspend,
+	.resume		= wm97xx_bat_resume,
+};
 #endif
 
 static int __devinit wm97xx_bat_probe(struct platform_device *dev)
@@ -281,11 +283,12 @@ static struct platform_driver wm97xx_bat_driver = {
 	.driver	= {
 		.name	= "wm97xx-battery",
 		.owner	= THIS_MODULE,
+#ifdef CONFIG_PM
+		.pm	= &wm97xx_bat_pm_ops,
+#endif
 	},
 	.probe		= wm97xx_bat_probe,
 	.remove		= __devexit_p(wm97xx_bat_remove),
-	.suspend	= wm97xx_bat_suspend,
-	.resume		= wm97xx_bat_resume,
 };
 
 static int __init wm97xx_bat_init(void)
-- 
cgit v0.10.2


From b0525b48f06714e8d5cf6a3266261b71de8d6dd4 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Fri, 24 Jul 2009 15:08:11 +0200
Subject: ds2760_battery: Fix integer overflow for time_to_empty_now

On the device we're currently developing, battery sizes of ~2.8Ah and
current flow of ~600mA are typical.

With that values, the life_sec computation overflows due to the
multiplication by 3600.

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Cc: Szabolcs Gyurko <szabolcs.gyurko@tlt.hu>
Cc: Matt Reimer <mreimer@vpop.net>
Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>

diff --git a/drivers/power/ds2760_battery.c b/drivers/power/ds2760_battery.c
index 1bb8498..6f1dba5 100644
--- a/drivers/power/ds2760_battery.c
+++ b/drivers/power/ds2760_battery.c
@@ -211,9 +211,9 @@ static int ds2760_battery_read_status(struct ds2760_device_info *di)
 	if (di->rem_capacity > 100)
 		di->rem_capacity = 100;
 
-	if (di->current_uA)
-		di->life_sec = -((di->accum_current_uAh - di->empty_uAh) *
-				 3600L) / di->current_uA;
+	if (di->current_uA >= 100L)
+		di->life_sec = -((di->accum_current_uAh - di->empty_uAh) * 36L)
+					/ (di->current_uA / 100L);
 	else
 		di->life_sec = 0;
 
-- 
cgit v0.10.2


From 3961f7c3cf247eee5df7fabadc7a40f2deeb98f3 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 10 Aug 2009 17:43:53 +0100
Subject: power_supply: Add driver for the PMU on WM831x PMICs

The WM831x PMICs provide power path management from three sources:
a wall supply, USB and a battery with integrated charger. They also
provide an additional backup supply with integrated for maintaining
always on functionality such as the RTC and monitoring of power
switches.

After some initial configuration at startup the device operates
autonomously, the driver simply provides reporting of the current
state.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>

diff --git a/drivers/power/Kconfig b/drivers/power/Kconfig
index bdbc4f7..cea6cef 100644
--- a/drivers/power/Kconfig
+++ b/drivers/power/Kconfig
@@ -29,6 +29,13 @@ config APM_POWER
 	  Say Y here to enable support APM status emulation using
 	  battery class devices.
 
+config WM831X_POWER
+	tristate "WM831X PMU support"
+	depends on MFD_WM831X
+	help
+	  Say Y here to enable support for the power management unit
+	  provided by Wolfson Microelectronics WM831x PMICs.
+
 config WM8350_POWER
         tristate "WM8350 PMU support"
         depends on MFD_WM8350
diff --git a/drivers/power/Makefile b/drivers/power/Makefile
index 380d17c..b96f29d 100644
--- a/drivers/power/Makefile
+++ b/drivers/power/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_POWER_SUPPLY)	+= power_supply.o
 
 obj-$(CONFIG_PDA_POWER)		+= pda_power.o
 obj-$(CONFIG_APM_POWER)		+= apm_power.o
+obj-$(CONFIG_WM831X_POWER)	+= wm831x_power.o
 obj-$(CONFIG_WM8350_POWER)	+= wm8350_power.o
 
 obj-$(CONFIG_BATTERY_DS2760)	+= ds2760_battery.o
diff --git a/drivers/power/wm831x_power.c b/drivers/power/wm831x_power.c
new file mode 100644
index 0000000..2a4c8b0
--- /dev/null
+++ b/drivers/power/wm831x_power.c
@@ -0,0 +1,779 @@
+/*
+ * PMU driver for Wolfson Microelectronics wm831x PMICs
+ *
+ * Copyright 2009 Wolfson Microelectronics PLC.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/platform_device.h>
+#include <linux/power_supply.h>
+
+#include <linux/mfd/wm831x/core.h>
+#include <linux/mfd/wm831x/auxadc.h>
+#include <linux/mfd/wm831x/pmu.h>
+#include <linux/mfd/wm831x/pdata.h>
+
+struct wm831x_power {
+	struct wm831x *wm831x;
+	struct power_supply wall;
+	struct power_supply backup;
+	struct power_supply usb;
+	struct power_supply battery;
+};
+
+static int wm831x_power_check_online(struct wm831x *wm831x, int supply,
+				     union power_supply_propval *val)
+{
+	int ret;
+
+	ret = wm831x_reg_read(wm831x, WM831X_SYSTEM_STATUS);
+	if (ret < 0)
+		return ret;
+
+	if (ret & supply)
+		val->intval = 1;
+	else
+		val->intval = 0;
+
+	return 0;
+}
+
+static int wm831x_power_read_voltage(struct wm831x *wm831x,
+				     enum wm831x_auxadc src,
+				     union power_supply_propval *val)
+{
+	int ret;
+
+	ret = wm831x_auxadc_read_uv(wm831x, src);
+	if (ret >= 0)
+		val->intval = ret;
+
+	return ret;
+}
+
+/*********************************************************************
+ *		WALL Power
+ *********************************************************************/
+static int wm831x_wall_get_prop(struct power_supply *psy,
+				enum power_supply_property psp,
+				union power_supply_propval *val)
+{
+	struct wm831x_power *wm831x_power = dev_get_drvdata(psy->dev->parent);
+	struct wm831x *wm831x = wm831x_power->wm831x;
+	int ret = 0;
+
+	switch (psp) {
+	case POWER_SUPPLY_PROP_ONLINE:
+		ret = wm831x_power_check_online(wm831x, WM831X_PWR_WALL, val);
+		break;
+	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+		ret = wm831x_power_read_voltage(wm831x, WM831X_AUX_WALL, val);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+static enum power_supply_property wm831x_wall_props[] = {
+	POWER_SUPPLY_PROP_ONLINE,
+	POWER_SUPPLY_PROP_VOLTAGE_NOW,
+};
+
+/*********************************************************************
+ *		USB Power
+ *********************************************************************/
+static int wm831x_usb_get_prop(struct power_supply *psy,
+			       enum power_supply_property psp,
+			       union power_supply_propval *val)
+{
+	struct wm831x_power *wm831x_power = dev_get_drvdata(psy->dev->parent);
+	struct wm831x *wm831x = wm831x_power->wm831x;
+	int ret = 0;
+
+	switch (psp) {
+	case POWER_SUPPLY_PROP_ONLINE:
+		ret = wm831x_power_check_online(wm831x, WM831X_PWR_USB, val);
+		break;
+	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+		ret = wm831x_power_read_voltage(wm831x, WM831X_AUX_USB, val);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+static enum power_supply_property wm831x_usb_props[] = {
+	POWER_SUPPLY_PROP_ONLINE,
+	POWER_SUPPLY_PROP_VOLTAGE_NOW,
+};
+
+/*********************************************************************
+ *		Battery properties
+ *********************************************************************/
+
+struct chg_map {
+	int val;
+	int reg_val;
+};
+
+static struct chg_map trickle_ilims[] = {
+	{  50, 0 << WM831X_CHG_TRKL_ILIM_SHIFT },
+	{ 100, 1 << WM831X_CHG_TRKL_ILIM_SHIFT },
+	{ 150, 2 << WM831X_CHG_TRKL_ILIM_SHIFT },
+	{ 200, 3 << WM831X_CHG_TRKL_ILIM_SHIFT },
+};
+
+static struct chg_map vsels[] = {
+	{ 4050, 0 << WM831X_CHG_VSEL_SHIFT },
+	{ 4100, 1 << WM831X_CHG_VSEL_SHIFT },
+	{ 4150, 2 << WM831X_CHG_VSEL_SHIFT },
+	{ 4200, 3 << WM831X_CHG_VSEL_SHIFT },
+};
+
+static struct chg_map fast_ilims[] = {
+	{    0,  0 << WM831X_CHG_FAST_ILIM_SHIFT },
+	{   50,  1 << WM831X_CHG_FAST_ILIM_SHIFT },
+	{  100,  2 << WM831X_CHG_FAST_ILIM_SHIFT },
+	{  150,  3 << WM831X_CHG_FAST_ILIM_SHIFT },
+	{  200,  4 << WM831X_CHG_FAST_ILIM_SHIFT },
+	{  250,  5 << WM831X_CHG_FAST_ILIM_SHIFT },
+	{  300,  6 << WM831X_CHG_FAST_ILIM_SHIFT },
+	{  350,  7 << WM831X_CHG_FAST_ILIM_SHIFT },
+	{  400,  8 << WM831X_CHG_FAST_ILIM_SHIFT },
+	{  450,  9 << WM831X_CHG_FAST_ILIM_SHIFT },
+	{  500, 10 << WM831X_CHG_FAST_ILIM_SHIFT },
+	{  600, 11 << WM831X_CHG_FAST_ILIM_SHIFT },
+	{  700, 12 << WM831X_CHG_FAST_ILIM_SHIFT },
+	{  800, 13 << WM831X_CHG_FAST_ILIM_SHIFT },
+	{  900, 14 << WM831X_CHG_FAST_ILIM_SHIFT },
+	{ 1000, 15 << WM831X_CHG_FAST_ILIM_SHIFT },
+};
+
+static struct chg_map eoc_iterms[] = {
+	{ 20, 0 << WM831X_CHG_ITERM_SHIFT },
+	{ 30, 1 << WM831X_CHG_ITERM_SHIFT },
+	{ 40, 2 << WM831X_CHG_ITERM_SHIFT },
+	{ 50, 3 << WM831X_CHG_ITERM_SHIFT },
+	{ 60, 4 << WM831X_CHG_ITERM_SHIFT },
+	{ 70, 5 << WM831X_CHG_ITERM_SHIFT },
+	{ 80, 6 << WM831X_CHG_ITERM_SHIFT },
+	{ 90, 7 << WM831X_CHG_ITERM_SHIFT },
+};
+
+static struct chg_map chg_times[] = {
+	{  60,  0 << WM831X_CHG_TIME_SHIFT },
+	{  90,  1 << WM831X_CHG_TIME_SHIFT },
+	{ 120,  2 << WM831X_CHG_TIME_SHIFT },
+	{ 150,  3 << WM831X_CHG_TIME_SHIFT },
+	{ 180,  4 << WM831X_CHG_TIME_SHIFT },
+	{ 210,  5 << WM831X_CHG_TIME_SHIFT },
+	{ 240,  6 << WM831X_CHG_TIME_SHIFT },
+	{ 270,  7 << WM831X_CHG_TIME_SHIFT },
+	{ 300,  8 << WM831X_CHG_TIME_SHIFT },
+	{ 330,  9 << WM831X_CHG_TIME_SHIFT },
+	{ 360, 10 << WM831X_CHG_TIME_SHIFT },
+	{ 390, 11 << WM831X_CHG_TIME_SHIFT },
+	{ 420, 12 << WM831X_CHG_TIME_SHIFT },
+	{ 450, 13 << WM831X_CHG_TIME_SHIFT },
+	{ 480, 14 << WM831X_CHG_TIME_SHIFT },
+	{ 510, 15 << WM831X_CHG_TIME_SHIFT },
+};
+
+static void wm831x_battey_apply_config(struct wm831x *wm831x,
+				       struct chg_map *map, int count, int val,
+				       int *reg, const char *name,
+				       const char *units)
+{
+	int i;
+
+	for (i = 0; i < count; i++)
+		if (val == map[i].val)
+			break;
+	if (i == count) {
+		dev_err(wm831x->dev, "Invalid %s %d%s\n",
+			name, val, units);
+	} else {
+		*reg |= map[i].reg_val;
+		dev_dbg(wm831x->dev, "Set %s of %d%s\n", name, val, units);
+	}
+}
+
+static void wm831x_config_battery(struct wm831x *wm831x)
+{
+	struct wm831x_pdata *wm831x_pdata = wm831x->dev->platform_data;
+	struct wm831x_battery_pdata *pdata;
+	int ret, reg1, reg2;
+
+	if (!wm831x_pdata || !wm831x_pdata->battery) {
+		dev_warn(wm831x->dev,
+			 "No battery charger configuration\n");
+		return;
+	}
+
+	pdata = wm831x_pdata->battery;
+
+	reg1 = 0;
+	reg2 = 0;
+
+	if (!pdata->enable) {
+		dev_info(wm831x->dev, "Battery charger disabled\n");
+		return;
+	}
+
+	reg1 |= WM831X_CHG_ENA;
+	if (pdata->off_mask)
+		reg2 |= WM831X_CHG_OFF_MSK;
+	if (pdata->fast_enable)
+		reg1 |= WM831X_CHG_FAST;
+
+	wm831x_battey_apply_config(wm831x, trickle_ilims,
+				   ARRAY_SIZE(trickle_ilims),
+				   pdata->trickle_ilim, &reg2,
+				   "trickle charge current limit", "mA");
+
+	wm831x_battey_apply_config(wm831x, vsels, ARRAY_SIZE(vsels),
+				   pdata->vsel, &reg2,
+				   "target voltage", "mV");
+
+	wm831x_battey_apply_config(wm831x, fast_ilims, ARRAY_SIZE(fast_ilims),
+				   pdata->fast_ilim, &reg2,
+				   "fast charge current limit", "mA");
+
+	wm831x_battey_apply_config(wm831x, eoc_iterms, ARRAY_SIZE(eoc_iterms),
+				   pdata->eoc_iterm, &reg1,
+				   "end of charge current threshold", "mA");
+
+	wm831x_battey_apply_config(wm831x, chg_times, ARRAY_SIZE(chg_times),
+				   pdata->timeout, &reg2,
+				   "charger timeout", "min");
+
+	ret = wm831x_reg_unlock(wm831x);
+	if (ret != 0) {
+		dev_err(wm831x->dev, "Failed to unlock registers: %d\n", ret);
+		return;
+	}
+
+	ret = wm831x_set_bits(wm831x, WM831X_CHARGER_CONTROL_1,
+			      WM831X_CHG_ENA_MASK |
+			      WM831X_CHG_FAST_MASK |
+			      WM831X_CHG_ITERM_MASK |
+			      WM831X_CHG_ITERM_MASK,
+			      reg1);
+	if (ret != 0)
+		dev_err(wm831x->dev, "Failed to set charger control 1: %d\n",
+			ret);
+
+	ret = wm831x_set_bits(wm831x, WM831X_CHARGER_CONTROL_2,
+			      WM831X_CHG_OFF_MSK |
+			      WM831X_CHG_TIME_MASK |
+			      WM831X_CHG_FAST_ILIM_MASK |
+			      WM831X_CHG_TRKL_ILIM_MASK |
+			      WM831X_CHG_VSEL_MASK,
+			      reg2);
+	if (ret != 0)
+		dev_err(wm831x->dev, "Failed to set charger control 2: %d\n",
+			ret);
+
+	wm831x_reg_lock(wm831x);
+}
+
+static int wm831x_bat_check_status(struct wm831x *wm831x, int *status)
+{
+	int ret;
+
+	ret = wm831x_reg_read(wm831x, WM831X_SYSTEM_STATUS);
+	if (ret < 0)
+		return ret;
+
+	if (ret & WM831X_PWR_SRC_BATT) {
+		*status = POWER_SUPPLY_STATUS_DISCHARGING;
+		return 0;
+	}
+
+	ret = wm831x_reg_read(wm831x, WM831X_CHARGER_STATUS);
+	if (ret < 0)
+		return ret;
+
+	switch (ret & WM831X_CHG_STATE_MASK) {
+	case WM831X_CHG_STATE_OFF:
+		*status = POWER_SUPPLY_STATUS_NOT_CHARGING;
+		break;
+	case WM831X_CHG_STATE_TRICKLE:
+	case WM831X_CHG_STATE_FAST:
+		*status = POWER_SUPPLY_STATUS_CHARGING;
+		break;
+
+	default:
+		*status = POWER_SUPPLY_STATUS_UNKNOWN;
+		break;
+	}
+
+	return 0;
+}
+
+static int wm831x_bat_check_type(struct wm831x *wm831x, int *type)
+{
+	int ret;
+
+	ret = wm831x_reg_read(wm831x, WM831X_CHARGER_STATUS);
+	if (ret < 0)
+		return ret;
+
+	switch (ret & WM831X_CHG_STATE_MASK) {
+	case WM831X_CHG_STATE_TRICKLE:
+	case WM831X_CHG_STATE_TRICKLE_OT:
+		*type = POWER_SUPPLY_CHARGE_TYPE_TRICKLE;
+		break;
+	case WM831X_CHG_STATE_FAST:
+	case WM831X_CHG_STATE_FAST_OT:
+		*type = POWER_SUPPLY_CHARGE_TYPE_FAST;
+		break;
+	default:
+		*type = POWER_SUPPLY_CHARGE_TYPE_NONE;
+		break;
+	}
+
+	return 0;
+}
+
+static int wm831x_bat_check_health(struct wm831x *wm831x, int *health)
+{
+	int ret;
+
+	ret = wm831x_reg_read(wm831x, WM831X_CHARGER_STATUS);
+	if (ret < 0)
+		return ret;
+
+	if (ret & WM831X_BATT_HOT_STS) {
+		*health = POWER_SUPPLY_HEALTH_OVERHEAT;
+		return 0;
+	}
+
+	if (ret & WM831X_BATT_COLD_STS) {
+		*health = POWER_SUPPLY_HEALTH_COLD;
+		return 0;
+	}
+
+	if (ret & WM831X_BATT_OV_STS) {
+		*health = POWER_SUPPLY_HEALTH_OVERVOLTAGE;
+		return 0;
+	}
+
+	switch (ret & WM831X_CHG_STATE_MASK) {
+	case WM831X_CHG_STATE_TRICKLE_OT:
+	case WM831X_CHG_STATE_FAST_OT:
+		*health = POWER_SUPPLY_HEALTH_OVERHEAT;
+		break;
+	case WM831X_CHG_STATE_DEFECTIVE:
+		*health = POWER_SUPPLY_HEALTH_UNSPEC_FAILURE;
+		break;
+	default:
+		*health = POWER_SUPPLY_HEALTH_GOOD;
+		break;
+	}
+
+	return 0;
+}
+
+static int wm831x_bat_get_prop(struct power_supply *psy,
+			       enum power_supply_property psp,
+			       union power_supply_propval *val)
+{
+	struct wm831x_power *wm831x_power = dev_get_drvdata(psy->dev->parent);
+	struct wm831x *wm831x = wm831x_power->wm831x;
+	int ret = 0;
+
+	switch (psp) {
+	case POWER_SUPPLY_PROP_STATUS:
+		ret = wm831x_bat_check_status(wm831x, &val->intval);
+		break;
+	case POWER_SUPPLY_PROP_ONLINE:
+		ret = wm831x_power_check_online(wm831x, WM831X_PWR_SRC_BATT,
+						val);
+		break;
+	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+		ret = wm831x_power_read_voltage(wm831x, WM831X_AUX_BATT, val);
+		break;
+	case POWER_SUPPLY_PROP_HEALTH:
+		ret = wm831x_bat_check_health(wm831x, &val->intval);
+		break;
+	case POWER_SUPPLY_PROP_CHARGE_TYPE:
+		ret = wm831x_bat_check_type(wm831x, &val->intval);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+static enum power_supply_property wm831x_bat_props[] = {
+	POWER_SUPPLY_PROP_STATUS,
+	POWER_SUPPLY_PROP_ONLINE,
+	POWER_SUPPLY_PROP_VOLTAGE_NOW,
+	POWER_SUPPLY_PROP_HEALTH,
+	POWER_SUPPLY_PROP_CHARGE_TYPE,
+};
+
+static const char *wm831x_bat_irqs[] = {
+	"BATT HOT",
+	"BATT COLD",
+	"BATT FAIL",
+	"OV",
+	"END",
+	"TO",
+	"MODE",
+	"START",
+};
+
+static irqreturn_t wm831x_bat_irq(int irq, void *data)
+{
+	struct wm831x_power *wm831x_power = data;
+	struct wm831x *wm831x = wm831x_power->wm831x;
+
+	dev_dbg(wm831x->dev, "Battery status changed: %d\n", irq);
+
+	/* The battery charger is autonomous so we don't need to do
+	 * anything except kick user space */
+	power_supply_changed(&wm831x_power->battery);
+
+	return IRQ_HANDLED;
+}
+
+
+/*********************************************************************
+ *		Backup supply properties
+ *********************************************************************/
+
+static void wm831x_config_backup(struct wm831x *wm831x)
+{
+	struct wm831x_pdata *wm831x_pdata = wm831x->dev->platform_data;
+	struct wm831x_backup_pdata *pdata;
+	int ret, reg;
+
+	if (!wm831x_pdata || !wm831x_pdata->backup) {
+		dev_warn(wm831x->dev,
+			 "No backup battery charger configuration\n");
+		return;
+	}
+
+	pdata = wm831x_pdata->backup;
+
+	reg = 0;
+
+	if (pdata->charger_enable)
+		reg |= WM831X_BKUP_CHG_ENA | WM831X_BKUP_BATT_DET_ENA;
+	if (pdata->no_constant_voltage)
+		reg |= WM831X_BKUP_CHG_MODE;
+
+	switch (pdata->vlim) {
+	case 2500:
+		break;
+	case 3100:
+		reg |= WM831X_BKUP_CHG_VLIM;
+		break;
+	default:
+		dev_err(wm831x->dev, "Invalid backup voltage limit %dmV\n",
+			pdata->vlim);
+	}
+
+	switch (pdata->ilim) {
+	case 100:
+		break;
+	case 200:
+		reg |= 1;
+		break;
+	case 300:
+		reg |= 2;
+		break;
+	case 400:
+		reg |= 3;
+		break;
+	default:
+		dev_err(wm831x->dev, "Invalid backup current limit %duA\n",
+			pdata->ilim);
+	}
+
+	ret = wm831x_reg_unlock(wm831x);
+	if (ret != 0) {
+		dev_err(wm831x->dev, "Failed to unlock registers: %d\n", ret);
+		return;
+	}
+
+	ret = wm831x_set_bits(wm831x, WM831X_BACKUP_CHARGER_CONTROL,
+			      WM831X_BKUP_CHG_ENA_MASK |
+			      WM831X_BKUP_CHG_MODE_MASK |
+			      WM831X_BKUP_BATT_DET_ENA_MASK |
+			      WM831X_BKUP_CHG_VLIM_MASK |
+			      WM831X_BKUP_CHG_ILIM_MASK,
+			      reg);
+	if (ret != 0)
+		dev_err(wm831x->dev,
+			"Failed to set backup charger config: %d\n", ret);
+
+	wm831x_reg_lock(wm831x);
+}
+
+static int wm831x_backup_get_prop(struct power_supply *psy,
+				  enum power_supply_property psp,
+				  union power_supply_propval *val)
+{
+	struct wm831x_power *wm831x_power = dev_get_drvdata(psy->dev->parent);
+	struct wm831x *wm831x = wm831x_power->wm831x;
+	int ret = 0;
+
+	ret = wm831x_reg_read(wm831x, WM831X_BACKUP_CHARGER_CONTROL);
+	if (ret < 0)
+		return ret;
+
+	switch (psp) {
+	case POWER_SUPPLY_PROP_STATUS:
+		if (ret & WM831X_BKUP_CHG_STS)
+			val->intval = POWER_SUPPLY_STATUS_CHARGING;
+		else
+			val->intval = POWER_SUPPLY_STATUS_NOT_CHARGING;
+		break;
+
+	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+		ret = wm831x_power_read_voltage(wm831x, WM831X_AUX_BKUP_BATT,
+						val);
+		break;
+
+	case POWER_SUPPLY_PROP_PRESENT:
+		if (ret & WM831X_BKUP_CHG_STS)
+			val->intval = 1;
+		else
+			val->intval = 0;
+		break;
+
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+static enum power_supply_property wm831x_backup_props[] = {
+	POWER_SUPPLY_PROP_STATUS,
+	POWER_SUPPLY_PROP_VOLTAGE_NOW,
+	POWER_SUPPLY_PROP_PRESENT,
+};
+
+/*********************************************************************
+ *		Initialisation
+ *********************************************************************/
+
+static irqreturn_t wm831x_syslo_irq(int irq, void *data)
+{
+	struct wm831x_power *wm831x_power = data;
+	struct wm831x *wm831x = wm831x_power->wm831x;
+
+	/* Not much we can actually *do* but tell people for
+	 * posterity, we're probably about to run out of power. */
+	dev_crit(wm831x->dev, "SYSVDD under voltage\n");
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t wm831x_pwr_src_irq(int irq, void *data)
+{
+	struct wm831x_power *wm831x_power = data;
+	struct wm831x *wm831x = wm831x_power->wm831x;
+
+	dev_dbg(wm831x->dev, "Power source changed\n");
+
+	/* Just notify for everything - little harm in overnotifying.
+	 * The backup battery is not a power source while the system
+	 * is running so skip that.
+	 */
+	power_supply_changed(&wm831x_power->battery);
+	power_supply_changed(&wm831x_power->usb);
+	power_supply_changed(&wm831x_power->wall);
+
+	return IRQ_HANDLED;
+}
+
+static __devinit int wm831x_power_probe(struct platform_device *pdev)
+{
+	struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent);
+	struct wm831x_power *power;
+	struct power_supply *usb;
+	struct power_supply *battery;
+	struct power_supply *wall;
+	struct power_supply *backup;
+	int ret, irq, i;
+
+	power = kzalloc(sizeof(struct wm831x_power), GFP_KERNEL);
+	if (power == NULL)
+		return -ENOMEM;
+
+	power->wm831x = wm831x;
+	platform_set_drvdata(pdev, power);
+
+	usb = &power->usb;
+	battery = &power->battery;
+	wall = &power->wall;
+	backup = &power->backup;
+
+	/* We ignore configuration failures since we can still read back
+	 * the status without enabling either of the chargers.
+	 */
+	wm831x_config_battery(wm831x);
+	wm831x_config_backup(wm831x);
+
+	wall->name = "wm831x-wall";
+	wall->type = POWER_SUPPLY_TYPE_MAINS;
+	wall->properties = wm831x_wall_props;
+	wall->num_properties = ARRAY_SIZE(wm831x_wall_props);
+	wall->get_property = wm831x_wall_get_prop;
+	ret = power_supply_register(&pdev->dev, wall);
+	if (ret)
+		goto err_kmalloc;
+
+	battery->name = "wm831x-battery";
+	battery->properties = wm831x_bat_props;
+	battery->num_properties = ARRAY_SIZE(wm831x_bat_props);
+	battery->get_property = wm831x_bat_get_prop;
+	battery->use_for_apm = 1;
+	ret = power_supply_register(&pdev->dev, battery);
+	if (ret)
+		goto err_wall;
+
+	usb->name = "wm831x-usb",
+	usb->type = POWER_SUPPLY_TYPE_USB;
+	usb->properties = wm831x_usb_props;
+	usb->num_properties = ARRAY_SIZE(wm831x_usb_props);
+	usb->get_property = wm831x_usb_get_prop;
+	ret = power_supply_register(&pdev->dev, usb);
+	if (ret)
+		goto err_battery;
+
+	backup->name = "wm831x-backup";
+	backup->type = POWER_SUPPLY_TYPE_BATTERY;
+	backup->properties = wm831x_backup_props;
+	backup->num_properties = ARRAY_SIZE(wm831x_backup_props);
+	backup->get_property = wm831x_backup_get_prop;
+	ret = power_supply_register(&pdev->dev, backup);
+	if (ret)
+		goto err_usb;
+
+	irq = platform_get_irq_byname(pdev, "SYSLO");
+	ret = wm831x_request_irq(wm831x, irq, wm831x_syslo_irq,
+				 IRQF_TRIGGER_RISING, "SYSLO",
+				 power);
+	if (ret != 0) {
+		dev_err(&pdev->dev, "Failed to request SYSLO IRQ %d: %d\n",
+			irq, ret);
+		goto err_backup;
+	}
+
+	irq = platform_get_irq_byname(pdev, "PWR SRC");
+	ret = wm831x_request_irq(wm831x, irq, wm831x_pwr_src_irq,
+				 IRQF_TRIGGER_RISING, "Power source",
+				 power);
+	if (ret != 0) {
+		dev_err(&pdev->dev, "Failed to request PWR SRC IRQ %d: %d\n",
+			irq, ret);
+		goto err_syslo;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(wm831x_bat_irqs); i++) {
+		irq = platform_get_irq_byname(pdev, wm831x_bat_irqs[i]);
+		ret = wm831x_request_irq(wm831x, irq, wm831x_bat_irq,
+					 IRQF_TRIGGER_RISING,
+					 wm831x_bat_irqs[i],
+					 power);
+		if (ret != 0) {
+			dev_err(&pdev->dev,
+				"Failed to request %s IRQ %d: %d\n",
+				wm831x_bat_irqs[i], irq, ret);
+			goto err_bat_irq;
+		}
+	}
+
+	return ret;
+
+err_bat_irq:
+	for (; i >= 0; i--) {
+		irq = platform_get_irq_byname(pdev, wm831x_bat_irqs[i]);
+		wm831x_free_irq(wm831x, irq, power);
+	}
+	irq = platform_get_irq_byname(pdev, "PWR SRC");
+	wm831x_free_irq(wm831x, irq, power);
+err_syslo:
+	irq = platform_get_irq_byname(pdev, "SYSLO");
+	wm831x_free_irq(wm831x, irq, power);
+err_backup:
+	power_supply_unregister(backup);
+err_usb:
+	power_supply_unregister(usb);
+err_battery:
+	power_supply_unregister(battery);
+err_wall:
+	power_supply_unregister(wall);
+err_kmalloc:
+	kfree(power);
+	return ret;
+}
+
+static __devexit int wm831x_power_remove(struct platform_device *pdev)
+{
+	struct wm831x_power *wm831x_power = platform_get_drvdata(pdev);
+	struct wm831x *wm831x = wm831x_power->wm831x;
+	int irq, i;
+
+	for (i = 0; i < ARRAY_SIZE(wm831x_bat_irqs); i++) {
+		irq = platform_get_irq_byname(pdev, wm831x_bat_irqs[i]);
+		wm831x_free_irq(wm831x, irq, wm831x_power);
+	}
+
+	irq = platform_get_irq_byname(pdev, "PWR SRC");
+	wm831x_free_irq(wm831x, irq, wm831x_power);
+
+	irq = platform_get_irq_byname(pdev, "SYSLO");
+	wm831x_free_irq(wm831x, irq, wm831x_power);
+
+	power_supply_unregister(&wm831x_power->backup);
+	power_supply_unregister(&wm831x_power->battery);
+	power_supply_unregister(&wm831x_power->wall);
+	power_supply_unregister(&wm831x_power->usb);
+	return 0;
+}
+
+static struct platform_driver wm831x_power_driver = {
+	.probe = wm831x_power_probe,
+	.remove = __devexit_p(wm831x_power_remove),
+	.driver = {
+		.name = "wm831x-power",
+	},
+};
+
+static int __init wm831x_power_init(void)
+{
+	return platform_driver_register(&wm831x_power_driver);
+}
+module_init(wm831x_power_init);
+
+static void __exit wm831x_power_exit(void)
+{
+	platform_driver_unregister(&wm831x_power_driver);
+}
+module_exit(wm831x_power_exit);
+
+MODULE_DESCRIPTION("Power supply driver for WM831x PMICs");
+MODULE_AUTHOR("Mark Brown <broonie@opensource.wolfsonmicro.com>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:wm831x-power");
diff --git a/include/linux/mfd/wm831x/pmu.h b/include/linux/mfd/wm831x/pmu.h
new file mode 100644
index 0000000..b18cbb0
--- /dev/null
+++ b/include/linux/mfd/wm831x/pmu.h
@@ -0,0 +1,189 @@
+/*
+ * include/linux/mfd/wm831x/pmu.h -- PMU for WM831x
+ *
+ * Copyright 2009 Wolfson Microelectronics PLC.
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#ifndef __MFD_WM831X_PMU_H__
+#define __MFD_WM831X_PMU_H__
+
+/*
+ * R16387 (0x4003) - Power State
+ */
+#define WM831X_CHIP_ON                          0x8000  /* CHIP_ON */
+#define WM831X_CHIP_ON_MASK                     0x8000  /* CHIP_ON */
+#define WM831X_CHIP_ON_SHIFT                        15  /* CHIP_ON */
+#define WM831X_CHIP_ON_WIDTH                         1  /* CHIP_ON */
+#define WM831X_CHIP_SLP                         0x4000  /* CHIP_SLP */
+#define WM831X_CHIP_SLP_MASK                    0x4000  /* CHIP_SLP */
+#define WM831X_CHIP_SLP_SHIFT                       14  /* CHIP_SLP */
+#define WM831X_CHIP_SLP_WIDTH                        1  /* CHIP_SLP */
+#define WM831X_REF_LP                           0x1000  /* REF_LP */
+#define WM831X_REF_LP_MASK                      0x1000  /* REF_LP */
+#define WM831X_REF_LP_SHIFT                         12  /* REF_LP */
+#define WM831X_REF_LP_WIDTH                          1  /* REF_LP */
+#define WM831X_PWRSTATE_DLY_MASK                0x0C00  /* PWRSTATE_DLY - [11:10] */
+#define WM831X_PWRSTATE_DLY_SHIFT                   10  /* PWRSTATE_DLY - [11:10] */
+#define WM831X_PWRSTATE_DLY_WIDTH                    2  /* PWRSTATE_DLY - [11:10] */
+#define WM831X_SWRST_DLY                        0x0200  /* SWRST_DLY */
+#define WM831X_SWRST_DLY_MASK                   0x0200  /* SWRST_DLY */
+#define WM831X_SWRST_DLY_SHIFT                       9  /* SWRST_DLY */
+#define WM831X_SWRST_DLY_WIDTH                       1  /* SWRST_DLY */
+#define WM831X_USB100MA_STARTUP_MASK            0x0030  /* USB100MA_STARTUP - [5:4] */
+#define WM831X_USB100MA_STARTUP_SHIFT                4  /* USB100MA_STARTUP - [5:4] */
+#define WM831X_USB100MA_STARTUP_WIDTH                2  /* USB100MA_STARTUP - [5:4] */
+#define WM831X_USB_CURR_STS                     0x0008  /* USB_CURR_STS */
+#define WM831X_USB_CURR_STS_MASK                0x0008  /* USB_CURR_STS */
+#define WM831X_USB_CURR_STS_SHIFT                    3  /* USB_CURR_STS */
+#define WM831X_USB_CURR_STS_WIDTH                    1  /* USB_CURR_STS */
+#define WM831X_USB_ILIM_MASK                    0x0007  /* USB_ILIM - [2:0] */
+#define WM831X_USB_ILIM_SHIFT                        0  /* USB_ILIM - [2:0] */
+#define WM831X_USB_ILIM_WIDTH                        3  /* USB_ILIM - [2:0] */
+
+/*
+ * R16397 (0x400D) - System Status
+ */
+#define WM831X_THW_STS                          0x8000  /* THW_STS */
+#define WM831X_THW_STS_MASK                     0x8000  /* THW_STS */
+#define WM831X_THW_STS_SHIFT                        15  /* THW_STS */
+#define WM831X_THW_STS_WIDTH                         1  /* THW_STS */
+#define WM831X_PWR_SRC_BATT                     0x0400  /* PWR_SRC_BATT */
+#define WM831X_PWR_SRC_BATT_MASK                0x0400  /* PWR_SRC_BATT */
+#define WM831X_PWR_SRC_BATT_SHIFT                   10  /* PWR_SRC_BATT */
+#define WM831X_PWR_SRC_BATT_WIDTH                    1  /* PWR_SRC_BATT */
+#define WM831X_PWR_WALL                         0x0200  /* PWR_WALL */
+#define WM831X_PWR_WALL_MASK                    0x0200  /* PWR_WALL */
+#define WM831X_PWR_WALL_SHIFT                        9  /* PWR_WALL */
+#define WM831X_PWR_WALL_WIDTH                        1  /* PWR_WALL */
+#define WM831X_PWR_USB                          0x0100  /* PWR_USB */
+#define WM831X_PWR_USB_MASK                     0x0100  /* PWR_USB */
+#define WM831X_PWR_USB_SHIFT                         8  /* PWR_USB */
+#define WM831X_PWR_USB_WIDTH                         1  /* PWR_USB */
+#define WM831X_MAIN_STATE_MASK                  0x001F  /* MAIN_STATE - [4:0] */
+#define WM831X_MAIN_STATE_SHIFT                      0  /* MAIN_STATE - [4:0] */
+#define WM831X_MAIN_STATE_WIDTH                      5  /* MAIN_STATE - [4:0] */
+
+/*
+ * R16456 (0x4048) - Charger Control 1
+ */
+#define WM831X_CHG_ENA                          0x8000  /* CHG_ENA */
+#define WM831X_CHG_ENA_MASK                     0x8000  /* CHG_ENA */
+#define WM831X_CHG_ENA_SHIFT                        15  /* CHG_ENA */
+#define WM831X_CHG_ENA_WIDTH                         1  /* CHG_ENA */
+#define WM831X_CHG_FRC                          0x4000  /* CHG_FRC */
+#define WM831X_CHG_FRC_MASK                     0x4000  /* CHG_FRC */
+#define WM831X_CHG_FRC_SHIFT                        14  /* CHG_FRC */
+#define WM831X_CHG_FRC_WIDTH                         1  /* CHG_FRC */
+#define WM831X_CHG_ITERM_MASK                   0x1C00  /* CHG_ITERM - [12:10] */
+#define WM831X_CHG_ITERM_SHIFT                      10  /* CHG_ITERM - [12:10] */
+#define WM831X_CHG_ITERM_WIDTH                       3  /* CHG_ITERM - [12:10] */
+#define WM831X_CHG_FAST                         0x0020  /* CHG_FAST */
+#define WM831X_CHG_FAST_MASK                    0x0020  /* CHG_FAST */
+#define WM831X_CHG_FAST_SHIFT                        5  /* CHG_FAST */
+#define WM831X_CHG_FAST_WIDTH                        1  /* CHG_FAST */
+#define WM831X_CHG_IMON_ENA                     0x0002  /* CHG_IMON_ENA */
+#define WM831X_CHG_IMON_ENA_MASK                0x0002  /* CHG_IMON_ENA */
+#define WM831X_CHG_IMON_ENA_SHIFT                    1  /* CHG_IMON_ENA */
+#define WM831X_CHG_IMON_ENA_WIDTH                    1  /* CHG_IMON_ENA */
+#define WM831X_CHG_CHIP_TEMP_MON                0x0001  /* CHG_CHIP_TEMP_MON */
+#define WM831X_CHG_CHIP_TEMP_MON_MASK           0x0001  /* CHG_CHIP_TEMP_MON */
+#define WM831X_CHG_CHIP_TEMP_MON_SHIFT               0  /* CHG_CHIP_TEMP_MON */
+#define WM831X_CHG_CHIP_TEMP_MON_WIDTH               1  /* CHG_CHIP_TEMP_MON */
+
+/*
+ * R16457 (0x4049) - Charger Control 2
+ */
+#define WM831X_CHG_OFF_MSK                      0x4000  /* CHG_OFF_MSK */
+#define WM831X_CHG_OFF_MSK_MASK                 0x4000  /* CHG_OFF_MSK */
+#define WM831X_CHG_OFF_MSK_SHIFT                    14  /* CHG_OFF_MSK */
+#define WM831X_CHG_OFF_MSK_WIDTH                     1  /* CHG_OFF_MSK */
+#define WM831X_CHG_TIME_MASK                    0x0F00  /* CHG_TIME - [11:8] */
+#define WM831X_CHG_TIME_SHIFT                        8  /* CHG_TIME - [11:8] */
+#define WM831X_CHG_TIME_WIDTH                        4  /* CHG_TIME - [11:8] */
+#define WM831X_CHG_TRKL_ILIM_MASK               0x00C0  /* CHG_TRKL_ILIM - [7:6] */
+#define WM831X_CHG_TRKL_ILIM_SHIFT                   6  /* CHG_TRKL_ILIM - [7:6] */
+#define WM831X_CHG_TRKL_ILIM_WIDTH                   2  /* CHG_TRKL_ILIM - [7:6] */
+#define WM831X_CHG_VSEL_MASK                    0x0030  /* CHG_VSEL - [5:4] */
+#define WM831X_CHG_VSEL_SHIFT                        4  /* CHG_VSEL - [5:4] */
+#define WM831X_CHG_VSEL_WIDTH                        2  /* CHG_VSEL - [5:4] */
+#define WM831X_CHG_FAST_ILIM_MASK               0x000F  /* CHG_FAST_ILIM - [3:0] */
+#define WM831X_CHG_FAST_ILIM_SHIFT                   0  /* CHG_FAST_ILIM - [3:0] */
+#define WM831X_CHG_FAST_ILIM_WIDTH                   4  /* CHG_FAST_ILIM - [3:0] */
+
+/*
+ * R16458 (0x404A) - Charger Status
+ */
+#define WM831X_BATT_OV_STS                      0x8000  /* BATT_OV_STS */
+#define WM831X_BATT_OV_STS_MASK                 0x8000  /* BATT_OV_STS */
+#define WM831X_BATT_OV_STS_SHIFT                    15  /* BATT_OV_STS */
+#define WM831X_BATT_OV_STS_WIDTH                     1  /* BATT_OV_STS */
+#define WM831X_CHG_STATE_MASK                   0x7000  /* CHG_STATE - [14:12] */
+#define WM831X_CHG_STATE_SHIFT                      12  /* CHG_STATE - [14:12] */
+#define WM831X_CHG_STATE_WIDTH                       3  /* CHG_STATE - [14:12] */
+#define WM831X_BATT_HOT_STS                     0x0800  /* BATT_HOT_STS */
+#define WM831X_BATT_HOT_STS_MASK                0x0800  /* BATT_HOT_STS */
+#define WM831X_BATT_HOT_STS_SHIFT                   11  /* BATT_HOT_STS */
+#define WM831X_BATT_HOT_STS_WIDTH                    1  /* BATT_HOT_STS */
+#define WM831X_BATT_COLD_STS                    0x0400  /* BATT_COLD_STS */
+#define WM831X_BATT_COLD_STS_MASK               0x0400  /* BATT_COLD_STS */
+#define WM831X_BATT_COLD_STS_SHIFT                  10  /* BATT_COLD_STS */
+#define WM831X_BATT_COLD_STS_WIDTH                   1  /* BATT_COLD_STS */
+#define WM831X_CHG_TOPOFF                       0x0200  /* CHG_TOPOFF */
+#define WM831X_CHG_TOPOFF_MASK                  0x0200  /* CHG_TOPOFF */
+#define WM831X_CHG_TOPOFF_SHIFT                      9  /* CHG_TOPOFF */
+#define WM831X_CHG_TOPOFF_WIDTH                      1  /* CHG_TOPOFF */
+#define WM831X_CHG_ACTIVE                       0x0100  /* CHG_ACTIVE */
+#define WM831X_CHG_ACTIVE_MASK                  0x0100  /* CHG_ACTIVE */
+#define WM831X_CHG_ACTIVE_SHIFT                      8  /* CHG_ACTIVE */
+#define WM831X_CHG_ACTIVE_WIDTH                      1  /* CHG_ACTIVE */
+#define WM831X_CHG_TIME_ELAPSED_MASK            0x00FF  /* CHG_TIME_ELAPSED - [7:0] */
+#define WM831X_CHG_TIME_ELAPSED_SHIFT                0  /* CHG_TIME_ELAPSED - [7:0] */
+#define WM831X_CHG_TIME_ELAPSED_WIDTH                8  /* CHG_TIME_ELAPSED - [7:0] */
+
+#define WM831X_CHG_STATE_OFF         (0 << WM831X_CHG_STATE_SHIFT)
+#define WM831X_CHG_STATE_TRICKLE     (1 << WM831X_CHG_STATE_SHIFT)
+#define WM831X_CHG_STATE_FAST        (2 << WM831X_CHG_STATE_SHIFT)
+#define WM831X_CHG_STATE_TRICKLE_OT  (3 << WM831X_CHG_STATE_SHIFT)
+#define WM831X_CHG_STATE_FAST_OT     (4 << WM831X_CHG_STATE_SHIFT)
+#define WM831X_CHG_STATE_DEFECTIVE   (5 << WM831X_CHG_STATE_SHIFT)
+
+/*
+ * R16459 (0x404B) - Backup Charger Control
+ */
+#define WM831X_BKUP_CHG_ENA                     0x8000  /* BKUP_CHG_ENA */
+#define WM831X_BKUP_CHG_ENA_MASK                0x8000  /* BKUP_CHG_ENA */
+#define WM831X_BKUP_CHG_ENA_SHIFT                   15  /* BKUP_CHG_ENA */
+#define WM831X_BKUP_CHG_ENA_WIDTH                    1  /* BKUP_CHG_ENA */
+#define WM831X_BKUP_CHG_STS                     0x4000  /* BKUP_CHG_STS */
+#define WM831X_BKUP_CHG_STS_MASK                0x4000  /* BKUP_CHG_STS */
+#define WM831X_BKUP_CHG_STS_SHIFT                   14  /* BKUP_CHG_STS */
+#define WM831X_BKUP_CHG_STS_WIDTH                    1  /* BKUP_CHG_STS */
+#define WM831X_BKUP_CHG_MODE                    0x1000  /* BKUP_CHG_MODE */
+#define WM831X_BKUP_CHG_MODE_MASK               0x1000  /* BKUP_CHG_MODE */
+#define WM831X_BKUP_CHG_MODE_SHIFT                  12  /* BKUP_CHG_MODE */
+#define WM831X_BKUP_CHG_MODE_WIDTH                   1  /* BKUP_CHG_MODE */
+#define WM831X_BKUP_BATT_DET_ENA                0x0800  /* BKUP_BATT_DET_ENA */
+#define WM831X_BKUP_BATT_DET_ENA_MASK           0x0800  /* BKUP_BATT_DET_ENA */
+#define WM831X_BKUP_BATT_DET_ENA_SHIFT              11  /* BKUP_BATT_DET_ENA */
+#define WM831X_BKUP_BATT_DET_ENA_WIDTH               1  /* BKUP_BATT_DET_ENA */
+#define WM831X_BKUP_BATT_STS                    0x0400  /* BKUP_BATT_STS */
+#define WM831X_BKUP_BATT_STS_MASK               0x0400  /* BKUP_BATT_STS */
+#define WM831X_BKUP_BATT_STS_SHIFT                  10  /* BKUP_BATT_STS */
+#define WM831X_BKUP_BATT_STS_WIDTH                   1  /* BKUP_BATT_STS */
+#define WM831X_BKUP_CHG_VLIM                    0x0010  /* BKUP_CHG_VLIM */
+#define WM831X_BKUP_CHG_VLIM_MASK               0x0010  /* BKUP_CHG_VLIM */
+#define WM831X_BKUP_CHG_VLIM_SHIFT                   4  /* BKUP_CHG_VLIM */
+#define WM831X_BKUP_CHG_VLIM_WIDTH                   1  /* BKUP_CHG_VLIM */
+#define WM831X_BKUP_CHG_ILIM_MASK               0x0003  /* BKUP_CHG_ILIM - [1:0] */
+#define WM831X_BKUP_CHG_ILIM_SHIFT                   0  /* BKUP_CHG_ILIM - [1:0] */
+#define WM831X_BKUP_CHG_ILIM_WIDTH                   2  /* BKUP_CHG_ILIM - [1:0] */
+
+#endif
-- 
cgit v0.10.2


From 8177e6d6dfb9cd03d9bdeb647c32161f8f58f686 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Fri, 4 Sep 2009 14:13:09 -0400
Subject: nfsd: clean up readdirplus encoding

Make the return from compose_entry_fh() zero or an error, even though
the returned error isn't used, just to make the meaning of the return
immediately obvious.

Move some repeated code out of main function into helper.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 01d4ec1..f16184a 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -814,17 +814,6 @@ encode_entry_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name,
 	return p;
 }
 
-static __be32 *
-encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p,
-		struct svc_fh *fhp)
-{
-	p = encode_post_op_attr(cd->rqstp, p, fhp);
-	*p++ = xdr_one;			/* yes, a file handle follows */
-	p = encode_fh(p, fhp);
-	fh_put(fhp);
-	return p;
-}
-
 static int
 compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp,
 		const char *name, int namlen)
@@ -843,22 +832,46 @@ compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp,
 			if (dchild == dparent) {
 				/* filesystem root - cannot return filehandle for ".." */
 				dput(dchild);
-				return 1;
+				return -ENOENT;
 			}
 		} else
 			dchild = dget(dparent);
 	} else
 		dchild = lookup_one_len(name, dparent, namlen);
 	if (IS_ERR(dchild))
-		return 1;
-	if (d_mountpoint(dchild) ||
-	    fh_compose(fhp, exp, dchild, &cd->fh) != 0 ||
-	    !dchild->d_inode)
-		rv = 1;
+		return -ENOENT;
+	rv = -ENOENT;
+	if (d_mountpoint(dchild))
+		goto out;
+	rv = fh_compose(fhp, exp, dchild, &cd->fh);
+	if (rv)
+		goto out;
+	if (!dchild->d_inode)
+		goto out;
+	rv = 0;
+out:
 	dput(dchild);
 	return rv;
 }
 
+__be32 *encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, int namlen)
+{
+	struct svc_fh	fh;
+	int err;
+
+	err = compose_entry_fh(cd, &fh, name, namlen);
+	if (err) {
+		*p++ = 0;
+		*p++ = 0;
+		return p;
+	}
+	p = encode_post_op_attr(cd->rqstp, p, &fh);
+	*p++ = xdr_one;			/* yes, a file handle follows */
+	p = encode_fh(p, &fh);
+	fh_put(&fh);
+	return p;
+}
+
 /*
  * Encode a directory entry. This one works for both normal readdir
  * and readdirplus.
@@ -929,16 +942,8 @@ encode_entry(struct readdir_cd *ccd, const char *name, int namlen,
 
 		p = encode_entry_baggage(cd, p, name, namlen, ino);
 
-		/* throw in readdirplus baggage */
-		if (plus) {
-			struct svc_fh	fh;
-
-			if (compose_entry_fh(cd, &fh, name, namlen) > 0) {
-				*p++ = 0;
-				*p++ = 0;
-			} else
-				p = encode_entryplus_baggage(cd, p, &fh);
-		}
+		if (plus)
+			p = encode_entryplus_baggage(cd, p, name, namlen);
 		num_entry_words = p - cd->buffer;
 	} else if (cd->rqstp->rq_respages[pn+1] != NULL) {
 		/* temporarily encode entry into next page, then move back to
@@ -951,17 +956,8 @@ encode_entry(struct readdir_cd *ccd, const char *name, int namlen,
 
 		p1 = encode_entry_baggage(cd, p1, name, namlen, ino);
 
-		/* throw in readdirplus baggage */
-		if (plus) {
-			struct svc_fh	fh;
-
-			if (compose_entry_fh(cd, &fh, name, namlen) > 0) {
-				/* zero out the filehandle */
-				*p1++ = 0;
-				*p1++ = 0;
-			} else
-				p1 = encode_entryplus_baggage(cd, p1, &fh);
-		}
+		if (plus)
+			p = encode_entryplus_baggage(cd, p1, name, namlen);
 
 		/* determine entry word length and lengths to go in pages */
 		num_entry_words = p1 - tmp;
-- 
cgit v0.10.2


From aed100fafb90aaabe8fb31e58af9dc7e68696507 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Fri, 4 Sep 2009 14:40:36 -0400
Subject: nfsd: fix leak on error in nfsv3 readdir

Note the !dchild->d_inode case can leak the filehandle.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index f16184a..edf926e 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -825,7 +825,6 @@ compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp,
 	dparent = cd->fh.fh_dentry;
 	exp  = cd->fh.fh_export;
 
-	fh_init(fhp, NFS3_FHSIZE);
 	if (isdotent(name, namlen)) {
 		if (namlen == 2) {
 			dchild = dget_parent(dparent);
@@ -859,15 +858,17 @@ __be32 *encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, const c
 	struct svc_fh	fh;
 	int err;
 
+	fh_init(&fh, NFS3_FHSIZE);
 	err = compose_entry_fh(cd, &fh, name, namlen);
 	if (err) {
 		*p++ = 0;
 		*p++ = 0;
-		return p;
+		goto out;
 	}
 	p = encode_post_op_attr(cd->rqstp, p, &fh);
 	*p++ = xdr_one;			/* yes, a file handle follows */
 	p = encode_fh(p, &fh);
+out:
 	fh_put(&fh);
 	return p;
 }
-- 
cgit v0.10.2


From 47460d65a483529b3bc2bf6ccf461ad45f94df83 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Tue, 10 Feb 2009 16:05:07 -0800
Subject: ocfs2: Make the ocfs2_caching_info structure self-contained.

We want to use the ocfs2_caching_info structure in places that are not
inodes.  To do that, it can no longer rely on referencing the inode
directly.

This patch moves the flags to ocfs2_caching_info->ci_flags, stores
pointers to the parent's locks on the ocfs2_caching_info, and renames
the constants and flags to reflect its independant state.

Signed-off-by: Joel Becker <joel.becker@oracle.com>

diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 4dc8890..8ec8044 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -1118,7 +1118,8 @@ void ocfs2_clear_inode(struct inode *inode)
 			"Clear inode of %llu, inode has %u cache items\n",
 			(unsigned long long)oi->ip_blkno, oi->ip_metadata_cache.ci_num_cached);
 
-	mlog_bug_on_msg(!(oi->ip_flags & OCFS2_INODE_CACHE_INLINE),
+	mlog_bug_on_msg(!(oi->ip_metadata_cache.ci_flags &
+			  OCFS2_CACHE_FL_INLINE),
 			"Clear inode of %llu, inode has a bad flag\n",
 			(unsigned long long)oi->ip_blkno);
 
@@ -1145,7 +1146,7 @@ void ocfs2_clear_inode(struct inode *inode)
 			(unsigned long long)oi->ip_blkno, oi->ip_open_count);
 
 	/* Clear all other flags. */
-	oi->ip_flags = OCFS2_INODE_CACHE_INLINE;
+	oi->ip_flags = 0;
 	oi->ip_created_trans = 0;
 	oi->ip_last_trans = 0;
 	oi->ip_dir_start_lookup = 0;
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index ea71525..2f5e1aa 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -106,8 +106,6 @@ struct ocfs2_inode_info
 #define OCFS2_INODE_MAYBE_ORPHANED	0x00000020
 /* Does someone have the file open O_DIRECT */
 #define OCFS2_INODE_OPEN_DIRECT		0x00000040
-/* Indicates that the metadata cache should be used as an array. */
-#define OCFS2_INODE_CACHE_INLINE	0x00000080
 
 static inline struct ocfs2_inode_info *OCFS2_I(struct inode *inode)
 {
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 39e1d5a..eef3bd0 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -51,17 +51,36 @@
 /* For struct ocfs2_blockcheck_stats */
 #include "blockcheck.h"
 
+
+/* Caching of metadata buffers */
+
 /* Most user visible OCFS2 inodes will have very few pieces of
  * metadata, but larger files (including bitmaps, etc) must be taken
  * into account when designing an access scheme. We allow a small
  * amount of inlined blocks to be stored on an array and grow the
  * structure into a rb tree when necessary. */
-#define OCFS2_INODE_MAX_CACHE_ARRAY 2
+#define OCFS2_CACHE_INFO_MAX_ARRAY 2
+
+/* Flags for ocfs2_caching_info */
+
+enum ocfs2_caching_info_flags {
+	/* Indicates that the metadata cache is using the inline array */
+	OCFS2_CACHE_FL_INLINE	= 1<<1,
+};
 
 struct ocfs2_caching_info {
+	/*
+	 * The parent structure provides the locks, but because the
+	 * parent structure can differ, struct ocfs2_caching_info needs
+	 * its own pointers to them.
+	 */
+	spinlock_t		*ci_lock;
+	struct mutex		*ci_io_mutex;
+
+	unsigned int		ci_flags;
 	unsigned int		ci_num_cached;
 	union {
-		sector_t	ci_array[OCFS2_INODE_MAX_CACHE_ARRAY];
+	sector_t	ci_array[OCFS2_CACHE_INFO_MAX_ARRAY];
 		struct rb_root	ci_tree;
 	} ci_cache;
 };
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index a3f8871..8f217f6 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1683,7 +1683,8 @@ static void ocfs2_inode_init_once(void *data)
 	ocfs2_lock_res_init_once(&oi->ip_inode_lockres);
 	ocfs2_lock_res_init_once(&oi->ip_open_lockres);
 
-	ocfs2_metadata_cache_init(&oi->vfs_inode);
+	ocfs2_metadata_cache_init(&oi->ip_metadata_cache, &oi->ip_lock,
+				  &oi->ip_io_mutex);
 
 	inode_init_once(&oi->vfs_inode);
 }
diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c
index 187b99f..8dbc457 100644
--- a/fs/ocfs2/uptodate.c
+++ b/fs/ocfs2/uptodate.c
@@ -75,12 +75,13 @@ struct ocfs2_meta_cache_item {
 
 static struct kmem_cache *ocfs2_uptodate_cachep = NULL;
 
-void ocfs2_metadata_cache_init(struct inode *inode)
+void ocfs2_metadata_cache_init(struct ocfs2_caching_info *ci,
+			       spinlock_t *cache_lock,
+			       struct mutex *io_mutex)
 {
-	struct ocfs2_inode_info *oi = OCFS2_I(inode);
-	struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
-
-	oi->ip_flags |= OCFS2_INODE_CACHE_INLINE;
+	ci->ci_lock = cache_lock;
+	ci->ci_io_mutex = io_mutex;
+	ci->ci_flags |= OCFS2_CACHE_FL_INLINE;
 	ci->ci_num_cached = 0;
 }
 
@@ -119,8 +120,8 @@ void ocfs2_metadata_cache_purge(struct inode *inode)
 	struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
 	struct rb_root root = RB_ROOT;
 
-	spin_lock(&oi->ip_lock);
-	tree = !(oi->ip_flags & OCFS2_INODE_CACHE_INLINE);
+	spin_lock(ci->ci_lock);
+	tree = !(ci->ci_flags & OCFS2_CACHE_FL_INLINE);
 	to_purge = ci->ci_num_cached;
 
 	mlog(0, "Purge %u %s items from Inode %llu\n", to_purge,
@@ -132,8 +133,8 @@ void ocfs2_metadata_cache_purge(struct inode *inode)
 	if (tree)
 		root = ci->ci_cache.ci_tree;
 
-	ocfs2_metadata_cache_init(inode);
-	spin_unlock(&oi->ip_lock);
+	ocfs2_metadata_cache_init(ci, ci->ci_lock, ci->ci_io_mutex);
+	spin_unlock(ci->ci_lock);
 
 	purged = ocfs2_purge_copied_metadata_tree(&root);
 	/* If possible, track the number wiped so that we can more
@@ -187,22 +188,23 @@ static int ocfs2_buffer_cached(struct ocfs2_inode_info *oi,
 {
 	int index = -1;
 	struct ocfs2_meta_cache_item *item = NULL;
+	struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
 
-	spin_lock(&oi->ip_lock);
+	spin_lock(ci->ci_lock);
 
 	mlog(0, "Inode %llu, query block %llu (inline = %u)\n",
 	     (unsigned long long)oi->ip_blkno,
 	     (unsigned long long) bh->b_blocknr,
-	     !!(oi->ip_flags & OCFS2_INODE_CACHE_INLINE));
+	     !!(ci->ci_flags & OCFS2_CACHE_FL_INLINE));
 
-	if (oi->ip_flags & OCFS2_INODE_CACHE_INLINE)
+	if (ci->ci_flags & OCFS2_CACHE_FL_INLINE)
 		index = ocfs2_search_cache_array(&oi->ip_metadata_cache,
 						 bh->b_blocknr);
 	else
 		item = ocfs2_search_cache_tree(&oi->ip_metadata_cache,
 					       bh->b_blocknr);
 
-	spin_unlock(&oi->ip_lock);
+	spin_unlock(ci->ci_lock);
 
 	mlog(0, "index = %d, item = %p\n", index, item);
 
@@ -235,7 +237,7 @@ int ocfs2_buffer_uptodate(struct inode *inode,
 
 /* 
  * Determine whether a buffer is currently out on a read-ahead request.
- * ip_io_sem should be held to serialize submitters with the logic here.
+ * ci_io_sem should be held to serialize submitters with the logic here.
  */
 int ocfs2_buffer_read_ahead(struct inode *inode,
 			    struct buffer_head *bh)
@@ -247,7 +249,7 @@ int ocfs2_buffer_read_ahead(struct inode *inode,
 static void ocfs2_append_cache_array(struct ocfs2_caching_info *ci,
 				     sector_t block)
 {
-	BUG_ON(ci->ci_num_cached >= OCFS2_INODE_MAX_CACHE_ARRAY);
+	BUG_ON(ci->ci_num_cached >= OCFS2_CACHE_INFO_MAX_ARRAY);
 
 	mlog(0, "block %llu takes position %u\n", (unsigned long long) block,
 	     ci->ci_num_cached);
@@ -295,13 +297,13 @@ static void __ocfs2_insert_cache_tree(struct ocfs2_caching_info *ci,
 static inline int ocfs2_insert_can_use_array(struct ocfs2_inode_info *oi,
 					     struct ocfs2_caching_info *ci)
 {
-	assert_spin_locked(&oi->ip_lock);
+	assert_spin_locked(ci->ci_lock);
 
-	return (oi->ip_flags & OCFS2_INODE_CACHE_INLINE) &&
-		(ci->ci_num_cached < OCFS2_INODE_MAX_CACHE_ARRAY);
+	return (ci->ci_flags & OCFS2_CACHE_FL_INLINE) &&
+		(ci->ci_num_cached < OCFS2_CACHE_INFO_MAX_ARRAY);
 }
 
-/* tree should be exactly OCFS2_INODE_MAX_CACHE_ARRAY wide. NULL the
+/* tree should be exactly OCFS2_CACHE_INFO_MAX_ARRAY wide. NULL the
  * pointers in tree after we use them - this allows caller to detect
  * when to free in case of error. */
 static void ocfs2_expand_cache(struct ocfs2_inode_info *oi,
@@ -310,32 +312,32 @@ static void ocfs2_expand_cache(struct ocfs2_inode_info *oi,
 	int i;
 	struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
 
-	mlog_bug_on_msg(ci->ci_num_cached != OCFS2_INODE_MAX_CACHE_ARRAY,
+	mlog_bug_on_msg(ci->ci_num_cached != OCFS2_CACHE_INFO_MAX_ARRAY,
 			"Inode %llu, num cached = %u, should be %u\n",
 			(unsigned long long)oi->ip_blkno, ci->ci_num_cached,
-			OCFS2_INODE_MAX_CACHE_ARRAY);
-	mlog_bug_on_msg(!(oi->ip_flags & OCFS2_INODE_CACHE_INLINE),
+			OCFS2_CACHE_INFO_MAX_ARRAY);
+	mlog_bug_on_msg(!(ci->ci_flags & OCFS2_CACHE_FL_INLINE),
 			"Inode %llu not marked as inline anymore!\n",
 			(unsigned long long)oi->ip_blkno);
-	assert_spin_locked(&oi->ip_lock);
+	assert_spin_locked(ci->ci_lock);
 
 	/* Be careful to initialize the tree members *first* because
 	 * once the ci_tree is used, the array is junk... */
-	for(i = 0; i < OCFS2_INODE_MAX_CACHE_ARRAY; i++)
+	for (i = 0; i < OCFS2_CACHE_INFO_MAX_ARRAY; i++)
 		tree[i]->c_block = ci->ci_cache.ci_array[i];
 
-	oi->ip_flags &= ~OCFS2_INODE_CACHE_INLINE;
+	ci->ci_flags &= ~OCFS2_CACHE_FL_INLINE;
 	ci->ci_cache.ci_tree = RB_ROOT;
 	/* this will be set again by __ocfs2_insert_cache_tree */
 	ci->ci_num_cached = 0;
 
-	for(i = 0; i < OCFS2_INODE_MAX_CACHE_ARRAY; i++) {
+	for (i = 0; i < OCFS2_CACHE_INFO_MAX_ARRAY; i++) {
 		__ocfs2_insert_cache_tree(ci, tree[i]);
 		tree[i] = NULL;
 	}
 
 	mlog(0, "Expanded %llu to a tree cache: flags 0x%x, num = %u\n",
-	     (unsigned long long)oi->ip_blkno, oi->ip_flags, ci->ci_num_cached);
+	     (unsigned long long)oi->ip_blkno, ci->ci_flags, ci->ci_num_cached);
 }
 
 /* Slow path function - memory allocation is necessary. See the
@@ -347,7 +349,7 @@ static void __ocfs2_set_buffer_uptodate(struct ocfs2_inode_info *oi,
 	int i;
 	struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
 	struct ocfs2_meta_cache_item *new = NULL;
-	struct ocfs2_meta_cache_item *tree[OCFS2_INODE_MAX_CACHE_ARRAY] =
+	struct ocfs2_meta_cache_item *tree[OCFS2_CACHE_INFO_MAX_ARRAY] =
 		{ NULL, };
 
 	mlog(0, "Inode %llu, block %llu, expand = %d\n",
@@ -364,7 +366,7 @@ static void __ocfs2_set_buffer_uptodate(struct ocfs2_inode_info *oi,
 	if (expand_tree) {
 		/* Do *not* allocate an array here - the removal code
 		 * has no way of tracking that. */
-		for(i = 0; i < OCFS2_INODE_MAX_CACHE_ARRAY; i++) {
+		for (i = 0; i < OCFS2_CACHE_INFO_MAX_ARRAY; i++) {
 			tree[i] = kmem_cache_alloc(ocfs2_uptodate_cachep,
 						   GFP_NOFS);
 			if (!tree[i]) {
@@ -376,13 +378,13 @@ static void __ocfs2_set_buffer_uptodate(struct ocfs2_inode_info *oi,
 		}
 	}
 
-	spin_lock(&oi->ip_lock);
+	spin_lock(ci->ci_lock);
 	if (ocfs2_insert_can_use_array(oi, ci)) {
 		mlog(0, "Someone cleared the tree underneath us\n");
 		/* Ok, items were removed from the cache in between
 		 * locks. Detect this and revert back to the fast path */
 		ocfs2_append_cache_array(ci, block);
-		spin_unlock(&oi->ip_lock);
+		spin_unlock(ci->ci_lock);
 		goto out_free;
 	}
 
@@ -390,7 +392,7 @@ static void __ocfs2_set_buffer_uptodate(struct ocfs2_inode_info *oi,
 		ocfs2_expand_cache(oi, tree);
 
 	__ocfs2_insert_cache_tree(ci, new);
-	spin_unlock(&oi->ip_lock);
+	spin_unlock(ci->ci_lock);
 
 	new = NULL;
 out_free:
@@ -400,14 +402,14 @@ out_free:
 	/* If these were used, then ocfs2_expand_cache re-set them to
 	 * NULL for us. */
 	if (tree[0]) {
-		for(i = 0; i < OCFS2_INODE_MAX_CACHE_ARRAY; i++)
+		for (i = 0; i < OCFS2_CACHE_INFO_MAX_ARRAY; i++)
 			if (tree[i])
 				kmem_cache_free(ocfs2_uptodate_cachep,
 						tree[i]);
 	}
 }
 
-/* Item insertion is guarded by ip_io_mutex, so the insertion path takes
+/* Item insertion is guarded by ci_io_mutex, so the insertion path takes
  * advantage of this by not rechecking for a duplicate insert during
  * the slow case. Additionally, if the cache needs to be bumped up to
  * a tree, the code will not recheck after acquiring the lock --
@@ -442,42 +444,43 @@ void ocfs2_set_buffer_uptodate(struct inode *inode,
 	     (unsigned long long)bh->b_blocknr);
 
 	/* No need to recheck under spinlock - insertion is guarded by
-	 * ip_io_mutex */
-	spin_lock(&oi->ip_lock);
+	 * ci_io_mutex */
+	spin_lock(ci->ci_lock);
 	if (ocfs2_insert_can_use_array(oi, ci)) {
 		/* Fast case - it's an array and there's a free
 		 * spot. */
 		ocfs2_append_cache_array(ci, bh->b_blocknr);
-		spin_unlock(&oi->ip_lock);
+		spin_unlock(ci->ci_lock);
 		return;
 	}
 
 	expand = 0;
-	if (oi->ip_flags & OCFS2_INODE_CACHE_INLINE) {
+	if (ci->ci_flags & OCFS2_CACHE_FL_INLINE) {
 		/* We need to bump things up to a tree. */
 		expand = 1;
 	}
-	spin_unlock(&oi->ip_lock);
+	spin_unlock(ci->ci_lock);
 
 	__ocfs2_set_buffer_uptodate(oi, bh->b_blocknr, expand);
 }
 
 /* Called against a newly allocated buffer. Most likely nobody should
  * be able to read this sort of metadata while it's still being
- * allocated, but this is careful to take ip_io_mutex anyway. */
+ * allocated, but this is careful to take ci_io_mutex anyway. */
 void ocfs2_set_new_buffer_uptodate(struct inode *inode,
 				   struct buffer_head *bh)
 {
 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
+	struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
 
 	/* This should definitely *not* exist in our cache */
 	BUG_ON(ocfs2_buffer_cached(oi, bh));
 
 	set_buffer_uptodate(bh);
 
-	mutex_lock(&oi->ip_io_mutex);
+	mutex_lock(ci->ci_io_mutex);
 	ocfs2_set_buffer_uptodate(inode, bh);
-	mutex_unlock(&oi->ip_io_mutex);
+	mutex_unlock(ci->ci_io_mutex);
 }
 
 /* Requires ip_lock. */
@@ -487,7 +490,7 @@ static void ocfs2_remove_metadata_array(struct ocfs2_caching_info *ci,
 	sector_t *array = ci->ci_cache.ci_array;
 	int bytes;
 
-	BUG_ON(index < 0 || index >= OCFS2_INODE_MAX_CACHE_ARRAY);
+	BUG_ON(index < 0 || index >= OCFS2_CACHE_INFO_MAX_ARRAY);
 	BUG_ON(index >= ci->ci_num_cached);
 	BUG_ON(!ci->ci_num_cached);
 
@@ -523,13 +526,13 @@ static void ocfs2_remove_block_from_cache(struct inode *inode,
 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
 	struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
 
-	spin_lock(&oi->ip_lock);
+	spin_lock(ci->ci_lock);
 	mlog(0, "Inode %llu, remove %llu, items = %u, array = %u\n",
 	     (unsigned long long)oi->ip_blkno,
 	     (unsigned long long) block, ci->ci_num_cached,
-	     oi->ip_flags & OCFS2_INODE_CACHE_INLINE);
+	     ci->ci_flags & OCFS2_CACHE_FL_INLINE);
 
-	if (oi->ip_flags & OCFS2_INODE_CACHE_INLINE) {
+	if (ci->ci_flags & OCFS2_CACHE_FL_INLINE) {
 		index = ocfs2_search_cache_array(ci, block);
 		if (index != -1)
 			ocfs2_remove_metadata_array(ci, index);
@@ -538,7 +541,7 @@ static void ocfs2_remove_block_from_cache(struct inode *inode,
 		if (item)
 			ocfs2_remove_metadata_tree(ci, item);
 	}
-	spin_unlock(&oi->ip_lock);
+	spin_unlock(ci->ci_lock);
 
 	if (item)
 		kmem_cache_free(ocfs2_uptodate_cachep, item);
@@ -577,7 +580,7 @@ int __init init_ocfs2_uptodate_cache(void)
 		return -ENOMEM;
 
 	mlog(0, "%u inlined cache items per inode.\n",
-	     OCFS2_INODE_MAX_CACHE_ARRAY);
+	     OCFS2_CACHE_INFO_MAX_ARRAY);
 
 	return 0;
 }
diff --git a/fs/ocfs2/uptodate.h b/fs/ocfs2/uptodate.h
index 531b4b3..bd749e1 100644
--- a/fs/ocfs2/uptodate.h
+++ b/fs/ocfs2/uptodate.h
@@ -29,7 +29,9 @@
 int __init init_ocfs2_uptodate_cache(void);
 void exit_ocfs2_uptodate_cache(void);
 
-void ocfs2_metadata_cache_init(struct inode *inode);
+void ocfs2_metadata_cache_init(struct ocfs2_caching_info *ci,
+			       spinlock_t *cache_lock,
+			       struct mutex *io_mutex);
 void ocfs2_metadata_cache_purge(struct inode *inode);
 
 int ocfs2_buffer_uptodate(struct inode *inode,
-- 
cgit v0.10.2


From 6e5a3d7538ad4e46a976862f593faf65750e37cc Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Tue, 10 Feb 2009 19:00:37 -0800
Subject: ocfs2: Change metadata caching locks to an operations structure.

We don't really want to cart around too many new fields on the
ocfs2_caching_info structure.  So let's wrap all our access of the
parent object in a set of operations.  One pointer on caching_info, and
more flexibility to boot.

Signed-off-by: Joel Becker <joel.becker@oracle.com>

diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 8ec8044..36bb588 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -1395,3 +1395,52 @@ int ocfs2_read_inode_block(struct inode *inode, struct buffer_head **bh)
 {
 	return ocfs2_read_inode_block_full(inode, bh, 0);
 }
+
+static struct ocfs2_inode_info *cache_info_to_inode(struct ocfs2_caching_info *ci)
+{
+	return container_of(ci, struct ocfs2_inode_info, ip_metadata_cache);
+}
+
+static u64 ocfs2_inode_cache_owner(struct ocfs2_caching_info *ci)
+{
+	struct ocfs2_inode_info *oi = cache_info_to_inode(ci);
+
+	return oi->ip_blkno;
+}
+
+static void ocfs2_inode_cache_lock(struct ocfs2_caching_info *ci)
+{
+	struct ocfs2_inode_info *oi = cache_info_to_inode(ci);
+
+	spin_lock(&oi->ip_lock);
+}
+
+static void ocfs2_inode_cache_unlock(struct ocfs2_caching_info *ci)
+{
+	struct ocfs2_inode_info *oi = cache_info_to_inode(ci);
+
+	spin_unlock(&oi->ip_lock);
+}
+
+static void ocfs2_inode_cache_io_lock(struct ocfs2_caching_info *ci)
+{
+	struct ocfs2_inode_info *oi = cache_info_to_inode(ci);
+
+	mutex_lock(&oi->ip_io_mutex);
+}
+
+static void ocfs2_inode_cache_io_unlock(struct ocfs2_caching_info *ci)
+{
+	struct ocfs2_inode_info *oi = cache_info_to_inode(ci);
+
+	mutex_unlock(&oi->ip_io_mutex);
+}
+
+const struct ocfs2_caching_operations ocfs2_inode_caching_ops = {
+	.co_owner		= ocfs2_inode_cache_owner,
+	.co_cache_lock		= ocfs2_inode_cache_lock,
+	.co_cache_unlock	= ocfs2_inode_cache_unlock,
+	.co_io_lock		= ocfs2_inode_cache_io_lock,
+	.co_io_unlock		= ocfs2_inode_cache_io_unlock,
+};
+
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index 2f5e1aa..cd1caca 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -118,6 +118,7 @@ static inline struct ocfs2_inode_info *OCFS2_I(struct inode *inode)
 extern struct kmem_cache *ocfs2_inode_cache;
 
 extern const struct address_space_operations ocfs2_aops;
+extern const struct ocfs2_caching_operations ocfs2_inode_caching_ops;
 
 void ocfs2_clear_inode(struct inode *inode);
 void ocfs2_delete_inode(struct inode *inode);
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index eef3bd0..6e54a49 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -68,14 +68,14 @@ enum ocfs2_caching_info_flags {
 	OCFS2_CACHE_FL_INLINE	= 1<<1,
 };
 
+struct ocfs2_caching_operations;
 struct ocfs2_caching_info {
 	/*
 	 * The parent structure provides the locks, but because the
-	 * parent structure can differ, struct ocfs2_caching_info needs
-	 * its own pointers to them.
+	 * parent structure can differ, it provides locking operations
+	 * to struct ocfs2_caching_info.
 	 */
-	spinlock_t		*ci_lock;
-	struct mutex		*ci_io_mutex;
+	const struct ocfs2_caching_operations *ci_ops;
 
 	unsigned int		ci_flags;
 	unsigned int		ci_num_cached;
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 8f217f6..746ed5d 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1683,8 +1683,8 @@ static void ocfs2_inode_init_once(void *data)
 	ocfs2_lock_res_init_once(&oi->ip_inode_lockres);
 	ocfs2_lock_res_init_once(&oi->ip_open_lockres);
 
-	ocfs2_metadata_cache_init(&oi->ip_metadata_cache, &oi->ip_lock,
-				  &oi->ip_io_mutex);
+	ocfs2_metadata_cache_init(&oi->ip_metadata_cache,
+				  &ocfs2_inode_caching_ops);
 
 	inode_init_once(&oi->vfs_inode);
 }
diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c
index 8dbc457..226d042 100644
--- a/fs/ocfs2/uptodate.c
+++ b/fs/ocfs2/uptodate.c
@@ -75,12 +75,48 @@ struct ocfs2_meta_cache_item {
 
 static struct kmem_cache *ocfs2_uptodate_cachep = NULL;
 
+static u64 ocfs2_metadata_cache_owner(struct ocfs2_caching_info *ci)
+{
+	BUG_ON(!ci || !ci->ci_ops);
+
+	return ci->ci_ops->co_owner(ci);
+}
+
+static void ocfs2_metadata_cache_lock(struct ocfs2_caching_info *ci)
+{
+	BUG_ON(!ci || !ci->ci_ops);
+
+	ci->ci_ops->co_cache_lock(ci);
+}
+
+static void ocfs2_metadata_cache_unlock(struct ocfs2_caching_info *ci)
+{
+	BUG_ON(!ci || !ci->ci_ops);
+
+	ci->ci_ops->co_cache_unlock(ci);
+}
+
+static void ocfs2_metadata_cache_io_lock(struct ocfs2_caching_info *ci)
+{
+	BUG_ON(!ci || !ci->ci_ops);
+
+	ci->ci_ops->co_io_lock(ci);
+}
+
+static void ocfs2_metadata_cache_io_unlock(struct ocfs2_caching_info *ci)
+{
+	BUG_ON(!ci || !ci->ci_ops);
+
+	ci->ci_ops->co_io_unlock(ci);
+}
+
+
 void ocfs2_metadata_cache_init(struct ocfs2_caching_info *ci,
-			       spinlock_t *cache_lock,
-			       struct mutex *io_mutex)
+			       const struct ocfs2_caching_operations *ops)
 {
-	ci->ci_lock = cache_lock;
-	ci->ci_io_mutex = io_mutex;
+	BUG_ON(!ops);
+
+	ci->ci_ops = ops;
 	ci->ci_flags |= OCFS2_CACHE_FL_INLINE;
 	ci->ci_num_cached = 0;
 }
@@ -120,12 +156,15 @@ void ocfs2_metadata_cache_purge(struct inode *inode)
 	struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
 	struct rb_root root = RB_ROOT;
 
-	spin_lock(ci->ci_lock);
+	BUG_ON(!ci || !ci->ci_ops);
+
+	ocfs2_metadata_cache_lock(ci);
 	tree = !(ci->ci_flags & OCFS2_CACHE_FL_INLINE);
 	to_purge = ci->ci_num_cached;
 
-	mlog(0, "Purge %u %s items from Inode %llu\n", to_purge,
-	     tree ? "array" : "tree", (unsigned long long)oi->ip_blkno);
+	mlog(0, "Purge %u %s items from Owner %llu\n", to_purge,
+	     tree ? "array" : "tree",
+	     (unsigned long long)ocfs2_metadata_cache_owner(ci));
 
 	/* If we're a tree, save off the root so that we can safely
 	 * initialize the cache. We do the work to free tree members
@@ -133,16 +172,17 @@ void ocfs2_metadata_cache_purge(struct inode *inode)
 	if (tree)
 		root = ci->ci_cache.ci_tree;
 
-	ocfs2_metadata_cache_init(ci, ci->ci_lock, ci->ci_io_mutex);
-	spin_unlock(ci->ci_lock);
+	ocfs2_metadata_cache_init(ci, ci->ci_ops);
+	ocfs2_metadata_cache_unlock(ci);
 
 	purged = ocfs2_purge_copied_metadata_tree(&root);
 	/* If possible, track the number wiped so that we can more
 	 * easily detect counting errors. Unfortunately, this is only
 	 * meaningful for trees. */
 	if (tree && purged != to_purge)
-		mlog(ML_ERROR, "Inode %llu, count = %u, purged = %u\n",
-		     (unsigned long long)oi->ip_blkno, to_purge, purged);
+		mlog(ML_ERROR, "Owner %llu, count = %u, purged = %u\n",
+		     (unsigned long long)ocfs2_metadata_cache_owner(ci),
+		     to_purge, purged);
 }
 
 /* Returns the index in the cache array, -1 if not found.
@@ -190,10 +230,10 @@ static int ocfs2_buffer_cached(struct ocfs2_inode_info *oi,
 	struct ocfs2_meta_cache_item *item = NULL;
 	struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
 
-	spin_lock(ci->ci_lock);
+	ocfs2_metadata_cache_lock(ci);
 
-	mlog(0, "Inode %llu, query block %llu (inline = %u)\n",
-	     (unsigned long long)oi->ip_blkno,
+	mlog(0, "Owner %llu, query block %llu (inline = %u)\n",
+	     (unsigned long long)ocfs2_metadata_cache_owner(ci),
 	     (unsigned long long) bh->b_blocknr,
 	     !!(ci->ci_flags & OCFS2_CACHE_FL_INLINE));
 
@@ -204,7 +244,7 @@ static int ocfs2_buffer_cached(struct ocfs2_inode_info *oi,
 		item = ocfs2_search_cache_tree(&oi->ip_metadata_cache,
 					       bh->b_blocknr);
 
-	spin_unlock(ci->ci_lock);
+	ocfs2_metadata_cache_unlock(ci);
 
 	mlog(0, "index = %d, item = %p\n", index, item);
 
@@ -294,18 +334,19 @@ static void __ocfs2_insert_cache_tree(struct ocfs2_caching_info *ci,
 	ci->ci_num_cached++;
 }
 
+/* co_cache_lock() must be held */
 static inline int ocfs2_insert_can_use_array(struct ocfs2_inode_info *oi,
 					     struct ocfs2_caching_info *ci)
 {
-	assert_spin_locked(ci->ci_lock);
-
 	return (ci->ci_flags & OCFS2_CACHE_FL_INLINE) &&
 		(ci->ci_num_cached < OCFS2_CACHE_INFO_MAX_ARRAY);
 }
 
 /* tree should be exactly OCFS2_CACHE_INFO_MAX_ARRAY wide. NULL the
  * pointers in tree after we use them - this allows caller to detect
- * when to free in case of error. */
+ * when to free in case of error.
+ *
+ * The co_cache_lock() must be held. */
 static void ocfs2_expand_cache(struct ocfs2_inode_info *oi,
 			       struct ocfs2_meta_cache_item **tree)
 {
@@ -313,13 +354,12 @@ static void ocfs2_expand_cache(struct ocfs2_inode_info *oi,
 	struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
 
 	mlog_bug_on_msg(ci->ci_num_cached != OCFS2_CACHE_INFO_MAX_ARRAY,
-			"Inode %llu, num cached = %u, should be %u\n",
-			(unsigned long long)oi->ip_blkno, ci->ci_num_cached,
-			OCFS2_CACHE_INFO_MAX_ARRAY);
+			"Owner %llu, num cached = %u, should be %u\n",
+			(unsigned long long)ocfs2_metadata_cache_owner(ci),
+			ci->ci_num_cached, OCFS2_CACHE_INFO_MAX_ARRAY);
 	mlog_bug_on_msg(!(ci->ci_flags & OCFS2_CACHE_FL_INLINE),
-			"Inode %llu not marked as inline anymore!\n",
-			(unsigned long long)oi->ip_blkno);
-	assert_spin_locked(ci->ci_lock);
+			"Owner %llu not marked as inline anymore!\n",
+			(unsigned long long)ocfs2_metadata_cache_owner(ci));
 
 	/* Be careful to initialize the tree members *first* because
 	 * once the ci_tree is used, the array is junk... */
@@ -337,7 +377,8 @@ static void ocfs2_expand_cache(struct ocfs2_inode_info *oi,
 	}
 
 	mlog(0, "Expanded %llu to a tree cache: flags 0x%x, num = %u\n",
-	     (unsigned long long)oi->ip_blkno, ci->ci_flags, ci->ci_num_cached);
+	     (unsigned long long)ocfs2_metadata_cache_owner(ci),
+	     ci->ci_flags, ci->ci_num_cached);
 }
 
 /* Slow path function - memory allocation is necessary. See the
@@ -352,8 +393,8 @@ static void __ocfs2_set_buffer_uptodate(struct ocfs2_inode_info *oi,
 	struct ocfs2_meta_cache_item *tree[OCFS2_CACHE_INFO_MAX_ARRAY] =
 		{ NULL, };
 
-	mlog(0, "Inode %llu, block %llu, expand = %d\n",
-	     (unsigned long long)oi->ip_blkno,
+	mlog(0, "Owner %llu, block %llu, expand = %d\n",
+	     (unsigned long long)ocfs2_metadata_cache_owner(ci),
 	     (unsigned long long)block, expand_tree);
 
 	new = kmem_cache_alloc(ocfs2_uptodate_cachep, GFP_NOFS);
@@ -378,13 +419,13 @@ static void __ocfs2_set_buffer_uptodate(struct ocfs2_inode_info *oi,
 		}
 	}
 
-	spin_lock(ci->ci_lock);
+	ocfs2_metadata_cache_lock(ci);
 	if (ocfs2_insert_can_use_array(oi, ci)) {
 		mlog(0, "Someone cleared the tree underneath us\n");
 		/* Ok, items were removed from the cache in between
 		 * locks. Detect this and revert back to the fast path */
 		ocfs2_append_cache_array(ci, block);
-		spin_unlock(ci->ci_lock);
+		ocfs2_metadata_cache_unlock(ci);
 		goto out_free;
 	}
 
@@ -392,7 +433,7 @@ static void __ocfs2_set_buffer_uptodate(struct ocfs2_inode_info *oi,
 		ocfs2_expand_cache(oi, tree);
 
 	__ocfs2_insert_cache_tree(ci, new);
-	spin_unlock(ci->ci_lock);
+	ocfs2_metadata_cache_unlock(ci);
 
 	new = NULL;
 out_free:
@@ -409,7 +450,7 @@ out_free:
 	}
 }
 
-/* Item insertion is guarded by ci_io_mutex, so the insertion path takes
+/* Item insertion is guarded by co_io_lock(), so the insertion path takes
  * advantage of this by not rechecking for a duplicate insert during
  * the slow case. Additionally, if the cache needs to be bumped up to
  * a tree, the code will not recheck after acquiring the lock --
@@ -439,18 +480,18 @@ void ocfs2_set_buffer_uptodate(struct inode *inode,
 	if (ocfs2_buffer_cached(oi, bh))
 		return;
 
-	mlog(0, "Inode %llu, inserting block %llu\n",
-	     (unsigned long long)oi->ip_blkno,
+	mlog(0, "Owner %llu, inserting block %llu\n",
+	     (unsigned long long)ocfs2_metadata_cache_owner(ci),
 	     (unsigned long long)bh->b_blocknr);
 
 	/* No need to recheck under spinlock - insertion is guarded by
-	 * ci_io_mutex */
-	spin_lock(ci->ci_lock);
+	 * co_io_lock() */
+	ocfs2_metadata_cache_lock(ci);
 	if (ocfs2_insert_can_use_array(oi, ci)) {
 		/* Fast case - it's an array and there's a free
 		 * spot. */
 		ocfs2_append_cache_array(ci, bh->b_blocknr);
-		spin_unlock(ci->ci_lock);
+		ocfs2_metadata_cache_unlock(ci);
 		return;
 	}
 
@@ -459,14 +500,14 @@ void ocfs2_set_buffer_uptodate(struct inode *inode,
 		/* We need to bump things up to a tree. */
 		expand = 1;
 	}
-	spin_unlock(ci->ci_lock);
+	ocfs2_metadata_cache_unlock(ci);
 
 	__ocfs2_set_buffer_uptodate(oi, bh->b_blocknr, expand);
 }
 
 /* Called against a newly allocated buffer. Most likely nobody should
  * be able to read this sort of metadata while it's still being
- * allocated, but this is careful to take ci_io_mutex anyway. */
+ * allocated, but this is careful to take co_io_lock() anyway. */
 void ocfs2_set_new_buffer_uptodate(struct inode *inode,
 				   struct buffer_head *bh)
 {
@@ -478,9 +519,9 @@ void ocfs2_set_new_buffer_uptodate(struct inode *inode,
 
 	set_buffer_uptodate(bh);
 
-	mutex_lock(ci->ci_io_mutex);
+	ocfs2_metadata_cache_io_lock(ci);
 	ocfs2_set_buffer_uptodate(inode, bh);
-	mutex_unlock(ci->ci_io_mutex);
+	ocfs2_metadata_cache_io_unlock(ci);
 }
 
 /* Requires ip_lock. */
@@ -526,9 +567,9 @@ static void ocfs2_remove_block_from_cache(struct inode *inode,
 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
 	struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
 
-	spin_lock(ci->ci_lock);
-	mlog(0, "Inode %llu, remove %llu, items = %u, array = %u\n",
-	     (unsigned long long)oi->ip_blkno,
+	ocfs2_metadata_cache_lock(ci);
+	mlog(0, "Owner %llu, remove %llu, items = %u, array = %u\n",
+	     (unsigned long long)ocfs2_metadata_cache_owner(ci),
 	     (unsigned long long) block, ci->ci_num_cached,
 	     ci->ci_flags & OCFS2_CACHE_FL_INLINE);
 
@@ -541,7 +582,7 @@ static void ocfs2_remove_block_from_cache(struct inode *inode,
 		if (item)
 			ocfs2_remove_metadata_tree(ci, item);
 	}
-	spin_unlock(ci->ci_lock);
+	ocfs2_metadata_cache_unlock(ci);
 
 	if (item)
 		kmem_cache_free(ocfs2_uptodate_cachep, item);
diff --git a/fs/ocfs2/uptodate.h b/fs/ocfs2/uptodate.h
index bd749e1..3b33eb8 100644
--- a/fs/ocfs2/uptodate.h
+++ b/fs/ocfs2/uptodate.h
@@ -26,12 +26,38 @@
 #ifndef OCFS2_UPTODATE_H
 #define OCFS2_UPTODATE_H
 
+/*
+ * The caching code relies on locking provided by the user of
+ * struct ocfs2_caching_info.  These operations connect that up.
+ */
+struct ocfs2_caching_operations {
+	/*
+	 * A u64 representing the owning structure.  Usually this
+	 * is the block number (i_blkno or whatnot).  This is used so
+	 * that caching log messages can identify the owning structure.
+	 */
+	u64	(*co_owner)(struct ocfs2_caching_info *ci);
+
+	/*
+	 * Lock and unlock the caching data.  These will not sleep, and
+	 * should probably be spinlocks.
+	 */
+	void	(*co_cache_lock)(struct ocfs2_caching_info *ci);
+	void	(*co_cache_unlock)(struct ocfs2_caching_info *ci);
+
+	/*
+	 * Lock and unlock for disk I/O.  These will sleep, and should
+	 * be mutexes.
+	 */
+	void	(*co_io_lock)(struct ocfs2_caching_info *ci);
+	void	(*co_io_unlock)(struct ocfs2_caching_info *ci);
+};
+
 int __init init_ocfs2_uptodate_cache(void);
 void exit_ocfs2_uptodate_cache(void);
 
 void ocfs2_metadata_cache_init(struct ocfs2_caching_info *ci,
-			       spinlock_t *cache_lock,
-			       struct mutex *io_mutex);
+			       const struct ocfs2_caching_operations *ops);
 void ocfs2_metadata_cache_purge(struct inode *inode);
 
 int ocfs2_buffer_uptodate(struct inode *inode,
-- 
cgit v0.10.2


From 8cb471e8f82506937fe5e2e9fb0bf90f6b1f1170 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Tue, 10 Feb 2009 20:00:41 -0800
Subject: ocfs2: Take the inode out of the metadata read/write paths.

We are really passing the inode into the ocfs2_read/write_blocks()
functions to get at the metadata cache.  This commit passes the cache
directly into the metadata block functions, divorcing them from the
inode.

Signed-off-by: Joel Becker <joel.becker@oracle.com>

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index ab513dd..d5dffcf 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -859,7 +859,7 @@ int ocfs2_read_extent_block(struct inode *inode, u64 eb_blkno,
 	int rc;
 	struct buffer_head *tmp = *bh;
 
-	rc = ocfs2_read_block(inode, eb_blkno, &tmp,
+	rc = ocfs2_read_block(INODE_CACHE(inode), eb_blkno, &tmp,
 			      ocfs2_validate_extent_block);
 
 	/* If ocfs2_read_block() got us a new bh, pass it up. */
@@ -949,7 +949,8 @@ static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb,
 				mlog_errno(status);
 				goto bail;
 			}
-			ocfs2_set_new_buffer_uptodate(inode, bhs[i]);
+			ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode),
+						      bhs[i]);
 
 			status = ocfs2_journal_access_eb(handle, inode, bhs[i],
 							 OCFS2_JOURNAL_ACCESS_CREATE);
@@ -2559,7 +2560,7 @@ static void ocfs2_unlink_path(struct inode *inode, handle_t *handle,
 			     le16_to_cpu(el->l_next_free_rec));
 
 			ocfs2_journal_dirty(handle, bh);
-			ocfs2_remove_from_cache(inode, bh);
+			ocfs2_remove_from_cache(INODE_CACHE(inode), bh);
 			continue;
 		}
 
@@ -2572,7 +2573,7 @@ static void ocfs2_unlink_path(struct inode *inode, handle_t *handle,
 		if (ret)
 			mlog_errno(ret);
 
-		ocfs2_remove_from_cache(inode, bh);
+		ocfs2_remove_from_cache(INODE_CACHE(inode), bh);
 	}
 }
 
@@ -6010,7 +6011,7 @@ int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb,
 		tl->tl_used = 0;
 
 		ocfs2_compute_meta_ecc(osb->sb, tl_bh->b_data, &di->i_check);
-		status = ocfs2_write_block(osb, tl_bh, tl_inode);
+		status = ocfs2_write_block(osb, tl_bh, INODE_CACHE(tl_inode));
 		if (status < 0) {
 			mlog_errno(status);
 			goto bail;
@@ -6719,7 +6720,7 @@ delete:
 
 			mlog(0, "deleting this extent block.\n");
 
-			ocfs2_remove_from_cache(inode, bh);
+			ocfs2_remove_from_cache(INODE_CACHE(inode), bh);
 
 			BUG_ON(ocfs2_rec_clusters(el, &el->l_recs[0]));
 			BUG_ON(le32_to_cpu(el->l_recs[0].e_cpos));
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index 15c8e6d..d43d34a 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -52,12 +52,12 @@ enum ocfs2_state_bits {
 BUFFER_FNS(NeedsValidate, needs_validate);
 
 int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
-		      struct inode *inode)
+		      struct ocfs2_caching_info *ci)
 {
 	int ret = 0;
 
-	mlog_entry("(bh->b_blocknr = %llu, inode=%p)\n",
-		   (unsigned long long)bh->b_blocknr, inode);
+	mlog_entry("(bh->b_blocknr = %llu, ci=%p)\n",
+		   (unsigned long long)bh->b_blocknr, ci);
 
 	BUG_ON(bh->b_blocknr < OCFS2_SUPER_BLOCK_BLKNO);
 	BUG_ON(buffer_jbd(bh));
@@ -70,7 +70,7 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
 		goto out;
 	}
 
-	mutex_lock(&OCFS2_I(inode)->ip_io_mutex);
+	ocfs2_metadata_cache_io_lock(ci);
 
 	lock_buffer(bh);
 	set_buffer_uptodate(bh);
@@ -85,7 +85,7 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
 	wait_on_buffer(bh);
 
 	if (buffer_uptodate(bh)) {
-		ocfs2_set_buffer_uptodate(inode, bh);
+		ocfs2_set_buffer_uptodate(ci, bh);
 	} else {
 		/* We don't need to remove the clustered uptodate
 		 * information for this bh as it's not marked locally
@@ -94,7 +94,7 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
 		put_bh(bh);
 	}
 
-	mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
+	ocfs2_metadata_cache_io_unlock(ci);
 out:
 	mlog_exit(ret);
 	return ret;
@@ -177,7 +177,7 @@ bail:
 	return status;
 }
 
-int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
+int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
 		      struct buffer_head *bhs[], int flags,
 		      int (*validate)(struct super_block *sb,
 				      struct buffer_head *bh))
@@ -185,11 +185,12 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
 	int status = 0;
 	int i, ignore_cache = 0;
 	struct buffer_head *bh;
+	struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
 
-	mlog_entry("(inode=%p, block=(%llu), nr=(%d), flags=%d)\n",
-		   inode, (unsigned long long)block, nr, flags);
+	mlog_entry("(ci=%p, block=(%llu), nr=(%d), flags=%d)\n",
+		   ci, (unsigned long long)block, nr, flags);
 
-	BUG_ON(!inode);
+	BUG_ON(!ci);
 	BUG_ON((flags & OCFS2_BH_READAHEAD) &&
 	       (flags & OCFS2_BH_IGNORE_CACHE));
 
@@ -212,12 +213,12 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
 		goto bail;
 	}
 
-	mutex_lock(&OCFS2_I(inode)->ip_io_mutex);
+	ocfs2_metadata_cache_io_lock(ci);
 	for (i = 0 ; i < nr ; i++) {
 		if (bhs[i] == NULL) {
-			bhs[i] = sb_getblk(inode->i_sb, block++);
+			bhs[i] = sb_getblk(sb, block++);
 			if (bhs[i] == NULL) {
-				mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
+				ocfs2_metadata_cache_io_unlock(ci);
 				status = -EIO;
 				mlog_errno(status);
 				goto bail;
@@ -250,11 +251,11 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
 		 *    before our is-it-in-flight check.
 		 */
 
-		if (!ignore_cache && !ocfs2_buffer_uptodate(inode, bh)) {
+		if (!ignore_cache && !ocfs2_buffer_uptodate(ci, bh)) {
 			mlog(ML_UPTODATE,
-			     "bh (%llu), inode %llu not uptodate\n",
+			     "bh (%llu), owner %llu not uptodate\n",
 			     (unsigned long long)bh->b_blocknr,
-			     (unsigned long long)OCFS2_I(inode)->ip_blkno);
+			     (unsigned long long)ocfs2_metadata_cache_owner(ci));
 			/* We're using ignore_cache here to say
 			 * "go to disk" */
 			ignore_cache = 1;
@@ -283,7 +284,7 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
 			 * previously submitted request than we are
 			 * done here. */
 			if ((flags & OCFS2_BH_READAHEAD)
-			    && ocfs2_buffer_read_ahead(inode, bh))
+			    && ocfs2_buffer_read_ahead(ci, bh))
 				continue;
 
 			lock_buffer(bh);
@@ -305,7 +306,7 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
 			 * buffer lock. */
 			if (!(flags & OCFS2_BH_IGNORE_CACHE)
 			    && !(flags & OCFS2_BH_READAHEAD)
-			    && ocfs2_buffer_uptodate(inode, bh)) {
+			    && ocfs2_buffer_uptodate(ci, bh)) {
 				unlock_buffer(bh);
 				continue;
 			}
@@ -327,7 +328,7 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
 
 		if (!(flags & OCFS2_BH_READAHEAD)) {
 			/* We know this can't have changed as we hold the
-			 * inode sem. Avoid doing any work on the bh if the
+			 * owner sem. Avoid doing any work on the bh if the
 			 * journal has it. */
 			if (!buffer_jbd(bh))
 				wait_on_buffer(bh);
@@ -351,7 +352,7 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
 				 * that better not have changed */
 				BUG_ON(buffer_jbd(bh));
 				clear_buffer_needs_validate(bh);
-				status = validate(inode->i_sb, bh);
+				status = validate(sb, bh);
 				if (status) {
 					put_bh(bh);
 					bhs[i] = NULL;
@@ -363,9 +364,9 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
 		/* Always set the buffer in the cache, even if it was
 		 * a forced read, or read-ahead which hasn't yet
 		 * completed. */
-		ocfs2_set_buffer_uptodate(inode, bh);
+		ocfs2_set_buffer_uptodate(ci, bh);
 	}
-	mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
+	ocfs2_metadata_cache_io_unlock(ci);
 
 	mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n", 
 	     (unsigned long long)block, nr,
@@ -399,7 +400,7 @@ static void ocfs2_check_super_or_backup(struct super_block *sb,
 
 /*
  * Write super block and backups doesn't need to collaborate with journal,
- * so we don't need to lock ip_io_mutex and inode doesn't need to bea passed
+ * so we don't need to lock ip_io_mutex and ci doesn't need to bea passed
  * into this function.
  */
 int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
diff --git a/fs/ocfs2/buffer_head_io.h b/fs/ocfs2/buffer_head_io.h
index c75d682..b97bcc6 100644
--- a/fs/ocfs2/buffer_head_io.h
+++ b/fs/ocfs2/buffer_head_io.h
@@ -33,7 +33,7 @@ void ocfs2_end_buffer_io_sync(struct buffer_head *bh,
 
 int ocfs2_write_block(struct ocfs2_super          *osb,
 		      struct buffer_head  *bh,
-		      struct inode        *inode);
+		      struct ocfs2_caching_info   *ci);
 int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
 			   unsigned int nr, struct buffer_head *bhs[]);
 
@@ -44,7 +44,7 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
  * be set even for a READAHEAD call, as it marks the buffer for later
  * validation.
  */
-int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
+int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
 		      struct buffer_head *bhs[], int flags,
 		      int (*validate)(struct super_block *sb,
 				      struct buffer_head *bh));
@@ -55,7 +55,7 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
 #define OCFS2_BH_IGNORE_CACHE      1
 #define OCFS2_BH_READAHEAD         8
 
-static inline int ocfs2_read_block(struct inode *inode, u64 off,
+static inline int ocfs2_read_block(struct ocfs2_caching_info *ci, u64 off,
 				   struct buffer_head **bh,
 				   int (*validate)(struct super_block *sb,
 						   struct buffer_head *bh))
@@ -68,7 +68,7 @@ static inline int ocfs2_read_block(struct inode *inode, u64 off,
 		goto bail;
 	}
 
-	status = ocfs2_read_blocks(inode, off, 1, bh, 0, validate);
+	status = ocfs2_read_blocks(ci, off, 1, bh, 0, validate);
 
 bail:
 	return status;
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index b358f3b..273fb76 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -564,7 +564,8 @@ static int ocfs2_read_dir_block_direct(struct inode *dir, u64 phys,
 	int ret;
 	struct buffer_head *tmp = *bh;
 
-	ret = ocfs2_read_block(dir, phys, &tmp, ocfs2_validate_dir_block);
+	ret = ocfs2_read_block(INODE_CACHE(dir), phys, &tmp,
+			       ocfs2_validate_dir_block);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -622,7 +623,8 @@ static int ocfs2_read_dx_root(struct inode *dir, struct ocfs2_dinode *di,
 	u64 blkno = le64_to_cpu(di->i_dx_root);
 	struct buffer_head *tmp = *dx_root_bh;
 
-	ret = ocfs2_read_block(dir, blkno, &tmp, ocfs2_validate_dx_root);
+	ret = ocfs2_read_block(INODE_CACHE(dir), blkno, &tmp,
+			       ocfs2_validate_dx_root);
 
 	/* If ocfs2_read_block() got us a new bh, pass it up. */
 	if (!ret && !*dx_root_bh)
@@ -662,7 +664,8 @@ static int ocfs2_read_dx_leaf(struct inode *dir, u64 blkno,
 	int ret;
 	struct buffer_head *tmp = *dx_leaf_bh;
 
-	ret = ocfs2_read_block(dir, blkno, &tmp, ocfs2_validate_dx_leaf);
+	ret = ocfs2_read_block(INODE_CACHE(dir), blkno, &tmp,
+			       ocfs2_validate_dx_leaf);
 
 	/* If ocfs2_read_block() got us a new bh, pass it up. */
 	if (!ret && !*dx_leaf_bh)
@@ -680,7 +683,7 @@ static int ocfs2_read_dx_leaves(struct inode *dir, u64 start, int num,
 {
 	int ret;
 
-	ret = ocfs2_read_blocks(dir, start, num, dx_leaf_bhs, 0,
+	ret = ocfs2_read_blocks(INODE_CACHE(dir), start, num, dx_leaf_bhs, 0,
 				ocfs2_validate_dx_leaf);
 	if (ret)
 		mlog_errno(ret);
@@ -2332,7 +2335,7 @@ static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb,
 		goto bail;
 	}
 
-	ocfs2_set_new_buffer_uptodate(inode, new_bh);
+	ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh);
 
 	status = ocfs2_journal_access_db(handle, inode, new_bh,
 					 OCFS2_JOURNAL_ACCESS_CREATE);
@@ -2418,7 +2421,7 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb,
 		ret = -EIO;
 		goto out;
 	}
-	ocfs2_set_new_buffer_uptodate(dir, dx_root_bh);
+	ocfs2_set_new_buffer_uptodate(INODE_CACHE(dir), dx_root_bh);
 
 	ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh,
 				      OCFS2_JOURNAL_ACCESS_CREATE);
@@ -2495,7 +2498,7 @@ static int ocfs2_dx_dir_format_cluster(struct ocfs2_super *osb,
 		}
 		dx_leaves[i] = bh;
 
-		ocfs2_set_new_buffer_uptodate(dir, bh);
+		ocfs2_set_new_buffer_uptodate(INODE_CACHE(dir), bh);
 
 		ret = ocfs2_journal_access_dl(handle, dir, bh,
 					      OCFS2_JOURNAL_ACCESS_CREATE);
@@ -3005,7 +3008,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
 		goto out_commit;
 	}
 
-	ocfs2_set_new_buffer_uptodate(dir, dirdata_bh);
+	ocfs2_set_new_buffer_uptodate(INODE_CACHE(dir), dirdata_bh);
 
 	ret = ocfs2_journal_access_db(handle, dir, dirdata_bh,
 				      OCFS2_JOURNAL_ACCESS_CREATE);
@@ -3387,7 +3390,7 @@ do_extend:
 		goto bail;
 	}
 
-	ocfs2_set_new_buffer_uptodate(dir, new_bh);
+	ocfs2_set_new_buffer_uptodate(INODE_CACHE(dir), new_bh);
 
 	status = ocfs2_journal_access_db(handle, dir, new_bh,
 					 OCFS2_JOURNAL_ACCESS_CREATE);
@@ -4565,7 +4568,7 @@ remove_index:
 		goto out;
 	}
 
-	ocfs2_remove_from_cache(dir, dx_root_bh);
+	ocfs2_remove_from_cache(INODE_CACHE(dir), dx_root_bh);
 out:
 	ocfs2_schedule_truncate_log_flush(osb, 1);
 	ocfs2_run_deallocs(osb, &dealloc);
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 110bb57..fe15cee 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -2127,7 +2127,7 @@ static int ocfs2_inode_lock_update(struct inode *inode,
 
 	/* This will discard any caching information we might have had
 	 * for the inode metadata. */
-	ocfs2_metadata_cache_purge(inode);
+	ocfs2_metadata_cache_purge(INODE_CACHE(inode));
 
 	ocfs2_extent_map_trunc(inode, 0);
 
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index f2bb1a0..dbd8a16 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -862,8 +862,8 @@ int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr,
 			BUG_ON(bhs[done + i]->b_blocknr != (p_block + i));
 		}
 
-		rc = ocfs2_read_blocks(inode, p_block, count, bhs + done,
-				       flags, validate);
+		rc = ocfs2_read_blocks(INODE_CACHE(inode), p_block, count,
+				       bhs + done, flags, validate);
 		if (rc) {
 			mlog_errno(rc);
 			break;
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 36bb588..1c9713c 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -662,7 +662,7 @@ static int ocfs2_remove_inode(struct inode *inode,
 		goto bail_commit;
 	}
 
-	ocfs2_remove_from_cache(inode, di_bh);
+	ocfs2_remove_from_cache(INODE_CACHE(inode), di_bh);
 	vfs_dq_free_inode(inode);
 
 	status = ocfs2_free_dinode(handle, inode_alloc_inode,
@@ -1112,14 +1112,14 @@ void ocfs2_clear_inode(struct inode *inode)
 	ocfs2_lock_res_free(&oi->ip_inode_lockres);
 	ocfs2_lock_res_free(&oi->ip_open_lockres);
 
-	ocfs2_metadata_cache_purge(inode);
+	ocfs2_metadata_cache_purge(INODE_CACHE(inode));
 
-	mlog_bug_on_msg(oi->ip_metadata_cache.ci_num_cached,
+	mlog_bug_on_msg(INODE_CACHE(inode)->ci_num_cached,
 			"Clear inode of %llu, inode has %u cache items\n",
-			(unsigned long long)oi->ip_blkno, oi->ip_metadata_cache.ci_num_cached);
+			(unsigned long long)oi->ip_blkno,
+			INODE_CACHE(inode)->ci_num_cached);
 
-	mlog_bug_on_msg(!(oi->ip_metadata_cache.ci_flags &
-			  OCFS2_CACHE_FL_INLINE),
+	mlog_bug_on_msg(!(INODE_CACHE(inode)->ci_flags & OCFS2_CACHE_FL_INLINE),
 			"Clear inode of %llu, inode has a bad flag\n",
 			(unsigned long long)oi->ip_blkno);
 
@@ -1381,8 +1381,8 @@ int ocfs2_read_inode_block_full(struct inode *inode, struct buffer_head **bh,
 	int rc;
 	struct buffer_head *tmp = *bh;
 
-	rc = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1, &tmp,
-			       flags, ocfs2_validate_inode_block);
+	rc = ocfs2_read_blocks(INODE_CACHE(inode), OCFS2_I(inode)->ip_blkno,
+			       1, &tmp, flags, ocfs2_validate_inode_block);
 
 	/* If ocfs2_read_blocks() got us a new bh, pass it up. */
 	if (!rc && !*bh)
@@ -1408,6 +1408,13 @@ static u64 ocfs2_inode_cache_owner(struct ocfs2_caching_info *ci)
 	return oi->ip_blkno;
 }
 
+static struct super_block *ocfs2_inode_cache_get_super(struct ocfs2_caching_info *ci)
+{
+	struct ocfs2_inode_info *oi = cache_info_to_inode(ci);
+
+	return oi->vfs_inode.i_sb;
+}
+
 static void ocfs2_inode_cache_lock(struct ocfs2_caching_info *ci)
 {
 	struct ocfs2_inode_info *oi = cache_info_to_inode(ci);
@@ -1438,6 +1445,7 @@ static void ocfs2_inode_cache_io_unlock(struct ocfs2_caching_info *ci)
 
 const struct ocfs2_caching_operations ocfs2_inode_caching_ops = {
 	.co_owner		= ocfs2_inode_cache_owner,
+	.co_get_super		= ocfs2_inode_cache_get_super,
 	.co_cache_lock		= ocfs2_inode_cache_lock,
 	.co_cache_unlock	= ocfs2_inode_cache_unlock,
 	.co_io_lock		= ocfs2_inode_cache_io_lock,
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index cd1caca..b0a71b2 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -120,6 +120,11 @@ extern struct kmem_cache *ocfs2_inode_cache;
 extern const struct address_space_operations ocfs2_aops;
 extern const struct ocfs2_caching_operations ocfs2_inode_caching_ops;
 
+static inline struct ocfs2_caching_info *INODE_CACHE(struct inode *inode)
+{
+	return &OCFS2_I(inode)->ip_metadata_cache;
+}
+
 void ocfs2_clear_inode(struct inode *inode);
 void ocfs2_delete_inode(struct inode *inode);
 void ocfs2_drop_inode(struct inode *inode);
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index c48b93a..ddf08d3 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -898,7 +898,7 @@ static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
 		ocfs2_bump_recovery_generation(fe);
 
 	ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &fe->i_check);
-	status = ocfs2_write_block(osb, bh, journal->j_inode);
+	status = ocfs2_write_block(osb, bh, INODE_CACHE(journal->j_inode));
 	if (status < 0)
 		mlog_errno(status);
 
@@ -1642,7 +1642,7 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
 					ocfs2_get_recovery_generation(fe);
 
 	ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &fe->i_check);
-	status = ocfs2_write_block(osb, bh, inode);
+	status = ocfs2_write_block(osb, bh, INODE_CACHE(inode));
 	if (status < 0)
 		mlog_errno(status);
 
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index bac7e6a..da5dd6a 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -392,7 +392,7 @@ int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb,
 	ocfs2_clear_local_alloc(alloc);
 
 	ocfs2_compute_meta_ecc(osb->sb, alloc_bh->b_data, &alloc->i_check);
-	status = ocfs2_write_block(osb, alloc_bh, inode);
+	status = ocfs2_write_block(osb, alloc_bh, INODE_CACHE(inode));
 	if (status < 0)
 		mlog_errno(status);
 
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 8601f93..689761b 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -507,7 +507,7 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
 		mlog_errno(status);
 		goto leave;
 	}
-	ocfs2_set_new_buffer_uptodate(inode, *new_fe_bh);
+	ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), *new_fe_bh);
 
 	status = ocfs2_journal_access_di(handle, inode, *new_fe_bh,
 					 OCFS2_JOURNAL_ACCESS_CREATE);
@@ -1527,7 +1527,8 @@ static int ocfs2_create_symlink_data(struct ocfs2_super *osb,
 			mlog_errno(status);
 			goto bail;
 		}
-		ocfs2_set_new_buffer_uptodate(inode, bhs[virtual]);
+		ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode),
+					      bhs[virtual]);
 
 		status = ocfs2_journal_access(handle, inode, bhs[virtual],
 					      OCFS2_JOURNAL_ACCESS_CREATE);
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 44f2a5e..0d7125b 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -253,7 +253,7 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type,
 	flush_dcache_page(bh->b_page);
 	set_buffer_uptodate(bh);
 	unlock_buffer(bh);
-	ocfs2_set_buffer_uptodate(gqinode, bh);
+	ocfs2_set_buffer_uptodate(INODE_CACHE(gqinode), bh);
 	err = ocfs2_journal_access_dq(handle, gqinode, bh, ja_type);
 	if (err < 0) {
 		brelse(bh);
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index bdb09cb..3df2954 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -993,7 +993,7 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
 		goto out_trans;
 	}
 	dchunk = (struct ocfs2_local_disk_chunk *)bh->b_data;
-	ocfs2_set_new_buffer_uptodate(lqinode, bh);
+	ocfs2_set_new_buffer_uptodate(INODE_CACHE(lqinode), bh);
 	status = ocfs2_journal_access_dq(handle, lqinode, bh,
 					 OCFS2_JOURNAL_ACCESS_CREATE);
 	if (status < 0) {
@@ -1027,7 +1027,7 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
 		mlog_errno(status);
 		goto out_trans;
 	}
-	ocfs2_set_new_buffer_uptodate(lqinode, dbh);
+	ocfs2_set_new_buffer_uptodate(INODE_CACHE(lqinode), dbh);
 	status = ocfs2_journal_access_dq(handle, lqinode, dbh,
 					 OCFS2_JOURNAL_ACCESS_CREATE);
 	if (status < 0) {
@@ -1131,7 +1131,7 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
 		mlog_errno(status);
 		goto out;
 	}
-	ocfs2_set_new_buffer_uptodate(lqinode, bh);
+	ocfs2_set_new_buffer_uptodate(INODE_CACHE(lqinode), bh);
 
 	/* Local quota info, chunk header and the new block we initialize */
 	handle = ocfs2_start_trans(OCFS2_SB(sb),
diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c
index 424adaa..7465f0f 100644
--- a/fs/ocfs2/resize.c
+++ b/fs/ocfs2/resize.c
@@ -514,7 +514,7 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
 		goto out_unlock;
 	}
 
-	ocfs2_set_new_buffer_uptodate(inode, group_bh);
+	ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), group_bh);
 
 	ret = ocfs2_verify_group_and_input(main_bm_inode, fe, input, group_bh);
 	if (ret) {
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c
index 40661e7..bfbd7e9 100644
--- a/fs/ocfs2/slot_map.c
+++ b/fs/ocfs2/slot_map.c
@@ -150,8 +150,8 @@ int ocfs2_refresh_slot_info(struct ocfs2_super *osb)
 	 * be !NULL.  Thus, ocfs2_read_blocks() will ignore blocknr.  If
 	 * this is not true, the read of -1 (UINT64_MAX) will fail.
 	 */
-	ret = ocfs2_read_blocks(si->si_inode, -1, si->si_blocks, si->si_bh,
-				OCFS2_BH_IGNORE_CACHE, NULL);
+	ret = ocfs2_read_blocks(INODE_CACHE(si->si_inode), -1, si->si_blocks,
+				si->si_bh, OCFS2_BH_IGNORE_CACHE, NULL);
 	if (ret == 0) {
 		spin_lock(&osb->osb_lock);
 		ocfs2_update_slot_info(si);
@@ -213,7 +213,7 @@ static int ocfs2_update_disk_slot(struct ocfs2_super *osb,
 		ocfs2_update_disk_slot_old(si, slot_num, &bh);
 	spin_unlock(&osb->osb_lock);
 
-	status = ocfs2_write_block(osb, bh, si->si_inode);
+	status = ocfs2_write_block(osb, bh, INODE_CACHE(si->si_inode));
 	if (status < 0)
 		mlog_errno(status);
 
@@ -404,8 +404,8 @@ static int ocfs2_map_slot_buffers(struct ocfs2_super *osb,
 		     (unsigned long long)blkno);
 
 		bh = NULL;  /* Acquire a fresh bh */
-		status = ocfs2_read_blocks(si->si_inode, blkno, 1, &bh,
-					   OCFS2_BH_IGNORE_CACHE, NULL);
+		status = ocfs2_read_blocks(INODE_CACHE(si->si_inode), blkno,
+					   1, &bh, OCFS2_BH_IGNORE_CACHE, NULL);
 		if (status < 0) {
 			mlog_errno(status);
 			goto bail;
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 73a16d4..21aaaaa 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -310,7 +310,7 @@ int ocfs2_read_group_descriptor(struct inode *inode, struct ocfs2_dinode *di,
 	int rc;
 	struct buffer_head *tmp = *bh;
 
-	rc = ocfs2_read_block(inode, gd_blkno, &tmp,
+	rc = ocfs2_read_block(INODE_CACHE(inode), gd_blkno, &tmp,
 			      ocfs2_validate_group_descriptor);
 	if (rc)
 		goto out;
@@ -476,7 +476,7 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
 		mlog_errno(status);
 		goto bail;
 	}
-	ocfs2_set_new_buffer_uptodate(alloc_inode, bg_bh);
+	ocfs2_set_new_buffer_uptodate(INODE_CACHE(alloc_inode), bg_bh);
 
 	status = ocfs2_block_group_fill(handle,
 					alloc_inode,
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 746ed5d..af118ad 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1683,7 +1683,7 @@ static void ocfs2_inode_init_once(void *data)
 	ocfs2_lock_res_init_once(&oi->ip_inode_lockres);
 	ocfs2_lock_res_init_once(&oi->ip_open_lockres);
 
-	ocfs2_metadata_cache_init(&oi->ip_metadata_cache,
+	ocfs2_metadata_cache_init(INODE_CACHE(&oi->vfs_inode),
 				  &ocfs2_inode_caching_ops);
 
 	inode_init_once(&oi->vfs_inode);
diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c
index 226d042..1c829e4 100644
--- a/fs/ocfs2/uptodate.c
+++ b/fs/ocfs2/uptodate.c
@@ -75,13 +75,20 @@ struct ocfs2_meta_cache_item {
 
 static struct kmem_cache *ocfs2_uptodate_cachep = NULL;
 
-static u64 ocfs2_metadata_cache_owner(struct ocfs2_caching_info *ci)
+u64 ocfs2_metadata_cache_owner(struct ocfs2_caching_info *ci)
 {
 	BUG_ON(!ci || !ci->ci_ops);
 
 	return ci->ci_ops->co_owner(ci);
 }
 
+struct super_block *ocfs2_metadata_cache_get_super(struct ocfs2_caching_info *ci)
+{
+	BUG_ON(!ci || !ci->ci_ops);
+
+	return ci->ci_ops->co_get_super(ci);
+}
+
 static void ocfs2_metadata_cache_lock(struct ocfs2_caching_info *ci)
 {
 	BUG_ON(!ci || !ci->ci_ops);
@@ -96,14 +103,14 @@ static void ocfs2_metadata_cache_unlock(struct ocfs2_caching_info *ci)
 	ci->ci_ops->co_cache_unlock(ci);
 }
 
-static void ocfs2_metadata_cache_io_lock(struct ocfs2_caching_info *ci)
+void ocfs2_metadata_cache_io_lock(struct ocfs2_caching_info *ci)
 {
 	BUG_ON(!ci || !ci->ci_ops);
 
 	ci->ci_ops->co_io_lock(ci);
 }
 
-static void ocfs2_metadata_cache_io_unlock(struct ocfs2_caching_info *ci)
+void ocfs2_metadata_cache_io_unlock(struct ocfs2_caching_info *ci)
 {
 	BUG_ON(!ci || !ci->ci_ops);
 
@@ -149,11 +156,9 @@ static unsigned int ocfs2_purge_copied_metadata_tree(struct rb_root *root)
  * This function is a few more lines longer than necessary due to some
  * accounting done here, but I think it's worth tracking down those
  * bugs sooner -- Mark */
-void ocfs2_metadata_cache_purge(struct inode *inode)
+void ocfs2_metadata_cache_purge(struct ocfs2_caching_info *ci)
 {
-	struct ocfs2_inode_info *oi = OCFS2_I(inode);
 	unsigned int tree, to_purge, purged;
-	struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
 	struct rb_root root = RB_ROOT;
 
 	BUG_ON(!ci || !ci->ci_ops);
@@ -223,12 +228,11 @@ ocfs2_search_cache_tree(struct ocfs2_caching_info *ci,
 	return NULL;
 }
 
-static int ocfs2_buffer_cached(struct ocfs2_inode_info *oi,
+static int ocfs2_buffer_cached(struct ocfs2_caching_info *ci,
 			       struct buffer_head *bh)
 {
 	int index = -1;
 	struct ocfs2_meta_cache_item *item = NULL;
-	struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
 
 	ocfs2_metadata_cache_lock(ci);
 
@@ -238,11 +242,9 @@ static int ocfs2_buffer_cached(struct ocfs2_inode_info *oi,
 	     !!(ci->ci_flags & OCFS2_CACHE_FL_INLINE));
 
 	if (ci->ci_flags & OCFS2_CACHE_FL_INLINE)
-		index = ocfs2_search_cache_array(&oi->ip_metadata_cache,
-						 bh->b_blocknr);
+		index = ocfs2_search_cache_array(ci, bh->b_blocknr);
 	else
-		item = ocfs2_search_cache_tree(&oi->ip_metadata_cache,
-					       bh->b_blocknr);
+		item = ocfs2_search_cache_tree(ci, bh->b_blocknr);
 
 	ocfs2_metadata_cache_unlock(ci);
 
@@ -256,7 +258,7 @@ static int ocfs2_buffer_cached(struct ocfs2_inode_info *oi,
  * 
  * This can be called under lock_buffer()
  */
-int ocfs2_buffer_uptodate(struct inode *inode,
+int ocfs2_buffer_uptodate(struct ocfs2_caching_info *ci,
 			  struct buffer_head *bh)
 {
 	/* Doesn't matter if the bh is in our cache or not -- if it's
@@ -272,17 +274,17 @@ int ocfs2_buffer_uptodate(struct inode *inode,
 
 	/* Ok, locally the buffer is marked as up to date, now search
 	 * our cache to see if we can trust that. */
-	return ocfs2_buffer_cached(OCFS2_I(inode), bh);
+	return ocfs2_buffer_cached(ci, bh);
 }
 
-/* 
+/*
  * Determine whether a buffer is currently out on a read-ahead request.
  * ci_io_sem should be held to serialize submitters with the logic here.
  */
-int ocfs2_buffer_read_ahead(struct inode *inode,
+int ocfs2_buffer_read_ahead(struct ocfs2_caching_info *ci,
 			    struct buffer_head *bh)
 {
-	return buffer_locked(bh) && ocfs2_buffer_cached(OCFS2_I(inode), bh);
+	return buffer_locked(bh) && ocfs2_buffer_cached(ci, bh);
 }
 
 /* Requires ip_lock */
@@ -335,8 +337,7 @@ static void __ocfs2_insert_cache_tree(struct ocfs2_caching_info *ci,
 }
 
 /* co_cache_lock() must be held */
-static inline int ocfs2_insert_can_use_array(struct ocfs2_inode_info *oi,
-					     struct ocfs2_caching_info *ci)
+static inline int ocfs2_insert_can_use_array(struct ocfs2_caching_info *ci)
 {
 	return (ci->ci_flags & OCFS2_CACHE_FL_INLINE) &&
 		(ci->ci_num_cached < OCFS2_CACHE_INFO_MAX_ARRAY);
@@ -347,11 +348,10 @@ static inline int ocfs2_insert_can_use_array(struct ocfs2_inode_info *oi,
  * when to free in case of error.
  *
  * The co_cache_lock() must be held. */
-static void ocfs2_expand_cache(struct ocfs2_inode_info *oi,
+static void ocfs2_expand_cache(struct ocfs2_caching_info *ci,
 			       struct ocfs2_meta_cache_item **tree)
 {
 	int i;
-	struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
 
 	mlog_bug_on_msg(ci->ci_num_cached != OCFS2_CACHE_INFO_MAX_ARRAY,
 			"Owner %llu, num cached = %u, should be %u\n",
@@ -383,12 +383,11 @@ static void ocfs2_expand_cache(struct ocfs2_inode_info *oi,
 
 /* Slow path function - memory allocation is necessary. See the
  * comment above ocfs2_set_buffer_uptodate for more information. */
-static void __ocfs2_set_buffer_uptodate(struct ocfs2_inode_info *oi,
+static void __ocfs2_set_buffer_uptodate(struct ocfs2_caching_info *ci,
 					sector_t block,
 					int expand_tree)
 {
 	int i;
-	struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
 	struct ocfs2_meta_cache_item *new = NULL;
 	struct ocfs2_meta_cache_item *tree[OCFS2_CACHE_INFO_MAX_ARRAY] =
 		{ NULL, };
@@ -420,7 +419,7 @@ static void __ocfs2_set_buffer_uptodate(struct ocfs2_inode_info *oi,
 	}
 
 	ocfs2_metadata_cache_lock(ci);
-	if (ocfs2_insert_can_use_array(oi, ci)) {
+	if (ocfs2_insert_can_use_array(ci)) {
 		mlog(0, "Someone cleared the tree underneath us\n");
 		/* Ok, items were removed from the cache in between
 		 * locks. Detect this and revert back to the fast path */
@@ -430,7 +429,7 @@ static void __ocfs2_set_buffer_uptodate(struct ocfs2_inode_info *oi,
 	}
 
 	if (expand_tree)
-		ocfs2_expand_cache(oi, tree);
+		ocfs2_expand_cache(ci, tree);
 
 	__ocfs2_insert_cache_tree(ci, new);
 	ocfs2_metadata_cache_unlock(ci);
@@ -468,16 +467,14 @@ out_free:
  * Readahead buffers can be passed in here before the I/O request is
  * completed.
  */
-void ocfs2_set_buffer_uptodate(struct inode *inode,
+void ocfs2_set_buffer_uptodate(struct ocfs2_caching_info *ci,
 			       struct buffer_head *bh)
 {
 	int expand;
-	struct ocfs2_inode_info *oi = OCFS2_I(inode);
-	struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
 
 	/* The block may very well exist in our cache already, so avoid
 	 * doing any more work in that case. */
-	if (ocfs2_buffer_cached(oi, bh))
+	if (ocfs2_buffer_cached(ci, bh))
 		return;
 
 	mlog(0, "Owner %llu, inserting block %llu\n",
@@ -487,7 +484,7 @@ void ocfs2_set_buffer_uptodate(struct inode *inode,
 	/* No need to recheck under spinlock - insertion is guarded by
 	 * co_io_lock() */
 	ocfs2_metadata_cache_lock(ci);
-	if (ocfs2_insert_can_use_array(oi, ci)) {
+	if (ocfs2_insert_can_use_array(ci)) {
 		/* Fast case - it's an array and there's a free
 		 * spot. */
 		ocfs2_append_cache_array(ci, bh->b_blocknr);
@@ -502,25 +499,22 @@ void ocfs2_set_buffer_uptodate(struct inode *inode,
 	}
 	ocfs2_metadata_cache_unlock(ci);
 
-	__ocfs2_set_buffer_uptodate(oi, bh->b_blocknr, expand);
+	__ocfs2_set_buffer_uptodate(ci, bh->b_blocknr, expand);
 }
 
 /* Called against a newly allocated buffer. Most likely nobody should
  * be able to read this sort of metadata while it's still being
  * allocated, but this is careful to take co_io_lock() anyway. */
-void ocfs2_set_new_buffer_uptodate(struct inode *inode,
+void ocfs2_set_new_buffer_uptodate(struct ocfs2_caching_info *ci,
 				   struct buffer_head *bh)
 {
-	struct ocfs2_inode_info *oi = OCFS2_I(inode);
-	struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
-
 	/* This should definitely *not* exist in our cache */
-	BUG_ON(ocfs2_buffer_cached(oi, bh));
+	BUG_ON(ocfs2_buffer_cached(ci, bh));
 
 	set_buffer_uptodate(bh);
 
 	ocfs2_metadata_cache_io_lock(ci);
-	ocfs2_set_buffer_uptodate(inode, bh);
+	ocfs2_set_buffer_uptodate(ci, bh);
 	ocfs2_metadata_cache_io_unlock(ci);
 }
 
@@ -559,13 +553,11 @@ static void ocfs2_remove_metadata_tree(struct ocfs2_caching_info *ci,
 	ci->ci_num_cached--;
 }
 
-static void ocfs2_remove_block_from_cache(struct inode *inode,
+static void ocfs2_remove_block_from_cache(struct ocfs2_caching_info *ci,
 					  sector_t block)
 {
 	int index;
 	struct ocfs2_meta_cache_item *item = NULL;
-	struct ocfs2_inode_info *oi = OCFS2_I(inode);
-	struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
 
 	ocfs2_metadata_cache_lock(ci);
 	mlog(0, "Owner %llu, remove %llu, items = %u, array = %u\n",
@@ -593,23 +585,24 @@ static void ocfs2_remove_block_from_cache(struct inode *inode,
  * bother reverting things to an inlined array in the case of a remove
  * which moves us back under the limit.
  */
-void ocfs2_remove_from_cache(struct inode *inode,
+void ocfs2_remove_from_cache(struct ocfs2_caching_info *ci,
 			     struct buffer_head *bh)
 {
 	sector_t block = bh->b_blocknr;
 
-	ocfs2_remove_block_from_cache(inode, block);
+	ocfs2_remove_block_from_cache(ci, block);
 }
 
 /* Called when we remove xattr clusters from an inode. */
-void ocfs2_remove_xattr_clusters_from_cache(struct inode *inode,
+void ocfs2_remove_xattr_clusters_from_cache(struct ocfs2_caching_info *ci,
 					    sector_t block,
 					    u32 c_len)
 {
-	unsigned int i, b_len = ocfs2_clusters_to_blocks(inode->i_sb, 1) * c_len;
+	struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
+	unsigned int i, b_len = ocfs2_clusters_to_blocks(sb, 1) * c_len;
 
 	for (i = 0; i < b_len; i++, block++)
-		ocfs2_remove_block_from_cache(inode, block);
+		ocfs2_remove_block_from_cache(ci, block);
 }
 
 int __init init_ocfs2_uptodate_cache(void)
diff --git a/fs/ocfs2/uptodate.h b/fs/ocfs2/uptodate.h
index 3b33eb8..f268273 100644
--- a/fs/ocfs2/uptodate.h
+++ b/fs/ocfs2/uptodate.h
@@ -38,6 +38,8 @@ struct ocfs2_caching_operations {
 	 */
 	u64	(*co_owner)(struct ocfs2_caching_info *ci);
 
+	/* The superblock is needed during I/O. */
+	struct super_block *(*co_get_super)(struct ocfs2_caching_info *ci);
 	/*
 	 * Lock and unlock the caching data.  These will not sleep, and
 	 * should probably be spinlocks.
@@ -58,20 +60,25 @@ void exit_ocfs2_uptodate_cache(void);
 
 void ocfs2_metadata_cache_init(struct ocfs2_caching_info *ci,
 			       const struct ocfs2_caching_operations *ops);
-void ocfs2_metadata_cache_purge(struct inode *inode);
+void ocfs2_metadata_cache_purge(struct ocfs2_caching_info *ci);
 
-int ocfs2_buffer_uptodate(struct inode *inode,
+u64 ocfs2_metadata_cache_owner(struct ocfs2_caching_info *ci);
+struct super_block *ocfs2_metadata_cache_get_super(struct ocfs2_caching_info *ci);
+void ocfs2_metadata_cache_io_lock(struct ocfs2_caching_info *ci);
+void ocfs2_metadata_cache_io_unlock(struct ocfs2_caching_info *ci);
+
+int ocfs2_buffer_uptodate(struct ocfs2_caching_info *ci,
 			  struct buffer_head *bh);
-void ocfs2_set_buffer_uptodate(struct inode *inode,
+void ocfs2_set_buffer_uptodate(struct ocfs2_caching_info *ci,
 			       struct buffer_head *bh);
-void ocfs2_set_new_buffer_uptodate(struct inode *inode,
+void ocfs2_set_new_buffer_uptodate(struct ocfs2_caching_info *ci,
 				   struct buffer_head *bh);
-void ocfs2_remove_from_cache(struct inode *inode,
+void ocfs2_remove_from_cache(struct ocfs2_caching_info *ci,
 			     struct buffer_head *bh);
-void ocfs2_remove_xattr_clusters_from_cache(struct inode *inode,
+void ocfs2_remove_xattr_clusters_from_cache(struct ocfs2_caching_info *ci,
 					    sector_t block,
 					    u32 c_len);
-int ocfs2_buffer_read_ahead(struct inode *inode,
+int ocfs2_buffer_read_ahead(struct ocfs2_caching_info *ci,
 			    struct buffer_head *bh);
 
 #endif /* OCFS2_UPTODATE_H */
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index d1a27cd..19de5c4 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -254,9 +254,9 @@ static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
 			break;
 		}
 
-		if (!ocfs2_buffer_uptodate(bucket->bu_inode,
+		if (!ocfs2_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
 					   bucket->bu_bhs[i]))
-			ocfs2_set_new_buffer_uptodate(bucket->bu_inode,
+			ocfs2_set_new_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
 						      bucket->bu_bhs[i]);
 	}
 
@@ -271,7 +271,7 @@ static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
 {
 	int rc;
 
-	rc = ocfs2_read_blocks(bucket->bu_inode, xb_blkno,
+	rc = ocfs2_read_blocks(INODE_CACHE(bucket->bu_inode), xb_blkno,
 			       bucket->bu_blocks, bucket->bu_bhs, 0,
 			       NULL);
 	if (!rc) {
@@ -399,7 +399,7 @@ static int ocfs2_read_xattr_block(struct inode *inode, u64 xb_blkno,
 	int rc;
 	struct buffer_head *tmp = *bh;
 
-	rc = ocfs2_read_block(inode, xb_blkno, &tmp,
+	rc = ocfs2_read_block(INODE_CACHE(inode), xb_blkno, &tmp,
 			      ocfs2_validate_xattr_block);
 
 	/* If ocfs2_read_block() got us a new bh, pass it up. */
@@ -724,8 +724,8 @@ static int ocfs2_xattr_shrink_size(struct inode *inode,
 		}
 
 		block = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
-		ocfs2_remove_xattr_clusters_from_cache(inode, block,
-						       alloc_size);
+		ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode),
+						       block, alloc_size);
 		cpos += alloc_size;
 		trunc_len -= alloc_size;
 	}
@@ -970,7 +970,8 @@ static int ocfs2_xattr_get_value_outside(struct inode *inode,
 		blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
 		/* Copy ocfs2_xattr_value */
 		for (i = 0; i < num_clusters * bpc; i++, blkno++) {
-			ret = ocfs2_read_block(inode, blkno, &bh, NULL);
+			ret = ocfs2_read_block(INODE_CACHE(inode), blkno,
+					       &bh, NULL);
 			if (ret) {
 				mlog_errno(ret);
 				goto out;
@@ -1208,7 +1209,8 @@ static int __ocfs2_xattr_set_value_outside(struct inode *inode,
 		blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
 
 		for (i = 0; i < num_clusters * bpc; i++, blkno++) {
-			ret = ocfs2_read_block(inode, blkno, &bh, NULL);
+			ret = ocfs2_read_block(INODE_CACHE(inode), blkno,
+					       &bh, NULL);
 			if (ret) {
 				mlog_errno(ret);
 				goto out;
@@ -2121,7 +2123,7 @@ static int ocfs2_xattr_block_set(struct inode *inode,
 		}
 
 		new_bh = sb_getblk(inode->i_sb, first_blkno);
-		ocfs2_set_new_buffer_uptodate(inode, new_bh);
+		ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh);
 
 		ret = ocfs2_journal_access_xb(handle, inode, new_bh,
 					      OCFS2_JOURNAL_ACCESS_CREATE);
@@ -4845,7 +4847,8 @@ static int ocfs2_rm_xattr_cluster(struct inode *inode,
 	mlog(0, "rm xattr extent rec at %u len = %u, start from %llu\n",
 	     cpos, len, (unsigned long long)blkno);
 
-	ocfs2_remove_xattr_clusters_from_cache(inode, blkno, len);
+	ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode), blkno,
+					       len);
 
 	ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac);
 	if (ret) {
-- 
cgit v0.10.2


From 66fb345ddd2d343e36692da0ff66126d7a99dc1b Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Thu, 12 Feb 2009 15:24:40 -0800
Subject: ocfs2: move ip_last_trans to struct ocfs2_caching_info

We have the read side of metadata caching isolated to struct
ocfs2_caching_info, now we need the write side.  This means the journal
functions.  The journal only does a couple of things with struct inode.

This change moves the ip_last_trans field onto struct
ocfs2_caching_info as ci_last_trans.  This field tells the journal
whether a pending journal flush is required.

Signed-off-by: Joel Becker <joel.becker@oracle.com>

diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 1c9713c..a47750d 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -1112,7 +1112,7 @@ void ocfs2_clear_inode(struct inode *inode)
 	ocfs2_lock_res_free(&oi->ip_inode_lockres);
 	ocfs2_lock_res_free(&oi->ip_open_lockres);
 
-	ocfs2_metadata_cache_purge(INODE_CACHE(inode));
+	ocfs2_metadata_cache_exit(INODE_CACHE(inode));
 
 	mlog_bug_on_msg(INODE_CACHE(inode)->ci_num_cached,
 			"Clear inode of %llu, inode has %u cache items\n",
@@ -1148,7 +1148,6 @@ void ocfs2_clear_inode(struct inode *inode)
 	/* Clear all other flags. */
 	oi->ip_flags = 0;
 	oi->ip_created_trans = 0;
-	oi->ip_last_trans = 0;
 	oi->ip_dir_start_lookup = 0;
 	oi->ip_blkno = 0ULL;
 
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index b0a71b2..2cae251 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -63,8 +63,6 @@ struct ocfs2_inode_info
 	/* next two are protected by trans_inc_lock */
 	/* which transaction were we created on? Zero if none. */
 	unsigned long			ip_created_trans;
-	/* last transaction we were a part of. */
-	unsigned long			ip_last_trans;
 
 	struct ocfs2_caching_info	ip_metadata_cache;
 
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 2c3222a..d4ac197 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -94,7 +94,7 @@ static inline void ocfs2_set_inode_lock_trans(struct ocfs2_journal *journal,
 					      struct inode *inode)
 {
 	spin_lock(&trans_inc_lock);
-	OCFS2_I(inode)->ip_last_trans = journal->j_trans_id;
+	INODE_CACHE(inode)->ci_last_trans = journal->j_trans_id;
 	spin_unlock(&trans_inc_lock);
 }
 
@@ -109,7 +109,8 @@ static inline int ocfs2_inode_fully_checkpointed(struct inode *inode)
 	struct ocfs2_journal *journal = OCFS2_SB(inode->i_sb)->journal;
 
 	spin_lock(&trans_inc_lock);
-	ret = time_after(journal->j_trans_id, OCFS2_I(inode)->ip_last_trans);
+	ret = time_after(journal->j_trans_id,
+			 INODE_CACHE(inode)->ci_last_trans);
 	spin_unlock(&trans_inc_lock);
 	return ret;
 }
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 6e54a49..c9bd7ce 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -77,6 +77,10 @@ struct ocfs2_caching_info {
 	 */
 	const struct ocfs2_caching_operations *ci_ops;
 
+	/* last transaction we were a part of. */
+	unsigned long		ci_last_trans;
+
+	/* Cache structures */
 	unsigned int		ci_flags;
 	unsigned int		ci_num_cached;
 	union {
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index af118ad..4212547 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1669,7 +1669,6 @@ static void ocfs2_inode_init_once(void *data)
 	ocfs2_extent_map_init(&oi->vfs_inode);
 	INIT_LIST_HEAD(&oi->ip_io_markers);
 	oi->ip_created_trans = 0;
-	oi->ip_last_trans = 0;
 	oi->ip_dir_start_lookup = 0;
 
 	init_rwsem(&oi->ip_alloc_sem);
diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c
index 1c829e4..81c8220 100644
--- a/fs/ocfs2/uptodate.c
+++ b/fs/ocfs2/uptodate.c
@@ -118,16 +118,32 @@ void ocfs2_metadata_cache_io_unlock(struct ocfs2_caching_info *ci)
 }
 
 
+static void ocfs2_metadata_cache_reset(struct ocfs2_caching_info *ci,
+				       int clear)
+{
+	ci->ci_flags |= OCFS2_CACHE_FL_INLINE;
+	ci->ci_num_cached = 0;
+
+	if (clear)
+		ci->ci_last_trans = 0;
+}
+
 void ocfs2_metadata_cache_init(struct ocfs2_caching_info *ci,
 			       const struct ocfs2_caching_operations *ops)
 {
 	BUG_ON(!ops);
 
 	ci->ci_ops = ops;
-	ci->ci_flags |= OCFS2_CACHE_FL_INLINE;
-	ci->ci_num_cached = 0;
+	ocfs2_metadata_cache_reset(ci, 1);
+}
+
+void ocfs2_metadata_cache_exit(struct ocfs2_caching_info *ci)
+{
+	ocfs2_metadata_cache_purge(ci);
+	ocfs2_metadata_cache_reset(ci, 1);
 }
 
+
 /* No lock taken here as 'root' is not expected to be visible to other
  * processes. */
 static unsigned int ocfs2_purge_copied_metadata_tree(struct rb_root *root)
@@ -177,7 +193,7 @@ void ocfs2_metadata_cache_purge(struct ocfs2_caching_info *ci)
 	if (tree)
 		root = ci->ci_cache.ci_tree;
 
-	ocfs2_metadata_cache_init(ci, ci->ci_ops);
+	ocfs2_metadata_cache_reset(ci, 0);
 	ocfs2_metadata_cache_unlock(ci);
 
 	purged = ocfs2_purge_copied_metadata_tree(&root);
diff --git a/fs/ocfs2/uptodate.h b/fs/ocfs2/uptodate.h
index f268273..80dbb1d 100644
--- a/fs/ocfs2/uptodate.h
+++ b/fs/ocfs2/uptodate.h
@@ -61,6 +61,7 @@ void exit_ocfs2_uptodate_cache(void);
 void ocfs2_metadata_cache_init(struct ocfs2_caching_info *ci,
 			       const struct ocfs2_caching_operations *ops);
 void ocfs2_metadata_cache_purge(struct ocfs2_caching_info *ci);
+void ocfs2_metadata_cache_exit(struct ocfs2_caching_info *ci);
 
 u64 ocfs2_metadata_cache_owner(struct ocfs2_caching_info *ci);
 struct super_block *ocfs2_metadata_cache_get_super(struct ocfs2_caching_info *ci);
-- 
cgit v0.10.2


From 292dd27ec76b96cebcef576f330ab121f59ccf05 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Thu, 12 Feb 2009 15:41:59 -0800
Subject: ocfs2: move ip_created_trans to struct ocfs2_caching_info

Similar ip_last_trans, ip_created_trans tracks the creation of a journal
managed inode.  This specifically tracks what transaction created the
inode.  This is so the code can know if the inode has ever been written
to disk.

This behavior is desirable for any journal managed object.  We move it
to struct ocfs2_caching_info as ci_created_trans so that any object
using ocfs2_caching_info can rely on this behavior.

Signed-off-by: Joel Becker <joel.becker@oracle.com>

diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index a47750d..8a9e708 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -1147,7 +1147,6 @@ void ocfs2_clear_inode(struct inode *inode)
 
 	/* Clear all other flags. */
 	oi->ip_flags = 0;
-	oi->ip_created_trans = 0;
 	oi->ip_dir_start_lookup = 0;
 	oi->ip_blkno = 0ULL;
 
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index 2cae251..67392f6 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -60,10 +60,6 @@ struct ocfs2_inode_info
 
 	u32				ip_dir_start_lookup;
 
-	/* next two are protected by trans_inc_lock */
-	/* which transaction were we created on? Zero if none. */
-	unsigned long			ip_created_trans;
-
 	struct ocfs2_caching_info	ip_metadata_cache;
 
 	struct ocfs2_extent_map		ip_extent_map;
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index d4ac197..0bb6754 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -129,9 +129,9 @@ static inline int ocfs2_inode_is_new(struct inode *inode)
 		return 0;
 	spin_lock(&trans_inc_lock);
 	ret = !(time_after(OCFS2_SB(inode->i_sb)->journal->j_trans_id,
-			   OCFS2_I(inode)->ip_created_trans));
+			   INODE_CACHE(inode)->ci_created_trans));
 	if (!ret)
-		OCFS2_I(inode)->ip_created_trans = 0;
+		INODE_CACHE(inode)->ci_created_trans = 0;
 	spin_unlock(&trans_inc_lock);
 	return ret;
 }
@@ -140,7 +140,7 @@ static inline void ocfs2_inode_set_new(struct ocfs2_super *osb,
 				       struct inode *inode)
 {
 	spin_lock(&trans_inc_lock);
-	OCFS2_I(inode)->ip_created_trans = osb->journal->j_trans_id;
+	INODE_CACHE(inode)->ci_created_trans = osb->journal->j_trans_id;
 	spin_unlock(&trans_inc_lock);
 }
 
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index c9bd7ce..18b5fea 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -77,6 +77,9 @@ struct ocfs2_caching_info {
 	 */
 	const struct ocfs2_caching_operations *ci_ops;
 
+	/* next two are protected by trans_inc_lock */
+	/* which transaction were we created on? Zero if none. */
+	unsigned long		ci_created_trans;
 	/* last transaction we were a part of. */
 	unsigned long		ci_last_trans;
 
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 4212547..e35a505 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1668,7 +1668,6 @@ static void ocfs2_inode_init_once(void *data)
 	spin_lock_init(&oi->ip_lock);
 	ocfs2_extent_map_init(&oi->vfs_inode);
 	INIT_LIST_HEAD(&oi->ip_io_markers);
-	oi->ip_created_trans = 0;
 	oi->ip_dir_start_lookup = 0;
 
 	init_rwsem(&oi->ip_alloc_sem);
diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c
index 81c8220..b6284f2 100644
--- a/fs/ocfs2/uptodate.c
+++ b/fs/ocfs2/uptodate.c
@@ -124,8 +124,10 @@ static void ocfs2_metadata_cache_reset(struct ocfs2_caching_info *ci,
 	ci->ci_flags |= OCFS2_CACHE_FL_INLINE;
 	ci->ci_num_cached = 0;
 
-	if (clear)
+	if (clear) {
+		ci->ci_created_trans = 0;
 		ci->ci_last_trans = 0;
+	}
 }
 
 void ocfs2_metadata_cache_init(struct ocfs2_caching_info *ci,
-- 
cgit v0.10.2


From 0cf2f7632b1789b811ab20b611c4156e6de2b055 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Thu, 12 Feb 2009 16:41:25 -0800
Subject: ocfs2: Pass struct ocfs2_caching_info to the journal functions.

The next step in divorcing metadata I/O management from struct inode is
to pass struct ocfs2_caching_info to the journal functions.  Thus the
journal locks a metadata cache with the cache io_lock function.  It also
can compare ci_last_trans and ci_created_trans directly.

This is a large patch because of all the places we change
ocfs2_journal_access..(handle, inode, ...) to
ocfs2_journal_access..(handle, INODE_CACHE(inode), ...).

Signed-off-by: Joel Becker <joel.becker@oracle.com>

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index d5dffcf..616afa9 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -415,11 +415,11 @@ static inline void ocfs2_et_update_clusters(struct inode *inode,
 }
 
 static inline int ocfs2_et_root_journal_access(handle_t *handle,
-					       struct inode *inode,
+					       struct ocfs2_caching_info *ci,
 					       struct ocfs2_extent_tree *et,
 					       int type)
 {
-	return et->et_root_journal_access(handle, inode, et->et_root_bh,
+	return et->et_root_journal_access(handle, ci, et->et_root_bh,
 					  type);
 }
 
@@ -633,7 +633,7 @@ static struct ocfs2_path *ocfs2_new_path_from_et(struct ocfs2_extent_tree *et)
  * ocfs2_journal_access_path(), but I don't have a better one.
  */
 static int ocfs2_path_bh_journal_access(handle_t *handle,
-					struct inode *inode,
+					struct ocfs2_caching_info *ci,
 					struct ocfs2_path *path,
 					int idx)
 {
@@ -645,14 +645,15 @@ static int ocfs2_path_bh_journal_access(handle_t *handle,
 	if (idx)
 		access = ocfs2_journal_access_eb;
 
-	return access(handle, inode, path->p_node[idx].bh,
+	return access(handle, ci, path->p_node[idx].bh,
 		      OCFS2_JOURNAL_ACCESS_WRITE);
 }
 
 /*
  * Convenience function to journal all components in a path.
  */
-static int ocfs2_journal_access_path(struct inode *inode, handle_t *handle,
+static int ocfs2_journal_access_path(struct ocfs2_caching_info *ci,
+				     handle_t *handle,
 				     struct ocfs2_path *path)
 {
 	int i, ret = 0;
@@ -661,7 +662,7 @@ static int ocfs2_journal_access_path(struct inode *inode, handle_t *handle,
 		goto out;
 
 	for(i = 0; i < path_num_items(path); i++) {
-		ret = ocfs2_path_bh_journal_access(handle, inode, path, i);
+		ret = ocfs2_path_bh_journal_access(handle, ci, path, i);
 		if (ret < 0) {
 			mlog_errno(ret);
 			goto out;
@@ -952,7 +953,7 @@ static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb,
 			ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode),
 						      bhs[i]);
 
-			status = ocfs2_journal_access_eb(handle, inode, bhs[i],
+			status = ocfs2_journal_access_eb(handle, INODE_CACHE(inode), bhs[i],
 							 OCFS2_JOURNAL_ACCESS_CREATE);
 			if (status < 0) {
 				mlog_errno(status);
@@ -1051,7 +1052,7 @@ static int ocfs2_adjust_rightmost_branch(handle_t *handle,
 		goto out;
 	}
 
-	status = ocfs2_journal_access_path(inode, handle, path);
+	status = ocfs2_journal_access_path(INODE_CACHE(inode), handle, path);
 	if (status < 0) {
 		mlog_errno(status);
 		goto out;
@@ -1162,7 +1163,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
 		BUG_ON(!OCFS2_IS_VALID_EXTENT_BLOCK(eb));
 		eb_el = &eb->h_list;
 
-		status = ocfs2_journal_access_eb(handle, inode, bh,
+		status = ocfs2_journal_access_eb(handle, INODE_CACHE(inode), bh,
 						 OCFS2_JOURNAL_ACCESS_CREATE);
 		if (status < 0) {
 			mlog_errno(status);
@@ -1202,20 +1203,20 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
 	 * journal_dirty erroring as it won't unless we've aborted the
 	 * handle (in which case we would never be here) so reserving
 	 * the write with journal_access is all we need to do. */
-	status = ocfs2_journal_access_eb(handle, inode, *last_eb_bh,
+	status = ocfs2_journal_access_eb(handle, INODE_CACHE(inode), *last_eb_bh,
 					 OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
 		goto bail;
 	}
-	status = ocfs2_et_root_journal_access(handle, inode, et,
+	status = ocfs2_et_root_journal_access(handle, INODE_CACHE(inode), et,
 					      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
 		goto bail;
 	}
 	if (eb_bh) {
-		status = ocfs2_journal_access_eb(handle, inode, eb_bh,
+		status = ocfs2_journal_access_eb(handle, INODE_CACHE(inode), eb_bh,
 						 OCFS2_JOURNAL_ACCESS_WRITE);
 		if (status < 0) {
 			mlog_errno(status);
@@ -1305,7 +1306,7 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
 	eb_el = &eb->h_list;
 	root_el = et->et_root_el;
 
-	status = ocfs2_journal_access_eb(handle, inode, new_eb_bh,
+	status = ocfs2_journal_access_eb(handle, INODE_CACHE(inode), new_eb_bh,
 					 OCFS2_JOURNAL_ACCESS_CREATE);
 	if (status < 0) {
 		mlog_errno(status);
@@ -1324,7 +1325,7 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
 		goto bail;
 	}
 
-	status = ocfs2_et_root_journal_access(handle, inode, et,
+	status = ocfs2_et_root_journal_access(handle, INODE_CACHE(inode), et,
 					      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
@@ -2095,7 +2096,7 @@ static int ocfs2_rotate_subtree_right(struct inode *inode,
 	root_bh = left_path->p_node[subtree_index].bh;
 	BUG_ON(root_bh != right_path->p_node[subtree_index].bh);
 
-	ret = ocfs2_path_bh_journal_access(handle, inode, right_path,
+	ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode), right_path,
 					   subtree_index);
 	if (ret) {
 		mlog_errno(ret);
@@ -2103,14 +2104,14 @@ static int ocfs2_rotate_subtree_right(struct inode *inode,
 	}
 
 	for(i = subtree_index + 1; i < path_num_items(right_path); i++) {
-		ret = ocfs2_path_bh_journal_access(handle, inode,
+		ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode),
 						   right_path, i);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
 		}
 
-		ret = ocfs2_path_bh_journal_access(handle, inode,
+		ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode),
 						   left_path, i);
 		if (ret) {
 			mlog_errno(ret);
@@ -2503,7 +2504,7 @@ static int ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle,
 		goto out;
 	}
 
-	ret = ocfs2_journal_access_path(inode, handle, path);
+	ret = ocfs2_journal_access_path(INODE_CACHE(inode), handle, path);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -2654,7 +2655,7 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
 			return -EAGAIN;
 
 		if (le16_to_cpu(right_leaf_el->l_next_free_rec) > 1) {
-			ret = ocfs2_journal_access_eb(handle, inode,
+			ret = ocfs2_journal_access_eb(handle, INODE_CACHE(inode),
 						      path_leaf_bh(right_path),
 						      OCFS2_JOURNAL_ACCESS_WRITE);
 			if (ret) {
@@ -2673,7 +2674,7 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
 		 * We have to update i_last_eb_blk during the meta
 		 * data delete.
 		 */
-		ret = ocfs2_et_root_journal_access(handle, inode, et,
+		ret = ocfs2_et_root_journal_access(handle, INODE_CACHE(inode), et,
 						   OCFS2_JOURNAL_ACCESS_WRITE);
 		if (ret) {
 			mlog_errno(ret);
@@ -2689,7 +2690,7 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
 	 */
 	BUG_ON(right_has_empty && !del_right_subtree);
 
-	ret = ocfs2_path_bh_journal_access(handle, inode, right_path,
+	ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode), right_path,
 					   subtree_index);
 	if (ret) {
 		mlog_errno(ret);
@@ -2697,14 +2698,14 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
 	}
 
 	for(i = subtree_index + 1; i < path_num_items(right_path); i++) {
-		ret = ocfs2_path_bh_journal_access(handle, inode,
+		ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode),
 						   right_path, i);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
 		}
 
-		ret = ocfs2_path_bh_journal_access(handle, inode,
+		ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode),
 						   left_path, i);
 		if (ret) {
 			mlog_errno(ret);
@@ -2864,7 +2865,7 @@ static int ocfs2_rotate_rightmost_leaf_left(struct inode *inode,
 	if (!ocfs2_is_empty_extent(&el->l_recs[0]))
 		return 0;
 
-	ret = ocfs2_path_bh_journal_access(handle, inode, path,
+	ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode), path,
 					   path_num_items(path) - 1);
 	if (ret) {
 		mlog_errno(ret);
@@ -2947,7 +2948,7 @@ static int __ocfs2_rotate_tree_left(struct inode *inode,
 		 * Caller might still want to make changes to the
 		 * tree root, so re-add it to the journal here.
 		 */
-		ret = ocfs2_path_bh_journal_access(handle, inode,
+		ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode),
 						   left_path, 0);
 		if (ret) {
 			mlog_errno(ret);
@@ -3025,7 +3026,7 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
 		goto out;
 	}
 
-	ret = ocfs2_journal_access_path(inode, handle, path);
+	ret = ocfs2_journal_access_path(INODE_CACHE(inode), handle, path);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -3055,7 +3056,7 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
 			goto out;
 		}
 
-		ret = ocfs2_journal_access_path(inode, handle, left_path);
+		ret = ocfs2_journal_access_path(INODE_CACHE(inode), handle, left_path);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -3370,7 +3371,7 @@ static int ocfs2_merge_rec_right(struct inode *inode,
 		root_bh = left_path->p_node[subtree_index].bh;
 		BUG_ON(root_bh != right_path->p_node[subtree_index].bh);
 
-		ret = ocfs2_path_bh_journal_access(handle, inode, right_path,
+		ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode), right_path,
 						   subtree_index);
 		if (ret) {
 			mlog_errno(ret);
@@ -3379,14 +3380,14 @@ static int ocfs2_merge_rec_right(struct inode *inode,
 
 		for (i = subtree_index + 1;
 		     i < path_num_items(right_path); i++) {
-			ret = ocfs2_path_bh_journal_access(handle, inode,
+			ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode),
 							   right_path, i);
 			if (ret) {
 				mlog_errno(ret);
 				goto out;
 			}
 
-			ret = ocfs2_path_bh_journal_access(handle, inode,
+			ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode),
 							   left_path, i);
 			if (ret) {
 				mlog_errno(ret);
@@ -3399,7 +3400,7 @@ static int ocfs2_merge_rec_right(struct inode *inode,
 		right_rec = &el->l_recs[index + 1];
 	}
 
-	ret = ocfs2_path_bh_journal_access(handle, inode, left_path,
+	ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode), left_path,
 					   path_num_items(left_path) - 1);
 	if (ret) {
 		mlog_errno(ret);
@@ -3539,7 +3540,7 @@ static int ocfs2_merge_rec_left(struct inode *inode,
 		root_bh = left_path->p_node[subtree_index].bh;
 		BUG_ON(root_bh != right_path->p_node[subtree_index].bh);
 
-		ret = ocfs2_path_bh_journal_access(handle, inode, right_path,
+		ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode), right_path,
 						   subtree_index);
 		if (ret) {
 			mlog_errno(ret);
@@ -3548,14 +3549,14 @@ static int ocfs2_merge_rec_left(struct inode *inode,
 
 		for (i = subtree_index + 1;
 		     i < path_num_items(right_path); i++) {
-			ret = ocfs2_path_bh_journal_access(handle, inode,
+			ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode),
 							   right_path, i);
 			if (ret) {
 				mlog_errno(ret);
 				goto out;
 			}
 
-			ret = ocfs2_path_bh_journal_access(handle, inode,
+			ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode),
 							   left_path, i);
 			if (ret) {
 				mlog_errno(ret);
@@ -3568,7 +3569,7 @@ static int ocfs2_merge_rec_left(struct inode *inode,
 			has_empty_extent = 1;
 	}
 
-	ret = ocfs2_path_bh_journal_access(handle, inode, right_path,
+	ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode), right_path,
 					   path_num_items(right_path) - 1);
 	if (ret) {
 		mlog_errno(ret);
@@ -4006,7 +4007,7 @@ static int ocfs2_append_rec_to_path(struct inode *inode, handle_t *handle,
 		}
 	}
 
-	ret = ocfs2_journal_access_path(inode, handle, right_path);
+	ret = ocfs2_journal_access_path(INODE_CACHE(inode), handle, right_path);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -4135,7 +4136,7 @@ static int ocfs2_insert_path(struct inode *inode,
 			goto out;
 		}
 
-		ret = ocfs2_journal_access_path(inode, handle, left_path);
+		ret = ocfs2_journal_access_path(INODE_CACHE(inode), handle, left_path);
 		if (ret < 0) {
 			mlog_errno(ret);
 			goto out;
@@ -4146,7 +4147,7 @@ static int ocfs2_insert_path(struct inode *inode,
 	 * Pass both paths to the journal. The majority of inserts
 	 * will be touching all components anyway.
 	 */
-	ret = ocfs2_journal_access_path(inode, handle, right_path);
+	ret = ocfs2_journal_access_path(INODE_CACHE(inode), handle, right_path);
 	if (ret < 0) {
 		mlog_errno(ret);
 		goto out;
@@ -4211,7 +4212,7 @@ static int ocfs2_do_insert_extent(struct inode *inode,
 
 	el = et->et_root_el;
 
-	ret = ocfs2_et_root_journal_access(handle, inode, et,
+	ret = ocfs2_et_root_journal_access(handle, INODE_CACHE(inode), et,
 					   OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
@@ -4273,7 +4274,7 @@ static int ocfs2_do_insert_extent(struct inode *inode,
 		 * ocfs2_rotate_tree_right() might have extended the
 		 * transaction without re-journaling our tree root.
 		 */
-		ret = ocfs2_et_root_journal_access(handle, inode, et,
+		ret = ocfs2_et_root_journal_access(handle, INODE_CACHE(inode), et,
 						   OCFS2_JOURNAL_ACCESS_WRITE);
 		if (ret) {
 			mlog_errno(ret);
@@ -4796,7 +4797,7 @@ int ocfs2_add_clusters_in_btree(struct ocfs2_super *osb,
 	BUG_ON(num_bits > clusters_to_add);
 
 	/* reserve our write early -- insert_extent may update the tree root */
-	status = ocfs2_et_root_journal_access(handle, inode, et,
+	status = ocfs2_et_root_journal_access(handle, INODE_CACHE(inode), et,
 					      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
@@ -4971,7 +4972,7 @@ static int ocfs2_replace_extent_rec(struct inode *inode,
 {
 	int ret;
 
-	ret = ocfs2_path_bh_journal_access(handle, inode, path,
+	ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode), path,
 					   path_num_items(path) - 1);
 	if (ret) {
 		mlog_errno(ret);
@@ -5333,13 +5334,13 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle,
 		goto out;
 	}
 
-	ret = ocfs2_journal_access_path(inode, handle, path);
+	ret = ocfs2_journal_access_path(INODE_CACHE(inode), handle, path);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
 	}
 
-	ret = ocfs2_journal_access_path(inode, handle, left_path);
+	ret = ocfs2_journal_access_path(INODE_CACHE(inode), handle, left_path);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -5574,7 +5575,7 @@ int ocfs2_remove_btree_range(struct inode *inode,
 		goto out;
 	}
 
-	ret = ocfs2_et_root_journal_access(handle, inode, et,
+	ret = ocfs2_et_root_journal_access(handle, INODE_CACHE(inode), et,
 					   OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
@@ -5691,7 +5692,7 @@ int ocfs2_truncate_log_append(struct ocfs2_super *osb,
 		goto bail;
 	}
 
-	status = ocfs2_journal_access_di(handle, tl_inode, tl_bh,
+	status = ocfs2_journal_access_di(handle, INODE_CACHE(tl_inode), tl_bh,
 					 OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
@@ -5753,7 +5754,7 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
 	while (i >= 0) {
 		/* Caller has given us at least enough credits to
 		 * update the truncate log dinode */
-		status = ocfs2_journal_access_di(handle, tl_inode, tl_bh,
+		status = ocfs2_journal_access_di(handle, INODE_CACHE(tl_inode), tl_bh,
 						 OCFS2_JOURNAL_ACCESS_WRITE);
 		if (status < 0) {
 			mlog_errno(status);
@@ -6770,14 +6771,14 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
 	 * Each component will be touched, so we might as well journal
 	 * here to avoid having to handle errors later.
 	 */
-	status = ocfs2_journal_access_path(inode, handle, path);
+	status = ocfs2_journal_access_path(INODE_CACHE(inode), handle, path);
 	if (status < 0) {
 		mlog_errno(status);
 		goto bail;
 	}
 
 	if (last_eb_bh) {
-		status = ocfs2_journal_access_eb(handle, inode, last_eb_bh,
+		status = ocfs2_journal_access_eb(handle, INODE_CACHE(inode), last_eb_bh,
 						 OCFS2_JOURNAL_ACCESS_WRITE);
 		if (status < 0) {
 			mlog_errno(status);
@@ -7139,7 +7140,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
 		goto out_unlock;
 	}
 
-	ret = ocfs2_journal_access_di(handle, inode, di_bh,
+	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
 				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
@@ -7508,7 +7509,7 @@ int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh,
 		goto out;
 	}
 
-	ret = ocfs2_journal_access_di(handle, inode, di_bh,
+	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
 				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 8a1e615..49eef2c 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1528,7 +1528,7 @@ static int ocfs2_write_begin_inline(struct address_space *mapping,
 		goto out;
 	}
 
-	ret = ocfs2_journal_access_di(handle, inode, wc->w_di_bh,
+	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), wc->w_di_bh,
 				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		ocfs2_commit_trans(osb, handle);
@@ -1773,7 +1773,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
 	 * We don't want this to fail in ocfs2_write_end(), so do it
 	 * here.
 	 */
-	ret = ocfs2_journal_access_di(handle, inode, wc->w_di_bh,
+	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), wc->w_di_bh,
 				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 273fb76..073ab34 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -176,7 +176,7 @@ static int ocfs2_dx_dir_link_trailer(struct inode *dir, handle_t *handle,
 	struct ocfs2_dx_root_block *dx_root;
 	struct ocfs2_dir_block_trailer *trailer;
 
-	ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh,
+	ret = ocfs2_journal_access_dr(handle, INODE_CACHE(dir), dx_root_bh,
 				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
@@ -1136,7 +1136,8 @@ int ocfs2_update_entry(struct inode *dir, handle_t *handle,
 	if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
 		access = ocfs2_journal_access_di;
 
-	ret = access(handle, dir, de_bh, OCFS2_JOURNAL_ACCESS_WRITE);
+	ret = access(handle, INODE_CACHE(dir), de_bh,
+		     OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -1179,7 +1180,7 @@ static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir,
 			goto bail;
 		}
 		if (de == de_del)  {
-			status = access(handle, dir, bh,
+			status = access(handle, INODE_CACHE(dir), bh,
 					OCFS2_JOURNAL_ACCESS_WRITE);
 			if (status < 0) {
 				status = -EIO;
@@ -1329,7 +1330,7 @@ static int ocfs2_delete_entry_dx(handle_t *handle, struct inode *dir,
 	 * the entry count needs to be updated. Also, we might be
 	 * adding to the start of the free list.
 	 */
-	ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh,
+	ret = ocfs2_journal_access_dr(handle, INODE_CACHE(dir), dx_root_bh,
 				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
@@ -1337,7 +1338,7 @@ static int ocfs2_delete_entry_dx(handle_t *handle, struct inode *dir,
 	}
 
 	if (!ocfs2_dx_root_inline(dx_root)) {
-		ret = ocfs2_journal_access_dl(handle, dir,
+		ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir),
 					      lookup->dl_dx_leaf_bh,
 					      OCFS2_JOURNAL_ACCESS_WRITE);
 		if (ret) {
@@ -1496,7 +1497,7 @@ static int __ocfs2_dx_dir_leaf_insert(struct inode *dir, handle_t *handle,
 	int ret;
 	struct ocfs2_dx_leaf *dx_leaf;
 
-	ret = ocfs2_journal_access_dl(handle, dir, dx_leaf_bh,
+	ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir), dx_leaf_bh,
 				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
@@ -1526,7 +1527,7 @@ static int ocfs2_dx_dir_insert(struct inode *dir, handle_t *handle,
 	struct ocfs2_dx_root_block *dx_root;
 	struct buffer_head *dx_root_bh = lookup->dl_dx_root_bh;
 
-	ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh,
+	ret = ocfs2_journal_access_dr(handle, INODE_CACHE(dir), dx_root_bh,
 				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
@@ -1648,11 +1649,13 @@ int __ocfs2_add_entry(handle_t *handle,
 		 */
 		if (ocfs2_free_list_at_root(lookup)) {
 			bh = lookup->dl_dx_root_bh;
-			retval = ocfs2_journal_access_dr(handle, dir, bh,
+			retval = ocfs2_journal_access_dr(handle,
+						 INODE_CACHE(dir), bh,
 						 OCFS2_JOURNAL_ACCESS_WRITE);
 		} else {
 			bh = lookup->dl_prev_leaf_bh;
-			retval = ocfs2_journal_access_db(handle, dir, bh,
+			retval = ocfs2_journal_access_db(handle,
+						 INODE_CACHE(dir), bh,
 						 OCFS2_JOURNAL_ACCESS_WRITE);
 		}
 		if (retval) {
@@ -1703,11 +1706,13 @@ int __ocfs2_add_entry(handle_t *handle,
 			}
 
 			if (insert_bh == parent_fe_bh)
-				status = ocfs2_journal_access_di(handle, dir,
+				status = ocfs2_journal_access_di(handle,
+								 INODE_CACHE(dir),
 								 insert_bh,
 								 OCFS2_JOURNAL_ACCESS_WRITE);
 			else {
-				status = ocfs2_journal_access_db(handle, dir,
+				status = ocfs2_journal_access_db(handle,
+								 INODE_CACHE(dir),
 								 insert_bh,
 					      OCFS2_JOURNAL_ACCESS_WRITE);
 
@@ -2283,7 +2288,7 @@ static int ocfs2_fill_new_dir_id(struct ocfs2_super *osb,
 	struct ocfs2_inline_data *data = &di->id2.i_data;
 	unsigned int size = le16_to_cpu(data->id_count);
 
-	ret = ocfs2_journal_access_di(handle, inode, di_bh,
+	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
 				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
@@ -2337,7 +2342,7 @@ static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb,
 
 	ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh);
 
-	status = ocfs2_journal_access_db(handle, inode, new_bh,
+	status = ocfs2_journal_access_db(handle, INODE_CACHE(inode), new_bh,
 					 OCFS2_JOURNAL_ACCESS_CREATE);
 	if (status < 0) {
 		mlog_errno(status);
@@ -2423,7 +2428,7 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb,
 	}
 	ocfs2_set_new_buffer_uptodate(INODE_CACHE(dir), dx_root_bh);
 
-	ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh,
+	ret = ocfs2_journal_access_dr(handle, INODE_CACHE(dir), dx_root_bh,
 				      OCFS2_JOURNAL_ACCESS_CREATE);
 	if (ret < 0) {
 		mlog_errno(ret);
@@ -2457,7 +2462,7 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb,
 	if (ret)
 		mlog_errno(ret);
 
-	ret = ocfs2_journal_access_di(handle, dir, di_bh,
+	ret = ocfs2_journal_access_di(handle, INODE_CACHE(dir), di_bh,
 				      OCFS2_JOURNAL_ACCESS_CREATE);
 	if (ret) {
 		mlog_errno(ret);
@@ -2500,7 +2505,7 @@ static int ocfs2_dx_dir_format_cluster(struct ocfs2_super *osb,
 
 		ocfs2_set_new_buffer_uptodate(INODE_CACHE(dir), bh);
 
-		ret = ocfs2_journal_access_dl(handle, dir, bh,
+		ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir), bh,
 					      OCFS2_JOURNAL_ACCESS_CREATE);
 		if (ret < 0) {
 			mlog_errno(ret);
@@ -3010,7 +3015,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
 
 	ocfs2_set_new_buffer_uptodate(INODE_CACHE(dir), dirdata_bh);
 
-	ret = ocfs2_journal_access_db(handle, dir, dirdata_bh,
+	ret = ocfs2_journal_access_db(handle, INODE_CACHE(dir), dirdata_bh,
 				      OCFS2_JOURNAL_ACCESS_CREATE);
 	if (ret) {
 		mlog_errno(ret);
@@ -3063,7 +3068,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
 	 * We let the later dirent insert modify c/mtime - to the user
 	 * the data hasn't changed.
 	 */
-	ret = ocfs2_journal_access_di(handle, dir, di_bh,
+	ret = ocfs2_journal_access_di(handle, INODE_CACHE(dir), di_bh,
 				      OCFS2_JOURNAL_ACCESS_CREATE);
 	if (ret) {
 		mlog_errno(ret);
@@ -3392,7 +3397,7 @@ do_extend:
 
 	ocfs2_set_new_buffer_uptodate(INODE_CACHE(dir), new_bh);
 
-	status = ocfs2_journal_access_db(handle, dir, new_bh,
+	status = ocfs2_journal_access_db(handle, INODE_CACHE(dir), new_bh,
 					 OCFS2_JOURNAL_ACCESS_CREATE);
 	if (status < 0) {
 		mlog_errno(status);
@@ -3888,7 +3893,7 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir,
 	}
 	did_quota = 1;
 
-	ret = ocfs2_journal_access_dl(handle, dir, dx_leaf_bh,
+	ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir), dx_leaf_bh,
 				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
@@ -3952,7 +3957,8 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir,
 	}
 
 	for (i = 0; i < num_dx_leaves; i++) {
-		ret = ocfs2_journal_access_dl(handle, dir, orig_dx_leaves[i],
+		ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir),
+					      orig_dx_leaves[i],
 					      OCFS2_JOURNAL_ACCESS_WRITE);
 		if (ret) {
 			mlog_errno(ret);
@@ -4168,7 +4174,7 @@ static int ocfs2_expand_inline_dx_root(struct inode *dir,
 	 * failure to add the dx_root_bh to the journal won't result
 	 * us losing clusters.
 	 */
-	ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh,
+	ret = ocfs2_journal_access_dr(handle, INODE_CACHE(dir), dx_root_bh,
 				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
@@ -4472,7 +4478,7 @@ static int ocfs2_dx_dir_remove_index(struct inode *dir,
 		goto out_unlock;
 	}
 
-	ret = ocfs2_journal_access_di(handle, dir, di_bh,
+	ret = ocfs2_journal_access_di(handle, INODE_CACHE(dir), di_bh,
 				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index fe15cee..f518d1b 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -3499,7 +3499,7 @@ static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
 					int new_level)
 {
 	struct inode *inode = ocfs2_lock_res_inode(lockres);
-	int checkpointed = ocfs2_inode_fully_checkpointed(inode);
+	int checkpointed = ocfs2_ci_fully_checkpointed(INODE_CACHE(inode));
 
 	BUG_ON(new_level != DLM_LOCK_NL && new_level != DLM_LOCK_PR);
 	BUG_ON(lockres->l_level != DLM_LOCK_EX && !checkpointed);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index aa501d3..3ddbc5e 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -259,7 +259,7 @@ int ocfs2_update_inode_atime(struct inode *inode,
 		goto out;
 	}
 
-	ret = ocfs2_journal_access_di(handle, inode, bh,
+	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), bh,
 				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
@@ -356,7 +356,7 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,
 		goto out;
 	}
 
-	status = ocfs2_journal_access_di(handle, inode, fe_bh,
+	status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), fe_bh,
 					 OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
@@ -593,7 +593,7 @@ restarted_transaction:
 	/* reserve a write to the file entry early on - that we if we
 	 * run out of credits in the allocation path, we can still
 	 * update i_size. */
-	status = ocfs2_journal_access_di(handle, inode, bh,
+	status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), bh,
 					 OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
@@ -1131,7 +1131,7 @@ static int __ocfs2_write_remove_suid(struct inode *inode,
 		goto out;
 	}
 
-	ret = ocfs2_journal_access_di(handle, inode, bh,
+	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), bh,
 				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret < 0) {
 		mlog_errno(ret);
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 8a9e708..179c819 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -562,7 +562,8 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
 			goto out;
 		}
 
-		status = ocfs2_journal_access_di(handle, inode, fe_bh,
+		status = ocfs2_journal_access_di(handle, INODE_CACHE(inode),
+						 fe_bh,
 						 OCFS2_JOURNAL_ACCESS_WRITE);
 		if (status < 0) {
 			mlog_errno(status);
@@ -646,7 +647,7 @@ static int ocfs2_remove_inode(struct inode *inode,
 	}
 
 	/* set the inodes dtime */
-	status = ocfs2_journal_access_di(handle, inode, di_bh,
+	status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
 					 OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
@@ -1238,7 +1239,7 @@ int ocfs2_mark_inode_dirty(handle_t *handle,
 	mlog_entry("(inode %llu)\n",
 		   (unsigned long long)OCFS2_I(inode)->ip_blkno);
 
-	status = ocfs2_journal_access_di(handle, inode, bh,
+	status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), bh,
 					 OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index ddf08d3..5b6c0e4 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -48,6 +48,7 @@
 #include "slot_map.h"
 #include "super.h"
 #include "sysfile.h"
+#include "uptodate.h"
 #include "quota.h"
 
 #include "buffer_head_io.h"
@@ -601,14 +602,16 @@ static struct ocfs2_triggers dl_triggers = {
 };
 
 static int __ocfs2_journal_access(handle_t *handle,
-				  struct inode *inode,
+				  struct ocfs2_caching_info *ci,
 				  struct buffer_head *bh,
 				  struct ocfs2_triggers *triggers,
 				  int type)
 {
 	int status;
+	struct ocfs2_super *osb =
+		OCFS2_SB(ocfs2_metadata_cache_get_super(ci));
 
-	BUG_ON(!inode);
+	BUG_ON(!ci || !ci->ci_ops);
 	BUG_ON(!handle);
 	BUG_ON(!bh);
 
@@ -627,15 +630,15 @@ static int __ocfs2_journal_access(handle_t *handle,
 		BUG();
 	}
 
-	/* Set the current transaction information on the inode so
+	/* Set the current transaction information on the ci so
 	 * that the locking code knows whether it can drop it's locks
-	 * on this inode or not. We're protected from the commit
+	 * on this ci or not. We're protected from the commit
 	 * thread updating the current transaction id until
 	 * ocfs2_commit_trans() because ocfs2_start_trans() took
 	 * j_trans_barrier for us. */
-	ocfs2_set_inode_lock_trans(OCFS2_SB(inode->i_sb)->journal, inode);
+	ocfs2_set_ci_lock_trans(osb->journal, ci);
 
-	mutex_lock(&OCFS2_I(inode)->ip_io_mutex);
+	ocfs2_metadata_cache_io_lock(ci);
 	switch (type) {
 	case OCFS2_JOURNAL_ACCESS_CREATE:
 	case OCFS2_JOURNAL_ACCESS_WRITE:
@@ -650,9 +653,9 @@ static int __ocfs2_journal_access(handle_t *handle,
 		status = -EINVAL;
 		mlog(ML_ERROR, "Uknown access type!\n");
 	}
-	if (!status && ocfs2_meta_ecc(OCFS2_SB(inode->i_sb)) && triggers)
+	if (!status && ocfs2_meta_ecc(osb) && triggers)
 		jbd2_journal_set_triggers(bh, &triggers->ot_triggers);
-	mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
+	ocfs2_metadata_cache_io_unlock(ci);
 
 	if (status < 0)
 		mlog(ML_ERROR, "Error %d getting %d access to buffer!\n",
@@ -662,66 +665,58 @@ static int __ocfs2_journal_access(handle_t *handle,
 	return status;
 }
 
-int ocfs2_journal_access_di(handle_t *handle, struct inode *inode,
-			       struct buffer_head *bh, int type)
+int ocfs2_journal_access_di(handle_t *handle, struct ocfs2_caching_info *ci,
+			    struct buffer_head *bh, int type)
 {
-	return __ocfs2_journal_access(handle, inode, bh, &di_triggers,
-				      type);
+	return __ocfs2_journal_access(handle, ci, bh, &di_triggers, type);
 }
 
-int ocfs2_journal_access_eb(handle_t *handle, struct inode *inode,
+int ocfs2_journal_access_eb(handle_t *handle, struct ocfs2_caching_info *ci,
 			    struct buffer_head *bh, int type)
 {
-	return __ocfs2_journal_access(handle, inode, bh, &eb_triggers,
-				      type);
+	return __ocfs2_journal_access(handle, ci, bh, &eb_triggers, type);
 }
 
-int ocfs2_journal_access_gd(handle_t *handle, struct inode *inode,
+int ocfs2_journal_access_gd(handle_t *handle, struct ocfs2_caching_info *ci,
 			    struct buffer_head *bh, int type)
 {
-	return __ocfs2_journal_access(handle, inode, bh, &gd_triggers,
-				      type);
+	return __ocfs2_journal_access(handle, ci, bh, &gd_triggers, type);
 }
 
-int ocfs2_journal_access_db(handle_t *handle, struct inode *inode,
+int ocfs2_journal_access_db(handle_t *handle, struct ocfs2_caching_info *ci,
 			    struct buffer_head *bh, int type)
 {
-	return __ocfs2_journal_access(handle, inode, bh, &db_triggers,
-				      type);
+	return __ocfs2_journal_access(handle, ci, bh, &db_triggers, type);
 }
 
-int ocfs2_journal_access_xb(handle_t *handle, struct inode *inode,
+int ocfs2_journal_access_xb(handle_t *handle, struct ocfs2_caching_info *ci,
 			    struct buffer_head *bh, int type)
 {
-	return __ocfs2_journal_access(handle, inode, bh, &xb_triggers,
-				      type);
+	return __ocfs2_journal_access(handle, ci, bh, &xb_triggers, type);
 }
 
-int ocfs2_journal_access_dq(handle_t *handle, struct inode *inode,
+int ocfs2_journal_access_dq(handle_t *handle, struct ocfs2_caching_info *ci,
 			    struct buffer_head *bh, int type)
 {
-	return __ocfs2_journal_access(handle, inode, bh, &dq_triggers,
-				      type);
+	return __ocfs2_journal_access(handle, ci, bh, &dq_triggers, type);
 }
 
-int ocfs2_journal_access_dr(handle_t *handle, struct inode *inode,
+int ocfs2_journal_access_dr(handle_t *handle, struct ocfs2_caching_info *ci,
 			    struct buffer_head *bh, int type)
 {
-	return __ocfs2_journal_access(handle, inode, bh, &dr_triggers,
-				      type);
+	return __ocfs2_journal_access(handle, ci, bh, &dr_triggers, type);
 }
 
-int ocfs2_journal_access_dl(handle_t *handle, struct inode *inode,
+int ocfs2_journal_access_dl(handle_t *handle, struct ocfs2_caching_info *ci,
 			    struct buffer_head *bh, int type)
 {
-	return __ocfs2_journal_access(handle, inode, bh, &dl_triggers,
-				      type);
+	return __ocfs2_journal_access(handle, ci, bh, &dl_triggers, type);
 }
 
-int ocfs2_journal_access(handle_t *handle, struct inode *inode,
+int ocfs2_journal_access(handle_t *handle, struct ocfs2_caching_info *ci,
 			 struct buffer_head *bh, int type)
 {
-	return __ocfs2_journal_access(handle, inode, bh, NULL, type);
+	return __ocfs2_journal_access(handle, ci, bh, NULL, type);
 }
 
 int ocfs2_journal_dirty(handle_t *handle,
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 0bb6754..6163f28 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -90,57 +90,66 @@ static inline unsigned long ocfs2_inc_trans_id(struct ocfs2_journal *j)
 	return old_id;
 }
 
-static inline void ocfs2_set_inode_lock_trans(struct ocfs2_journal *journal,
-					      struct inode *inode)
+static inline void ocfs2_set_ci_lock_trans(struct ocfs2_journal *journal,
+					   struct ocfs2_caching_info *ci)
 {
 	spin_lock(&trans_inc_lock);
-	INODE_CACHE(inode)->ci_last_trans = journal->j_trans_id;
+	ci->ci_last_trans = journal->j_trans_id;
 	spin_unlock(&trans_inc_lock);
 }
 
 /* Used to figure out whether it's safe to drop a metadata lock on an
- * inode. Returns true if all the inodes changes have been
+ * cached object. Returns true if all the object's changes have been
  * checkpointed to disk. You should be holding the spinlock on the
  * metadata lock while calling this to be sure that nobody can take
  * the lock and put it on another transaction. */
-static inline int ocfs2_inode_fully_checkpointed(struct inode *inode)
+static inline int ocfs2_ci_fully_checkpointed(struct ocfs2_caching_info *ci)
 {
 	int ret;
-	struct ocfs2_journal *journal = OCFS2_SB(inode->i_sb)->journal;
+	struct ocfs2_journal *journal =
+		OCFS2_SB(ocfs2_metadata_cache_get_super(ci))->journal;
 
 	spin_lock(&trans_inc_lock);
-	ret = time_after(journal->j_trans_id,
-			 INODE_CACHE(inode)->ci_last_trans);
+	ret = time_after(journal->j_trans_id, ci->ci_last_trans);
 	spin_unlock(&trans_inc_lock);
 	return ret;
 }
 
-/* convenience function to check if an inode is still new (has never
- * hit disk) Will do you a favor and set created_trans = 0 when you've
- * been checkpointed.  returns '1' if the inode is still new. */
-static inline int ocfs2_inode_is_new(struct inode *inode)
+/* convenience function to check if an object backed by struct
+ * ocfs2_caching_info  is still new (has never hit disk) Will do you a
+ * favor and set created_trans = 0 when you've
+ * been checkpointed.  returns '1' if the ci is still new. */
+static inline int ocfs2_ci_is_new(struct ocfs2_caching_info *ci)
 {
 	int ret;
+	struct ocfs2_journal *journal =
+		OCFS2_SB(ocfs2_metadata_cache_get_super(ci))->journal;
+
+	spin_lock(&trans_inc_lock);
+	ret = !(time_after(journal->j_trans_id, ci->ci_created_trans));
+	if (!ret)
+		ci->ci_created_trans = 0;
+	spin_unlock(&trans_inc_lock);
+	return ret;
+}
 
+/* Wrapper for inodes so we can check system files */
+static inline int ocfs2_inode_is_new(struct inode *inode)
+{
 	/* System files are never "new" as they're written out by
 	 * mkfs. This helps us early during mount, before we have the
 	 * journal open and j_trans_id could be junk. */
 	if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SYSTEM_FILE)
 		return 0;
-	spin_lock(&trans_inc_lock);
-	ret = !(time_after(OCFS2_SB(inode->i_sb)->journal->j_trans_id,
-			   INODE_CACHE(inode)->ci_created_trans));
-	if (!ret)
-		INODE_CACHE(inode)->ci_created_trans = 0;
-	spin_unlock(&trans_inc_lock);
-	return ret;
+
+	return ocfs2_ci_is_new(INODE_CACHE(inode));
 }
 
-static inline void ocfs2_inode_set_new(struct ocfs2_super *osb,
-				       struct inode *inode)
+static inline void ocfs2_ci_set_new(struct ocfs2_super *osb,
+				    struct ocfs2_caching_info *ci)
 {
 	spin_lock(&trans_inc_lock);
-	INODE_CACHE(inode)->ci_created_trans = osb->journal->j_trans_id;
+	ci->ci_created_trans = osb->journal->j_trans_id;
 	spin_unlock(&trans_inc_lock);
 }
 
@@ -201,7 +210,7 @@ static inline void ocfs2_checkpoint_inode(struct inode *inode)
 	if (ocfs2_mount_local(osb))
 		return;
 
-	if (!ocfs2_inode_fully_checkpointed(inode)) {
+	if (!ocfs2_ci_fully_checkpointed(INODE_CACHE(inode))) {
 		/* WARNING: This only kicks off a single
 		 * checkpoint. If someone races you and adds more
 		 * metadata to the journal, you won't know, and will
@@ -211,7 +220,7 @@ static inline void ocfs2_checkpoint_inode(struct inode *inode)
 		ocfs2_start_checkpoint(osb);
 
 		wait_event(osb->journal->j_checkpointed,
-			   ocfs2_inode_fully_checkpointed(inode));
+			   ocfs2_ci_fully_checkpointed(INODE_CACHE(inode)));
 	}
 }
 
@@ -267,31 +276,31 @@ int			     ocfs2_extend_trans(handle_t *handle, int nblocks);
 
 
 /* ocfs2_inode */
-int ocfs2_journal_access_di(handle_t *handle, struct inode *inode,
+int ocfs2_journal_access_di(handle_t *handle, struct ocfs2_caching_info *ci,
 			    struct buffer_head *bh, int type);
 /* ocfs2_extent_block */
-int ocfs2_journal_access_eb(handle_t *handle, struct inode *inode,
+int ocfs2_journal_access_eb(handle_t *handle, struct ocfs2_caching_info *ci,
 			    struct buffer_head *bh, int type);
 /* ocfs2_group_desc */
-int ocfs2_journal_access_gd(handle_t *handle, struct inode *inode,
+int ocfs2_journal_access_gd(handle_t *handle, struct ocfs2_caching_info *ci,
 			    struct buffer_head *bh, int type);
 /* ocfs2_xattr_block */
-int ocfs2_journal_access_xb(handle_t *handle, struct inode *inode,
+int ocfs2_journal_access_xb(handle_t *handle, struct ocfs2_caching_info *ci,
 			    struct buffer_head *bh, int type);
 /* quota blocks */
-int ocfs2_journal_access_dq(handle_t *handle, struct inode *inode,
+int ocfs2_journal_access_dq(handle_t *handle, struct ocfs2_caching_info *ci,
 			    struct buffer_head *bh, int type);
 /* dirblock */
-int ocfs2_journal_access_db(handle_t *handle, struct inode *inode,
+int ocfs2_journal_access_db(handle_t *handle, struct ocfs2_caching_info *ci,
 			    struct buffer_head *bh, int type);
 /* ocfs2_dx_root_block */
-int ocfs2_journal_access_dr(handle_t *handle, struct inode *inode,
+int ocfs2_journal_access_dr(handle_t *handle, struct ocfs2_caching_info *ci,
 			    struct buffer_head *bh, int type);
 /* ocfs2_dx_leaf */
-int ocfs2_journal_access_dl(handle_t *handle, struct inode *inode,
+int ocfs2_journal_access_dl(handle_t *handle, struct ocfs2_caching_info *ci,
 			    struct buffer_head *bh, int type);
 /* Anything that has no ecc */
-int ocfs2_journal_access(handle_t *handle, struct inode *inode,
+int ocfs2_journal_access(handle_t *handle, struct ocfs2_caching_info *ci,
 			 struct buffer_head *bh, int type);
 
 /*
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index da5dd6a..ac10f83 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -297,8 +297,8 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
 	}
 	memcpy(alloc_copy, alloc, bh->b_size);
 
-	status = ocfs2_journal_access_di(handle, local_alloc_inode, bh,
-					 OCFS2_JOURNAL_ACCESS_WRITE);
+	status = ocfs2_journal_access_di(handle, INODE_CACHE(local_alloc_inode),
+					 bh, OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
 		goto out_commit;
@@ -678,7 +678,8 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
 	 * delete bits from it! */
 	*num_bits = bits_wanted;
 
-	status = ocfs2_journal_access_di(handle, local_alloc_inode,
+	status = ocfs2_journal_access_di(handle,
+					 INODE_CACHE(local_alloc_inode),
 					 osb->local_alloc_bh,
 					 OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
@@ -1156,7 +1157,8 @@ static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
 	}
 	memcpy(alloc_copy, alloc, osb->local_alloc_bh->b_size);
 
-	status = ocfs2_journal_access_di(handle, local_alloc_inode,
+	status = ocfs2_journal_access_di(handle,
+					 INODE_CACHE(local_alloc_inode),
 					 osb->local_alloc_bh,
 					 OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 689761b..c07217a 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -375,7 +375,8 @@ static int ocfs2_mknod(struct inode *dir,
 			goto leave;
 		}
 
-		status = ocfs2_journal_access_di(handle, dir, parent_fe_bh,
+		status = ocfs2_journal_access_di(handle, INODE_CACHE(dir),
+						 parent_fe_bh,
 						 OCFS2_JOURNAL_ACCESS_WRITE);
 		if (status < 0) {
 			mlog_errno(status);
@@ -509,7 +510,8 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
 	}
 	ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), *new_fe_bh);
 
-	status = ocfs2_journal_access_di(handle, inode, *new_fe_bh,
+	status = ocfs2_journal_access_di(handle, INODE_CACHE(inode),
+					 *new_fe_bh,
 					 OCFS2_JOURNAL_ACCESS_CREATE);
 	if (status < 0) {
 		mlog_errno(status);
@@ -565,7 +567,7 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
 	}
 
 	ocfs2_populate_inode(inode, fe, 1);
-	ocfs2_inode_set_new(osb, inode);
+	ocfs2_ci_set_new(osb, INODE_CACHE(inode));
 	if (!ocfs2_mount_local(osb)) {
 		status = ocfs2_create_new_inode_locks(inode);
 		if (status < 0)
@@ -682,7 +684,7 @@ static int ocfs2_link(struct dentry *old_dentry,
 		goto out_unlock_inode;
 	}
 
-	err = ocfs2_journal_access_di(handle, inode, fe_bh,
+	err = ocfs2_journal_access_di(handle, INODE_CACHE(inode), fe_bh,
 				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (err < 0) {
 		mlog_errno(err);
@@ -866,7 +868,7 @@ static int ocfs2_unlink(struct inode *dir,
 		goto leave;
 	}
 
-	status = ocfs2_journal_access_di(handle, inode, fe_bh,
+	status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), fe_bh,
 					 OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
@@ -1284,7 +1286,8 @@ static int ocfs2_rename(struct inode *old_dir,
 				goto bail;
 			}
 		}
-		status = ocfs2_journal_access_di(handle, new_inode, newfe_bh,
+		status = ocfs2_journal_access_di(handle, INODE_CACHE(new_inode),
+						 newfe_bh,
 						 OCFS2_JOURNAL_ACCESS_WRITE);
 		if (status < 0) {
 			mlog_errno(status);
@@ -1331,7 +1334,8 @@ static int ocfs2_rename(struct inode *old_dir,
 	old_inode->i_ctime = CURRENT_TIME;
 	mark_inode_dirty(old_inode);
 
-	status = ocfs2_journal_access_di(handle, old_inode, old_inode_bh,
+	status = ocfs2_journal_access_di(handle, INODE_CACHE(old_inode),
+					 old_inode_bh,
 					 OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status >= 0) {
 		old_di = (struct ocfs2_dinode *) old_inode_bh->b_data;
@@ -1407,9 +1411,10 @@ static int ocfs2_rename(struct inode *old_dir,
 			     (int)old_dir_nlink, old_dir->i_nlink);
 		} else {
 			struct ocfs2_dinode *fe;
-			status = ocfs2_journal_access_di(handle, old_dir,
-						      old_dir_bh,
-						      OCFS2_JOURNAL_ACCESS_WRITE);
+			status = ocfs2_journal_access_di(handle,
+							 INODE_CACHE(old_dir),
+							 old_dir_bh,
+							 OCFS2_JOURNAL_ACCESS_WRITE);
 			fe = (struct ocfs2_dinode *) old_dir_bh->b_data;
 			ocfs2_set_links_count(fe, old_dir->i_nlink);
 			status = ocfs2_journal_dirty(handle, old_dir_bh);
@@ -1530,7 +1535,8 @@ static int ocfs2_create_symlink_data(struct ocfs2_super *osb,
 		ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode),
 					      bhs[virtual]);
 
-		status = ocfs2_journal_access(handle, inode, bhs[virtual],
+		status = ocfs2_journal_access(handle, INODE_CACHE(inode),
+					      bhs[virtual],
 					      OCFS2_JOURNAL_ACCESS_CREATE);
 		if (status < 0) {
 			mlog_errno(status);
@@ -1918,7 +1924,9 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
 		goto leave;
 	}
 
-	status = ocfs2_journal_access_di(handle, orphan_dir_inode, orphan_dir_bh,
+	status = ocfs2_journal_access_di(handle,
+					 INODE_CACHE(orphan_dir_inode),
+					 orphan_dir_bh,
 					 OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
@@ -2003,7 +2011,9 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
 		goto leave;
 	}
 
-	status = ocfs2_journal_access_di(handle,orphan_dir_inode,  orphan_dir_bh,
+	status = ocfs2_journal_access_di(handle,
+					 INODE_CACHE(orphan_dir_inode),
+					 orphan_dir_bh,
 					 OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 18b5fea..d370262 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -91,6 +91,11 @@ struct ocfs2_caching_info {
 		struct rb_root	ci_tree;
 	} ci_cache;
 };
+/*
+ * Need this prototype here instead of in uptodate.h because journal.h
+ * uses it.
+ */
+struct super_block *ocfs2_metadata_cache_get_super(struct ocfs2_caching_info *ci);
 
 /* this limits us to 256 nodes
  * if we need more, we can do a kmalloc for the map */
@@ -408,7 +413,8 @@ struct ocfs2_super
 #define OCFS2_SB(sb)	    ((struct ocfs2_super *)(sb)->s_fs_info)
 
 /* Useful typedef for passing around journal access functions */
-typedef int (*ocfs2_journal_access_func)(handle_t *handle, struct inode *inode,
+typedef int (*ocfs2_journal_access_func)(handle_t *handle,
+					 struct ocfs2_caching_info *ci,
 					 struct buffer_head *bh, int type);
 
 static inline int ocfs2_should_order_data(struct inode *inode)
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 0d7125b..7eadf8b 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -254,7 +254,8 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type,
 	set_buffer_uptodate(bh);
 	unlock_buffer(bh);
 	ocfs2_set_buffer_uptodate(INODE_CACHE(gqinode), bh);
-	err = ocfs2_journal_access_dq(handle, gqinode, bh, ja_type);
+	err = ocfs2_journal_access_dq(handle, INODE_CACHE(gqinode), bh,
+				      ja_type);
 	if (err < 0) {
 		brelse(bh);
 		goto out;
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index 3df2954..1a2c50a 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -108,7 +108,7 @@ static int ocfs2_modify_bh(struct inode *inode, struct buffer_head *bh,
 		mlog_errno(status);
 		return status;
 	}
-	status = ocfs2_journal_access_dq(handle, inode, bh,
+	status = ocfs2_journal_access_dq(handle, INODE_CACHE(inode), bh,
 					 OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
@@ -510,7 +510,8 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode,
 				goto out_commit;
 			}
 			/* Release local quota file entry */
-			status = ocfs2_journal_access_dq(handle, lqinode,
+			status = ocfs2_journal_access_dq(handle,
+					INODE_CACHE(lqinode),
 					qbh, OCFS2_JOURNAL_ACCESS_WRITE);
 			if (status < 0) {
 				mlog_errno(status);
@@ -619,7 +620,8 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb,
 			mlog_errno(status);
 			goto out_bh;
 		}
-		status = ocfs2_journal_access_dq(handle, lqinode, bh,
+		status = ocfs2_journal_access_dq(handle, INODE_CACHE(lqinode),
+						 bh,
 						 OCFS2_JOURNAL_ACCESS_WRITE);
 		if (status < 0) {
 			mlog_errno(status);
@@ -994,7 +996,7 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
 	}
 	dchunk = (struct ocfs2_local_disk_chunk *)bh->b_data;
 	ocfs2_set_new_buffer_uptodate(INODE_CACHE(lqinode), bh);
-	status = ocfs2_journal_access_dq(handle, lqinode, bh,
+	status = ocfs2_journal_access_dq(handle, INODE_CACHE(lqinode), bh,
 					 OCFS2_JOURNAL_ACCESS_CREATE);
 	if (status < 0) {
 		mlog_errno(status);
@@ -1028,7 +1030,7 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
 		goto out_trans;
 	}
 	ocfs2_set_new_buffer_uptodate(INODE_CACHE(lqinode), dbh);
-	status = ocfs2_journal_access_dq(handle, lqinode, dbh,
+	status = ocfs2_journal_access_dq(handle, INODE_CACHE(lqinode), dbh,
 					 OCFS2_JOURNAL_ACCESS_CREATE);
 	if (status < 0) {
 		mlog_errno(status);
@@ -1143,7 +1145,7 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
 		goto out;
 	}
 	/* Zero created block */
-	status = ocfs2_journal_access_dq(handle, lqinode, bh,
+	status = ocfs2_journal_access_dq(handle, INODE_CACHE(lqinode), bh,
 				 OCFS2_JOURNAL_ACCESS_CREATE);
 	if (status < 0) {
 		mlog_errno(status);
@@ -1158,7 +1160,8 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
 		goto out_trans;
 	}
 	/* Update chunk header */
-	status = ocfs2_journal_access_dq(handle, lqinode, chunk->qc_headerbh,
+	status = ocfs2_journal_access_dq(handle, INODE_CACHE(lqinode),
+					 chunk->qc_headerbh,
 				 OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
@@ -1292,7 +1295,8 @@ static int ocfs2_local_release_dquot(struct dquot *dquot)
 		goto out;
 	}
 
-	status = ocfs2_journal_access_dq(handle, sb_dqopt(sb)->files[type],
+	status = ocfs2_journal_access_dq(handle,
+			INODE_CACHE(sb_dqopt(sb)->files[type]),
 			od->dq_chunk->qc_headerbh, OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c
index 7465f0f..3c3d673 100644
--- a/fs/ocfs2/resize.c
+++ b/fs/ocfs2/resize.c
@@ -106,8 +106,8 @@ static int ocfs2_update_last_group_and_inode(handle_t *handle,
 	mlog_entry("(new_clusters=%d, first_new_cluster = %u)\n",
 		   new_clusters, first_new_cluster);
 
-	ret = ocfs2_journal_access_gd(handle, bm_inode, group_bh,
-				      OCFS2_JOURNAL_ACCESS_WRITE);
+	ret = ocfs2_journal_access_gd(handle, INODE_CACHE(bm_inode),
+				      group_bh, OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret < 0) {
 		mlog_errno(ret);
 		goto out;
@@ -141,7 +141,7 @@ static int ocfs2_update_last_group_and_inode(handle_t *handle,
 	}
 
 	/* update the inode accordingly. */
-	ret = ocfs2_journal_access_di(handle, bm_inode, bm_bh,
+	ret = ocfs2_journal_access_di(handle, INODE_CACHE(bm_inode), bm_bh,
 				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret < 0) {
 		mlog_errno(ret);
@@ -536,8 +536,8 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
 	cl = &fe->id2.i_chain;
 	cr = &cl->cl_recs[input->chain];
 
-	ret = ocfs2_journal_access_gd(handle, main_bm_inode, group_bh,
-				      OCFS2_JOURNAL_ACCESS_WRITE);
+	ret = ocfs2_journal_access_gd(handle, INODE_CACHE(main_bm_inode),
+				      group_bh, OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret < 0) {
 		mlog_errno(ret);
 		goto out_commit;
@@ -552,8 +552,8 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
 		goto out_commit;
 	}
 
-	ret = ocfs2_journal_access_di(handle, main_bm_inode, main_bm_bh,
-				      OCFS2_JOURNAL_ACCESS_WRITE);
+	ret = ocfs2_journal_access_di(handle, INODE_CACHE(main_bm_inode),
+				      main_bm_bh, OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret < 0) {
 		mlog_errno(ret);
 		goto out_commit;
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 21aaaaa..a6c442c 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -352,7 +352,7 @@ static int ocfs2_block_group_fill(handle_t *handle,
 	}
 
 	status = ocfs2_journal_access_gd(handle,
-					 alloc_inode,
+					 INODE_CACHE(alloc_inode),
 					 bg_bh,
 					 OCFS2_JOURNAL_ACCESS_CREATE);
 	if (status < 0) {
@@ -491,7 +491,7 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
 
 	bg = (struct ocfs2_group_desc *) bg_bh->b_data;
 
-	status = ocfs2_journal_access_di(handle, alloc_inode,
+	status = ocfs2_journal_access_di(handle, INODE_CACHE(alloc_inode),
 					 bh, OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
@@ -1033,7 +1033,7 @@ static inline int ocfs2_block_group_set_bits(handle_t *handle,
 		journal_type = OCFS2_JOURNAL_ACCESS_UNDO;
 
 	status = ocfs2_journal_access_gd(handle,
-					 alloc_inode,
+					 INODE_CACHE(alloc_inode),
 					 group_bh,
 					 journal_type);
 	if (status < 0) {
@@ -1106,7 +1106,8 @@ static int ocfs2_relink_block_group(handle_t *handle,
 	bg_ptr = le64_to_cpu(bg->bg_next_group);
 	prev_bg_ptr = le64_to_cpu(prev_bg->bg_next_group);
 
-	status = ocfs2_journal_access_gd(handle, alloc_inode, prev_bg_bh,
+	status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode),
+					 prev_bg_bh,
 					 OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
@@ -1121,8 +1122,8 @@ static int ocfs2_relink_block_group(handle_t *handle,
 		goto out_rollback;
 	}
 
-	status = ocfs2_journal_access_gd(handle, alloc_inode, bg_bh,
-					 OCFS2_JOURNAL_ACCESS_WRITE);
+	status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode),
+					 bg_bh, OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
 		goto out_rollback;
@@ -1136,8 +1137,8 @@ static int ocfs2_relink_block_group(handle_t *handle,
 		goto out_rollback;
 	}
 
-	status = ocfs2_journal_access_di(handle, alloc_inode, fe_bh,
-					 OCFS2_JOURNAL_ACCESS_WRITE);
+	status = ocfs2_journal_access_di(handle, INODE_CACHE(alloc_inode),
+					 fe_bh, OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
 		goto out_rollback;
@@ -1288,7 +1289,7 @@ static int ocfs2_alloc_dinode_update_counts(struct inode *inode,
 	struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
 	struct ocfs2_chain_list *cl = (struct ocfs2_chain_list *) &di->id2.i_chain;
 
-	ret = ocfs2_journal_access_di(handle, inode, di_bh,
+	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
 				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret < 0) {
 		mlog_errno(ret);
@@ -1461,7 +1462,7 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
 	/* Ok, claim our bits now: set the info on dinode, chainlist
 	 * and then the group */
 	status = ocfs2_journal_access_di(handle,
-					 alloc_inode,
+					 INODE_CACHE(alloc_inode),
 					 ac->ac_bh,
 					 OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
@@ -1907,8 +1908,8 @@ static inline int ocfs2_block_group_clear_bits(handle_t *handle,
 	if (ocfs2_is_cluster_bitmap(alloc_inode))
 		journal_type = OCFS2_JOURNAL_ACCESS_UNDO;
 
-	status = ocfs2_journal_access_gd(handle, alloc_inode, group_bh,
-					 journal_type);
+	status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode),
+					 group_bh, journal_type);
 	if (status < 0) {
 		mlog_errno(status);
 		goto bail;
@@ -1993,8 +1994,8 @@ int ocfs2_free_suballoc_bits(handle_t *handle,
 		goto bail;
 	}
 
-	status = ocfs2_journal_access_di(handle, alloc_inode, alloc_bh,
-					 OCFS2_JOURNAL_ACCESS_WRITE);
+	status = ocfs2_journal_access_di(handle, INODE_CACHE(alloc_inode),
+					 alloc_bh, OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
 		goto bail;
diff --git a/fs/ocfs2/uptodate.h b/fs/ocfs2/uptodate.h
index 80dbb1d..0d826fe 100644
--- a/fs/ocfs2/uptodate.h
+++ b/fs/ocfs2/uptodate.h
@@ -64,7 +64,6 @@ void ocfs2_metadata_cache_purge(struct ocfs2_caching_info *ci);
 void ocfs2_metadata_cache_exit(struct ocfs2_caching_info *ci);
 
 u64 ocfs2_metadata_cache_owner(struct ocfs2_caching_info *ci);
-struct super_block *ocfs2_metadata_cache_get_super(struct ocfs2_caching_info *ci);
 void ocfs2_metadata_cache_io_lock(struct ocfs2_caching_info *ci);
 void ocfs2_metadata_cache_io_unlock(struct ocfs2_caching_info *ci);
 
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 19de5c4..93aae79 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -297,7 +297,8 @@ static int ocfs2_xattr_bucket_journal_access(handle_t *handle,
 	int i, rc = 0;
 
 	for (i = 0; i < bucket->bu_blocks; i++) {
-		rc = ocfs2_journal_access(handle, bucket->bu_inode,
+		rc = ocfs2_journal_access(handle,
+					  INODE_CACHE(bucket->bu_inode),
 					  bucket->bu_bhs[i], type);
 		if (rc) {
 			mlog_errno(rc);
@@ -604,7 +605,7 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode,
 
 	ocfs2_init_xattr_value_extent_tree(&et, inode, vb);
 
-	status = vb->vb_access(handle, inode, vb->vb_bh,
+	status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
 			      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
@@ -658,7 +659,7 @@ static int __ocfs2_remove_xattr_range(struct inode *inode,
 
 	ocfs2_init_xattr_value_extent_tree(&et, inode, vb);
 
-	ret = vb->vb_access(handle, inode, vb->vb_bh,
+	ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
 			    OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
@@ -1217,7 +1218,7 @@ static int __ocfs2_xattr_set_value_outside(struct inode *inode,
 			}
 
 			ret = ocfs2_journal_access(handle,
-						   inode,
+						   INODE_CACHE(inode),
 						   bh,
 						   OCFS2_JOURNAL_ACCESS_WRITE);
 			if (ret < 0) {
@@ -1268,7 +1269,7 @@ static int ocfs2_xattr_cleanup(struct inode *inode,
 	void *val = xs->base + offs;
 	size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
 
-	ret = vb->vb_access(handle, inode, vb->vb_bh,
+	ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
 			    OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
@@ -1296,7 +1297,7 @@ static int ocfs2_xattr_update_entry(struct inode *inode,
 {
 	int ret;
 
-	ret = vb->vb_access(handle, inode, vb->vb_bh,
+	ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
 			    OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
@@ -1617,7 +1618,7 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
 		}
 	}
 
-	ret = ocfs2_journal_access_di(handle, inode, xs->inode_bh,
+	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), xs->inode_bh,
 				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
@@ -1625,7 +1626,7 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
 	}
 
 	if (!(flag & OCFS2_INLINE_XATTR_FL)) {
-		ret = vb.vb_access(handle, inode, vb.vb_bh,
+		ret = vb.vb_access(handle, INODE_CACHE(inode), vb.vb_bh,
 				   OCFS2_JOURNAL_ACCESS_WRITE);
 		if (ret) {
 			mlog_errno(ret);
@@ -1898,7 +1899,7 @@ int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
 		mlog_errno(ret);
 		goto out;
 	}
-	ret = ocfs2_journal_access_di(handle, inode, di_bh,
+	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
 				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
@@ -2107,7 +2108,8 @@ static int ocfs2_xattr_block_set(struct inode *inode,
 	int ret;
 
 	if (!xs->xattr_bh) {
-		ret = ocfs2_journal_access_di(handle, inode, xs->inode_bh,
+		ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode),
+					      xs->inode_bh,
 					      OCFS2_JOURNAL_ACCESS_CREATE);
 		if (ret < 0) {
 			mlog_errno(ret);
@@ -2125,7 +2127,8 @@ static int ocfs2_xattr_block_set(struct inode *inode,
 		new_bh = sb_getblk(inode->i_sb, first_blkno);
 		ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh);
 
-		ret = ocfs2_journal_access_xb(handle, inode, new_bh,
+		ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode),
+					      new_bh,
 					      OCFS2_JOURNAL_ACCESS_CREATE);
 		if (ret < 0) {
 			mlog_errno(ret);
@@ -2600,7 +2603,7 @@ static int __ocfs2_xattr_set_handle(struct inode *inode,
 
 	if (!ret) {
 		/* Update inode ctime. */
-		ret = ocfs2_journal_access_di(ctxt->handle, inode,
+		ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode),
 					      xis->inode_bh,
 					      OCFS2_JOURNAL_ACCESS_WRITE);
 		if (ret) {
@@ -3428,7 +3431,7 @@ static int ocfs2_xattr_create_index_block(struct inode *inode,
 	 */
 	down_write(&oi->ip_alloc_sem);
 
-	ret = ocfs2_journal_access_xb(handle, inode, xb_bh,
+	ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), xb_bh,
 				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
@@ -4267,7 +4270,7 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
 
 	ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh);
 
-	ret = ocfs2_journal_access_xb(handle, inode, root_bh,
+	ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh,
 				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret < 0) {
 		mlog_errno(ret);
@@ -4873,7 +4876,7 @@ static int ocfs2_rm_xattr_cluster(struct inode *inode,
 		goto out;
 	}
 
-	ret = ocfs2_journal_access_xb(handle, inode, root_bh,
+	ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh,
 				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
-- 
cgit v0.10.2


From d9a0a1f83bf083b55b3c1f16efddecc31abace61 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Thu, 12 Feb 2009 17:32:34 -0800
Subject: ocfs2: Store the ocfs2_caching_info on ocfs2_extent_tree.

What do we cache?  Metadata blocks.  What are most of our non-inode metadata
blocks?  Extent blocks for our btrees.  struct ocfs2_extent_tree is the
main structure for managing those.  So let's store the associated
ocfs2_caching_info there.

This means that ocfs2_et_root_journal_access() doesn't need struct inode
anymore, and any place that has an et can refer to et->et_ci instead of
INODE_CACHE(inode).

Signed-off-by: Joel Becker <joel.becker@oracle.com>

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 616afa9..a26294c 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -352,6 +352,7 @@ static void __ocfs2_init_extent_tree(struct ocfs2_extent_tree *et,
 {
 	et->et_ops = ops;
 	et->et_root_bh = bh;
+	et->et_ci = INODE_CACHE(inode);
 	et->et_root_journal_access = access;
 	if (!obj)
 		obj = (void *)bh->b_data;
@@ -415,11 +416,10 @@ static inline void ocfs2_et_update_clusters(struct inode *inode,
 }
 
 static inline int ocfs2_et_root_journal_access(handle_t *handle,
-					       struct ocfs2_caching_info *ci,
 					       struct ocfs2_extent_tree *et,
 					       int type)
 {
-	return et->et_root_journal_access(handle, ci, et->et_root_bh,
+	return et->et_root_journal_access(handle, et->et_ci, et->et_root_bh,
 					  type);
 }
 
@@ -1209,7 +1209,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
 		mlog_errno(status);
 		goto bail;
 	}
-	status = ocfs2_et_root_journal_access(handle, INODE_CACHE(inode), et,
+	status = ocfs2_et_root_journal_access(handle, et,
 					      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
@@ -1325,7 +1325,7 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
 		goto bail;
 	}
 
-	status = ocfs2_et_root_journal_access(handle, INODE_CACHE(inode), et,
+	status = ocfs2_et_root_journal_access(handle, et,
 					      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
@@ -2674,7 +2674,7 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
 		 * We have to update i_last_eb_blk during the meta
 		 * data delete.
 		 */
-		ret = ocfs2_et_root_journal_access(handle, INODE_CACHE(inode), et,
+		ret = ocfs2_et_root_journal_access(handle, et,
 						   OCFS2_JOURNAL_ACCESS_WRITE);
 		if (ret) {
 			mlog_errno(ret);
@@ -3026,7 +3026,7 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
 		goto out;
 	}
 
-	ret = ocfs2_journal_access_path(INODE_CACHE(inode), handle, path);
+	ret = ocfs2_journal_access_path(et->et_ci, handle, path);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -3056,7 +3056,7 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
 			goto out;
 		}
 
-		ret = ocfs2_journal_access_path(INODE_CACHE(inode), handle, left_path);
+		ret = ocfs2_journal_access_path(et->et_ci, handle, left_path);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -4212,7 +4212,7 @@ static int ocfs2_do_insert_extent(struct inode *inode,
 
 	el = et->et_root_el;
 
-	ret = ocfs2_et_root_journal_access(handle, INODE_CACHE(inode), et,
+	ret = ocfs2_et_root_journal_access(handle, et,
 					   OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
@@ -4274,7 +4274,7 @@ static int ocfs2_do_insert_extent(struct inode *inode,
 		 * ocfs2_rotate_tree_right() might have extended the
 		 * transaction without re-journaling our tree root.
 		 */
-		ret = ocfs2_et_root_journal_access(handle, INODE_CACHE(inode), et,
+		ret = ocfs2_et_root_journal_access(handle, et,
 						   OCFS2_JOURNAL_ACCESS_WRITE);
 		if (ret) {
 			mlog_errno(ret);
@@ -4797,7 +4797,7 @@ int ocfs2_add_clusters_in_btree(struct ocfs2_super *osb,
 	BUG_ON(num_bits > clusters_to_add);
 
 	/* reserve our write early -- insert_extent may update the tree root */
-	status = ocfs2_et_root_journal_access(handle, INODE_CACHE(inode), et,
+	status = ocfs2_et_root_journal_access(handle, et,
 					      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
@@ -5334,13 +5334,13 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle,
 		goto out;
 	}
 
-	ret = ocfs2_journal_access_path(INODE_CACHE(inode), handle, path);
+	ret = ocfs2_journal_access_path(et->et_ci, handle, path);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
 	}
 
-	ret = ocfs2_journal_access_path(INODE_CACHE(inode), handle, left_path);
+	ret = ocfs2_journal_access_path(et->et_ci, handle, left_path);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -5575,7 +5575,7 @@ int ocfs2_remove_btree_range(struct inode *inode,
 		goto out;
 	}
 
-	ret = ocfs2_et_root_journal_access(handle, INODE_CACHE(inode), et,
+	ret = ocfs2_et_root_journal_access(handle, et,
 					   OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index 353254b..285d40b 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -45,7 +45,8 @@
  *
  * ocfs2_extent_tree contains info for the root of the b-tree, it must have a
  * root ocfs2_extent_list and a root_bh so that they can be used in the b-tree
- * functions.  With metadata ecc, we now call different journal_access
+ * functions.  It needs the ocfs2_caching_info structure associated with
+ * I/O on the tree.  With metadata ecc, we now call different journal_access
  * functions for each type of metadata, so it must have the
  * root_journal_access function.
  * ocfs2_extent_tree_operations abstract the normal operations we do for
@@ -56,6 +57,7 @@ struct ocfs2_extent_tree {
 	struct ocfs2_extent_tree_operations	*et_ops;
 	struct buffer_head			*et_root_bh;
 	struct ocfs2_extent_list		*et_root_el;
+	struct ocfs2_caching_info		*et_ci;
 	ocfs2_journal_access_func		et_root_journal_access;
 	void					*et_object;
 	unsigned int				et_max_leaf_clusters;
-- 
cgit v0.10.2


From 3d03a305ded8057155bd3c801e64ffef9f534827 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Thu, 12 Feb 2009 17:49:26 -0800
Subject: ocfs2: Pass ocfs2_caching_info to ocfs2_read_extent_block().

extent blocks belong to btrees on more than just inodes, so we want to
pass the ocfs2_caching_info structure directly to
ocfs2_read_extent_block().  A number of places in alloc.c can now drop
struct inode from their argument list.

Signed-off-by: Joel Becker <joel.becker@oracle.com>

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index a26294c..1ff13d3 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -854,13 +854,13 @@ static int ocfs2_validate_extent_block(struct super_block *sb,
 	return 0;
 }
 
-int ocfs2_read_extent_block(struct inode *inode, u64 eb_blkno,
+int ocfs2_read_extent_block(struct ocfs2_caching_info *ci, u64 eb_blkno,
 			    struct buffer_head **bh)
 {
 	int rc;
 	struct buffer_head *tmp = *bh;
 
-	rc = ocfs2_read_block(INODE_CACHE(inode), eb_blkno, &tmp,
+	rc = ocfs2_read_block(ci, eb_blkno, &tmp,
 			      ocfs2_validate_extent_block);
 
 	/* If ocfs2_read_block() got us a new bh, pass it up. */
@@ -875,7 +875,6 @@ int ocfs2_read_extent_block(struct inode *inode, u64 eb_blkno,
  * How many free extents have we got before we need more meta data?
  */
 int ocfs2_num_free_extents(struct ocfs2_super *osb,
-			   struct inode *inode,
 			   struct ocfs2_extent_tree *et)
 {
 	int retval;
@@ -890,7 +889,8 @@ int ocfs2_num_free_extents(struct ocfs2_super *osb,
 	last_eb_blk = ocfs2_et_get_last_eb_blk(et);
 
 	if (last_eb_blk) {
-		retval = ocfs2_read_extent_block(inode, last_eb_blk, &eb_bh);
+		retval = ocfs2_read_extent_block(et->et_ci, last_eb_blk,
+						 &eb_bh);
 		if (retval < 0) {
 			mlog_errno(retval);
 			goto bail;
@@ -1382,7 +1382,6 @@ bail:
  * return status < 0 indicates an error.
  */
 static int ocfs2_find_branch_target(struct ocfs2_super *osb,
-				    struct inode *inode,
 				    struct ocfs2_extent_tree *et,
 				    struct buffer_head **target_bh)
 {
@@ -1401,19 +1400,21 @@ static int ocfs2_find_branch_target(struct ocfs2_super *osb,
 
 	while(le16_to_cpu(el->l_tree_depth) > 1) {
 		if (le16_to_cpu(el->l_next_free_rec) == 0) {
-			ocfs2_error(inode->i_sb, "Dinode %llu has empty "
+			ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
+				    "Owner %llu has empty "
 				    "extent list (next_free_rec == 0)",
-				    (unsigned long long)OCFS2_I(inode)->ip_blkno);
+				    (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci));
 			status = -EIO;
 			goto bail;
 		}
 		i = le16_to_cpu(el->l_next_free_rec) - 1;
 		blkno = le64_to_cpu(el->l_recs[i].e_blkno);
 		if (!blkno) {
-			ocfs2_error(inode->i_sb, "Dinode %llu has extent "
+			ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
+				    "Owner %llu has extent "
 				    "list where extent # %d has no physical "
 				    "block start",
-				    (unsigned long long)OCFS2_I(inode)->ip_blkno, i);
+				    (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci), i);
 			status = -EIO;
 			goto bail;
 		}
@@ -1421,7 +1422,7 @@ static int ocfs2_find_branch_target(struct ocfs2_super *osb,
 		brelse(bh);
 		bh = NULL;
 
-		status = ocfs2_read_extent_block(inode, blkno, &bh);
+		status = ocfs2_read_extent_block(et->et_ci, blkno, &bh);
 		if (status < 0) {
 			mlog_errno(status);
 			goto bail;
@@ -1475,7 +1476,7 @@ static int ocfs2_grow_tree(struct inode *inode, handle_t *handle,
 
 	BUG_ON(meta_ac == NULL);
 
-	shift = ocfs2_find_branch_target(osb, inode, et, &bh);
+	shift = ocfs2_find_branch_target(osb, et, &bh);
 	if (shift < 0) {
 		ret = shift;
 		mlog_errno(ret);
@@ -1780,7 +1781,7 @@ static int __ocfs2_find_path(struct inode *inode,
 
 		brelse(bh);
 		bh = NULL;
-		ret = ocfs2_read_extent_block(inode, blkno, &bh);
+		ret = ocfs2_read_extent_block(INODE_CACHE(inode), blkno, &bh);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -3032,7 +3033,8 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
 		goto out;
 	}
 
-	ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, path, &cpos);
+	ret = ocfs2_find_cpos_for_left_leaf(ocfs2_metadata_cache_get_super(et->et_ci),
+					    path, &cpos);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -4557,7 +4559,7 @@ static int ocfs2_figure_insert_type(struct inode *inode,
 		 * ocfs2_figure_insert_type() and ocfs2_add_branch()
 		 * may want it later.
 		 */
-		ret = ocfs2_read_extent_block(inode,
+		ret = ocfs2_read_extent_block(et->et_ci,
 					      ocfs2_et_get_last_eb_blk(et),
 					      &bh);
 		if (ret) {
@@ -4760,7 +4762,7 @@ int ocfs2_add_clusters_in_btree(struct ocfs2_super *osb,
 	if (mark_unwritten)
 		flags = OCFS2_EXT_UNWRITTEN;
 
-	free_extents = ocfs2_num_free_extents(osb, inode, et);
+	free_extents = ocfs2_num_free_extents(osb, et);
 	if (free_extents < 0) {
 		status = free_extents;
 		mlog_errno(status);
@@ -5048,7 +5050,7 @@ static int __ocfs2_mark_extent_written(struct inode *inode,
 	if (path->p_tree_depth) {
 		struct ocfs2_extent_block *eb;
 
-		ret = ocfs2_read_extent_block(inode,
+		ret = ocfs2_read_extent_block(et->et_ci,
 					      ocfs2_et_get_last_eb_blk(et),
 					      &last_eb_bh);
 		if (ret) {
@@ -5203,7 +5205,7 @@ static int ocfs2_split_tree(struct inode *inode, struct ocfs2_extent_tree *et,
 
 	depth = path->p_tree_depth;
 	if (depth > 0) {
-		ret = ocfs2_read_extent_block(inode,
+		ret = ocfs2_read_extent_block(et->et_ci,
 					      ocfs2_et_get_last_eb_blk(et),
 					      &last_eb_bh);
 		if (ret < 0) {
@@ -7447,7 +7449,7 @@ int ocfs2_prepare_truncate(struct ocfs2_super *osb,
 	ocfs2_init_dealloc_ctxt(&(*tc)->tc_dealloc);
 
 	if (fe->id2.i_list.l_tree_depth) {
-		status = ocfs2_read_extent_block(inode,
+		status = ocfs2_read_extent_block(INODE_CACHE(inode),
 						 le64_to_cpu(fe->i_last_eb_blk),
 						 &last_eb_bh);
 		if (status < 0) {
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index 285d40b..ed78ee5 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -86,7 +86,7 @@ void ocfs2_init_dx_root_extent_tree(struct ocfs2_extent_tree *et,
  * allocated.  This is a cached read.  The extent block will be validated
  * with ocfs2_validate_extent_block().
  */
-int ocfs2_read_extent_block(struct inode *inode, u64 eb_blkno,
+int ocfs2_read_extent_block(struct ocfs2_caching_info *ci, u64 eb_blkno,
 			    struct buffer_head **bh);
 
 struct ocfs2_alloc_context;
@@ -132,7 +132,6 @@ int ocfs2_remove_btree_range(struct inode *inode,
 			     struct ocfs2_cached_dealloc_ctxt *dealloc);
 
 int ocfs2_num_free_extents(struct ocfs2_super *osb,
-			   struct inode *inode,
 			   struct ocfs2_extent_tree *et);
 
 /*
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 073ab34..00e4328 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -3346,7 +3346,7 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
 	if (dir_i_size == ocfs2_clusters_to_bytes(sb, OCFS2_I(dir)->ip_clusters)) {
 		spin_unlock(&OCFS2_I(dir)->ip_lock);
 		ocfs2_init_dinode_extent_tree(&et, dir, parent_fe_bh);
-		num_free_extents = ocfs2_num_free_extents(osb, dir, &et);
+		num_free_extents = ocfs2_num_free_extents(osb, &et);
 		if (num_free_extents < 0) {
 			status = num_free_extents;
 			mlog_errno(status);
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index dbd8a16..a5dc13e 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -293,7 +293,7 @@ static int ocfs2_last_eb_is_empty(struct inode *inode,
 	struct ocfs2_extent_block *eb;
 	struct ocfs2_extent_list *el;
 
-	ret = ocfs2_read_extent_block(inode, last_eb_blk, &eb_bh);
+	ret = ocfs2_read_extent_block(INODE_CACHE(inode), last_eb_blk, &eb_bh);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -375,7 +375,7 @@ static int ocfs2_figure_hole_clusters(struct inode *inode,
 		if (le64_to_cpu(eb->h_next_leaf_blk) == 0ULL)
 			goto no_more_extents;
 
-		ret = ocfs2_read_extent_block(inode,
+		ret = ocfs2_read_extent_block(INODE_CACHE(inode),
 					      le64_to_cpu(eb->h_next_leaf_blk),
 					      &next_eb_bh);
 		if (ret) {
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index a6c442c..c30b644 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -2152,7 +2152,7 @@ int ocfs2_lock_allocators(struct inode *inode,
 
 	BUG_ON(clusters_to_add != 0 && data_ac == NULL);
 
-	num_free_extents = ocfs2_num_free_extents(osb, inode, et);
+	num_free_extents = ocfs2_num_free_extents(osb, et);
 	if (num_free_extents < 0) {
 		ret = num_free_extents;
 		mlog_errno(ret);
-- 
cgit v0.10.2


From facdb77f54f09a33baf6b649496f5dd1d7922a7e Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Thu, 12 Feb 2009 18:08:48 -0800
Subject: ocfs2: ocfs2_find_path() only needs the caching info

ocfs2_find_path and ocfs2_find_leaf() walk our btrees, reading extent
blocks.  They need struct ocfs2_caching_info for that, but not struct
inode.

Signed-off-by: Joel Becker <joel.becker@oracle.com>

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 1ff13d3..ecd9730 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -475,8 +475,8 @@ struct ocfs2_path {
 #define path_leaf_el(_path) ((_path)->p_node[(_path)->p_tree_depth].el)
 #define path_num_items(_path) ((_path)->p_tree_depth + 1)
 
-static int ocfs2_find_path(struct inode *inode, struct ocfs2_path *path,
-			   u32 cpos);
+static int ocfs2_find_path(struct ocfs2_caching_info *ci,
+			   struct ocfs2_path *path, u32 cpos);
 static void ocfs2_adjust_rightmost_records(struct inode *inode,
 					   handle_t *handle,
 					   struct ocfs2_path *path,
@@ -1039,7 +1039,7 @@ static int ocfs2_adjust_rightmost_branch(handle_t *handle,
 		return status;
 	}
 
-	status = ocfs2_find_path(inode, path, UINT_MAX);
+	status = ocfs2_find_path(et->et_ci, path, UINT_MAX);
 	if (status < 0) {
 		mlog_errno(status);
 		goto out;
@@ -1728,7 +1728,7 @@ typedef void (path_insert_t)(void *, struct buffer_head *);
  * This code can be called with a cpos larger than the tree, in which
  * case it will return the rightmost path.
  */
-static int __ocfs2_find_path(struct inode *inode,
+static int __ocfs2_find_path(struct ocfs2_caching_info *ci,
 			     struct ocfs2_extent_list *root_el, u32 cpos,
 			     path_insert_t *func, void *data)
 {
@@ -1739,15 +1739,14 @@ static int __ocfs2_find_path(struct inode *inode,
 	struct ocfs2_extent_block *eb;
 	struct ocfs2_extent_list *el;
 	struct ocfs2_extent_rec *rec;
-	struct ocfs2_inode_info *oi = OCFS2_I(inode);
 
 	el = root_el;
 	while (el->l_tree_depth) {
 		if (le16_to_cpu(el->l_next_free_rec) == 0) {
-			ocfs2_error(inode->i_sb,
-				    "Inode %llu has empty extent list at "
+			ocfs2_error(ocfs2_metadata_cache_get_super(ci),
+				    "Owner %llu has empty extent list at "
 				    "depth %u\n",
-				    (unsigned long long)oi->ip_blkno,
+				    (unsigned long long)ocfs2_metadata_cache_owner(ci),
 				    le16_to_cpu(el->l_tree_depth));
 			ret = -EROFS;
 			goto out;
@@ -1770,10 +1769,10 @@ static int __ocfs2_find_path(struct inode *inode,
 
 		blkno = le64_to_cpu(el->l_recs[i].e_blkno);
 		if (blkno == 0) {
-			ocfs2_error(inode->i_sb,
-				    "Inode %llu has bad blkno in extent list "
+			ocfs2_error(ocfs2_metadata_cache_get_super(ci),
+				    "Owner %llu has bad blkno in extent list "
 				    "at depth %u (index %d)\n",
-				    (unsigned long long)oi->ip_blkno,
+				    (unsigned long long)ocfs2_metadata_cache_owner(ci),
 				    le16_to_cpu(el->l_tree_depth), i);
 			ret = -EROFS;
 			goto out;
@@ -1781,7 +1780,7 @@ static int __ocfs2_find_path(struct inode *inode,
 
 		brelse(bh);
 		bh = NULL;
-		ret = ocfs2_read_extent_block(INODE_CACHE(inode), blkno, &bh);
+		ret = ocfs2_read_extent_block(ci, blkno, &bh);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -1792,10 +1791,10 @@ static int __ocfs2_find_path(struct inode *inode,
 
 		if (le16_to_cpu(el->l_next_free_rec) >
 		    le16_to_cpu(el->l_count)) {
-			ocfs2_error(inode->i_sb,
-				    "Inode %llu has bad count in extent list "
+			ocfs2_error(ocfs2_metadata_cache_get_super(ci),
+				    "Owner %llu has bad count in extent list "
 				    "at block %llu (next free=%u, count=%u)\n",
-				    (unsigned long long)oi->ip_blkno,
+				    (unsigned long long)ocfs2_metadata_cache_owner(ci),
 				    (unsigned long long)bh->b_blocknr,
 				    le16_to_cpu(el->l_next_free_rec),
 				    le16_to_cpu(el->l_count));
@@ -1839,14 +1838,14 @@ static void find_path_ins(void *data, struct buffer_head *bh)
 	ocfs2_path_insert_eb(fp->path, fp->index, bh);
 	fp->index++;
 }
-static int ocfs2_find_path(struct inode *inode, struct ocfs2_path *path,
-			   u32 cpos)
+static int ocfs2_find_path(struct ocfs2_caching_info *ci,
+			   struct ocfs2_path *path, u32 cpos)
 {
 	struct find_path_data data;
 
 	data.index = 1;
 	data.path = path;
-	return __ocfs2_find_path(inode, path_root_el(path), cpos,
+	return __ocfs2_find_path(ci, path_root_el(path), cpos,
 				 find_path_ins, &data);
 }
 
@@ -1871,13 +1870,14 @@ static void find_leaf_ins(void *data, struct buffer_head *bh)
  *
  * This function doesn't handle non btree extent lists.
  */
-int ocfs2_find_leaf(struct inode *inode, struct ocfs2_extent_list *root_el,
-		    u32 cpos, struct buffer_head **leaf_bh)
+int ocfs2_find_leaf(struct ocfs2_caching_info *ci,
+		    struct ocfs2_extent_list *root_el, u32 cpos,
+		    struct buffer_head **leaf_bh)
 {
 	int ret;
 	struct buffer_head *bh = NULL;
 
-	ret = __ocfs2_find_path(inode, root_el, cpos, find_leaf_ins, &bh);
+	ret = __ocfs2_find_path(ci, root_el, cpos, find_leaf_ins, &bh);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -2382,7 +2382,7 @@ static int ocfs2_rotate_tree_right(struct inode *inode,
 		mlog(0, "Rotating a tree: ins. cpos: %u, left path cpos: %u\n",
 		     insert_cpos, cpos);
 
-		ret = ocfs2_find_path(inode, left_path, cpos);
+		ret = ocfs2_find_path(INODE_CACHE(inode), left_path, cpos);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -2923,7 +2923,7 @@ static int __ocfs2_rotate_tree_left(struct inode *inode,
 	}
 
 	while (right_cpos) {
-		ret = ocfs2_find_path(inode, right_path, right_cpos);
+		ret = ocfs2_find_path(et->et_ci, right_path, right_cpos);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -3052,7 +3052,7 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
 			goto out;
 		}
 
-		ret = ocfs2_find_path(inode, left_path, cpos);
+		ret = ocfs2_find_path(et->et_ci, left_path, cpos);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -3297,7 +3297,7 @@ static int ocfs2_get_right_path(struct inode *inode,
 		goto out;
 	}
 
-	ret = ocfs2_find_path(inode, right_path, right_cpos);
+	ret = ocfs2_find_path(INODE_CACHE(inode), right_path, right_cpos);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -3466,7 +3466,7 @@ static int ocfs2_get_left_path(struct inode *inode,
 		goto out;
 	}
 
-	ret = ocfs2_find_path(inode, left_path, left_cpos);
+	ret = ocfs2_find_path(INODE_CACHE(inode), left_path, left_cpos);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -3996,7 +3996,8 @@ static int ocfs2_append_rec_to_path(struct inode *inode, handle_t *handle,
 				goto out;
 			}
 
-			ret = ocfs2_find_path(inode, left_path, left_cpos);
+			ret = ocfs2_find_path(INODE_CACHE(inode), left_path,
+					      left_cpos);
 			if (ret) {
 				mlog_errno(ret);
 				goto out;
@@ -4245,7 +4246,7 @@ static int ocfs2_do_insert_extent(struct inode *inode,
 		cpos = UINT_MAX;
 	}
 
-	ret = ocfs2_find_path(inode, right_path, cpos);
+	ret = ocfs2_find_path(et->et_ci, right_path, cpos);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -4342,7 +4343,8 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
 			if (!left_path)
 				goto out;
 
-			status = ocfs2_find_path(inode, left_path, left_cpos);
+			status = ocfs2_find_path(INODE_CACHE(inode),
+						 left_path, left_cpos);
 			if (status)
 				goto out;
 
@@ -4398,7 +4400,7 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
 		if (!right_path)
 			goto out;
 
-		status = ocfs2_find_path(inode, right_path, right_cpos);
+		status = ocfs2_find_path(INODE_CACHE(inode), right_path, right_cpos);
 		if (status)
 			goto out;
 
@@ -4600,7 +4602,7 @@ static int ocfs2_figure_insert_type(struct inode *inode,
 	 * us the rightmost tree path. This is accounted for below in
 	 * the appending code.
 	 */
-	ret = ocfs2_find_path(inode, path, le32_to_cpu(insert_rec->e_cpos));
+	ret = ocfs2_find_path(et->et_ci, path, le32_to_cpu(insert_rec->e_cpos));
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -4950,7 +4952,7 @@ leftright:
 		ocfs2_reinit_path(path, 1);
 
 		cpos = le32_to_cpu(split_rec.e_cpos);
-		ret = ocfs2_find_path(inode, path, cpos);
+		ret = ocfs2_find_path(et->et_ci, path, cpos);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -5148,7 +5150,7 @@ int ocfs2_mark_extent_written(struct inode *inode,
 		goto out;
 	}
 
-	ret = ocfs2_find_path(inode, left_path, cpos);
+	ret = ocfs2_find_path(et->et_ci, left_path, cpos);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -5320,7 +5322,8 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle,
 				goto out;
 			}
 
-			ret = ocfs2_find_path(inode, left_path, left_cpos);
+			ret = ocfs2_find_path(et->et_ci, left_path,
+					      left_cpos);
 			if (ret) {
 				mlog_errno(ret);
 				goto out;
@@ -5429,7 +5432,7 @@ int ocfs2_remove_extent(struct inode *inode,
 		goto out;
 	}
 
-	ret = ocfs2_find_path(inode, path, cpos);
+	ret = ocfs2_find_path(et->et_ci, path, cpos);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -5494,7 +5497,7 @@ int ocfs2_remove_extent(struct inode *inode,
 		 */
 		ocfs2_reinit_path(path, 1);
 
-		ret = ocfs2_find_path(inode, path, cpos);
+		ret = ocfs2_find_path(et->et_ci, path, cpos);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -6522,7 +6525,7 @@ static int ocfs2_find_new_last_ext_blk(struct inode *inode,
 		goto out;
 	}
 
-	ret = ocfs2_find_leaf(inode, path_root_el(path), cpos, &bh);
+	ret = ocfs2_find_leaf(INODE_CACHE(inode), path_root_el(path), cpos, &bh);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -7299,7 +7302,7 @@ start:
 	/*
 	 * Truncate always works against the rightmost tree branch.
 	 */
-	status = ocfs2_find_path(inode, path, UINT_MAX);
+	status = ocfs2_find_path(INODE_CACHE(inode), path, UINT_MAX);
 	if (status) {
 		mlog_errno(status);
 		goto bail;
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index ed78ee5..8718e57 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -223,8 +223,9 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb,
 int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh,
 			  unsigned int start, unsigned int end, int trunc);
 
-int ocfs2_find_leaf(struct inode *inode, struct ocfs2_extent_list *root_el,
-		    u32 cpos, struct buffer_head **leaf_bh);
+int ocfs2_find_leaf(struct ocfs2_caching_info *ci,
+		    struct ocfs2_extent_list *root_el, u32 cpos,
+		    struct buffer_head **leaf_bh);
 int ocfs2_search_extent_list(struct ocfs2_extent_list *el, u32 v_cluster);
 
 /*
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 00e4328..088a1b5 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -805,7 +805,8 @@ static int ocfs2_dx_dir_lookup_rec(struct inode *inode,
 	struct ocfs2_extent_rec *rec = NULL;
 
 	if (el->l_tree_depth) {
-		ret = ocfs2_find_leaf(inode, el, major_hash, &eb_bh);
+		ret = ocfs2_find_leaf(INODE_CACHE(inode), el, major_hash,
+				      &eb_bh);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index a5dc13e..dc9482c 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -428,7 +428,8 @@ static int ocfs2_get_clusters_nocache(struct inode *inode,
 	tree_height = le16_to_cpu(el->l_tree_depth);
 
 	if (tree_height > 0) {
-		ret = ocfs2_find_leaf(inode, el, v_cluster, &eb_bh);
+		ret = ocfs2_find_leaf(INODE_CACHE(inode), el, v_cluster,
+				      &eb_bh);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -548,7 +549,8 @@ int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
 	u32 coff;
 
 	if (el->l_tree_depth) {
-		ret = ocfs2_find_leaf(inode, el, v_cluster, &eb_bh);
+		ret = ocfs2_find_leaf(INODE_CACHE(inode), el, v_cluster,
+				      &eb_bh);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 93aae79..61819b2 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -2854,7 +2854,8 @@ static int ocfs2_xattr_get_rec(struct inode *inode,
 	u64 e_blkno = 0;
 
 	if (el->l_tree_depth) {
-		ret = ocfs2_find_leaf(inode, el, name_hash, &eb_bh);
+		ret = ocfs2_find_leaf(INODE_CACHE(inode), el, name_hash,
+				      &eb_bh);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
-- 
cgit v0.10.2


From 42a5a7a9a5abf9a566b91c51137921957b9a14e4 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Thu, 12 Feb 2009 18:49:19 -0800
Subject: ocfs2: ocfs2_create_new_meta_bhs() doesn't need struct inode.

Pass struct ocfs2_extent_tree into ocfs2_create_new_meta_bhs().  It no
longer needs struct inode or ocfs2_super.

Signed-off-by: Joel Becker <joel.becker@oracle.com>

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index ecd9730..ad41eab 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -914,9 +914,8 @@ bail:
  * sets h_signature, h_blkno, h_suballoc_bit, h_suballoc_slot, and
  * l_count for you
  */
-static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb,
-				     handle_t *handle,
-				     struct inode *inode,
+static int ocfs2_create_new_meta_bhs(handle_t *handle,
+				     struct ocfs2_extent_tree *et,
 				     int wanted,
 				     struct ocfs2_alloc_context *meta_ac,
 				     struct buffer_head *bhs[])
@@ -925,6 +924,8 @@ static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb,
 	u16 suballoc_bit_start;
 	u32 num_got;
 	u64 first_blkno;
+	struct ocfs2_super *osb =
+		OCFS2_SB(ocfs2_metadata_cache_get_super(et->et_ci));
 	struct ocfs2_extent_block *eb;
 
 	mlog_entry_void();
@@ -950,10 +951,10 @@ static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb,
 				mlog_errno(status);
 				goto bail;
 			}
-			ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode),
-						      bhs[i]);
+			ocfs2_set_new_buffer_uptodate(et->et_ci, bhs[i]);
 
-			status = ocfs2_journal_access_eb(handle, INODE_CACHE(inode), bhs[i],
+			status = ocfs2_journal_access_eb(handle, et->et_ci,
+							 bhs[i],
 							 OCFS2_JOURNAL_ACCESS_CREATE);
 			if (status < 0) {
 				mlog_errno(status);
@@ -1141,7 +1142,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
 		goto bail;
 	}
 
-	status = ocfs2_create_new_meta_bhs(osb, handle, inode, new_blocks,
+	status = ocfs2_create_new_meta_bhs(handle, et, new_blocks,
 					   meta_ac, new_eb_bhs);
 	if (status < 0) {
 		mlog_errno(status);
@@ -1292,7 +1293,7 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
 
 	mlog_entry_void();
 
-	status = ocfs2_create_new_meta_bhs(osb, handle, inode, 1, meta_ac,
+	status = ocfs2_create_new_meta_bhs(handle, et, 1, meta_ac,
 					   &new_eb_bh);
 	if (status < 0) {
 		mlog_errno(status);
-- 
cgit v0.10.2


From 6641b0ce3274d979338cb67b2f562189dcbc1c28 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Thu, 12 Feb 2009 18:57:52 -0800
Subject: ocfs2: Pass ocfs2_extent_tree to ocfs2_unlink_path()

ocfs2_unlink_path() doesn't need struct inode, so let's pass it struct
ocfs2_extent_tree.

Signed-off-by: Joel Becker <joel.becker@oracle.com>

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index ad41eab..18762f5 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -2536,7 +2536,8 @@ out:
 	return ret;
 }
 
-static void ocfs2_unlink_path(struct inode *inode, handle_t *handle,
+static void ocfs2_unlink_path(handle_t *handle,
+			      struct ocfs2_extent_tree *et,
 			      struct ocfs2_cached_dealloc_ctxt *dealloc,
 			      struct ocfs2_path *path, int unlink_start)
 {
@@ -2558,12 +2559,12 @@ static void ocfs2_unlink_path(struct inode *inode, handle_t *handle,
 			mlog(ML_ERROR,
 			     "Inode %llu, attempted to remove extent block "
 			     "%llu with %u records\n",
-			     (unsigned long long)OCFS2_I(inode)->ip_blkno,
+			     (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
 			     (unsigned long long)le64_to_cpu(eb->h_blkno),
 			     le16_to_cpu(el->l_next_free_rec));
 
 			ocfs2_journal_dirty(handle, bh);
-			ocfs2_remove_from_cache(INODE_CACHE(inode), bh);
+			ocfs2_remove_from_cache(et->et_ci, bh);
 			continue;
 		}
 
@@ -2576,11 +2577,12 @@ static void ocfs2_unlink_path(struct inode *inode, handle_t *handle,
 		if (ret)
 			mlog_errno(ret);
 
-		ocfs2_remove_from_cache(INODE_CACHE(inode), bh);
+		ocfs2_remove_from_cache(et->et_ci, bh);
 	}
 }
 
-static void ocfs2_unlink_subtree(struct inode *inode, handle_t *handle,
+static void ocfs2_unlink_subtree(handle_t *handle,
+				 struct ocfs2_extent_tree *et,
 				 struct ocfs2_path *left_path,
 				 struct ocfs2_path *right_path,
 				 int subtree_index,
@@ -2611,7 +2613,7 @@ static void ocfs2_unlink_subtree(struct inode *inode, handle_t *handle,
 	ocfs2_journal_dirty(handle, root_bh);
 	ocfs2_journal_dirty(handle, path_leaf_bh(left_path));
 
-	ocfs2_unlink_path(inode, handle, dealloc, right_path,
+	ocfs2_unlink_path(handle, et, dealloc, right_path,
 			  subtree_index + 1);
 }
 
@@ -2744,7 +2746,7 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
 		mlog_errno(ret);
 
 	if (del_right_subtree) {
-		ocfs2_unlink_subtree(inode, handle, left_path, right_path,
+		ocfs2_unlink_subtree(handle, et, left_path, right_path,
 				     subtree_index, dealloc);
 		ret = ocfs2_update_edge_lengths(inode, handle, subtree_index,
 						left_path);
@@ -3067,7 +3069,7 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
 
 		subtree_index = ocfs2_find_subtree_root(inode, left_path, path);
 
-		ocfs2_unlink_subtree(inode, handle, left_path, path,
+		ocfs2_unlink_subtree(handle, et, left_path, path,
 				     subtree_index, dealloc);
 		ret = ocfs2_update_edge_lengths(inode, handle, subtree_index,
 						left_path);
@@ -3086,7 +3088,7 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
 		 * revert the inode back to having extents
 		 * in-line.
 		 */
-		ocfs2_unlink_path(inode, handle, dealloc, path, 1);
+		ocfs2_unlink_path(handle, et, dealloc, path, 1);
 
 		el = et->et_root_el;
 		el->l_tree_depth = 0;
-- 
cgit v0.10.2


From 4619c73e7c9bd10bac6b60925fa28d5a2eeaf6ed Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Thu, 12 Feb 2009 19:02:36 -0800
Subject: ocfs2: ocfs2_complete_edge_insert() doesn't need struct inode at all.

Completely unused argument.  Get rid of it.

Signed-off-by: Joel Becker <joel.becker@oracle.com>

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 18762f5..4a8e971 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -1984,7 +1984,7 @@ static void ocfs2_adjust_root_records(struct ocfs2_extent_list *root_el,
  *   - When we've adjusted the last extent record in the left path leaf and the
  *     1st extent record in the right path leaf during cross extent block merge.
  */
-static void ocfs2_complete_edge_insert(struct inode *inode, handle_t *handle,
+static void ocfs2_complete_edge_insert(handle_t *handle,
 				       struct ocfs2_path *left_path,
 				       struct ocfs2_path *right_path,
 				       int subtree_index)
@@ -2161,8 +2161,8 @@ static int ocfs2_rotate_subtree_right(struct inode *inode,
 		goto out;
 	}
 
-	ocfs2_complete_edge_insert(inode, handle, left_path, right_path,
-				subtree_index);
+	ocfs2_complete_edge_insert(handle, left_path, right_path,
+				   subtree_index);
 
 out:
 	return ret;
@@ -2772,7 +2772,7 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
 
 		*deleted = 1;
 	} else
-		ocfs2_complete_edge_insert(inode, handle, left_path, right_path,
+		ocfs2_complete_edge_insert(handle, left_path, right_path,
 					   subtree_index);
 
 out:
@@ -3430,8 +3430,8 @@ static int ocfs2_merge_rec_right(struct inode *inode,
 		if (ret)
 			mlog_errno(ret);
 
-		ocfs2_complete_edge_insert(inode, handle, left_path,
-					   right_path, subtree_index);
+		ocfs2_complete_edge_insert(handle, left_path, right_path,
+					   subtree_index);
 	}
 out:
 	if (right_path)
@@ -3629,7 +3629,7 @@ static int ocfs2_merge_rec_left(struct inode *inode,
 			ocfs2_mv_path(right_path, left_path);
 			left_path = NULL;
 		} else
-			ocfs2_complete_edge_insert(inode, handle, left_path,
+			ocfs2_complete_edge_insert(handle, left_path,
 						   right_path, subtree_index);
 	}
 out:
@@ -4195,8 +4195,8 @@ static int ocfs2_insert_path(struct inode *inode,
 		 */
 		subtree_index = ocfs2_find_subtree_root(inode, left_path,
 							right_path);
-		ocfs2_complete_edge_insert(inode, handle, left_path,
-					   right_path, subtree_index);
+		ocfs2_complete_edge_insert(handle, left_path, right_path,
+					   subtree_index);
 	}
 
 	ret = 0;
@@ -5397,7 +5397,7 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle,
 		int subtree_index;
 
 		subtree_index = ocfs2_find_subtree_root(inode, left_path, path);
-		ocfs2_complete_edge_insert(inode, handle, left_path, path,
+		ocfs2_complete_edge_insert(handle, left_path, path,
 					   subtree_index);
 	}
 
-- 
cgit v0.10.2


From 5c601aba8c5d9d5f944cf02b59e3288dd72ae6cf Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Thu, 12 Feb 2009 19:10:13 -0800
Subject: ocfs2: Get inode out of ocfs2_rotate_subtree_root_right().

Pass the ocfs2_extent_list down through ocfs2_rotate_tree_right() and
get rid of struct inode in ocfs2_rotate_subtree_root_right().

Signed-off-by: Joel Becker <joel.becker@oracle.com>

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 4a8e971..7a04e17 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -2062,8 +2062,8 @@ static void ocfs2_complete_edge_insert(handle_t *handle,
 		mlog_errno(ret);
 }
 
-static int ocfs2_rotate_subtree_right(struct inode *inode,
-				      handle_t *handle,
+static int ocfs2_rotate_subtree_right(handle_t *handle,
+				      struct ocfs2_extent_tree *et,
 				      struct ocfs2_path *left_path,
 				      struct ocfs2_path *right_path,
 				      int subtree_index)
@@ -2079,10 +2079,10 @@ static int ocfs2_rotate_subtree_right(struct inode *inode,
 	left_el = path_leaf_el(left_path);
 
 	if (left_el->l_next_free_rec != left_el->l_count) {
-		ocfs2_error(inode->i_sb,
+		ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
 			    "Inode %llu has non-full interior leaf node %llu"
 			    "(next free = %u)",
-			    (unsigned long long)OCFS2_I(inode)->ip_blkno,
+			    (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
 			    (unsigned long long)left_leaf_bh->b_blocknr,
 			    le16_to_cpu(left_el->l_next_free_rec));
 		return -EROFS;
@@ -2098,7 +2098,7 @@ static int ocfs2_rotate_subtree_right(struct inode *inode,
 	root_bh = left_path->p_node[subtree_index].bh;
 	BUG_ON(root_bh != right_path->p_node[subtree_index].bh);
 
-	ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode), right_path,
+	ret = ocfs2_path_bh_journal_access(handle, et->et_ci, right_path,
 					   subtree_index);
 	if (ret) {
 		mlog_errno(ret);
@@ -2106,14 +2106,14 @@ static int ocfs2_rotate_subtree_right(struct inode *inode,
 	}
 
 	for(i = subtree_index + 1; i < path_num_items(right_path); i++) {
-		ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode),
+		ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
 						   right_path, i);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
 		}
 
-		ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode),
+		ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
 						   left_path, i);
 		if (ret) {
 			mlog_errno(ret);
@@ -2127,7 +2127,7 @@ static int ocfs2_rotate_subtree_right(struct inode *inode,
 	/* This is a code error, not a disk corruption. */
 	mlog_bug_on_msg(!right_el->l_next_free_rec, "Inode %llu: Rotate fails "
 			"because rightmost leaf block %llu is empty\n",
-			(unsigned long long)OCFS2_I(inode)->ip_blkno,
+			(unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
 			(unsigned long long)right_leaf_bh->b_blocknr);
 
 	ocfs2_create_empty_extent(right_el);
@@ -2325,8 +2325,8 @@ static int ocfs2_leftmost_rec_contains(struct ocfs2_extent_list *el, u32 cpos)
  *   *ret_left_path will contain a valid path which can be passed to
  *   ocfs2_insert_path().
  */
-static int ocfs2_rotate_tree_right(struct inode *inode,
-				   handle_t *handle,
+static int ocfs2_rotate_tree_right(struct inode *inode, handle_t *handle,
+				   struct ocfs2_extent_tree *et,
 				   enum ocfs2_split_type split,
 				   u32 insert_cpos,
 				   struct ocfs2_path *right_path,
@@ -2335,6 +2335,7 @@ static int ocfs2_rotate_tree_right(struct inode *inode,
 	int ret, start, orig_credits = handle->h_buffer_credits;
 	u32 cpos;
 	struct ocfs2_path *left_path = NULL;
+	struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci);
 
 	*ret_left_path = NULL;
 
@@ -2345,7 +2346,7 @@ static int ocfs2_rotate_tree_right(struct inode *inode,
 		goto out;
 	}
 
-	ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, right_path, &cpos);
+	ret = ocfs2_find_cpos_for_left_leaf(sb, right_path, &cpos);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -2383,7 +2384,7 @@ static int ocfs2_rotate_tree_right(struct inode *inode,
 		mlog(0, "Rotating a tree: ins. cpos: %u, left path cpos: %u\n",
 		     insert_cpos, cpos);
 
-		ret = ocfs2_find_path(INODE_CACHE(inode), left_path, cpos);
+		ret = ocfs2_find_path(et->et_ci, left_path, cpos);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -2391,10 +2392,11 @@ static int ocfs2_rotate_tree_right(struct inode *inode,
 
 		mlog_bug_on_msg(path_leaf_bh(left_path) ==
 				path_leaf_bh(right_path),
-				"Inode %lu: error during insert of %u "
+				"Owner %llu: error during insert of %u "
 				"(left path cpos %u) results in two identical "
 				"paths ending at %llu\n",
-				inode->i_ino, insert_cpos, cpos,
+				(unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
+				insert_cpos, cpos,
 				(unsigned long long)
 				path_leaf_bh(left_path)->b_blocknr);
 
@@ -2434,7 +2436,7 @@ static int ocfs2_rotate_tree_right(struct inode *inode,
 			goto out;
 		}
 
-		ret = ocfs2_rotate_subtree_right(inode, handle, left_path,
+		ret = ocfs2_rotate_subtree_right(handle, et, left_path,
 						 right_path, start);
 		if (ret) {
 			mlog_errno(ret);
@@ -2466,8 +2468,7 @@ static int ocfs2_rotate_tree_right(struct inode *inode,
 		 */
 		ocfs2_mv_path(right_path, left_path);
 
-		ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, right_path,
-						    &cpos);
+		ret = ocfs2_find_cpos_for_left_leaf(sb, right_path, &cpos);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -4268,7 +4269,7 @@ static int ocfs2_do_insert_extent(struct inode *inode,
 	 * can wind up skipping both of these two special cases...
 	 */
 	if (rotate) {
-		ret = ocfs2_rotate_tree_right(inode, handle, type->ins_split,
+		ret = ocfs2_rotate_tree_right(inode, handle, et, type->ins_split,
 					      le32_to_cpu(insert_rec->e_cpos),
 					      right_path, &left_path);
 		if (ret) {
-- 
cgit v0.10.2


From 7dc028056750328e74ca807041c822068384fe16 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Thu, 12 Feb 2009 19:20:13 -0800
Subject: ocfs2: Pass ocfs2_extent_tree to ocfs2_get_subtree_root()

Get rid of the inode argument.  Use extent_tree instead.  This means a
few more functions have to pass an extent_tree around.

Signed-off-by: Joel Becker <joel.becker@oracle.com>

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 7a04e17..c3edd02 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -1691,7 +1691,7 @@ set_and_inc:
  *
  * The array index of the subtree root is passed back.
  */
-static int ocfs2_find_subtree_root(struct inode *inode,
+static int ocfs2_find_subtree_root(struct ocfs2_extent_tree *et,
 				   struct ocfs2_path *left,
 				   struct ocfs2_path *right)
 {
@@ -1709,10 +1709,10 @@ static int ocfs2_find_subtree_root(struct inode *inode,
 		 * The caller didn't pass two adjacent paths.
 		 */
 		mlog_bug_on_msg(i > left->p_tree_depth,
-				"Inode %lu, left depth %u, right depth %u\n"
+				"Owner %llu, left depth %u, right depth %u\n"
 				"left leaf blk %llu, right leaf blk %llu\n",
-				inode->i_ino, left->p_tree_depth,
-				right->p_tree_depth,
+				(unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
+				left->p_tree_depth, right->p_tree_depth,
 				(unsigned long long)path_leaf_bh(left)->b_blocknr,
 				(unsigned long long)path_leaf_bh(right)->b_blocknr);
 	} while (left->p_node[i].bh->b_blocknr ==
@@ -2422,7 +2422,7 @@ static int ocfs2_rotate_tree_right(struct inode *inode, handle_t *handle,
 			goto out_ret_path;
 		}
 
-		start = ocfs2_find_subtree_root(inode, left_path, right_path);
+		start = ocfs2_find_subtree_root(et, left_path, right_path);
 
 		mlog(0, "Subtree root at index %d (blk %llu, depth %d)\n",
 		     start,
@@ -2933,7 +2933,7 @@ static int __ocfs2_rotate_tree_left(struct inode *inode,
 			goto out;
 		}
 
-		subtree_root = ocfs2_find_subtree_root(inode, left_path,
+		subtree_root = ocfs2_find_subtree_root(et, left_path,
 						       right_path);
 
 		mlog(0, "Subtree root at index %d (blk %llu, depth %d)\n",
@@ -3068,7 +3068,7 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
 			goto out;
 		}
 
-		subtree_index = ocfs2_find_subtree_root(inode, left_path, path);
+		subtree_index = ocfs2_find_subtree_root(et, left_path, path);
 
 		ocfs2_unlink_subtree(handle, et, left_path, path,
 				     subtree_index, dealloc);
@@ -3324,6 +3324,7 @@ out:
 static int ocfs2_merge_rec_right(struct inode *inode,
 				 struct ocfs2_path *left_path,
 				 handle_t *handle,
+				 struct ocfs2_extent_tree *et,
 				 struct ocfs2_extent_rec *split_rec,
 				 int index)
 {
@@ -3363,8 +3364,8 @@ static int ocfs2_merge_rec_right(struct inode *inode,
 		       le16_to_cpu(left_rec->e_leaf_clusters) !=
 		       le32_to_cpu(right_rec->e_cpos));
 
-		subtree_index = ocfs2_find_subtree_root(inode,
-							left_path, right_path);
+		subtree_index = ocfs2_find_subtree_root(et, left_path,
+							right_path);
 
 		ret = ocfs2_extend_rotate_transaction(handle, subtree_index,
 						      handle->h_buffer_credits,
@@ -3377,7 +3378,7 @@ static int ocfs2_merge_rec_right(struct inode *inode,
 		root_bh = left_path->p_node[subtree_index].bh;
 		BUG_ON(root_bh != right_path->p_node[subtree_index].bh);
 
-		ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode), right_path,
+		ret = ocfs2_path_bh_journal_access(handle, et->et_ci, right_path,
 						   subtree_index);
 		if (ret) {
 			mlog_errno(ret);
@@ -3386,14 +3387,14 @@ static int ocfs2_merge_rec_right(struct inode *inode,
 
 		for (i = subtree_index + 1;
 		     i < path_num_items(right_path); i++) {
-			ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode),
+			ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
 							   right_path, i);
 			if (ret) {
 				mlog_errno(ret);
 				goto out;
 			}
 
-			ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode),
+			ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
 							   left_path, i);
 			if (ret) {
 				mlog_errno(ret);
@@ -3406,7 +3407,7 @@ static int ocfs2_merge_rec_right(struct inode *inode,
 		right_rec = &el->l_recs[index + 1];
 	}
 
-	ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode), left_path,
+	ret = ocfs2_path_bh_journal_access(handle, et->et_ci, left_path,
 					   path_num_items(left_path) - 1);
 	if (ret) {
 		mlog_errno(ret);
@@ -3417,7 +3418,8 @@ static int ocfs2_merge_rec_right(struct inode *inode,
 
 	le32_add_cpu(&right_rec->e_cpos, -split_clusters);
 	le64_add_cpu(&right_rec->e_blkno,
-		     -ocfs2_clusters_to_blocks(inode->i_sb, split_clusters));
+		     -ocfs2_clusters_to_blocks(ocfs2_metadata_cache_get_super(et->et_ci),
+					       split_clusters));
 	le16_add_cpu(&right_rec->e_leaf_clusters, split_clusters);
 
 	ocfs2_cleanup_merge(el, index);
@@ -3532,8 +3534,8 @@ static int ocfs2_merge_rec_left(struct inode *inode,
 		       le16_to_cpu(left_rec->e_leaf_clusters) !=
 		       le32_to_cpu(split_rec->e_cpos));
 
-		subtree_index = ocfs2_find_subtree_root(inode,
-							left_path, right_path);
+		subtree_index = ocfs2_find_subtree_root(et, left_path,
+							right_path);
 
 		ret = ocfs2_extend_rotate_transaction(handle, subtree_index,
 						      handle->h_buffer_credits,
@@ -3694,7 +3696,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
 		 * if we do merge_right first and merge_left later.
 		 */
 		ret = ocfs2_merge_rec_right(inode, path,
-					    handle, split_rec,
+					    handle, et, split_rec,
 					    split_index);
 		if (ret) {
 			mlog_errno(ret);
@@ -3758,9 +3760,8 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
 				goto out;
 			}
 		} else {
-			ret = ocfs2_merge_rec_right(inode,
-						    path,
-						    handle, split_rec,
+			ret = ocfs2_merge_rec_right(inode, path, handle,
+						    et, split_rec,
 						    split_index);
 			if (ret) {
 				mlog_errno(ret);
@@ -4118,6 +4119,7 @@ static void ocfs2_split_record(struct inode *inode,
  */
 static int ocfs2_insert_path(struct inode *inode,
 			     handle_t *handle,
+			     struct ocfs2_extent_tree *et,
 			     struct ocfs2_path *left_path,
 			     struct ocfs2_path *right_path,
 			     struct ocfs2_extent_rec *insert_rec,
@@ -4143,7 +4145,7 @@ static int ocfs2_insert_path(struct inode *inode,
 			goto out;
 		}
 
-		ret = ocfs2_journal_access_path(INODE_CACHE(inode), handle, left_path);
+		ret = ocfs2_journal_access_path(et->et_ci, handle, left_path);
 		if (ret < 0) {
 			mlog_errno(ret);
 			goto out;
@@ -4154,7 +4156,7 @@ static int ocfs2_insert_path(struct inode *inode,
 	 * Pass both paths to the journal. The majority of inserts
 	 * will be touching all components anyway.
 	 */
-	ret = ocfs2_journal_access_path(INODE_CACHE(inode), handle, right_path);
+	ret = ocfs2_journal_access_path(et->et_ci, handle, right_path);
 	if (ret < 0) {
 		mlog_errno(ret);
 		goto out;
@@ -4194,7 +4196,7 @@ static int ocfs2_insert_path(struct inode *inode,
 		 *
 		 * XXX: Should we extend the transaction here?
 		 */
-		subtree_index = ocfs2_find_subtree_root(inode, left_path,
+		subtree_index = ocfs2_find_subtree_root(et, left_path,
 							right_path);
 		ocfs2_complete_edge_insert(handle, left_path, right_path,
 					   subtree_index);
@@ -4297,7 +4299,7 @@ static int ocfs2_do_insert_extent(struct inode *inode,
 		}
 	}
 
-	ret = ocfs2_insert_path(inode, handle, left_path, right_path,
+	ret = ocfs2_insert_path(inode, handle, et, left_path, right_path,
 				insert_rec, type);
 	if (ret) {
 		mlog_errno(ret);
@@ -5397,7 +5399,7 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle,
 	if (left_path) {
 		int subtree_index;
 
-		subtree_index = ocfs2_find_subtree_root(inode, left_path, path);
+		subtree_index = ocfs2_find_subtree_root(et, left_path, path);
 		ocfs2_complete_edge_insert(handle, left_path, path,
 					   subtree_index);
 	}
-- 
cgit v0.10.2


From 6136ca5f5f9fd38da399e9ff9380f537c1b3b901 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Thu, 12 Feb 2009 19:32:43 -0800
Subject: ocfs2: Drop struct inode from ocfs2_extent_tree_operations.

We can get to the inode from the caching information.  Other parent
types don't need it.

Signed-off-by: Joel Becker <joel.becker@oracle.com>

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index c3edd02..072f7fe 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -79,18 +79,16 @@ struct ocfs2_extent_tree_operations {
 	 * that value.  new_clusters is the delta, and must be
 	 * added to the total.  Required.
 	 */
-	void (*eo_update_clusters)(struct inode *inode,
-				   struct ocfs2_extent_tree *et,
+	void (*eo_update_clusters)(struct ocfs2_extent_tree *et,
 				   u32 new_clusters);
 
 	/*
 	 * If ->eo_insert_check() exists, it is called before rec is
 	 * inserted into the extent tree.  It is optional.
 	 */
-	int (*eo_insert_check)(struct inode *inode,
-			       struct ocfs2_extent_tree *et,
+	int (*eo_insert_check)(struct ocfs2_extent_tree *et,
 			       struct ocfs2_extent_rec *rec);
-	int (*eo_sanity_check)(struct inode *inode, struct ocfs2_extent_tree *et);
+	int (*eo_sanity_check)(struct ocfs2_extent_tree *et);
 
 	/*
 	 * --------------------------------------------------------------
@@ -109,8 +107,7 @@ struct ocfs2_extent_tree_operations {
 	 * it exists.  If it does not, et->et_max_leaf_clusters is set
 	 * to 0 (unlimited).  Optional.
 	 */
-	void (*eo_fill_max_leaf_clusters)(struct inode *inode,
-					  struct ocfs2_extent_tree *et);
+	void (*eo_fill_max_leaf_clusters)(struct ocfs2_extent_tree *et);
 };
 
 
@@ -121,14 +118,11 @@ struct ocfs2_extent_tree_operations {
 static u64 ocfs2_dinode_get_last_eb_blk(struct ocfs2_extent_tree *et);
 static void ocfs2_dinode_set_last_eb_blk(struct ocfs2_extent_tree *et,
 					 u64 blkno);
-static void ocfs2_dinode_update_clusters(struct inode *inode,
-					 struct ocfs2_extent_tree *et,
+static void ocfs2_dinode_update_clusters(struct ocfs2_extent_tree *et,
 					 u32 clusters);
-static int ocfs2_dinode_insert_check(struct inode *inode,
-				     struct ocfs2_extent_tree *et,
+static int ocfs2_dinode_insert_check(struct ocfs2_extent_tree *et,
 				     struct ocfs2_extent_rec *rec);
-static int ocfs2_dinode_sanity_check(struct inode *inode,
-				     struct ocfs2_extent_tree *et);
+static int ocfs2_dinode_sanity_check(struct ocfs2_extent_tree *et);
 static void ocfs2_dinode_fill_root_el(struct ocfs2_extent_tree *et);
 static struct ocfs2_extent_tree_operations ocfs2_dinode_et_ops = {
 	.eo_set_last_eb_blk	= ocfs2_dinode_set_last_eb_blk,
@@ -156,40 +150,37 @@ static u64 ocfs2_dinode_get_last_eb_blk(struct ocfs2_extent_tree *et)
 	return le64_to_cpu(di->i_last_eb_blk);
 }
 
-static void ocfs2_dinode_update_clusters(struct inode *inode,
-					 struct ocfs2_extent_tree *et,
+static void ocfs2_dinode_update_clusters(struct ocfs2_extent_tree *et,
 					 u32 clusters)
 {
+	struct ocfs2_inode_info *oi = cache_info_to_inode(et->et_ci);
 	struct ocfs2_dinode *di = et->et_object;
 
 	le32_add_cpu(&di->i_clusters, clusters);
-	spin_lock(&OCFS2_I(inode)->ip_lock);
-	OCFS2_I(inode)->ip_clusters = le32_to_cpu(di->i_clusters);
-	spin_unlock(&OCFS2_I(inode)->ip_lock);
+	spin_lock(&oi->ip_lock);
+	oi->ip_clusters = le32_to_cpu(di->i_clusters);
+	spin_unlock(&oi->ip_lock);
 }
 
-static int ocfs2_dinode_insert_check(struct inode *inode,
-				     struct ocfs2_extent_tree *et,
+static int ocfs2_dinode_insert_check(struct ocfs2_extent_tree *et,
 				     struct ocfs2_extent_rec *rec)
 {
-	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct ocfs2_inode_info *oi = cache_info_to_inode(et->et_ci);
+	struct ocfs2_super *osb = OCFS2_SB(oi->vfs_inode.i_sb);
 
-	BUG_ON(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL);
+	BUG_ON(oi->ip_dyn_features & OCFS2_INLINE_DATA_FL);
 	mlog_bug_on_msg(!ocfs2_sparse_alloc(osb) &&
-			(OCFS2_I(inode)->ip_clusters !=
-			 le32_to_cpu(rec->e_cpos)),
+			(oi->ip_clusters != le32_to_cpu(rec->e_cpos)),
 			"Device %s, asking for sparse allocation: inode %llu, "
 			"cpos %u, clusters %u\n",
 			osb->dev_str,
-			(unsigned long long)OCFS2_I(inode)->ip_blkno,
-			rec->e_cpos,
-			OCFS2_I(inode)->ip_clusters);
+			(unsigned long long)oi->ip_blkno,
+			rec->e_cpos, oi->ip_clusters);
 
 	return 0;
 }
 
-static int ocfs2_dinode_sanity_check(struct inode *inode,
-				     struct ocfs2_extent_tree *et)
+static int ocfs2_dinode_sanity_check(struct ocfs2_extent_tree *et)
 {
 	struct ocfs2_dinode *di = et->et_object;
 
@@ -229,8 +220,7 @@ static u64 ocfs2_xattr_value_get_last_eb_blk(struct ocfs2_extent_tree *et)
 	return le64_to_cpu(vb->vb_xv->xr_last_eb_blk);
 }
 
-static void ocfs2_xattr_value_update_clusters(struct inode *inode,
-					      struct ocfs2_extent_tree *et,
+static void ocfs2_xattr_value_update_clusters(struct ocfs2_extent_tree *et,
 					      u32 clusters)
 {
 	struct ocfs2_xattr_value_buf *vb = et->et_object;
@@ -252,12 +242,11 @@ static void ocfs2_xattr_tree_fill_root_el(struct ocfs2_extent_tree *et)
 	et->et_root_el = &xb->xb_attrs.xb_root.xt_list;
 }
 
-static void ocfs2_xattr_tree_fill_max_leaf_clusters(struct inode *inode,
-						    struct ocfs2_extent_tree *et)
+static void ocfs2_xattr_tree_fill_max_leaf_clusters(struct ocfs2_extent_tree *et)
 {
+	struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci);
 	et->et_max_leaf_clusters =
-		ocfs2_clusters_for_bytes(inode->i_sb,
-					 OCFS2_MAX_XATTR_TREE_LEAF_SIZE);
+		ocfs2_clusters_for_bytes(sb, OCFS2_MAX_XATTR_TREE_LEAF_SIZE);
 }
 
 static void ocfs2_xattr_tree_set_last_eb_blk(struct ocfs2_extent_tree *et,
@@ -277,8 +266,7 @@ static u64 ocfs2_xattr_tree_get_last_eb_blk(struct ocfs2_extent_tree *et)
 	return le64_to_cpu(xt->xt_last_eb_blk);
 }
 
-static void ocfs2_xattr_tree_update_clusters(struct inode *inode,
-					     struct ocfs2_extent_tree *et,
+static void ocfs2_xattr_tree_update_clusters(struct ocfs2_extent_tree *et,
 					     u32 clusters)
 {
 	struct ocfs2_xattr_block *xb = et->et_object;
@@ -309,8 +297,7 @@ static u64 ocfs2_dx_root_get_last_eb_blk(struct ocfs2_extent_tree *et)
 	return le64_to_cpu(dx_root->dr_last_eb_blk);
 }
 
-static void ocfs2_dx_root_update_clusters(struct inode *inode,
-					  struct ocfs2_extent_tree *et,
+static void ocfs2_dx_root_update_clusters(struct ocfs2_extent_tree *et,
 					  u32 clusters)
 {
 	struct ocfs2_dx_root_block *dx_root = et->et_object;
@@ -318,8 +305,7 @@ static void ocfs2_dx_root_update_clusters(struct inode *inode,
 	le32_add_cpu(&dx_root->dr_clusters, clusters);
 }
 
-static int ocfs2_dx_root_sanity_check(struct inode *inode,
-				      struct ocfs2_extent_tree *et)
+static int ocfs2_dx_root_sanity_check(struct ocfs2_extent_tree *et)
 {
 	struct ocfs2_dx_root_block *dx_root = et->et_object;
 
@@ -362,7 +348,7 @@ static void __ocfs2_init_extent_tree(struct ocfs2_extent_tree *et,
 	if (!et->et_ops->eo_fill_max_leaf_clusters)
 		et->et_max_leaf_clusters = 0;
 	else
-		et->et_ops->eo_fill_max_leaf_clusters(inode, et);
+		et->et_ops->eo_fill_max_leaf_clusters(et);
 }
 
 void ocfs2_init_dinode_extent_tree(struct ocfs2_extent_tree *et,
@@ -408,11 +394,10 @@ static inline u64 ocfs2_et_get_last_eb_blk(struct ocfs2_extent_tree *et)
 	return et->et_ops->eo_get_last_eb_blk(et);
 }
 
-static inline void ocfs2_et_update_clusters(struct inode *inode,
-					    struct ocfs2_extent_tree *et,
+static inline void ocfs2_et_update_clusters(struct ocfs2_extent_tree *et,
 					    u32 clusters)
 {
-	et->et_ops->eo_update_clusters(inode, et, clusters);
+	et->et_ops->eo_update_clusters(et, clusters);
 }
 
 static inline int ocfs2_et_root_journal_access(handle_t *handle,
@@ -423,24 +408,22 @@ static inline int ocfs2_et_root_journal_access(handle_t *handle,
 					  type);
 }
 
-static inline int ocfs2_et_insert_check(struct inode *inode,
-					struct ocfs2_extent_tree *et,
+static inline int ocfs2_et_insert_check(struct ocfs2_extent_tree *et,
 					struct ocfs2_extent_rec *rec)
 {
 	int ret = 0;
 
 	if (et->et_ops->eo_insert_check)
-		ret = et->et_ops->eo_insert_check(inode, et, rec);
+		ret = et->et_ops->eo_insert_check(et, rec);
 	return ret;
 }
 
-static inline int ocfs2_et_sanity_check(struct inode *inode,
-					struct ocfs2_extent_tree *et)
+static inline int ocfs2_et_sanity_check(struct ocfs2_extent_tree *et)
 {
 	int ret = 0;
 
 	if (et->et_ops->eo_sanity_check)
-		ret = et->et_ops->eo_sanity_check(inode, et);
+		ret = et->et_ops->eo_sanity_check(et);
 	return ret;
 }
 
@@ -3016,7 +2999,7 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
 	struct ocfs2_extent_list *el;
 
 
-	ret = ocfs2_et_sanity_check(inode, et);
+	ret = ocfs2_et_sanity_check(et);
 	if (ret)
 		goto out;
 	/*
@@ -4308,7 +4291,7 @@ static int ocfs2_do_insert_extent(struct inode *inode,
 
 out_update_clusters:
 	if (type->ins_split == SPLIT_NONE)
-		ocfs2_et_update_clusters(inode, et,
+		ocfs2_et_update_clusters(et,
 					 le16_to_cpu(insert_rec->e_leaf_clusters));
 
 	ret = ocfs2_journal_dirty(handle, et->et_root_bh);
@@ -4697,7 +4680,7 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
 	rec.e_blkno = cpu_to_le64(start_blk);
 	rec.e_leaf_clusters = cpu_to_le16(new_clusters);
 	rec.e_flags = flags;
-	status = ocfs2_et_insert_check(inode, et, &rec);
+	status = ocfs2_et_insert_check(et, &rec);
 	if (status) {
 		mlog_errno(status);
 		goto bail;
@@ -5603,7 +5586,7 @@ int ocfs2_remove_btree_range(struct inode *inode,
 		goto out_commit;
 	}
 
-	ocfs2_et_update_clusters(inode, et, -len);
+	ocfs2_et_update_clusters(et, -len);
 
 	ret = ocfs2_journal_dirty(handle, et->et_root_bh);
 	if (ret) {
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 179c819..e82ceb3 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -1395,10 +1395,6 @@ int ocfs2_read_inode_block(struct inode *inode, struct buffer_head **bh)
 	return ocfs2_read_inode_block_full(inode, bh, 0);
 }
 
-static struct ocfs2_inode_info *cache_info_to_inode(struct ocfs2_caching_info *ci)
-{
-	return container_of(ci, struct ocfs2_inode_info, ip_metadata_cache);
-}
 
 static u64 ocfs2_inode_cache_owner(struct ocfs2_caching_info *ci)
 {
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index 67392f6..ba4fe07 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -170,4 +170,10 @@ int ocfs2_read_inode_block(struct inode *inode, struct buffer_head **bh);
 /* The same, but can be passed OCFS2_BH_* flags */
 int ocfs2_read_inode_block_full(struct inode *inode, struct buffer_head **bh,
 				int flags);
+
+static inline struct ocfs2_inode_info *cache_info_to_inode(struct ocfs2_caching_info *ci)
+{
+	return container_of(ci, struct ocfs2_inode_info, ip_metadata_cache);
+}
+
 #endif /* OCFS2_INODE_H */
-- 
cgit v0.10.2


From 1bbf0b8d606645c7596ee641acfbf042765c9719 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Thu, 12 Feb 2009 19:42:08 -0800
Subject: ocfs2: ocfs2_rotate_tree_right() doesn't need struct inode.

We don't need struct inode in ocfs2_rotate_tree_right() anymore.

Signed-off-by: Joel Becker <joel.becker@oracle.com>

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 072f7fe..93f02a1 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -2308,7 +2308,7 @@ static int ocfs2_leftmost_rec_contains(struct ocfs2_extent_list *el, u32 cpos)
  *   *ret_left_path will contain a valid path which can be passed to
  *   ocfs2_insert_path().
  */
-static int ocfs2_rotate_tree_right(struct inode *inode, handle_t *handle,
+static int ocfs2_rotate_tree_right(handle_t *handle,
 				   struct ocfs2_extent_tree *et,
 				   enum ocfs2_split_type split,
 				   u32 insert_cpos,
@@ -4254,7 +4254,7 @@ static int ocfs2_do_insert_extent(struct inode *inode,
 	 * can wind up skipping both of these two special cases...
 	 */
 	if (rotate) {
-		ret = ocfs2_rotate_tree_right(inode, handle, et, type->ins_split,
+		ret = ocfs2_rotate_tree_right(handle, et, type->ins_split,
 					      le32_to_cpu(insert_rec->e_cpos),
 					      right_path, &left_path);
 		if (ret) {
-- 
cgit v0.10.2


From 09106bae05c3350e8d0ef0ede90b1c3da4bda2f8 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Thu, 12 Feb 2009 19:43:57 -0800
Subject: ocfs2: ocfs2_update_edge_lengths() doesn't need struct inode.

Pass in the extent tree, which is all we need.

Signed-off-by: Joel Becker <joel.becker@oracle.com>

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 93f02a1..8efcfac 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -2465,7 +2465,8 @@ out_ret_path:
 	return ret;
 }
 
-static int ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle,
+static int ocfs2_update_edge_lengths(handle_t *handle,
+				     struct ocfs2_extent_tree *et,
 				     int subtree_index, struct ocfs2_path *path)
 {
 	int i, idx, ret;
@@ -2490,7 +2491,7 @@ static int ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle,
 		goto out;
 	}
 
-	ret = ocfs2_journal_access_path(INODE_CACHE(inode), handle, path);
+	ret = ocfs2_journal_access_path(et->et_ci, handle, path);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -2732,7 +2733,7 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
 	if (del_right_subtree) {
 		ocfs2_unlink_subtree(handle, et, left_path, right_path,
 				     subtree_index, dealloc);
-		ret = ocfs2_update_edge_lengths(inode, handle, subtree_index,
+		ret = ocfs2_update_edge_lengths(handle, et, subtree_index,
 						left_path);
 		if (ret) {
 			mlog_errno(ret);
@@ -3055,7 +3056,7 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
 
 		ocfs2_unlink_subtree(handle, et, left_path, path,
 				     subtree_index, dealloc);
-		ret = ocfs2_update_edge_lengths(inode, handle, subtree_index,
+		ret = ocfs2_update_edge_lengths(handle, et, subtree_index,
 						left_path);
 		if (ret) {
 			mlog_errno(ret);
-- 
cgit v0.10.2


From 1e2dd63fe0b6e99b81904a61090db801978b9520 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Thu, 12 Feb 2009 19:45:28 -0800
Subject: ocfs2: ocfs2_rotate_subtree_left() doesn't need struct inode.

It already has struct ocfs2_extent_tree, which has the caching info.  So
we don't need to pass it struct inode.

Signed-off-by: Joel Becker <joel.becker@oracle.com>

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 8efcfac..b358d56 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -2602,13 +2602,13 @@ static void ocfs2_unlink_subtree(handle_t *handle,
 			  subtree_index + 1);
 }
 
-static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
+static int ocfs2_rotate_subtree_left(handle_t *handle,
+				     struct ocfs2_extent_tree *et,
 				     struct ocfs2_path *left_path,
 				     struct ocfs2_path *right_path,
 				     int subtree_index,
 				     struct ocfs2_cached_dealloc_ctxt *dealloc,
-				     int *deleted,
-				     struct ocfs2_extent_tree *et)
+				     int *deleted)
 {
 	int ret, i, del_right_subtree = 0, right_has_empty = 0;
 	struct buffer_head *root_bh, *et_root_bh = path_root_bh(right_path);
@@ -2644,7 +2644,7 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
 			return -EAGAIN;
 
 		if (le16_to_cpu(right_leaf_el->l_next_free_rec) > 1) {
-			ret = ocfs2_journal_access_eb(handle, INODE_CACHE(inode),
+			ret = ocfs2_journal_access_eb(handle, et->et_ci,
 						      path_leaf_bh(right_path),
 						      OCFS2_JOURNAL_ACCESS_WRITE);
 			if (ret) {
@@ -2679,7 +2679,7 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
 	 */
 	BUG_ON(right_has_empty && !del_right_subtree);
 
-	ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode), right_path,
+	ret = ocfs2_path_bh_journal_access(handle, et->et_ci, right_path,
 					   subtree_index);
 	if (ret) {
 		mlog_errno(ret);
@@ -2687,14 +2687,14 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
 	}
 
 	for(i = subtree_index + 1; i < path_num_items(right_path); i++) {
-		ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode),
+		ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
 						   right_path, i);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
 		}
 
-		ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode),
+		ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
 						   left_path, i);
 		if (ret) {
 			mlog_errno(ret);
@@ -2944,9 +2944,9 @@ static int __ocfs2_rotate_tree_left(struct inode *inode,
 			goto out;
 		}
 
-		ret = ocfs2_rotate_subtree_left(inode, handle, left_path,
+		ret = ocfs2_rotate_subtree_left(handle, et, left_path,
 						right_path, subtree_root,
-						dealloc, &deleted, et);
+						dealloc, &deleted);
 		if (ret == -EAGAIN) {
 			/*
 			 * The rotation has to temporarily stop due to
-- 
cgit v0.10.2


From e46f74dc357947e2aed9bdd63cf335c5fd23810b Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Thu, 12 Feb 2009 19:47:43 -0800
Subject: ocfs2: __ocfs2_rotate_tree_left() doesn't need struct inode.

It already has struct ocfs2_extent_tree, which has the caching info.  So
we don't need to pass it struct inode.

Signed-off-by: Joel Becker <joel.becker@oracle.com>

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index b358d56..12dbd6e 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -2871,24 +2871,24 @@ out:
 	return ret;
 }
 
-static int __ocfs2_rotate_tree_left(struct inode *inode,
-				    handle_t *handle, int orig_credits,
+static int __ocfs2_rotate_tree_left(handle_t *handle,
+				    struct ocfs2_extent_tree *et,
+				    int orig_credits,
 				    struct ocfs2_path *path,
 				    struct ocfs2_cached_dealloc_ctxt *dealloc,
-				    struct ocfs2_path **empty_extent_path,
-				    struct ocfs2_extent_tree *et)
+				    struct ocfs2_path **empty_extent_path)
 {
 	int ret, subtree_root, deleted;
 	u32 right_cpos;
 	struct ocfs2_path *left_path = NULL;
 	struct ocfs2_path *right_path = NULL;
+	struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci);
 
 	BUG_ON(!ocfs2_is_empty_extent(&(path_leaf_el(path)->l_recs[0])));
 
 	*empty_extent_path = NULL;
 
-	ret = ocfs2_find_cpos_for_right_leaf(inode->i_sb, path,
-					     &right_cpos);
+	ret = ocfs2_find_cpos_for_right_leaf(sb, path, &right_cpos);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -2937,7 +2937,7 @@ static int __ocfs2_rotate_tree_left(struct inode *inode,
 		 * Caller might still want to make changes to the
 		 * tree root, so re-add it to the journal here.
 		 */
-		ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode),
+		ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
 						   left_path, 0);
 		if (ret) {
 			mlog_errno(ret);
@@ -2973,7 +2973,7 @@ static int __ocfs2_rotate_tree_left(struct inode *inode,
 
 		ocfs2_mv_path(left_path, right_path);
 
-		ret = ocfs2_find_cpos_for_right_leaf(inode->i_sb, left_path,
+		ret = ocfs2_find_cpos_for_right_leaf(sb, left_path,
 						     &right_cpos);
 		if (ret) {
 			mlog_errno(ret);
@@ -3187,8 +3187,8 @@ rightmost_no_delete:
 	 * and restarting from there.
 	 */
 try_rotate:
-	ret = __ocfs2_rotate_tree_left(inode, handle, orig_credits, path,
-				       dealloc, &restart_path, et);
+	ret = __ocfs2_rotate_tree_left(handle, et, orig_credits, path,
+				       dealloc, &restart_path);
 	if (ret && ret != -EAGAIN) {
 		mlog_errno(ret);
 		goto out;
@@ -3198,9 +3198,9 @@ try_rotate:
 		tmp_path = restart_path;
 		restart_path = NULL;
 
-		ret = __ocfs2_rotate_tree_left(inode, handle, orig_credits,
+		ret = __ocfs2_rotate_tree_left(handle, et, orig_credits,
 					       tmp_path, dealloc,
-					       &restart_path, et);
+					       &restart_path);
 		if (ret && ret != -EAGAIN) {
 			mlog_errno(ret);
 			goto out;
-- 
cgit v0.10.2


From 70f18c08b476e315c8ee17ea34b55ea1957e7e7d Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Fri, 13 Feb 2009 02:09:31 -0800
Subject: ocfs2: ocfs2_rotate_tree_left() no longer needs struct inode.

It already gets ocfs2_extent_tree, so we can just use that.  This chains
to the same modification for ocfs2_remove_rightmost_path() and
ocfs2_rotate_rightmost_leaf_left().

Signed-off-by: Joel Becker <joel.becker@oracle.com>

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 12dbd6e..d348cfb 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -2843,8 +2843,8 @@ out:
 	return ret;
 }
 
-static int ocfs2_rotate_rightmost_leaf_left(struct inode *inode,
-					    handle_t *handle,
+static int ocfs2_rotate_rightmost_leaf_left(handle_t *handle,
+					    struct ocfs2_extent_tree *et,
 					    struct ocfs2_path *path)
 {
 	int ret;
@@ -2854,7 +2854,7 @@ static int ocfs2_rotate_rightmost_leaf_left(struct inode *inode,
 	if (!ocfs2_is_empty_extent(&el->l_recs[0]))
 		return 0;
 
-	ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode), path,
+	ret = ocfs2_path_bh_journal_access(handle, et->et_ci, path,
 					   path_num_items(path) - 1);
 	if (ret) {
 		mlog_errno(ret);
@@ -2988,10 +2988,10 @@ out:
 	return ret;
 }
 
-static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
+static int ocfs2_remove_rightmost_path(handle_t *handle,
+				struct ocfs2_extent_tree *et,
 				struct ocfs2_path *path,
-				struct ocfs2_cached_dealloc_ctxt *dealloc,
-				struct ocfs2_extent_tree *et)
+				struct ocfs2_cached_dealloc_ctxt *dealloc)
 {
 	int ret, subtree_index;
 	u32 cpos;
@@ -3070,7 +3070,7 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
 		 * 'path' is also the leftmost path which
 		 * means it must be the only one. This gets
 		 * handled differently because we want to
-		 * revert the inode back to having extents
+		 * revert the root back to having extents
 		 * in-line.
 		 */
 		ocfs2_unlink_path(handle, et, dealloc, path, 1);
@@ -3106,10 +3106,10 @@ out:
  * the rightmost tree leaf record is removed so the caller is
  * responsible for detecting and correcting that.
  */
-static int ocfs2_rotate_tree_left(struct inode *inode, handle_t *handle,
+static int ocfs2_rotate_tree_left(handle_t *handle,
+				  struct ocfs2_extent_tree *et,
 				  struct ocfs2_path *path,
-				  struct ocfs2_cached_dealloc_ctxt *dealloc,
-				  struct ocfs2_extent_tree *et)
+				  struct ocfs2_cached_dealloc_ctxt *dealloc)
 {
 	int ret, orig_credits = handle->h_buffer_credits;
 	struct ocfs2_path *tmp_path = NULL, *restart_path = NULL;
@@ -3126,8 +3126,7 @@ rightmost_no_delete:
 		 * Inline extents. This is trivially handled, so do
 		 * it up front.
 		 */
-		ret = ocfs2_rotate_rightmost_leaf_left(inode, handle,
-						       path);
+		ret = ocfs2_rotate_rightmost_leaf_left(handle, et, path);
 		if (ret)
 			mlog_errno(ret);
 		goto out;
@@ -3143,7 +3142,7 @@ rightmost_no_delete:
 	 *
 	 *  1) is handled via ocfs2_rotate_rightmost_leaf_left()
 	 *  2a) we need the left branch so that we can update it with the unlink
-	 *  2b) we need to bring the inode back to inline extents.
+	 *  2b) we need to bring the root back to inline extents.
 	 */
 
 	eb = (struct ocfs2_extent_block *)path_leaf_bh(path)->b_data;
@@ -3159,9 +3158,9 @@ rightmost_no_delete:
 
 		if (le16_to_cpu(el->l_next_free_rec) == 0) {
 			ret = -EIO;
-			ocfs2_error(inode->i_sb,
-				    "Inode %llu has empty extent block at %llu",
-				    (unsigned long long)OCFS2_I(inode)->ip_blkno,
+			ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
+				    "Owner %llu has empty extent block at %llu",
+				    (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
 				    (unsigned long long)le64_to_cpu(eb->h_blkno));
 			goto out;
 		}
@@ -3175,8 +3174,8 @@ rightmost_no_delete:
 		 * nonempty list.
 		 */
 
-		ret = ocfs2_remove_rightmost_path(inode, handle, path,
-						  dealloc, et);
+		ret = ocfs2_remove_rightmost_path(handle, et, path,
+						  dealloc);
 		if (ret)
 			mlog_errno(ret);
 		goto out;
@@ -3602,9 +3601,9 @@ static int ocfs2_merge_rec_left(struct inode *inode,
 		if (le16_to_cpu(right_rec->e_leaf_clusters) == 0 &&
 		    le16_to_cpu(el->l_next_free_rec) == 1) {
 
-			ret = ocfs2_remove_rightmost_path(inode, handle,
+			ret = ocfs2_remove_rightmost_path(handle, et,
 							  right_path,
-							  dealloc, et);
+							  dealloc);
 			if (ret) {
 				mlog_errno(ret);
 				goto out;
@@ -3649,8 +3648,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
 		 * extents - having more than one in a leaf is
 		 * illegal.
 		 */
-		ret = ocfs2_rotate_tree_left(inode, handle, path,
-					     dealloc, et);
+		ret = ocfs2_rotate_tree_left(handle, et, path, dealloc);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -3693,8 +3691,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
 		BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0]));
 
 		/* The merge left us with an empty extent, remove it. */
-		ret = ocfs2_rotate_tree_left(inode, handle, path,
-					     dealloc, et);
+		ret = ocfs2_rotate_tree_left(handle, et, path, dealloc);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -3716,8 +3713,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
 			goto out;
 		}
 
-		ret = ocfs2_rotate_tree_left(inode, handle, path,
-					     dealloc, et);
+		ret = ocfs2_rotate_tree_left(handle, et, path, dealloc);
 		/*
 		 * Error from this last rotate is not critical, so
 		 * print but don't bubble it up.
@@ -3758,8 +3754,8 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
 			 * The merge may have left an empty extent in
 			 * our leaf. Try to rotate it away.
 			 */
-			ret = ocfs2_rotate_tree_left(inode, handle, path,
-						     dealloc, et);
+			ret = ocfs2_rotate_tree_left(handle, et, path,
+						     dealloc);
 			if (ret)
 				mlog_errno(ret);
 			ret = 0;
@@ -5259,7 +5255,7 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle,
 	struct ocfs2_extent_block *eb;
 
 	if (ocfs2_is_empty_extent(&el->l_recs[0]) && index > 0) {
-		ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc, et);
+		ret = ocfs2_rotate_tree_left(handle, et, path, dealloc);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -5390,7 +5386,7 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle,
 
 	ocfs2_journal_dirty(handle, path_leaf_bh(path));
 
-	ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc, et);
+	ret = ocfs2_rotate_tree_left(handle, et, path, dealloc);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
-- 
cgit v0.10.2


From 4fe82c312a7d975a9d0f591dc9180c1197ee4270 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Fri, 13 Feb 2009 02:16:08 -0800
Subject: ocfs2: ocfs2_merge_rec_left/right() no longer need struct inode.

Drop it from the parameters - they already have ocfs2_extent_list.

Signed-off-by: Joel Becker <joel.becker@oracle.com>

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index d348cfb..bac6ca0 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -3250,7 +3250,7 @@ static void ocfs2_cleanup_merge(struct ocfs2_extent_list *el,
 	}
 }
 
-static int ocfs2_get_right_path(struct inode *inode,
+static int ocfs2_get_right_path(struct ocfs2_extent_tree *et,
 				struct ocfs2_path *left_path,
 				struct ocfs2_path **ret_right_path)
 {
@@ -3267,8 +3267,8 @@ static int ocfs2_get_right_path(struct inode *inode,
 	left_el = path_leaf_el(left_path);
 	BUG_ON(left_el->l_next_free_rec != left_el->l_count);
 
-	ret = ocfs2_find_cpos_for_right_leaf(inode->i_sb, left_path,
-					     &right_cpos);
+	ret = ocfs2_find_cpos_for_right_leaf(ocfs2_metadata_cache_get_super(et->et_ci),
+					     left_path, &right_cpos);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -3284,7 +3284,7 @@ static int ocfs2_get_right_path(struct inode *inode,
 		goto out;
 	}
 
-	ret = ocfs2_find_path(INODE_CACHE(inode), right_path, right_cpos);
+	ret = ocfs2_find_path(et->et_ci, right_path, right_cpos);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -3304,8 +3304,7 @@ out:
  * For index == l_count - 1, the "next" means the 1st extent rec of the
  * next extent block.
  */
-static int ocfs2_merge_rec_right(struct inode *inode,
-				 struct ocfs2_path *left_path,
+static int ocfs2_merge_rec_right(struct ocfs2_path *left_path,
 				 handle_t *handle,
 				 struct ocfs2_extent_tree *et,
 				 struct ocfs2_extent_rec *split_rec,
@@ -3328,7 +3327,7 @@ static int ocfs2_merge_rec_right(struct inode *inode,
 	if (index == le16_to_cpu(el->l_next_free_rec) - 1 &&
 	    le16_to_cpu(el->l_next_free_rec) == le16_to_cpu(el->l_count)) {
 		/* we meet with a cross extent block merge. */
-		ret = ocfs2_get_right_path(inode, left_path, &right_path);
+		ret = ocfs2_get_right_path(et, left_path, &right_path);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -3425,7 +3424,7 @@ out:
 	return ret;
 }
 
-static int ocfs2_get_left_path(struct inode *inode,
+static int ocfs2_get_left_path(struct ocfs2_extent_tree *et,
 			       struct ocfs2_path *right_path,
 			       struct ocfs2_path **ret_left_path)
 {
@@ -3438,7 +3437,7 @@ static int ocfs2_get_left_path(struct inode *inode,
 	/* This function shouldn't be called for non-trees. */
 	BUG_ON(right_path->p_tree_depth == 0);
 
-	ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb,
+	ret = ocfs2_find_cpos_for_left_leaf(ocfs2_metadata_cache_get_super(et->et_ci),
 					    right_path, &left_cpos);
 	if (ret) {
 		mlog_errno(ret);
@@ -3455,7 +3454,7 @@ static int ocfs2_get_left_path(struct inode *inode,
 		goto out;
 	}
 
-	ret = ocfs2_find_path(INODE_CACHE(inode), left_path, left_cpos);
+	ret = ocfs2_find_path(et->et_ci, left_path, left_cpos);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -3478,12 +3477,11 @@ out:
  * remove the rightmost leaf extent block in the right_path and change
  * the right path to indicate the new rightmost path.
  */
-static int ocfs2_merge_rec_left(struct inode *inode,
-				struct ocfs2_path *right_path,
+static int ocfs2_merge_rec_left(struct ocfs2_path *right_path,
 				handle_t *handle,
+				struct ocfs2_extent_tree *et,
 				struct ocfs2_extent_rec *split_rec,
 				struct ocfs2_cached_dealloc_ctxt *dealloc,
-				struct ocfs2_extent_tree *et,
 				int index)
 {
 	int ret, i, subtree_index = 0, has_empty_extent = 0;
@@ -3501,7 +3499,7 @@ static int ocfs2_merge_rec_left(struct inode *inode,
 	right_rec = &el->l_recs[index];
 	if (index == 0) {
 		/* we meet with a cross extent block merge. */
-		ret = ocfs2_get_left_path(inode, right_path, &left_path);
+		ret = ocfs2_get_left_path(et, right_path, &left_path);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -3531,7 +3529,7 @@ static int ocfs2_merge_rec_left(struct inode *inode,
 		root_bh = left_path->p_node[subtree_index].bh;
 		BUG_ON(root_bh != right_path->p_node[subtree_index].bh);
 
-		ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode), right_path,
+		ret = ocfs2_path_bh_journal_access(handle, et->et_ci, right_path,
 						   subtree_index);
 		if (ret) {
 			mlog_errno(ret);
@@ -3540,14 +3538,14 @@ static int ocfs2_merge_rec_left(struct inode *inode,
 
 		for (i = subtree_index + 1;
 		     i < path_num_items(right_path); i++) {
-			ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode),
+			ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
 							   right_path, i);
 			if (ret) {
 				mlog_errno(ret);
 				goto out;
 			}
 
-			ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode),
+			ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
 							   left_path, i);
 			if (ret) {
 				mlog_errno(ret);
@@ -3560,7 +3558,7 @@ static int ocfs2_merge_rec_left(struct inode *inode,
 			has_empty_extent = 1;
 	}
 
-	ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode), right_path,
+	ret = ocfs2_path_bh_journal_access(handle, et->et_ci, right_path,
 					   path_num_items(right_path) - 1);
 	if (ret) {
 		mlog_errno(ret);
@@ -3579,7 +3577,8 @@ static int ocfs2_merge_rec_left(struct inode *inode,
 
 	le32_add_cpu(&right_rec->e_cpos, split_clusters);
 	le64_add_cpu(&right_rec->e_blkno,
-		     ocfs2_clusters_to_blocks(inode->i_sb, split_clusters));
+		     ocfs2_clusters_to_blocks(ocfs2_metadata_cache_get_super(et->et_ci),
+					      split_clusters));
 	le16_add_cpu(&right_rec->e_leaf_clusters, -split_clusters);
 
 	ocfs2_cleanup_merge(el, index);
@@ -3677,8 +3676,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
 		 * prevoius extent block. It is more efficient and easier
 		 * if we do merge_right first and merge_left later.
 		 */
-		ret = ocfs2_merge_rec_right(inode, path,
-					    handle, et, split_rec,
+		ret = ocfs2_merge_rec_right(path, handle, et, split_rec,
 					    split_index);
 		if (ret) {
 			mlog_errno(ret);
@@ -3703,10 +3701,8 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
 		 * Note that we don't pass split_rec here on purpose -
 		 * we've merged it into the rec already.
 		 */
-		ret = ocfs2_merge_rec_left(inode, path,
-					   handle, rec,
-					   dealloc, et,
-					   split_index);
+		ret = ocfs2_merge_rec_left(path, handle, et, rec,
+					   dealloc, split_index);
 
 		if (ret) {
 			mlog_errno(ret);
@@ -3730,17 +3726,15 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
 		 * the record on the left (hence the left merge).
 		 */
 		if (ctxt->c_contig_type == CONTIG_RIGHT) {
-			ret = ocfs2_merge_rec_left(inode,
-						   path,
-						   handle, split_rec,
-						   dealloc, et,
+			ret = ocfs2_merge_rec_left(path, handle, et,
+						   split_rec, dealloc,
 						   split_index);
 			if (ret) {
 				mlog_errno(ret);
 				goto out;
 			}
 		} else {
-			ret = ocfs2_merge_rec_right(inode, path, handle,
+			ret = ocfs2_merge_rec_right(path, handle,
 						    et, split_rec,
 						    split_index);
 			if (ret) {
-- 
cgit v0.10.2


From c495dd24ac00654f99540f533185e1fcc9534009 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Fri, 13 Feb 2009 02:19:11 -0800
Subject: ocfs2: ocfs2_try_to_merge_extent() doesn't need struct inode.

It's not using it, so remove it from the parameter list.

Signed-off-by: Joel Becker <joel.becker@oracle.com>

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index bac6ca0..2c4967f7b 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -3623,15 +3623,13 @@ out:
 	return ret;
 }
 
-static int ocfs2_try_to_merge_extent(struct inode *inode,
-				     handle_t *handle,
+static int ocfs2_try_to_merge_extent(handle_t *handle,
+				     struct ocfs2_extent_tree *et,
 				     struct ocfs2_path *path,
 				     int split_index,
 				     struct ocfs2_extent_rec *split_rec,
 				     struct ocfs2_cached_dealloc_ctxt *dealloc,
-				     struct ocfs2_merge_ctxt *ctxt,
-				     struct ocfs2_extent_tree *et)
-
+				     struct ocfs2_merge_ctxt *ctxt)
 {
 	int ret = 0;
 	struct ocfs2_extent_list *el = path_leaf_el(path);
@@ -5069,9 +5067,9 @@ static int __ocfs2_mark_extent_written(struct inode *inode,
 		if (ret)
 			mlog_errno(ret);
 	} else {
-		ret = ocfs2_try_to_merge_extent(inode, handle, path,
+		ret = ocfs2_try_to_merge_extent(handle, et, path,
 						split_index, split_rec,
-						dealloc, &ctxt, et);
+						dealloc, &ctxt);
 		if (ret)
 			mlog_errno(ret);
 	}
-- 
cgit v0.10.2


From d401dc12fcced123909eba10334fb5d78866d1a9 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Fri, 13 Feb 2009 02:24:10 -0800
Subject: ocfs2: ocfs2_grow_branch() and ocfs2_append_rec_to_path() lose struct
 inode.

ocfs2_grow_branch() not really using it other than to pass it to the
subfunctions ocfs2_shift_tree_depth(), ocfs2_find_branch_target(), and
ocfs2_add_branch().  The first two weren't it either, so they drop the
argument.  ocfs2_add_branch() only passed it to
ocfs2_adjust_rightmost_branch(), which drops the inode argument and uses
the ocfs2_extent_tree as well.

ocfs2_append_rec_to_path() can be take an ocfs2_extent_tree instead of
the inode.  The function ocfs2_adjust_rightmost_records() goes along for
the ride.

Signed-off-by: Joel Becker <joel.becker@oracle.com>

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 2c4967f7b..e1479fa 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -460,8 +460,8 @@ struct ocfs2_path {
 
 static int ocfs2_find_path(struct ocfs2_caching_info *ci,
 			   struct ocfs2_path *path, u32 cpos);
-static void ocfs2_adjust_rightmost_records(struct inode *inode,
-					   handle_t *handle,
+static void ocfs2_adjust_rightmost_records(handle_t *handle,
+					   struct ocfs2_extent_tree *et,
 					   struct ocfs2_path *path,
 					   struct ocfs2_extent_rec *insert_rec);
 /*
@@ -1009,7 +1009,6 @@ static inline u32 ocfs2_sum_rightmost_rec(struct ocfs2_extent_list  *el)
  * extent block's rightmost record.
  */
 static int ocfs2_adjust_rightmost_branch(handle_t *handle,
-					 struct inode *inode,
 					 struct ocfs2_extent_tree *et)
 {
 	int status;
@@ -1036,7 +1035,7 @@ static int ocfs2_adjust_rightmost_branch(handle_t *handle,
 		goto out;
 	}
 
-	status = ocfs2_journal_access_path(INODE_CACHE(inode), handle, path);
+	status = ocfs2_journal_access_path(et->et_ci, handle, path);
 	if (status < 0) {
 		mlog_errno(status);
 		goto out;
@@ -1045,7 +1044,7 @@ static int ocfs2_adjust_rightmost_branch(handle_t *handle,
 	el = path_leaf_el(path);
 	rec = &el->l_recs[le32_to_cpu(el->l_next_free_rec) - 1];
 
-	ocfs2_adjust_rightmost_records(inode, handle, path, rec);
+	ocfs2_adjust_rightmost_records(handle, et, path, rec);
 
 out:
 	ocfs2_free_path(path);
@@ -1054,7 +1053,7 @@ out:
 
 /*
  * Add an entire tree branch to our inode. eb_bh is the extent block
- * to start at, if we don't want to start the branch at the dinode
+ * to start at, if we don't want to start the branch at the root
  * structure.
  *
  * last_eb_bh is required as we have to update it's next_leaf pointer
@@ -1063,9 +1062,7 @@ out:
  * the new branch will be 'empty' in the sense that every block will
  * contain a single record with cluster count == 0.
  */
-static int ocfs2_add_branch(struct ocfs2_super *osb,
-			    handle_t *handle,
-			    struct inode *inode,
+static int ocfs2_add_branch(handle_t *handle,
 			    struct ocfs2_extent_tree *et,
 			    struct buffer_head *eb_bh,
 			    struct buffer_head **last_eb_bh,
@@ -1109,7 +1106,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
 	if (root_end > new_cpos) {
 		mlog(0, "adjust the cluster end from %u to %u\n",
 		     root_end, new_cpos);
-		status = ocfs2_adjust_rightmost_branch(handle, inode, et);
+		status = ocfs2_adjust_rightmost_branch(handle, et);
 		if (status) {
 			mlog_errno(status);
 			goto bail;
@@ -1147,7 +1144,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
 		BUG_ON(!OCFS2_IS_VALID_EXTENT_BLOCK(eb));
 		eb_el = &eb->h_list;
 
-		status = ocfs2_journal_access_eb(handle, INODE_CACHE(inode), bh,
+		status = ocfs2_journal_access_eb(handle, et->et_ci, bh,
 						 OCFS2_JOURNAL_ACCESS_CREATE);
 		if (status < 0) {
 			mlog_errno(status);
@@ -1187,7 +1184,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
 	 * journal_dirty erroring as it won't unless we've aborted the
 	 * handle (in which case we would never be here) so reserving
 	 * the write with journal_access is all we need to do. */
-	status = ocfs2_journal_access_eb(handle, INODE_CACHE(inode), *last_eb_bh,
+	status = ocfs2_journal_access_eb(handle, et->et_ci, *last_eb_bh,
 					 OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
@@ -1200,7 +1197,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
 		goto bail;
 	}
 	if (eb_bh) {
-		status = ocfs2_journal_access_eb(handle, INODE_CACHE(inode), eb_bh,
+		status = ocfs2_journal_access_eb(handle, et->et_ci, eb_bh,
 						 OCFS2_JOURNAL_ACCESS_WRITE);
 		if (status < 0) {
 			mlog_errno(status);
@@ -1260,9 +1257,7 @@ bail:
  * returns back the new extent block so you can add a branch to it
  * after this call.
  */
-static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
-				  handle_t *handle,
-				  struct inode *inode,
+static int ocfs2_shift_tree_depth(handle_t *handle,
 				  struct ocfs2_extent_tree *et,
 				  struct ocfs2_alloc_context *meta_ac,
 				  struct buffer_head **ret_new_eb_bh)
@@ -1290,7 +1285,7 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
 	eb_el = &eb->h_list;
 	root_el = et->et_root_el;
 
-	status = ocfs2_journal_access_eb(handle, INODE_CACHE(inode), new_eb_bh,
+	status = ocfs2_journal_access_eb(handle, et->et_ci, new_eb_bh,
 					 OCFS2_JOURNAL_ACCESS_CREATE);
 	if (status < 0) {
 		mlog_errno(status);
@@ -1365,8 +1360,7 @@ bail:
  *
  * return status < 0 indicates an error.
  */
-static int ocfs2_find_branch_target(struct ocfs2_super *osb,
-				    struct ocfs2_extent_tree *et,
+static int ocfs2_find_branch_target(struct ocfs2_extent_tree *et,
 				    struct buffer_head **target_bh)
 {
 	int status = 0, i;
@@ -1447,20 +1441,18 @@ bail:
  *
  * *last_eb_bh will be updated by ocfs2_add_branch().
  */
-static int ocfs2_grow_tree(struct inode *inode, handle_t *handle,
-			   struct ocfs2_extent_tree *et, int *final_depth,
-			   struct buffer_head **last_eb_bh,
+static int ocfs2_grow_tree(handle_t *handle, struct ocfs2_extent_tree *et,
+			   int *final_depth, struct buffer_head **last_eb_bh,
 			   struct ocfs2_alloc_context *meta_ac)
 {
 	int ret, shift;
 	struct ocfs2_extent_list *el = et->et_root_el;
 	int depth = le16_to_cpu(el->l_tree_depth);
-	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	struct buffer_head *bh = NULL;
 
 	BUG_ON(meta_ac == NULL);
 
-	shift = ocfs2_find_branch_target(osb, et, &bh);
+	shift = ocfs2_find_branch_target(et, &bh);
 	if (shift < 0) {
 		ret = shift;
 		mlog_errno(ret);
@@ -1477,8 +1469,7 @@ static int ocfs2_grow_tree(struct inode *inode, handle_t *handle,
 		/* ocfs2_shift_tree_depth will return us a buffer with
 		 * the new extent block (so we can pass that to
 		 * ocfs2_add_branch). */
-		ret = ocfs2_shift_tree_depth(osb, handle, inode, et,
-					     meta_ac, &bh);
+		ret = ocfs2_shift_tree_depth(handle, et, meta_ac, &bh);
 		if (ret < 0) {
 			mlog_errno(ret);
 			goto out;
@@ -1504,7 +1495,7 @@ static int ocfs2_grow_tree(struct inode *inode, handle_t *handle,
 	/* call ocfs2_add_branch to add the final part of the tree with
 	 * the new data. */
 	mlog(0, "add branch. bh = %p\n", bh);
-	ret = ocfs2_add_branch(osb, handle, inode, et, bh, last_eb_bh,
+	ret = ocfs2_add_branch(handle, et, bh, last_eb_bh,
 			       meta_ac);
 	if (ret < 0) {
 		mlog_errno(ret);
@@ -3881,8 +3872,8 @@ rotate:
 	ocfs2_rotate_leaf(el, insert_rec);
 }
 
-static void ocfs2_adjust_rightmost_records(struct inode *inode,
-					   handle_t *handle,
+static void ocfs2_adjust_rightmost_records(handle_t *handle,
+					   struct ocfs2_extent_tree *et,
 					   struct ocfs2_path *path,
 					   struct ocfs2_extent_rec *insert_rec)
 {
@@ -3900,9 +3891,9 @@ static void ocfs2_adjust_rightmost_records(struct inode *inode,
 
 		next_free = le16_to_cpu(el->l_next_free_rec);
 		if (next_free == 0) {
-			ocfs2_error(inode->i_sb,
-				    "Dinode %llu has a bad extent list",
-				    (unsigned long long)OCFS2_I(inode)->ip_blkno);
+			ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
+				    "Owner %llu has a bad extent list",
+				    (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci));
 			ret = -EIO;
 			return;
 		}
@@ -3922,7 +3913,8 @@ static void ocfs2_adjust_rightmost_records(struct inode *inode,
 	}
 }
 
-static int ocfs2_append_rec_to_path(struct inode *inode, handle_t *handle,
+static int ocfs2_append_rec_to_path(handle_t *handle,
+				    struct ocfs2_extent_tree *et,
 				    struct ocfs2_extent_rec *insert_rec,
 				    struct ocfs2_path *right_path,
 				    struct ocfs2_path **ret_left_path)
@@ -3950,8 +3942,8 @@ static int ocfs2_append_rec_to_path(struct inode *inode, handle_t *handle,
 	    (next_free == 1 && ocfs2_is_empty_extent(&el->l_recs[0]))) {
 		u32 left_cpos;
 
-		ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, right_path,
-						    &left_cpos);
+		ret = ocfs2_find_cpos_for_left_leaf(ocfs2_metadata_cache_get_super(et->et_ci),
+						    right_path, &left_cpos);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -3973,7 +3965,7 @@ static int ocfs2_append_rec_to_path(struct inode *inode, handle_t *handle,
 				goto out;
 			}
 
-			ret = ocfs2_find_path(INODE_CACHE(inode), left_path,
+			ret = ocfs2_find_path(et->et_ci, left_path,
 					      left_cpos);
 			if (ret) {
 				mlog_errno(ret);
@@ -3987,13 +3979,13 @@ static int ocfs2_append_rec_to_path(struct inode *inode, handle_t *handle,
 		}
 	}
 
-	ret = ocfs2_journal_access_path(INODE_CACHE(inode), handle, right_path);
+	ret = ocfs2_journal_access_path(et->et_ci, handle, right_path);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
 	}
 
-	ocfs2_adjust_rightmost_records(inode, handle, right_path, insert_rec);
+	ocfs2_adjust_rightmost_records(handle, et, right_path, insert_rec);
 
 	*ret_left_path = left_path;
 	ret = 0;
@@ -4263,7 +4255,7 @@ static int ocfs2_do_insert_extent(struct inode *inode,
 		}
 	} else if (type->ins_appending == APPEND_TAIL
 		   && type->ins_contig != CONTIG_LEFT) {
-		ret = ocfs2_append_rec_to_path(inode, handle, insert_rec,
+		ret = ocfs2_append_rec_to_path(handle, et, insert_rec,
 					       right_path, &left_path);
 		if (ret) {
 			mlog_errno(ret);
@@ -4689,7 +4681,7 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
 	     free_records, insert.ins_tree_depth);
 
 	if (insert.ins_contig == CONTIG_NONE && free_records == 0) {
-		status = ocfs2_grow_tree(inode, handle, et,
+		status = ocfs2_grow_tree(handle, et,
 					 &insert.ins_tree_depth, &last_eb_bh,
 					 meta_ac);
 		if (status) {
@@ -4876,7 +4868,7 @@ leftright:
 
 	if (le16_to_cpu(rightmost_el->l_next_free_rec) ==
 	    le16_to_cpu(rightmost_el->l_count)) {
-		ret = ocfs2_grow_tree(inode, handle, et,
+		ret = ocfs2_grow_tree(handle, et,
 				      &depth, last_eb_bh, meta_ac);
 		if (ret) {
 			mlog_errno(ret);
@@ -5208,7 +5200,7 @@ static int ocfs2_split_tree(struct inode *inode, struct ocfs2_extent_tree *et,
 
 	if (le16_to_cpu(rightmost_el->l_next_free_rec) ==
 	    le16_to_cpu(rightmost_el->l_count)) {
-		ret = ocfs2_grow_tree(inode, handle, et, &depth, &last_eb_bh,
+		ret = ocfs2_grow_tree(handle, et, &depth, &last_eb_bh,
 				      meta_ac);
 		if (ret) {
 			mlog_errno(ret);
@@ -5346,7 +5338,7 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle,
 			 * be deleted by the rotate code.
 			 */
 			rec = &el->l_recs[next_free - 1];
-			ocfs2_adjust_rightmost_records(inode, handle, path,
+			ocfs2_adjust_rightmost_records(handle, et, path,
 						       rec);
 		}
 	} else if (le32_to_cpu(rec->e_cpos) == cpos) {
@@ -5358,7 +5350,7 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle,
 		/* Remove rightmost portion of the record */
 		le16_add_cpu(&rec->e_leaf_clusters, -len);
 		if (is_rightmost_tree_rec)
-			ocfs2_adjust_rightmost_records(inode, handle, path, rec);
+			ocfs2_adjust_rightmost_records(handle, et, path, rec);
 	} else {
 		/* Caller should have trapped this. */
 		mlog(ML_ERROR, "Inode %llu: Invalid record truncate: (%u, %u) "
-- 
cgit v0.10.2


From 043beebb6c467a07ccd7aa666095f87fade1c28e Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Fri, 13 Feb 2009 02:42:30 -0800
Subject: ocfs2: ocfs2_truncate_rec() doesn't need struct inode.

It's not using it anymore.  Remove it from the parameter list.

Signed-off-by: Joel Becker <joel.becker@oracle.com>

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index e1479fa..4022fa4 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -5223,16 +5223,16 @@ out:
 	return ret;
 }
 
-static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle,
+static int ocfs2_truncate_rec(handle_t *handle,
+			      struct ocfs2_extent_tree *et,
 			      struct ocfs2_path *path, int index,
 			      struct ocfs2_cached_dealloc_ctxt *dealloc,
-			      u32 cpos, u32 len,
-			      struct ocfs2_extent_tree *et)
+			      u32 cpos, u32 len)
 {
 	int ret;
 	u32 left_cpos, rec_range, trunc_range;
 	int wants_rotate = 0, is_rightmost_tree_rec = 0;
-	struct super_block *sb = inode->i_sb;
+	struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci);
 	struct ocfs2_path *left_path = NULL;
 	struct ocfs2_extent_list *el = path_leaf_el(path);
 	struct ocfs2_extent_rec *rec;
@@ -5271,14 +5271,13 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle,
 		 * by this leaf and the one to it's left.
 		 *
 		 * There are two cases we can skip:
-		 *   1) Path is the leftmost one in our inode tree.
+		 *   1) Path is the leftmost one in our btree.
 		 *   2) The leaf is rightmost and will be empty after
 		 *      we remove the extent record - the rotate code
 		 *      knows how to update the newly formed edge.
 		 */
 
-		ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, path,
-						    &left_cpos);
+		ret = ocfs2_find_cpos_for_left_leaf(sb, path, &left_cpos);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -5353,8 +5352,9 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle,
 			ocfs2_adjust_rightmost_records(handle, et, path, rec);
 	} else {
 		/* Caller should have trapped this. */
-		mlog(ML_ERROR, "Inode %llu: Invalid record truncate: (%u, %u) "
-		     "(%u, %u)\n", (unsigned long long)OCFS2_I(inode)->ip_blkno,
+		mlog(ML_ERROR, "Owner %llu: Invalid record truncate: (%u, %u) "
+		     "(%u, %u)\n",
+		     (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
 		     le32_to_cpu(rec->e_cpos),
 		     le16_to_cpu(rec->e_leaf_clusters), cpos, len);
 		BUG();
@@ -5447,8 +5447,8 @@ int ocfs2_remove_extent(struct inode *inode,
 	     le32_to_cpu(rec->e_cpos), ocfs2_rec_clusters(el, rec));
 
 	if (le32_to_cpu(rec->e_cpos) == cpos || rec_range == trunc_range) {
-		ret = ocfs2_truncate_rec(inode, handle, path, index, dealloc,
-					 cpos, len, et);
+		ret = ocfs2_truncate_rec(handle, et, path, index, dealloc,
+					 cpos, len);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -5502,8 +5502,8 @@ int ocfs2_remove_extent(struct inode *inode,
 			goto out;
 		}
 
-		ret = ocfs2_truncate_rec(inode, handle, path, index, dealloc,
-					 cpos, len, et);
+		ret = ocfs2_truncate_rec(handle, et, path, index, dealloc,
+					 cpos, len);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
-- 
cgit v0.10.2


From 4c911eefca316f580f174940cd67d561b4b7e6e8 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Fri, 13 Feb 2009 02:50:12 -0800
Subject: ocfs2: Make truncating the extent map an extent_tree_operation.

ocfs2_remove_extent() wants to truncate the extent map if it's
truncating an inode data extent.  But since many btrees can call that
function, let's make it an op on ocfs2_extent_tree.  Other tree types
can leave it empty.

Signed-off-by: Joel Becker <joel.becker@oracle.com>

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 4022fa4..cdf9697 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -83,6 +83,13 @@ struct ocfs2_extent_tree_operations {
 				   u32 new_clusters);
 
 	/*
+	 * If this extent tree is supported by an extent map, truncate the
+	 * map to clusters,
+	 */
+	void (*eo_extent_map_truncate)(struct ocfs2_extent_tree *et,
+				       u32 clusters);
+
+	/*
 	 * If ->eo_insert_check() exists, it is called before rec is
 	 * inserted into the extent tree.  It is optional.
 	 */
@@ -120,6 +127,8 @@ static void ocfs2_dinode_set_last_eb_blk(struct ocfs2_extent_tree *et,
 					 u64 blkno);
 static void ocfs2_dinode_update_clusters(struct ocfs2_extent_tree *et,
 					 u32 clusters);
+static void ocfs2_dinode_extent_map_truncate(struct ocfs2_extent_tree *et,
+					     u32 clusters);
 static int ocfs2_dinode_insert_check(struct ocfs2_extent_tree *et,
 				     struct ocfs2_extent_rec *rec);
 static int ocfs2_dinode_sanity_check(struct ocfs2_extent_tree *et);
@@ -128,6 +137,7 @@ static struct ocfs2_extent_tree_operations ocfs2_dinode_et_ops = {
 	.eo_set_last_eb_blk	= ocfs2_dinode_set_last_eb_blk,
 	.eo_get_last_eb_blk	= ocfs2_dinode_get_last_eb_blk,
 	.eo_update_clusters	= ocfs2_dinode_update_clusters,
+	.eo_extent_map_truncate	= ocfs2_dinode_extent_map_truncate,
 	.eo_insert_check	= ocfs2_dinode_insert_check,
 	.eo_sanity_check	= ocfs2_dinode_sanity_check,
 	.eo_fill_root_el	= ocfs2_dinode_fill_root_el,
@@ -162,6 +172,14 @@ static void ocfs2_dinode_update_clusters(struct ocfs2_extent_tree *et,
 	spin_unlock(&oi->ip_lock);
 }
 
+static void ocfs2_dinode_extent_map_truncate(struct ocfs2_extent_tree *et,
+					     u32 clusters)
+{
+	struct inode *inode = &cache_info_to_inode(et->et_ci)->vfs_inode;
+
+	ocfs2_extent_map_trunc(inode, clusters);
+}
+
 static int ocfs2_dinode_insert_check(struct ocfs2_extent_tree *et,
 				     struct ocfs2_extent_rec *rec)
 {
@@ -400,6 +418,13 @@ static inline void ocfs2_et_update_clusters(struct ocfs2_extent_tree *et,
 	et->et_ops->eo_update_clusters(et, clusters);
 }
 
+static inline void ocfs2_et_extent_map_truncate(struct ocfs2_extent_tree *et,
+						u32 clusters)
+{
+	if (et->et_ops->eo_extent_map_truncate)
+		et->et_ops->eo_extent_map_truncate(et, clusters);
+}
+
 static inline int ocfs2_et_root_journal_access(handle_t *handle,
 					       struct ocfs2_extent_tree *et,
 					       int type)
@@ -5106,12 +5131,8 @@ int ocfs2_mark_extent_written(struct inode *inode,
 	/*
 	 * XXX: This should be fixed up so that we just re-insert the
 	 * next extent records.
-	 *
-	 * XXX: This is a hack on the extent tree, maybe it should be
-	 * an op?
 	 */
-	if (et->et_ops == &ocfs2_dinode_et_ops)
-		ocfs2_extent_map_trunc(inode, 0);
+	ocfs2_et_extent_map_truncate(et, 0);
 
 	left_path = ocfs2_new_path_from_et(et);
 	if (!left_path) {
@@ -5393,7 +5414,11 @@ int ocfs2_remove_extent(struct inode *inode,
 	struct ocfs2_extent_list *el;
 	struct ocfs2_path *path = NULL;
 
-	ocfs2_extent_map_trunc(inode, 0);
+	/*
+	 * XXX: Why are we truncating to 0 instead of wherever this
+	 * affects us?
+	 */
+	ocfs2_et_extent_map_truncate(et, 0);
 
 	path = ocfs2_new_path_from_et(et);
 	if (!path) {
-- 
cgit v0.10.2


From d562862314a7b131a630f7b912490312387542fb Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Fri, 13 Feb 2009 02:54:36 -0800
Subject: ocfs2: ocfs2_insert_at_leaf() doesn't need struct inode.

Give it an ocfs2_extent_tree and it is happy.

Signed-off-by: Joel Becker <joel.becker@oracle.com>

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index cdf9697..ec9c2ce 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -3809,10 +3809,10 @@ static void ocfs2_subtract_from_rec(struct super_block *sb,
  * list. If this leaf is part of an allocation tree, it is assumed
  * that the tree above has been prepared.
  */
-static void ocfs2_insert_at_leaf(struct ocfs2_extent_rec *insert_rec,
+static void ocfs2_insert_at_leaf(struct ocfs2_extent_tree *et,
+				 struct ocfs2_extent_rec *insert_rec,
 				 struct ocfs2_extent_list *el,
-				 struct ocfs2_insert_type *insert,
-				 struct inode *inode)
+				 struct ocfs2_insert_type *insert)
 {
 	int i = insert->ins_contig_index;
 	unsigned int range;
@@ -3824,7 +3824,8 @@ static void ocfs2_insert_at_leaf(struct ocfs2_extent_rec *insert_rec,
 		i = ocfs2_search_extent_list(el, le32_to_cpu(insert_rec->e_cpos));
 		BUG_ON(i == -1);
 		rec = &el->l_recs[i];
-		ocfs2_subtract_from_rec(inode->i_sb, insert->ins_split, rec,
+		ocfs2_subtract_from_rec(ocfs2_metadata_cache_get_super(et->et_ci),
+					insert->ins_split, rec,
 					insert_rec);
 		goto rotate;
 	}
@@ -3866,10 +3867,10 @@ static void ocfs2_insert_at_leaf(struct ocfs2_extent_rec *insert_rec,
 
 		mlog_bug_on_msg(le16_to_cpu(el->l_next_free_rec) >=
 				le16_to_cpu(el->l_count),
-				"inode %lu, depth %u, count %u, next free %u, "
+				"owner %llu, depth %u, count %u, next free %u, "
 				"rec.cpos %u, rec.clusters %u, "
 				"insert.cpos %u, insert.clusters %u\n",
-				inode->i_ino,
+				ocfs2_metadata_cache_owner(et->et_ci),
 				le16_to_cpu(el->l_tree_depth),
 				le16_to_cpu(el->l_count),
 				le16_to_cpu(el->l_next_free_rec),
@@ -4171,8 +4172,8 @@ static int ocfs2_insert_path(struct inode *inode,
 			if (ret)
 				mlog_errno(ret);
 	} else
-		ocfs2_insert_at_leaf(insert_rec, path_leaf_el(right_path),
-				     insert, inode);
+		ocfs2_insert_at_leaf(et, insert_rec, path_leaf_el(right_path),
+				     insert);
 
 	ret = ocfs2_journal_dirty(handle, leaf_bh);
 	if (ret)
@@ -4218,7 +4219,7 @@ static int ocfs2_do_insert_extent(struct inode *inode,
 	}
 
 	if (le16_to_cpu(el->l_tree_depth) == 0) {
-		ocfs2_insert_at_leaf(insert_rec, el, type, inode);
+		ocfs2_insert_at_leaf(et, insert_rec, el, type);
 		goto out_update_clusters;
 	}
 
-- 
cgit v0.10.2


From c38e52bb1c0187186bd3c4a2b318ffe69cd2fdf8 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Fri, 13 Feb 2009 02:56:23 -0800
Subject: ocfs2: Give ocfs2_split_record() an extent_tree instead of an inode.

Another on the way to generic btree functions.

Signed-off-by: Joel Becker <joel.becker@oracle.com>

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index ec9c2ce..b57f976 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -4022,7 +4022,7 @@ out:
 	return ret;
 }
 
-static void ocfs2_split_record(struct inode *inode,
+static void ocfs2_split_record(struct ocfs2_extent_tree *et,
 			       struct ocfs2_path *left_path,
 			       struct ocfs2_path *right_path,
 			       struct ocfs2_extent_rec *split_rec,
@@ -4095,7 +4095,8 @@ static void ocfs2_split_record(struct inode *inode,
 	}
 
 	rec = &el->l_recs[index];
-	ocfs2_subtract_from_rec(inode->i_sb, split, rec, split_rec);
+	ocfs2_subtract_from_rec(ocfs2_metadata_cache_get_super(et->et_ci),
+				split, rec, split_rec);
 	ocfs2_rotate_leaf(insert_el, split_rec);
 }
 
@@ -4158,7 +4159,7 @@ static int ocfs2_insert_path(struct inode *inode,
 		 * of splits, but it's easier to just let one separate
 		 * function sort it all out.
 		 */
-		ocfs2_split_record(inode, left_path, right_path,
+		ocfs2_split_record(et, left_path, right_path,
 				   insert_rec, insert->ins_split);
 
 		/*
-- 
cgit v0.10.2


From 3505bec01829a8f690259517add55c7941a4d3d5 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Fri, 13 Feb 2009 02:57:58 -0800
Subject: ocfs2: ocfs2_do_insert_extent() and ocfs2_insert_path() no longer
 need an inode.

They aren't using it, so remove it from their parameter lists.

Signed-off-by: Joel Becker <joel.becker@oracle.com>

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index b57f976..cced176 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -4108,8 +4108,7 @@ static void ocfs2_split_record(struct ocfs2_extent_tree *et,
  * in. left_path should only be passed in if we need to update that
  * portion of the tree after an edge insert.
  */
-static int ocfs2_insert_path(struct inode *inode,
-			     handle_t *handle,
+static int ocfs2_insert_path(handle_t *handle,
 			     struct ocfs2_extent_tree *et,
 			     struct ocfs2_path *left_path,
 			     struct ocfs2_path *right_path,
@@ -4198,8 +4197,7 @@ out:
 	return ret;
 }
 
-static int ocfs2_do_insert_extent(struct inode *inode,
-				  handle_t *handle,
+static int ocfs2_do_insert_extent(handle_t *handle,
 				  struct ocfs2_extent_tree *et,
 				  struct ocfs2_extent_rec *insert_rec,
 				  struct ocfs2_insert_type *type)
@@ -4290,7 +4288,7 @@ static int ocfs2_do_insert_extent(struct inode *inode,
 		}
 	}
 
-	ret = ocfs2_insert_path(inode, handle, et, left_path, right_path,
+	ret = ocfs2_insert_path(handle, et, left_path, right_path,
 				insert_rec, type);
 	if (ret) {
 		mlog_errno(ret);
@@ -4718,7 +4716,7 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
 	}
 
 	/* Finally, we can add clusters. This might rotate the tree for us. */
-	status = ocfs2_do_insert_extent(inode, handle, et, &rec, &insert);
+	status = ocfs2_do_insert_extent(handle, et, &rec, &insert);
 	if (status < 0)
 		mlog_errno(status);
 	else if (et->et_ops == &ocfs2_dinode_et_ops)
@@ -4933,7 +4931,7 @@ leftright:
 		do_leftright = 1;
 	}
 
-	ret = ocfs2_do_insert_extent(inode, handle, et, &split_rec, &insert);
+	ret = ocfs2_do_insert_extent(handle, et, &split_rec, &insert);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -5237,7 +5235,7 @@ static int ocfs2_split_tree(struct inode *inode, struct ocfs2_extent_tree *et,
 	insert.ins_split = SPLIT_RIGHT;
 	insert.ins_tree_depth = depth;
 
-	ret = ocfs2_do_insert_extent(inode, handle, et, &split_rec, &insert);
+	ret = ocfs2_do_insert_extent(handle, et, &split_rec, &insert);
 	if (ret)
 		mlog_errno(ret);
 
-- 
cgit v0.10.2


From b4a176515c715f0c6db1759a39cd9c4175e5a23a Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Fri, 13 Feb 2009 03:07:09 -0800
Subject: ocfs2: ocfs2_extent_contig() only requires the superblock.

Don't pass the inode in.  We don't want it around for generic btree
operations.

Signed-off-by: Joel Becker <joel.becker@oracle.com>

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index cced176..2431bbb 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -747,7 +747,7 @@ static int ocfs2_extents_adjacent(struct ocfs2_extent_rec *left,
 }
 
 static enum ocfs2_contig_type
-	ocfs2_extent_contig(struct inode *inode,
+	ocfs2_extent_contig(struct super_block *sb,
 			    struct ocfs2_extent_rec *ext,
 			    struct ocfs2_extent_rec *insert_rec)
 {
@@ -762,12 +762,12 @@ static enum ocfs2_contig_type
 		return CONTIG_NONE;
 
 	if (ocfs2_extents_adjacent(ext, insert_rec) &&
-	    ocfs2_block_extent_contig(inode->i_sb, ext, blkno))
+	    ocfs2_block_extent_contig(sb, ext, blkno))
 			return CONTIG_RIGHT;
 
 	blkno = le64_to_cpu(ext->e_blkno);
 	if (ocfs2_extents_adjacent(insert_rec, ext) &&
-	    ocfs2_block_extent_contig(inode->i_sb, insert_rec, blkno))
+	    ocfs2_block_extent_contig(sb, insert_rec, blkno))
 		return CONTIG_LEFT;
 
 	return CONTIG_NONE;
@@ -4374,7 +4374,7 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
 			if (split_rec->e_cpos == el->l_recs[index].e_cpos)
 				ret = CONTIG_RIGHT;
 		} else {
-			ret = ocfs2_extent_contig(inode, rec, split_rec);
+			ret = ocfs2_extent_contig(inode->i_sb, rec, split_rec);
 		}
 	}
 
@@ -4420,7 +4420,7 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
 	if (rec) {
 		enum ocfs2_contig_type contig_type;
 
-		contig_type = ocfs2_extent_contig(inode, rec, split_rec);
+		contig_type = ocfs2_extent_contig(inode->i_sb, rec, split_rec);
 
 		if (contig_type == CONTIG_LEFT && ret == CONTIG_RIGHT)
 			ret = CONTIG_LEFTRIGHT;
@@ -4449,7 +4449,7 @@ static void ocfs2_figure_contig_type(struct inode *inode,
 	BUG_ON(le16_to_cpu(el->l_tree_depth) != 0);
 
 	for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
-		contig_type = ocfs2_extent_contig(inode, &el->l_recs[i],
+		contig_type = ocfs2_extent_contig(inode->i_sb, &el->l_recs[i],
 						  insert_rec);
 		if (contig_type != CONTIG_NONE) {
 			insert->ins_contig_index = i;
-- 
cgit v0.10.2


From a29702914ad36443d83b5250b3bfa1bf91e6b239 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Fri, 13 Feb 2009 03:09:54 -0800
Subject: ocfs2: Swap inode for extent_tree in
 ocfs2_figure_merge_contig_type().

We don't want struct inode in generic btree operations.

Signed-off-by: Joel Becker <joel.becker@oracle.com>

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 2431bbb..9b79150 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -4312,7 +4312,8 @@ out:
 }
 
 static enum ocfs2_contig_type
-ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
+ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et,
+			       struct ocfs2_path *path,
 			       struct ocfs2_extent_list *el, int index,
 			       struct ocfs2_extent_rec *split_rec)
 {
@@ -4324,12 +4325,12 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
 	struct ocfs2_path *left_path = NULL, *right_path = NULL;
 	struct buffer_head *bh;
 	struct ocfs2_extent_block *eb;
+	struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci);
 
 	if (index > 0) {
 		rec = &el->l_recs[index - 1];
 	} else if (path->p_tree_depth > 0) {
-		status = ocfs2_find_cpos_for_left_leaf(inode->i_sb,
-						       path, &left_cpos);
+		status = ocfs2_find_cpos_for_left_leaf(sb, path, &left_cpos);
 		if (status)
 			goto out;
 
@@ -4338,8 +4339,8 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
 			if (!left_path)
 				goto out;
 
-			status = ocfs2_find_path(INODE_CACHE(inode),
-						 left_path, left_cpos);
+			status = ocfs2_find_path(et->et_ci, left_path,
+						 left_cpos);
 			if (status)
 				goto out;
 
@@ -4349,7 +4350,7 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
 			    le16_to_cpu(new_el->l_count)) {
 				bh = path_leaf_bh(left_path);
 				eb = (struct ocfs2_extent_block *)bh->b_data;
-				ocfs2_error(inode->i_sb,
+				ocfs2_error(sb,
 					    "Extent block #%llu has an "
 					    "invalid l_next_free_rec of "
 					    "%d.  It should have "
@@ -4374,7 +4375,7 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
 			if (split_rec->e_cpos == el->l_recs[index].e_cpos)
 				ret = CONTIG_RIGHT;
 		} else {
-			ret = ocfs2_extent_contig(inode->i_sb, rec, split_rec);
+			ret = ocfs2_extent_contig(sb, rec, split_rec);
 		}
 	}
 
@@ -4383,8 +4384,7 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
 		rec = &el->l_recs[index + 1];
 	else if (le16_to_cpu(el->l_next_free_rec) == le16_to_cpu(el->l_count) &&
 		 path->p_tree_depth > 0) {
-		status = ocfs2_find_cpos_for_right_leaf(inode->i_sb,
-							path, &right_cpos);
+		status = ocfs2_find_cpos_for_right_leaf(sb, path, &right_cpos);
 		if (status)
 			goto out;
 
@@ -4395,7 +4395,7 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
 		if (!right_path)
 			goto out;
 
-		status = ocfs2_find_path(INODE_CACHE(inode), right_path, right_cpos);
+		status = ocfs2_find_path(et->et_ci, right_path, right_cpos);
 		if (status)
 			goto out;
 
@@ -4405,7 +4405,7 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
 			if (le16_to_cpu(new_el->l_next_free_rec) <= 1) {
 				bh = path_leaf_bh(right_path);
 				eb = (struct ocfs2_extent_block *)bh->b_data;
-				ocfs2_error(inode->i_sb,
+				ocfs2_error(sb,
 					    "Extent block #%llu has an "
 					    "invalid l_next_free_rec of %d",
 					    (unsigned long long)le64_to_cpu(eb->h_blkno),
@@ -4420,7 +4420,7 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
 	if (rec) {
 		enum ocfs2_contig_type contig_type;
 
-		contig_type = ocfs2_extent_contig(inode->i_sb, rec, split_rec);
+		contig_type = ocfs2_extent_contig(sb, rec, split_rec);
 
 		if (contig_type == CONTIG_LEFT && ret == CONTIG_RIGHT)
 			ret = CONTIG_LEFTRIGHT;
@@ -5035,7 +5035,7 @@ static int __ocfs2_mark_extent_written(struct inode *inode,
 		goto out;
 	}
 
-	ctxt.c_contig_type = ocfs2_figure_merge_contig_type(inode, path, el,
+	ctxt.c_contig_type = ocfs2_figure_merge_contig_type(et, path, el,
 							    split_index,
 							    split_rec);
 
-- 
cgit v0.10.2


From 1ef61b33148a6b32b6d28383cd72ceeddfc7054d Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Fri, 13 Feb 2009 03:12:33 -0800
Subject: ocfs2: Remove inode from ocfs2_figure_extent_contig().

It already has an ocfs2_extent_tree and doesn't need the inode.

Signed-off-by: Joel Becker <joel.becker@oracle.com>

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 9b79150..38b1fea 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -4437,11 +4437,10 @@ out:
 	return ret;
 }
 
-static void ocfs2_figure_contig_type(struct inode *inode,
+static void ocfs2_figure_contig_type(struct ocfs2_extent_tree *et,
 				     struct ocfs2_insert_type *insert,
 				     struct ocfs2_extent_list *el,
-				     struct ocfs2_extent_rec *insert_rec,
-				     struct ocfs2_extent_tree *et)
+				     struct ocfs2_extent_rec *insert_rec)
 {
 	int i;
 	enum ocfs2_contig_type contig_type = CONTIG_NONE;
@@ -4449,8 +4448,8 @@ static void ocfs2_figure_contig_type(struct inode *inode,
 	BUG_ON(le16_to_cpu(el->l_tree_depth) != 0);
 
 	for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
-		contig_type = ocfs2_extent_contig(inode->i_sb, &el->l_recs[i],
-						  insert_rec);
+		contig_type = ocfs2_extent_contig(ocfs2_metadata_cache_get_super(et->et_ci),
+						  &el->l_recs[i], insert_rec);
 		if (contig_type != CONTIG_NONE) {
 			insert->ins_contig_index = i;
 			break;
@@ -4579,7 +4578,7 @@ static int ocfs2_figure_insert_type(struct inode *inode,
 		le16_to_cpu(el->l_next_free_rec);
 
 	if (!insert->ins_tree_depth) {
-		ocfs2_figure_contig_type(inode, insert, el, insert_rec, et);
+		ocfs2_figure_contig_type(et, insert, el, insert_rec);
 		ocfs2_figure_appending_type(insert, el, insert_rec);
 		return 0;
 	}
@@ -4613,7 +4612,7 @@ static int ocfs2_figure_insert_type(struct inode *inode,
          *     into two types of appends: simple record append, or a
          *     rotate inside the tail leaf.
 	 */
-	ocfs2_figure_contig_type(inode, insert, el, insert_rec, et);
+	ocfs2_figure_contig_type(et, insert, el, insert_rec);
 
 	/*
 	 * The insert code isn't quite ready to deal with all cases of
-- 
cgit v0.10.2


From 627961b77e68b725851cb227db10084bf15f6920 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Fri, 13 Feb 2009 03:14:38 -0800
Subject: ocfs2: ocfs2_figure_insert_type() no longer needs struct inode.

It's not using it, so remove it from the parameter list.

Signed-off-by: Joel Becker <joel.becker@oracle.com>

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 38b1fea..3d09f4ba 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -4530,8 +4530,7 @@ set_tail_append:
  * All of the information is stored on the ocfs2_insert_type
  * structure.
  */
-static int ocfs2_figure_insert_type(struct inode *inode,
-				    struct ocfs2_extent_tree *et,
+static int ocfs2_figure_insert_type(struct ocfs2_extent_tree *et,
 				    struct buffer_head **last_eb_bh,
 				    struct ocfs2_extent_rec *insert_rec,
 				    int *free_records,
@@ -4691,7 +4690,7 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
 		goto bail;
 	}
 
-	status = ocfs2_figure_insert_type(inode, et, &last_eb_bh, &rec,
+	status = ocfs2_figure_insert_type(et, &last_eb_bh, &rec,
 					  &free_records, &insert);
 	if (status < 0) {
 		mlog_errno(status);
-- 
cgit v0.10.2


From 92ba470c44c1404ff18ca0f4ecce1e5b116bb933 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Fri, 13 Feb 2009 03:18:34 -0800
Subject: ocfs2: Make extent map insertion an extent_tree_operation.

ocfs2_insert_extent() wants to insert a record into the extent map if
it's an inode data extent.  But since many btrees can call that
function, let's make it an op on ocfs2_extent_tree.  Other tree types
can leave it empty.

Signed-off-by: Joel Becker <joel.becker@oracle.com>

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 3d09f4ba..ed86988 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -83,6 +83,13 @@ struct ocfs2_extent_tree_operations {
 				   u32 new_clusters);
 
 	/*
+	 * If this extent tree is supported by an extent map, insert
+	 * a record into the map.
+	 */
+	void (*eo_extent_map_insert)(struct ocfs2_extent_tree *et,
+				     struct ocfs2_extent_rec *rec);
+
+	/*
 	 * If this extent tree is supported by an extent map, truncate the
 	 * map to clusters,
 	 */
@@ -127,6 +134,8 @@ static void ocfs2_dinode_set_last_eb_blk(struct ocfs2_extent_tree *et,
 					 u64 blkno);
 static void ocfs2_dinode_update_clusters(struct ocfs2_extent_tree *et,
 					 u32 clusters);
+static void ocfs2_dinode_extent_map_insert(struct ocfs2_extent_tree *et,
+					   struct ocfs2_extent_rec *rec);
 static void ocfs2_dinode_extent_map_truncate(struct ocfs2_extent_tree *et,
 					     u32 clusters);
 static int ocfs2_dinode_insert_check(struct ocfs2_extent_tree *et,
@@ -137,6 +146,7 @@ static struct ocfs2_extent_tree_operations ocfs2_dinode_et_ops = {
 	.eo_set_last_eb_blk	= ocfs2_dinode_set_last_eb_blk,
 	.eo_get_last_eb_blk	= ocfs2_dinode_get_last_eb_blk,
 	.eo_update_clusters	= ocfs2_dinode_update_clusters,
+	.eo_extent_map_insert	= ocfs2_dinode_extent_map_insert,
 	.eo_extent_map_truncate	= ocfs2_dinode_extent_map_truncate,
 	.eo_insert_check	= ocfs2_dinode_insert_check,
 	.eo_sanity_check	= ocfs2_dinode_sanity_check,
@@ -172,6 +182,14 @@ static void ocfs2_dinode_update_clusters(struct ocfs2_extent_tree *et,
 	spin_unlock(&oi->ip_lock);
 }
 
+static void ocfs2_dinode_extent_map_insert(struct ocfs2_extent_tree *et,
+					   struct ocfs2_extent_rec *rec)
+{
+	struct inode *inode = &cache_info_to_inode(et->et_ci)->vfs_inode;
+
+	ocfs2_extent_map_insert_rec(inode, rec);
+}
+
 static void ocfs2_dinode_extent_map_truncate(struct ocfs2_extent_tree *et,
 					     u32 clusters)
 {
@@ -418,6 +436,13 @@ static inline void ocfs2_et_update_clusters(struct ocfs2_extent_tree *et,
 	et->et_ops->eo_update_clusters(et, clusters);
 }
 
+static inline void ocfs2_et_extent_map_insert(struct ocfs2_extent_tree *et,
+					      struct ocfs2_extent_rec *rec)
+{
+	if (et->et_ops->eo_extent_map_insert)
+		et->et_ops->eo_extent_map_insert(et, rec);
+}
+
 static inline void ocfs2_et_extent_map_truncate(struct ocfs2_extent_tree *et,
 						u32 clusters)
 {
@@ -4717,8 +4742,8 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
 	status = ocfs2_do_insert_extent(handle, et, &rec, &insert);
 	if (status < 0)
 		mlog_errno(status);
-	else if (et->et_ops == &ocfs2_dinode_et_ops)
-		ocfs2_extent_map_insert_rec(inode, &rec);
+	else
+		ocfs2_et_extent_map_insert(et, &rec);
 
 bail:
 	brelse(last_eb_bh);
-- 
cgit v0.10.2


From cc79d8c19e9d39446525a1026f1a21761f5d3cd2 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Fri, 13 Feb 2009 03:24:43 -0800
Subject: ocfs2: ocfs2_insert_extent() no longer needs struct inode.

One more function down, no inode in the entire insert-extent chain.

Signed-off-by: Joel Becker <joel.becker@oracle.com>

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index ed86988..c4943b9 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -4681,13 +4681,11 @@ out:
 }
 
 /*
- * Insert an extent into an inode btree.
+ * Insert an extent into a btree.
  *
- * The caller needs to update fe->i_clusters
+ * The caller needs to update the owning btree's cluster count.
  */
-int ocfs2_insert_extent(struct ocfs2_super *osb,
-			handle_t *handle,
-			struct inode *inode,
+int ocfs2_insert_extent(handle_t *handle,
 			struct ocfs2_extent_tree *et,
 			u32 cpos,
 			u64 start_blk,
@@ -4701,8 +4699,9 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
 	struct ocfs2_insert_type insert = {0, };
 	struct ocfs2_extent_rec rec;
 
-	mlog(0, "add %u clusters at position %u to inode %llu\n",
-	     new_clusters, cpos, (unsigned long long)OCFS2_I(inode)->ip_blkno);
+	mlog(0, "add %u clusters at position %u to owner %llu\n",
+	     new_clusters, cpos,
+	     (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci));
 
 	memset(&rec, 0, sizeof(rec));
 	rec.e_cpos = cpu_to_le32(cpos);
@@ -4829,8 +4828,7 @@ int ocfs2_add_clusters_in_btree(struct ocfs2_super *osb,
 	block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
 	mlog(0, "Allocating %u clusters at block %u for inode %llu\n",
 	     num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);
-	status = ocfs2_insert_extent(osb, handle, inode, et,
-				     *logical_offset, block,
+	status = ocfs2_insert_extent(handle, et, *logical_offset, block,
 				     num_bits, flags, meta_ac);
 	if (status < 0) {
 		mlog_errno(status);
@@ -7244,8 +7242,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
 		 * the in-inode data from our pages.
 		 */
 		ocfs2_init_dinode_extent_tree(&et, inode, di_bh);
-		ret = ocfs2_insert_extent(osb, handle, inode, &et,
-					  0, block, 1, 0, NULL);
+		ret = ocfs2_insert_extent(handle, &et, 0, block, 1, 0, NULL);
 		if (ret) {
 			mlog_errno(ret);
 			goto out_commit;
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index 8718e57..99accd3 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -90,9 +90,7 @@ int ocfs2_read_extent_block(struct ocfs2_caching_info *ci, u64 eb_blkno,
 			    struct buffer_head **bh);
 
 struct ocfs2_alloc_context;
-int ocfs2_insert_extent(struct ocfs2_super *osb,
-			handle_t *handle,
-			struct inode *inode,
+int ocfs2_insert_extent(handle_t *handle,
 			struct ocfs2_extent_tree *et,
 			u32 cpos,
 			u64 start_blk,
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 088a1b5..de490a6 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -2591,7 +2591,6 @@ static int ocfs2_dx_dir_new_cluster(struct inode *dir,
 {
 	int ret;
 	u64 phys_blkno;
-	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
 
 	ret = __ocfs2_dx_dir_new_cluster(dir, cpos, handle, data_ac, dx_leaves,
 					 num_dx_leaves, &phys_blkno);
@@ -2600,7 +2599,7 @@ static int ocfs2_dx_dir_new_cluster(struct inode *dir,
 		goto out;
 	}
 
-	ret = ocfs2_insert_extent(osb, handle, dir, et, cpos, phys_blkno, 1, 0,
+	ret = ocfs2_insert_extent(handle, et, cpos, phys_blkno, 1, 0,
 				  meta_ac);
 	if (ret)
 		mlog_errno(ret);
@@ -3094,7 +3093,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
 	 * This should never fail as our extent list is empty and all
 	 * related blocks have been journaled already.
 	 */
-	ret = ocfs2_insert_extent(osb, handle, dir, &et, 0, blkno, len,
+	ret = ocfs2_insert_extent(handle, &et, 0, blkno, len,
 				  0, NULL);
 	if (ret) {
 		mlog_errno(ret);
@@ -3127,7 +3126,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
 						      dirdata_bh);
 		} else {
 			ocfs2_init_dx_root_extent_tree(&dx_et, dir, dx_root_bh);
-			ret = ocfs2_insert_extent(osb, handle, dir, &dx_et, 0,
+			ret = ocfs2_insert_extent(handle, &dx_et, 0,
 						  dx_insert_blkno, 1, 0, NULL);
 			if (ret)
 				mlog_errno(ret);
@@ -3147,7 +3146,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
 		}
 		blkno = ocfs2_clusters_to_blocks(dir->i_sb, bit_off);
 
-		ret = ocfs2_insert_extent(osb, handle, dir, &et, 1,
+		ret = ocfs2_insert_extent(handle, &et, 1,
 					  blkno, len, 0, NULL);
 		if (ret) {
 			mlog_errno(ret);
@@ -4218,8 +4217,7 @@ static int ocfs2_expand_inline_dx_root(struct inode *dir,
 	/* This should never fail considering we start with an empty
 	 * dx_root. */
 	ocfs2_init_dx_root_extent_tree(&et, dir, dx_root_bh);
-	ret = ocfs2_insert_extent(osb, handle, dir, &et, 0,
-				  insert_blkno, 1, 0, NULL);
+	ret = ocfs2_insert_extent(handle, &et, 0, insert_blkno, 1, 0, NULL);
 	if (ret)
 		mlog_errno(ret);
 	did_quota = 0;
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 61819b2..38db12a 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -4325,7 +4325,7 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
 
 	mlog(0, "Insert %u clusters at block %llu for xattr at %u\n",
 	     num_bits, (unsigned long long)block, v_start);
-	ret = ocfs2_insert_extent(osb, handle, inode, &et, v_start, block,
+	ret = ocfs2_insert_extent(handle, &et, v_start, block,
 				  num_bits, 0, ctxt->meta_ac);
 	if (ret < 0) {
 		mlog_errno(ret);
-- 
cgit v0.10.2


From cbee7e1a6a1a2a3d6eda1f76ffc38a3ed3eeb6cc Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Fri, 13 Feb 2009 03:34:15 -0800
Subject: ocfs2: ocfs2_add_clusters_in_btree() no longer needs struct inode.

One more function that doesn't need a struct inode to pass to its
children.

Signed-off-by: Joel Becker <joel.becker@oracle.com>

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index c4943b9..29095e1 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -4758,13 +4758,11 @@ bail:
  * it is not limited to the file storage. Any extent tree can use this
  * function if it implements the proper ocfs2_extent_tree.
  */
-int ocfs2_add_clusters_in_btree(struct ocfs2_super *osb,
-				struct inode *inode,
+int ocfs2_add_clusters_in_btree(handle_t *handle,
+				struct ocfs2_extent_tree *et,
 				u32 *logical_offset,
 				u32 clusters_to_add,
 				int mark_unwritten,
-				struct ocfs2_extent_tree *et,
-				handle_t *handle,
 				struct ocfs2_alloc_context *data_ac,
 				struct ocfs2_alloc_context *meta_ac,
 				enum ocfs2_alloc_restarted *reason_ret)
@@ -4775,6 +4773,8 @@ int ocfs2_add_clusters_in_btree(struct ocfs2_super *osb,
 	u32 bit_off, num_bits;
 	u64 block;
 	u8 flags = 0;
+	struct ocfs2_super *osb =
+		OCFS2_SB(ocfs2_metadata_cache_get_super(et->et_ci));
 
 	BUG_ON(!clusters_to_add);
 
@@ -4826,8 +4826,9 @@ int ocfs2_add_clusters_in_btree(struct ocfs2_super *osb,
 	}
 
 	block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
-	mlog(0, "Allocating %u clusters at block %u for inode %llu\n",
-	     num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);
+	mlog(0, "Allocating %u clusters at block %u for owner %llu\n",
+	     num_bits, bit_off,
+	     (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci));
 	status = ocfs2_insert_extent(handle, et, *logical_offset, block,
 				     num_bits, flags, meta_ac);
 	if (status < 0) {
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index 99accd3..d1d196e 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -103,13 +103,11 @@ enum ocfs2_alloc_restarted {
 	RESTART_TRANS,
 	RESTART_META
 };
-int ocfs2_add_clusters_in_btree(struct ocfs2_super *osb,
-				struct inode *inode,
+int ocfs2_add_clusters_in_btree(handle_t *handle,
+				struct ocfs2_extent_tree *et,
 				u32 *logical_offset,
 				u32 clusters_to_add,
 				int mark_unwritten,
-				struct ocfs2_extent_tree *et,
-				handle_t *handle,
 				struct ocfs2_alloc_context *data_ac,
 				struct ocfs2_alloc_context *meta_ac,
 				enum ocfs2_alloc_restarted *reason_ret);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 3ddbc5e..891e2c1e 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -516,10 +516,9 @@ int ocfs2_add_inode_data(struct ocfs2_super *osb,
 	struct ocfs2_extent_tree et;
 
 	ocfs2_init_dinode_extent_tree(&et, inode, fe_bh);
-	ret = ocfs2_add_clusters_in_btree(osb, inode, logical_offset,
-					   clusters_to_add, mark_unwritten,
-					   &et, handle,
-					   data_ac, meta_ac, reason_ret);
+	ret = ocfs2_add_clusters_in_btree(handle, &et, logical_offset,
+					  clusters_to_add, mark_unwritten,
+					  data_ac, meta_ac, reason_ret);
 
 	return ret;
 }
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 38db12a..fdd02c4 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -597,7 +597,6 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode,
 	int status = 0;
 	handle_t *handle = ctxt->handle;
 	enum ocfs2_alloc_restarted why;
-	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters);
 	struct ocfs2_extent_tree et;
 
@@ -613,13 +612,11 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode,
 	}
 
 	prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
-	status = ocfs2_add_clusters_in_btree(osb,
-					     inode,
+	status = ocfs2_add_clusters_in_btree(handle,
+					     &et,
 					     &logical_start,
 					     clusters_to_add,
 					     0,
-					     &et,
-					     handle,
 					     ctxt->data_ac,
 					     ctxt->meta_ac,
 					     &why);
-- 
cgit v0.10.2


From dbdcf6a48a40e6c9d7081393d793c4f1c5bb4fcf Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Fri, 13 Feb 2009 03:41:26 -0800
Subject: ocfs2: ocfs2_remove_extent() no longer needs struct inode.

One more generic btree function that is isolated from struct inode.

Signed-off-by: Joel Becker <joel.becker@oracle.com>

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 29095e1..bfead60 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -5198,8 +5198,8 @@ out:
 	return ret;
 }
 
-static int ocfs2_split_tree(struct inode *inode, struct ocfs2_extent_tree *et,
-			    handle_t *handle, struct ocfs2_path *path,
+static int ocfs2_split_tree(handle_t *handle, struct ocfs2_extent_tree *et,
+			    struct ocfs2_path *path,
 			    int index, u32 new_range,
 			    struct ocfs2_alloc_context *meta_ac)
 {
@@ -5216,7 +5216,8 @@ static int ocfs2_split_tree(struct inode *inode, struct ocfs2_extent_tree *et,
 	 */
 	el = path_leaf_el(path);
 	rec = &el->l_recs[index];
-	ocfs2_make_right_split_rec(inode->i_sb, &split_rec, new_range, rec);
+	ocfs2_make_right_split_rec(ocfs2_metadata_cache_get_super(et->et_ci),
+				   &split_rec, new_range, rec);
 
 	depth = path->p_tree_depth;
 	if (depth > 0) {
@@ -5424,9 +5425,9 @@ out:
 	return ret;
 }
 
-int ocfs2_remove_extent(struct inode *inode,
+int ocfs2_remove_extent(handle_t *handle,
 			struct ocfs2_extent_tree *et,
-			u32 cpos, u32 len, handle_t *handle,
+			u32 cpos, u32 len,
 			struct ocfs2_alloc_context *meta_ac,
 			struct ocfs2_cached_dealloc_ctxt *dealloc)
 {
@@ -5458,10 +5459,11 @@ int ocfs2_remove_extent(struct inode *inode,
 	el = path_leaf_el(path);
 	index = ocfs2_search_extent_list(el, cpos);
 	if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
-		ocfs2_error(inode->i_sb,
-			    "Inode %llu has an extent at cpos %u which can no "
+		ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
+			    "Owner %llu has an extent at cpos %u which can no "
 			    "longer be found.\n",
-			    (unsigned long long)OCFS2_I(inode)->ip_blkno, cpos);
+			    (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
+			    cpos);
 		ret = -EROFS;
 		goto out;
 	}
@@ -5488,9 +5490,10 @@ int ocfs2_remove_extent(struct inode *inode,
 
 	BUG_ON(cpos < le32_to_cpu(rec->e_cpos) || trunc_range > rec_range);
 
-	mlog(0, "Inode %llu, remove (cpos %u, len %u). Existing index %d "
+	mlog(0, "Owner %llu, remove (cpos %u, len %u). Existing index %d "
 	     "(cpos %u, len %u)\n",
-	     (unsigned long long)OCFS2_I(inode)->ip_blkno, cpos, len, index,
+	     (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
+	     cpos, len, index,
 	     le32_to_cpu(rec->e_cpos), ocfs2_rec_clusters(el, rec));
 
 	if (le32_to_cpu(rec->e_cpos) == cpos || rec_range == trunc_range) {
@@ -5501,7 +5504,7 @@ int ocfs2_remove_extent(struct inode *inode,
 			goto out;
 		}
 	} else {
-		ret = ocfs2_split_tree(inode, et, handle, path, index,
+		ret = ocfs2_split_tree(handle, et, path, index,
 				       trunc_range, meta_ac);
 		if (ret) {
 			mlog_errno(ret);
@@ -5523,9 +5526,9 @@ int ocfs2_remove_extent(struct inode *inode,
 		el = path_leaf_el(path);
 		index = ocfs2_search_extent_list(el, cpos);
 		if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
-			ocfs2_error(inode->i_sb,
-				    "Inode %llu: split at cpos %u lost record.",
-				    (unsigned long long)OCFS2_I(inode)->ip_blkno,
+			ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
+				    "Owner %llu: split at cpos %u lost record.",
+				    (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
 				    cpos);
 			ret = -EROFS;
 			goto out;
@@ -5539,10 +5542,10 @@ int ocfs2_remove_extent(struct inode *inode,
 		rec_range = le32_to_cpu(rec->e_cpos) +
 			ocfs2_rec_clusters(el, rec);
 		if (rec_range != trunc_range) {
-			ocfs2_error(inode->i_sb,
-				    "Inode %llu: error after split at cpos %u"
+			ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
+				    "Owner %llu: error after split at cpos %u"
 				    "trunc len %u, existing record is (%u,%u)",
-				    (unsigned long long)OCFS2_I(inode)->ip_blkno,
+				    (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
 				    cpos, len, le32_to_cpu(rec->e_cpos),
 				    ocfs2_rec_clusters(el, rec));
 			ret = -EROFS;
@@ -5607,8 +5610,7 @@ int ocfs2_remove_btree_range(struct inode *inode,
 	vfs_dq_free_space_nodirty(inode,
 				  ocfs2_clusters_to_bytes(inode->i_sb, len));
 
-	ret = ocfs2_remove_extent(inode, et, cpos, len, handle, meta_ac,
-				  dealloc);
+	ret = ocfs2_remove_extent(handle, et, cpos, len, meta_ac, dealloc);
 	if (ret) {
 		mlog_errno(ret);
 		goto out_commit;
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index d1d196e..abc66ce 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -117,9 +117,8 @@ int ocfs2_mark_extent_written(struct inode *inode,
 			      handle_t *handle, u32 cpos, u32 len, u32 phys,
 			      struct ocfs2_alloc_context *meta_ac,
 			      struct ocfs2_cached_dealloc_ctxt *dealloc);
-int ocfs2_remove_extent(struct inode *inode,
-			struct ocfs2_extent_tree *et,
-			u32 cpos, u32 len, handle_t *handle,
+int ocfs2_remove_extent(handle_t *handle, struct ocfs2_extent_tree *et,
+			u32 cpos, u32 len,
 			struct ocfs2_alloc_context *meta_ac,
 			struct ocfs2_cached_dealloc_ctxt *dealloc);
 int ocfs2_remove_btree_range(struct inode *inode,
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index fdd02c4..96f973a 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -663,7 +663,7 @@ static int __ocfs2_remove_xattr_range(struct inode *inode,
 		goto out;
 	}
 
-	ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, ctxt->meta_ac,
+	ret = ocfs2_remove_extent(handle, &et, cpos, len, ctxt->meta_ac,
 				  &ctxt->dealloc);
 	if (ret) {
 		mlog_errno(ret);
@@ -4881,7 +4881,7 @@ static int ocfs2_rm_xattr_cluster(struct inode *inode,
 		goto out_commit;
 	}
 
-	ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, meta_ac,
+	ret = ocfs2_remove_extent(handle, &et, cpos, len, meta_ac,
 				  &dealloc);
 	if (ret) {
 		mlog_errno(ret);
-- 
cgit v0.10.2


From d231129f44e7ead14f5f496e664ff1e3883a7b25 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Fri, 13 Feb 2009 03:43:22 -0800
Subject: ocfs2: ocfs2_split_and_insert() no longer needs struct inode.

It already has an extent_tree.

Signed-off-by: Joel Becker <joel.becker@oracle.com>

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index bfead60..85cd2ad 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -4879,10 +4879,9 @@ static void ocfs2_make_right_split_rec(struct super_block *sb,
 	split_rec->e_flags = rec->e_flags;
 }
 
-static int ocfs2_split_and_insert(struct inode *inode,
-				  handle_t *handle,
-				  struct ocfs2_path *path,
+static int ocfs2_split_and_insert(handle_t *handle,
 				  struct ocfs2_extent_tree *et,
+				  struct ocfs2_path *path,
 				  struct buffer_head **last_eb_bh,
 				  int split_index,
 				  struct ocfs2_extent_rec *orig_split_rec,
@@ -4944,8 +4943,8 @@ leftright:
 		 */
 		insert.ins_split = SPLIT_RIGHT;
 
-		ocfs2_make_right_split_rec(inode->i_sb, &tmprec, insert_range,
-					   &rec);
+		ocfs2_make_right_split_rec(ocfs2_metadata_cache_get_super(et->et_ci),
+					   &tmprec, insert_range, &rec);
 
 		split_rec = tmprec;
 
@@ -5100,7 +5099,7 @@ static int __ocfs2_mark_extent_written(struct inode *inode,
 						       path, el,
 						       split_index, split_rec);
 		else
-			ret = ocfs2_split_and_insert(inode, handle, path, et,
+			ret = ocfs2_split_and_insert(handle, et, path,
 						     &last_eb_bh, split_index,
 						     split_rec, meta_ac);
 		if (ret)
-- 
cgit v0.10.2


From f3868d0fa2e20d923087a8296fda47b0afe7f9ba Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Tue, 17 Feb 2009 19:46:04 -0800
Subject: ocfs2: Teach ocfs2_replace_extent_rec() to use an extent_tree.

Don't use a struct inode anymore.

Signed-off-by: Joel Becker <joel.becker@oracle.com>

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 85cd2ad..7b0f2cd 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -4983,8 +4983,8 @@ out:
 	return ret;
 }
 
-static int ocfs2_replace_extent_rec(struct inode *inode,
-				    handle_t *handle,
+static int ocfs2_replace_extent_rec(handle_t *handle,
+				    struct ocfs2_extent_tree *et,
 				    struct ocfs2_path *path,
 				    struct ocfs2_extent_list *el,
 				    int split_index,
@@ -4992,7 +4992,7 @@ static int ocfs2_replace_extent_rec(struct inode *inode,
 {
 	int ret;
 
-	ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode), path,
+	ret = ocfs2_path_bh_journal_access(handle, et->et_ci, path,
 					   path_num_items(path) - 1);
 	if (ret) {
 		mlog_errno(ret);
@@ -5095,8 +5095,7 @@ static int __ocfs2_mark_extent_written(struct inode *inode,
 
 	if (ctxt.c_contig_type == CONTIG_NONE) {
 		if (ctxt.c_split_covers_rec)
-			ret = ocfs2_replace_extent_rec(inode, handle,
-						       path, el,
+			ret = ocfs2_replace_extent_rec(handle, et, path, el,
 						       split_index, split_rec);
 		else
 			ret = ocfs2_split_and_insert(handle, et, path,
-- 
cgit v0.10.2


From a1cf076ba93f9fdf3eb4195f9f43d1e7cb7550f2 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Fri, 13 Feb 2009 03:45:49 -0800
Subject: ocfs2: __ocfs2_mark_extent_written() doesn't need struct inode.

We only allow unwritten extents on data, so the toplevel
ocfs2_mark_extent_written() can use an inode all it wants.  But the
subfunction isn't even using the inode argument.

Signed-off-by: Joel Becker <joel.becker@oracle.com>

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 7b0f2cd..4488685 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -5026,9 +5026,8 @@ out:
  * have been brought into cache (and pinned via the journal), so the
  * extra overhead is not expressed in terms of disk reads.
  */
-static int __ocfs2_mark_extent_written(struct inode *inode,
+static int __ocfs2_mark_extent_written(handle_t *handle,
 				       struct ocfs2_extent_tree *et,
-				       handle_t *handle,
 				       struct ocfs2_path *path,
 				       int split_index,
 				       struct ocfs2_extent_rec *split_rec,
@@ -5062,7 +5061,7 @@ static int __ocfs2_mark_extent_written(struct inode *inode,
 
 	/*
 	 * The core merge / split code wants to know how much room is
-	 * left in this inodes allocation tree, so we pass the
+	 * left in this allocation tree, so we pass the
 	 * rightmost extent list.
 	 */
 	if (path->p_tree_depth) {
@@ -5185,7 +5184,7 @@ int ocfs2_mark_extent_written(struct inode *inode,
 	split_rec.e_flags = path_leaf_el(left_path)->l_recs[index].e_flags;
 	split_rec.e_flags &= ~OCFS2_EXT_UNWRITTEN;
 
-	ret = __ocfs2_mark_extent_written(inode, et, handle, left_path,
+	ret = __ocfs2_mark_extent_written(handle, et, left_path,
 					  index, &split_rec, meta_ac,
 					  dealloc);
 	if (ret)
-- 
cgit v0.10.2


From 5e404e9ed1b05cafb044bd46792e50197df805ed Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Fri, 13 Feb 2009 03:54:22 -0800
Subject: ocfs2: Pass ocfs2_caching_info into ocfs_init_*_extent_tree().

With this commit, extent tree operations are divorced from inodes and
rely on ocfs2_caching_info.  Phew!

Signed-off-by: Joel Becker <joel.becker@oracle.com>

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 4488685..ab4d2b5 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -366,7 +366,7 @@ static struct ocfs2_extent_tree_operations ocfs2_dx_root_et_ops = {
 };
 
 static void __ocfs2_init_extent_tree(struct ocfs2_extent_tree *et,
-				     struct inode *inode,
+				     struct ocfs2_caching_info *ci,
 				     struct buffer_head *bh,
 				     ocfs2_journal_access_func access,
 				     void *obj,
@@ -374,7 +374,7 @@ static void __ocfs2_init_extent_tree(struct ocfs2_extent_tree *et,
 {
 	et->et_ops = ops;
 	et->et_root_bh = bh;
-	et->et_ci = INODE_CACHE(inode);
+	et->et_ci = ci;
 	et->et_root_journal_access = access;
 	if (!obj)
 		obj = (void *)bh->b_data;
@@ -388,34 +388,34 @@ static void __ocfs2_init_extent_tree(struct ocfs2_extent_tree *et,
 }
 
 void ocfs2_init_dinode_extent_tree(struct ocfs2_extent_tree *et,
-				   struct inode *inode,
+				   struct ocfs2_caching_info *ci,
 				   struct buffer_head *bh)
 {
-	__ocfs2_init_extent_tree(et, inode, bh, ocfs2_journal_access_di,
+	__ocfs2_init_extent_tree(et, ci, bh, ocfs2_journal_access_di,
 				 NULL, &ocfs2_dinode_et_ops);
 }
 
 void ocfs2_init_xattr_tree_extent_tree(struct ocfs2_extent_tree *et,
-				       struct inode *inode,
+				       struct ocfs2_caching_info *ci,
 				       struct buffer_head *bh)
 {
-	__ocfs2_init_extent_tree(et, inode, bh, ocfs2_journal_access_xb,
+	__ocfs2_init_extent_tree(et, ci, bh, ocfs2_journal_access_xb,
 				 NULL, &ocfs2_xattr_tree_et_ops);
 }
 
 void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et,
-					struct inode *inode,
+					struct ocfs2_caching_info *ci,
 					struct ocfs2_xattr_value_buf *vb)
 {
-	__ocfs2_init_extent_tree(et, inode, vb->vb_bh, vb->vb_access, vb,
+	__ocfs2_init_extent_tree(et, ci, vb->vb_bh, vb->vb_access, vb,
 				 &ocfs2_xattr_value_et_ops);
 }
 
 void ocfs2_init_dx_root_extent_tree(struct ocfs2_extent_tree *et,
-				    struct inode *inode,
+				    struct ocfs2_caching_info *ci,
 				    struct buffer_head *bh)
 {
-	__ocfs2_init_extent_tree(et, inode, bh, ocfs2_journal_access_dr,
+	__ocfs2_init_extent_tree(et, ci, bh, ocfs2_journal_access_dr,
 				 NULL, &ocfs2_dx_root_et_ops);
 }
 
@@ -7241,7 +7241,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
 		 * this proves to be false, we could always re-build
 		 * the in-inode data from our pages.
 		 */
-		ocfs2_init_dinode_extent_tree(&et, inode, di_bh);
+		ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
 		ret = ocfs2_insert_extent(handle, &et, 0, block, 1, 0, NULL);
 		if (ret) {
 			mlog_errno(ret);
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index abc66ce..bcf6aa4 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -68,17 +68,17 @@ struct ocfs2_extent_tree {
  * specified object buffer.
  */
 void ocfs2_init_dinode_extent_tree(struct ocfs2_extent_tree *et,
-				   struct inode *inode,
+				   struct ocfs2_caching_info *ci,
 				   struct buffer_head *bh);
 void ocfs2_init_xattr_tree_extent_tree(struct ocfs2_extent_tree *et,
-				       struct inode *inode,
+				       struct ocfs2_caching_info *ci,
 				       struct buffer_head *bh);
 struct ocfs2_xattr_value_buf;
 void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et,
-					struct inode *inode,
+					struct ocfs2_caching_info *ci,
 					struct ocfs2_xattr_value_buf *vb);
 void ocfs2_init_dx_root_extent_tree(struct ocfs2_extent_tree *et,
-				    struct inode *inode,
+				    struct ocfs2_caching_info *ci,
 				    struct buffer_head *bh);
 
 /*
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 49eef2c..15c594d 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1259,7 +1259,8 @@ static int ocfs2_write_cluster(struct address_space *mapping,
 			goto out;
 		}
 	} else if (unwritten) {
-		ocfs2_init_dinode_extent_tree(&et, inode, wc->w_di_bh);
+		ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode),
+					      wc->w_di_bh);
 		ret = ocfs2_mark_extent_written(inode, &et,
 						wc->w_handle, cpos, 1, phys,
 						meta_ac, &wc->w_dealloc);
@@ -1726,7 +1727,8 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
 		     (long long)i_size_read(inode), le32_to_cpu(di->i_clusters),
 		     clusters_to_alloc, extents_to_split);
 
-		ocfs2_init_dinode_extent_tree(&et, inode, wc->w_di_bh);
+		ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode),
+					      wc->w_di_bh);
 		ret = ocfs2_lock_allocators(inode, &et,
 					    clusters_to_alloc, extents_to_split,
 					    &data_ac, &meta_ac);
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index de490a6..28c3ec2 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -2903,7 +2903,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
 	struct ocfs2_extent_tree dx_et;
 	int did_quota = 0, bytes_allocated = 0;
 
-	ocfs2_init_dinode_extent_tree(&et, dir, di_bh);
+	ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(dir), di_bh);
 
 	alloc = ocfs2_clusters_for_bytes(sb, bytes);
 	dx_alloc = 0;
@@ -3125,7 +3125,9 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
 			ocfs2_dx_dir_index_root_block(dir, dx_root_bh,
 						      dirdata_bh);
 		} else {
-			ocfs2_init_dx_root_extent_tree(&dx_et, dir, dx_root_bh);
+			ocfs2_init_dx_root_extent_tree(&dx_et,
+						       INODE_CACHE(dir),
+						       dx_root_bh);
 			ret = ocfs2_insert_extent(handle, &dx_et, 0,
 						  dx_insert_blkno, 1, 0, NULL);
 			if (ret)
@@ -3345,7 +3347,8 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
 	spin_lock(&OCFS2_I(dir)->ip_lock);
 	if (dir_i_size == ocfs2_clusters_to_bytes(sb, OCFS2_I(dir)->ip_clusters)) {
 		spin_unlock(&OCFS2_I(dir)->ip_lock);
-		ocfs2_init_dinode_extent_tree(&et, dir, parent_fe_bh);
+		ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(dir),
+					      parent_fe_bh);
 		num_free_extents = ocfs2_num_free_extents(osb, &et);
 		if (num_free_extents < 0) {
 			status = num_free_extents;
@@ -3837,7 +3840,7 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir,
 	     (unsigned long long)OCFS2_I(dir)->ip_blkno,
 	     (unsigned long long)leaf_blkno, insert_hash);
 
-	ocfs2_init_dx_root_extent_tree(&et, dir, dx_root_bh);
+	ocfs2_init_dx_root_extent_tree(&et, INODE_CACHE(dir), dx_root_bh);
 
 	dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
 	/*
@@ -4216,7 +4219,7 @@ static int ocfs2_expand_inline_dx_root(struct inode *dir,
 
 	/* This should never fail considering we start with an empty
 	 * dx_root. */
-	ocfs2_init_dx_root_extent_tree(&et, dir, dx_root_bh);
+	ocfs2_init_dx_root_extent_tree(&et, INODE_CACHE(dir), dx_root_bh);
 	ret = ocfs2_insert_extent(handle, &et, 0, insert_blkno, 1, 0, NULL);
 	if (ret)
 		mlog_errno(ret);
@@ -4540,7 +4543,7 @@ int ocfs2_dx_dir_truncate(struct inode *dir, struct buffer_head *di_bh)
 	if (ocfs2_dx_root_inline(dx_root))
 		goto remove_index;
 
-	ocfs2_init_dx_root_extent_tree(&et, dir, dx_root_bh);
+	ocfs2_init_dx_root_extent_tree(&et, INODE_CACHE(dir), dx_root_bh);
 
 	/* XXX: What if dr_clusters is too large? */
 	while (le32_to_cpu(dx_root->dr_clusters)) {
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 891e2c1e..4921b4e 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -515,7 +515,7 @@ int ocfs2_add_inode_data(struct ocfs2_super *osb,
 	int ret;
 	struct ocfs2_extent_tree et;
 
-	ocfs2_init_dinode_extent_tree(&et, inode, fe_bh);
+	ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), fe_bh);
 	ret = ocfs2_add_clusters_in_btree(handle, &et, logical_offset,
 					  clusters_to_add, mark_unwritten,
 					  data_ac, meta_ac, reason_ret);
@@ -563,7 +563,7 @@ restart_all:
 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
 	     (long long)i_size_read(inode), le32_to_cpu(fe->i_clusters),
 	     clusters_to_add);
-	ocfs2_init_dinode_extent_tree(&et, inode, bh);
+	ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), bh);
 	status = ocfs2_lock_allocators(inode, &et, clusters_to_add, 0,
 				       &data_ac, &meta_ac);
 	if (status) {
@@ -1394,7 +1394,7 @@ static int ocfs2_remove_inode_range(struct inode *inode,
 	struct address_space *mapping = inode->i_mapping;
 	struct ocfs2_extent_tree et;
 
-	ocfs2_init_dinode_extent_tree(&et, inode, di_bh);
+	ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
 	ocfs2_init_dealloc_ctxt(&dealloc);
 
 	if (byte_len == 0)
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 96f973a..1bf12c4 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -602,7 +602,7 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode,
 
 	mlog(0, "(clusters_to_add for xattr= %u)\n", clusters_to_add);
 
-	ocfs2_init_xattr_value_extent_tree(&et, inode, vb);
+	ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
 
 	status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
 			      OCFS2_JOURNAL_ACCESS_WRITE);
@@ -654,7 +654,7 @@ static int __ocfs2_remove_xattr_range(struct inode *inode,
 	handle_t *handle = ctxt->handle;
 	struct ocfs2_extent_tree et;
 
-	ocfs2_init_xattr_value_extent_tree(&et, inode, vb);
+	ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
 
 	ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
 			    OCFS2_JOURNAL_ACCESS_WRITE);
@@ -4266,7 +4266,7 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
 	     prev_cpos, (unsigned long long)bucket_blkno(first));
 
-	ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh);
+	ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh);
 
 	ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh,
 				      OCFS2_JOURNAL_ACCESS_WRITE);
@@ -4841,7 +4841,7 @@ static int ocfs2_rm_xattr_cluster(struct inode *inode,
 	struct ocfs2_cached_dealloc_ctxt dealloc;
 	struct ocfs2_extent_tree et;
 
-	ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh);
+	ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh);
 
 	ocfs2_init_dealloc_ctxt(&dealloc);
 
-- 
cgit v0.10.2


From f61f925859c57f6175082aeeee17743c68558a6e Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Sat, 5 Sep 2009 13:33:23 -0400
Subject: Revert "ACPI: Attach the ACPI device to the ACPI handle as early as
 possible"

This reverts commit eab4b645769fa2f8703f5a3cb0cc4ac090d347af.

http://bugzilla.kernel.org/show_bug.cgi?id=13002

Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 781435d..5dd702c 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -1264,16 +1264,6 @@ acpi_add_single_object(struct acpi_device **child,
 	acpi_device_set_id(device, parent, handle, type);
 
 	/*
-	 * The ACPI device is attached to acpi handle before getting
-	 * the power/wakeup/peformance flags. Otherwise OS can't get
-	 * the corresponding ACPI device by the acpi handle in the course
-	 * of getting the power/wakeup/performance flags.
-	 */
-	result = acpi_device_set_context(device, type);
-	if (result)
-		goto end;
-
-	/*
 	 * Power Management
 	 * ----------------
 	 */
@@ -1303,6 +1293,8 @@ acpi_add_single_object(struct acpi_device **child,
 			goto end;
 	}
 
+	if ((result = acpi_device_set_context(device, type)))
+		goto end;
 
 	result = acpi_device_register(device, parent);
 
-- 
cgit v0.10.2


From 307a042416dfc2216251a85b79e8578b65fdc0e7 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Thu, 3 Sep 2009 09:55:40 +0800
Subject: ACPICA: Fix extraneous warning if _DSM returns a package

_DSM can return any type of object, so validation on the return
type cannot be performed. ACPICA BZ 802.

http://www.acpica.org/bugzilla/show_bug.cgi?id=802

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/acpica/nspredef.c b/drivers/acpi/acpica/nspredef.c
index 8314e6a..f8427af 100644
--- a/drivers/acpi/acpica/nspredef.c
+++ b/drivers/acpi/acpica/nspredef.c
@@ -193,11 +193,15 @@ acpi_ns_check_predefined_names(struct acpi_namespace_node *node,
 	}
 
 	/*
-	 * We have a return value, but if one wasn't expected, just exit, this is
+	 * 1) We have a return value, but if one wasn't expected, just exit, this is
 	 * not a problem. For example, if the "Implicit Return" feature is
 	 * enabled, methods will always return a value.
+	 *
+	 * 2) If the return value can be of any type, then we cannot perform any
+	 * validation, exit.
 	 */
-	if (!predefined->info.expected_btypes) {
+	if ((!predefined->info.expected_btypes) ||
+	    (predefined->info.expected_btypes == ACPI_RTYPE_ALL)) {
 		goto cleanup;
 	}
 
-- 
cgit v0.10.2


From e678902ee899f6b0ab48166b410cdc9f1c27a350 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Thu, 3 Sep 2009 09:58:14 +0800
Subject: ACPICA: Remove error message for Store(Localx,Localx)

We silently ignore this construct for Windows compatibility
ACPICA BZ 785.

http://www.acpica.org/bugzilla/show_bug.cgi?id=785

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/acpica/dsmthdat.c b/drivers/acpi/acpica/dsmthdat.c
index 22b1a3c..7d077bb 100644
--- a/drivers/acpi/acpica/dsmthdat.c
+++ b/drivers/acpi/acpica/dsmthdat.c
@@ -433,10 +433,10 @@ acpi_ds_method_data_get_value(u8 type,
 
 			case ACPI_REFCLASS_LOCAL:
 
-				ACPI_ERROR((AE_INFO,
-					    "Uninitialized Local[%d] at node %p",
-					    index, node));
-
+				/*
+				 * No error message for this case, will be trapped again later to
+				 * detect and ignore cases of Store(local_x,local_x)
+				 */
 				return_ACPI_STATUS(AE_AML_UNINITIALIZED_LOCAL);
 
 			default:
-- 
cgit v0.10.2


From e3fe0913b8e732ae636cf23afca76cf2c30718e5 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Thu, 3 Sep 2009 10:03:37 +0800
Subject: ACPICA: Fix memory leak for ill-formed Package objects

Fixes a possible memory leak in the interpreter for package
objects if the package initializer list is longer than the
defined size of the package. This apparently can only happen
if the BIOS changes the package size on the fly (seen in a _PSS
object), as both iASL and the other compiler do not allow this.
The interpreter will truncate the package to the defined size
(and issue an error message), but can leave the extra objects
undeleted if they have been pre-created during the argument
processing (such is the case if the package consists of a number
of sub-packages as in the _PSS.) ACPICA BZ 805.

http://www.acpica.org/bugzilla/show_bug.cgi?id=805

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/acpica/dsobject.c b/drivers/acpi/acpica/dsobject.c
index 02e6caa..507e1f0 100644
--- a/drivers/acpi/acpica/dsobject.c
+++ b/drivers/acpi/acpica/dsobject.c
@@ -482,14 +482,27 @@ acpi_ds_build_internal_package_obj(struct acpi_walk_state *walk_state,
 	if (arg) {
 		/*
 		 * num_elements was exhausted, but there are remaining elements in the
-		 * package_list.
+		 * package_list. Truncate the package to num_elements.
 		 *
 		 * Note: technically, this is an error, from ACPI spec: "It is an error
 		 * for NumElements to be less than the number of elements in the
-		 * PackageList". However, for now, we just print an error message and
-		 * no exception is returned.
+		 * PackageList". However, we just print an error message and
+		 * no exception is returned. This provides Windows compatibility. Some
+		 * BIOSs will alter the num_elements on the fly, creating this type
+		 * of ill-formed package object.
 		 */
 		while (arg) {
+			/*
+			 * We must delete any package elements that were created earlier
+			 * and are not going to be used because of the package truncation.
+			 */
+			if (arg->common.node) {
+				acpi_ut_remove_reference(ACPI_CAST_PTR
+							 (union
+							  acpi_operand_object,
+							  arg->common.node));
+				arg->common.node = NULL;
+			}
 
 			/* Find out how many elements there really are */
 
@@ -498,7 +511,7 @@ acpi_ds_build_internal_package_obj(struct acpi_walk_state *walk_state,
 		}
 
 		ACPI_WARNING((AE_INFO,
-			    "Package List length (%X) larger than NumElements count (%X), truncated\n",
+			    "Package List length (0x%X) larger than NumElements count (0x%X), truncated\n",
 			    i, element_count));
 	} else if (i < element_count) {
 		/*
@@ -506,7 +519,7 @@ acpi_ds_build_internal_package_obj(struct acpi_walk_state *walk_state,
 		 * Note: this is not an error, the package is padded out with NULLs.
 		 */
 		ACPI_DEBUG_PRINT((ACPI_DB_INFO,
-				  "Package List length (%X) smaller than NumElements count (%X), padded with null elements\n",
+				  "Package List length (0x%X) smaller than NumElements count (0x%X), padded with null elements\n",
 				  i, element_count));
 	}
 
-- 
cgit v0.10.2


From eb752552464dbb7a99f8a975ec3b9355893cedd4 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Thu, 3 Sep 2009 10:05:08 +0800
Subject: ACPICA: Update _OSI with new Windows OS strings

Added strings for Windows server 2008, Windows Vista SP1,
Windows 7, and Windows server 2008 R2.

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/acpica/aclocal.h b/drivers/acpi/acpica/aclocal.h
index ff6689e..81e64f4 100644
--- a/drivers/acpi/acpica/aclocal.h
+++ b/drivers/acpi/acpica/aclocal.h
@@ -898,6 +898,9 @@ struct acpi_bit_register_info {
 #define ACPI_OSI_WIN_XP_SP2             0x05
 #define ACPI_OSI_WINSRV_2003_SP1        0x06
 #define ACPI_OSI_WIN_VISTA              0x07
+#define ACPI_OSI_WINSRV_2008            0x08
+#define ACPI_OSI_WIN_VISTA_SP1          0x09
+#define ACPI_OSI_WIN_7                  0x0A
 
 #define ACPI_ALWAYS_ILLEGAL             0x00
 
diff --git a/drivers/acpi/acpica/uteval.c b/drivers/acpi/acpica/uteval.c
index 5503307..5d54e36 100644
--- a/drivers/acpi/acpica/uteval.c
+++ b/drivers/acpi/acpica/uteval.c
@@ -69,6 +69,9 @@ static struct acpi_interface_info acpi_interfaces_supported[] = {
 	{"Windows 2001 SP2", ACPI_OSI_WIN_XP_SP2},	/* Windows XP SP2 */
 	{"Windows 2001.1 SP1", ACPI_OSI_WINSRV_2003_SP1},	/* Windows Server 2003 SP1 - Added 03/2006 */
 	{"Windows 2006", ACPI_OSI_WIN_VISTA},	/* Windows Vista - Added 03/2006 */
+	{"Windows 2006.1", ACPI_OSI_WINSRV_2008},	/* Windows Server 2008 - Added 09/2009 */
+	{"Windows 2006 SP1", ACPI_OSI_WIN_VISTA_SP1},	/* Windows Vista SP1 - Added 09/2009 */
+	{"Windows 2009", ACPI_OSI_WIN_7},	/* Windows 7 and Server 2008 R2 - Added 09/2009 */
 
 	/* Feature Group Strings */
 
-- 
cgit v0.10.2


From 9e64155eb1b6ab78980db58cfd21385fa5f6b024 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Thu, 3 Sep 2009 10:21:03 +0800
Subject: ACPICA: Windows compatibility: autoexecute root _INI method

Add support for execution of an _INI method at the namespace root.
Although not defined in the ACPI specification, this support was
added to Windows around the Vista timeframe. It is added here
for Windows compatibility.

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/acpica/nsinit.c b/drivers/acpi/acpica/nsinit.c
index 2adfcf3..1d5b360 100644
--- a/drivers/acpi/acpica/nsinit.c
+++ b/drivers/acpi/acpica/nsinit.c
@@ -170,6 +170,21 @@ acpi_status acpi_ns_initialize_devices(void)
 		goto error_exit;
 	}
 
+	/*
+	 * Execute the "global" _INI method that may appear at the root. This
+	 * support is provided for Windows compatibility (Vista+) and is not
+	 * part of the ACPI specification.
+	 */
+	info.evaluate_info->prefix_node = acpi_gbl_root_node;
+	info.evaluate_info->pathname = METHOD_NAME__INI;
+	info.evaluate_info->parameters = NULL;
+	info.evaluate_info->flags = ACPI_IGNORE_RETURN_VALUE;
+
+	status = acpi_ns_evaluate(info.evaluate_info);
+	if (ACPI_SUCCESS(status)) {
+		info.num_INI++;
+	}
+
 	/* Walk namespace to execute all _INIs on present devices */
 
 	status = acpi_ns_walk_namespace(ACPI_TYPE_ANY, ACPI_ROOT_OBJECT,
-- 
cgit v0.10.2


From c9766237afa92e8d7f27bbcd4964f1b43fa0bce8 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Fri, 4 Sep 2009 08:56:17 +0800
Subject: ACPICA: Update version to 20090903.

Version 20090903.

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index f3b358b..e723b0f 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -47,7 +47,7 @@
 
 /* Current ACPICA subsystem version in YYYYMMDD format */
 
-#define ACPI_CA_VERSION                 0x20090730
+#define ACPI_CA_VERSION                 0x20090903
 
 #include "actypes.h"
 #include "actbl.h"
-- 
cgit v0.10.2


From 6951867b9967066eda090f46ad91ce69e0ead611 Mon Sep 17 00:00:00 2001
From: Benny Halevy <bhalevy@panasas.com>
Date: Thu, 10 Sep 2009 12:25:04 +0300
Subject: nfsd41: sunrpc: move struct rpc_buffer def into sunrpc.h

Move struct rpc_buffer's definition into a sunrpc.h, a common, internal
header file, in preparation for supporting the nfsv4.1 backchannel.

Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[nfs41: sunrpc: #include <linux/net.h> from sunrpc.h]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 8f459ab..cef74ba 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -21,6 +21,8 @@
 
 #include <linux/sunrpc/clnt.h>
 
+#include "sunrpc.h"
+
 #ifdef RPC_DEBUG
 #define RPCDBG_FACILITY		RPCDBG_SCHED
 #define RPC_TASK_MAGIC_ID	0xf00baa
@@ -711,11 +713,6 @@ static void rpc_async_schedule(struct work_struct *work)
 	__rpc_execute(container_of(work, struct rpc_task, u.tk_work));
 }
 
-struct rpc_buffer {
-	size_t	len;
-	char	data[];
-};
-
 /**
  * rpc_malloc - allocate an RPC buffer
  * @task: RPC task that will use this buffer
diff --git a/net/sunrpc/sunrpc.h b/net/sunrpc/sunrpc.h
index 5d9dd74..13171e6 100644
--- a/net/sunrpc/sunrpc.h
+++ b/net/sunrpc/sunrpc.h
@@ -27,6 +27,16 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #ifndef _NET_SUNRPC_SUNRPC_H
 #define _NET_SUNRPC_SUNRPC_H
 
+#include <linux/net.h>
+
+/*
+ * Header for dynamically allocated rpc buffers.
+ */
+struct rpc_buffer {
+	size_t	len;
+	char	data[];
+};
+
 static inline int rpc_reply_expected(struct rpc_task *task)
 {
 	return (task->tk_msg.rpc_proc != NULL) &&
-- 
cgit v0.10.2


From e936d0773df172ec8600777fdd72bbc1f75f22ad Mon Sep 17 00:00:00 2001
From: Youquan Song <youquan.song@intel.com>
Date: Mon, 7 Sep 2009 10:58:07 -0400
Subject: intel-iommu: Disallow interrupt remapping if not all ioapics covered

Current kernel enable interrupt remapping only when all the vt-d unit support
interrupt remapping. So it is reasonable we should also disallow enabling
intr-remapping if there any io-apics that are not listed under vt-d units.
Otherwise we can run into issues.

Acked-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Youquan Song <youquan.song@intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index 3264b62..fba4f68 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -570,9 +570,6 @@ int __init dmar_table_init(void)
 		printk(KERN_INFO PREFIX "No ATSR found\n");
 #endif
 
-#ifdef CONFIG_INTR_REMAP
-	parse_ioapics_under_ir();
-#endif
 	return 0;
 }
 
diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c
index 4f5b871..ebfa47b 100644
--- a/drivers/pci/intr_remapping.c
+++ b/drivers/pci/intr_remapping.c
@@ -626,6 +626,11 @@ int __init enable_intr_remapping(int eim)
 	struct dmar_drhd_unit *drhd;
 	int setup = 0;
 
+	if (parse_ioapics_under_ir() != 1) {
+		printk(KERN_INFO "Not enable interrupt remapping\n");
+		return -1;
+	}
+
 	for_each_drhd_unit(drhd) {
 		struct intel_iommu *iommu = drhd->iommu;
 
-- 
cgit v0.10.2


From 074835f0143b83845af5044af2739c52c9f53808 Mon Sep 17 00:00:00 2001
From: Youquan Song <youquan.song@intel.com>
Date: Wed, 9 Sep 2009 12:05:39 -0400
Subject: intel-iommu: Fix kernel hang if interrupt remapping disabled in BIOS

BIOS clear DMAR table INTR_REMAP flag to disable interrupt remapping. Current
kernel only check interrupt remapping(IR) flag in DRHD's extended capability
register to decide interrupt remapping support or not. But IR flag will not
change when BIOS disable/enable interrupt remapping.

When user disable interrupt remapping in BIOS or BIOS often defaultly disable
interrupt remapping feature when BIOS is not mature.Though BIOS disable
interrupt remapping but intr_remapping_supported function will always report
to OS support interrupt remapping if VT-d2 chipset populated. On this
cases, kernel will continue enable interrupt remapping and result kernel panic.
This bug exist on almost all platforms with interrupt remapping support.

This patch add DMAR table INTR_REMAP flag check before enable interrupt
remapping.

Signed-off-by: Youquan Song <youquan.song@intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index fba4f68..270ed22 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -1316,3 +1316,13 @@ int dmar_reenable_qi(struct intel_iommu *iommu)
 
 	return 0;
 }
+
+/*
+ * Check interrupt remapping support in DMAR table description.
+ */
+int dmar_ir_support(void)
+{
+	struct acpi_table_dmar *dmar;
+	dmar = (struct acpi_table_dmar *)dmar_tbl;
+	return dmar->flags & 0x1;
+}
diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c
index ebfa47b..ac06514 100644
--- a/drivers/pci/intr_remapping.c
+++ b/drivers/pci/intr_remapping.c
@@ -611,6 +611,9 @@ int __init intr_remapping_supported(void)
 	if (disable_intremap)
 		return 0;
 
+	if (!dmar_ir_support())
+		return 0;
+
 	for_each_drhd_unit(drhd) {
 		struct intel_iommu *iommu = drhd->iommu;
 
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 482dc91..4f0a72a 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -360,4 +360,6 @@ extern void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep,
 
 extern int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
 
+extern int dmar_ir_support(void);
+
 #endif
-- 
cgit v0.10.2


From 4cfc7e6019caa3e97d2a81c48c8d575d7b38d751 Mon Sep 17 00:00:00 2001
From: Rahul Iyer <iyer@netapp.com>
Date: Thu, 10 Sep 2009 17:32:28 +0300
Subject: nfsd41: sunrpc: Added rpc server-side backchannel handling

When the call direction is a reply, copy the xid and call direction into the
req->rq_private_buf.head[0].iov_base otherwise rpc_verify_header returns
rpc_garbage.

Signed-off-by: Rahul Iyer <iyer@netapp.com>
Signed-off-by: Mike Sager <sager@netapp.com>
Signed-off-by: Marc Eshel <eshel@almaden.ibm.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[get rid of CONFIG_NFSD_V4_1]
[sunrpc: refactoring of svc_tcp_recvfrom]
[nfsd41: sunrpc: create common send routine for the fore and the back channels]
[nfsd41: sunrpc: Use free_page() to free server backchannel pages]
[nfsd41: sunrpc: Document server backchannel locking]
[nfsd41: sunrpc: remove bc_connect_worker()]
[nfsd41: sunrpc: Define xprt_server_backchannel()[
[nfsd41: sunrpc: remove bc_close and bc_init_auto_disconnect dummy functions]
[nfsd41: sunrpc: eliminate unneeded switch statement in xs_setup_tcp()]
[nfsd41: sunrpc: Don't auto close the server backchannel connection]
[nfsd41: sunrpc: Remove unused functions]
Signed-off-by: Alexandros Batsakis <batsakis@netapp.com>
Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[nfsd41: change bc_sock to bc_xprt]
[nfsd41: sunrpc: move struct rpc_buffer def into a common header file]
[nfsd41: sunrpc: use rpc_sleep in bc_send_request so not to block on mutex]
[removed cosmetic changes]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[sunrpc: add new xprt class for nfsv4.1 backchannel]
[sunrpc: v2.1 change handling of auto_close and init_auto_disconnect operations for the nfsv4.1 backchannel]
Signed-off-by: Alexandros Batsakis <batsakis@netapp.com>
[reverted more cosmetic leftovers]
[got rid of xprt_server_backchannel]
[separated "nfsd41: sunrpc: add new xprt class for nfsv4.1 backchannel"]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Cc: Trond Myklebust <trond.myklebust@netapp.com>
[sunrpc: change idle timeout value for the backchannel]
Signed-off-by: Alexandros Batsakis <batsakis@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Acked-by: Trond Myklebust <trond.myklebust@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 2223ae0..5f4e18b 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -65,6 +65,7 @@ struct svc_xprt {
 	size_t			xpt_locallen;	/* length of address */
 	struct sockaddr_storage	xpt_remote;	/* remote peer's address */
 	size_t			xpt_remotelen;	/* length of address */
+	struct rpc_wait_queue	xpt_bc_pending;	/* backchannel wait queue */
 };
 
 int	svc_reg_xprt_class(struct svc_xprt_class *);
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index 04dba23..1b353a7 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -28,6 +28,7 @@ struct svc_sock {
 	/* private TCP part */
 	u32			sk_reclen;	/* length of record */
 	u32			sk_tcplen;	/* current read length */
+	struct rpc_xprt		*sk_bc_xprt;	/* NFSv4.1 backchannel xprt */
 };
 
 /*
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index c090df4..228d694 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -179,6 +179,7 @@ struct rpc_xprt {
 	spinlock_t		reserve_lock;	/* lock slot table */
 	u32			xid;		/* Next XID value to use */
 	struct rpc_task *	snd_task;	/* Task blocked in send */
+	struct svc_xprt		*bc_xprt;	/* NFSv4.1 backchannel */
 #if defined(CONFIG_NFS_V4_1)
 	struct svc_serv		*bc_serv;       /* The RPC service which will */
 						/* process the callback */
diff --git a/net/sunrpc/sunrpc.h b/net/sunrpc/sunrpc.h
index 13171e6..90c292e 100644
--- a/net/sunrpc/sunrpc.h
+++ b/net/sunrpc/sunrpc.h
@@ -43,5 +43,9 @@ static inline int rpc_reply_expected(struct rpc_task *task)
 		(task->tk_msg.rpc_proc->p_decode != NULL);
 }
 
+int svc_send_common(struct socket *sock, struct xdr_buf *xdr,
+		    struct page *headpage, unsigned long headoffset,
+		    struct page *tailpage, unsigned long tailoffset);
+
 #endif /* _NET_SUNRPC_SUNRPC_H */
 
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 912dea5..df124f7 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -160,6 +160,7 @@ void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt,
 	mutex_init(&xprt->xpt_mutex);
 	spin_lock_init(&xprt->xpt_lock);
 	set_bit(XPT_BUSY, &xprt->xpt_flags);
+	rpc_init_wait_queue(&xprt->xpt_bc_pending, "xpt_bc_pending");
 }
 EXPORT_SYMBOL_GPL(svc_xprt_init);
 
@@ -810,6 +811,7 @@ int svc_send(struct svc_rqst *rqstp)
 	else
 		len = xprt->xpt_ops->xpo_sendto(rqstp);
 	mutex_unlock(&xprt->xpt_mutex);
+	rpc_wake_up(&xprt->xpt_bc_pending);
 	svc_xprt_release(rqstp);
 
 	if (len == -ECONNREFUSED || len == -ENOTCONN || len == -EAGAIN)
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 76a380d..ccc5e83 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -49,6 +49,7 @@
 #include <linux/sunrpc/msg_prot.h>
 #include <linux/sunrpc/svcsock.h>
 #include <linux/sunrpc/stats.h>
+#include <linux/sunrpc/xprt.h>
 
 #define RPCDBG_FACILITY	RPCDBG_SVCXPRT
 
@@ -153,49 +154,27 @@ static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh)
 }
 
 /*
- * Generic sendto routine
+ * send routine intended to be shared by the fore- and back-channel
  */
-static int svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr)
+int svc_send_common(struct socket *sock, struct xdr_buf *xdr,
+		    struct page *headpage, unsigned long headoffset,
+		    struct page *tailpage, unsigned long tailoffset)
 {
-	struct svc_sock	*svsk =
-		container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
-	struct socket	*sock = svsk->sk_sock;
-	int		slen;
-	union {
-		struct cmsghdr	hdr;
-		long		all[SVC_PKTINFO_SPACE / sizeof(long)];
-	} buffer;
-	struct cmsghdr *cmh = &buffer.hdr;
-	int		len = 0;
 	int		result;
 	int		size;
 	struct page	**ppage = xdr->pages;
 	size_t		base = xdr->page_base;
 	unsigned int	pglen = xdr->page_len;
 	unsigned int	flags = MSG_MORE;
-	RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
+	int		slen;
+	int		len = 0;
 
 	slen = xdr->len;
 
-	if (rqstp->rq_prot == IPPROTO_UDP) {
-		struct msghdr msg = {
-			.msg_name	= &rqstp->rq_addr,
-			.msg_namelen	= rqstp->rq_addrlen,
-			.msg_control	= cmh,
-			.msg_controllen	= sizeof(buffer),
-			.msg_flags	= MSG_MORE,
-		};
-
-		svc_set_cmsg_data(rqstp, cmh);
-
-		if (sock_sendmsg(sock, &msg, 0) < 0)
-			goto out;
-	}
-
 	/* send head */
 	if (slen == xdr->head[0].iov_len)
 		flags = 0;
-	len = kernel_sendpage(sock, rqstp->rq_respages[0], 0,
+	len = kernel_sendpage(sock, headpage, headoffset,
 				  xdr->head[0].iov_len, flags);
 	if (len != xdr->head[0].iov_len)
 		goto out;
@@ -219,16 +198,58 @@ static int svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr)
 		base = 0;
 		ppage++;
 	}
+
 	/* send tail */
 	if (xdr->tail[0].iov_len) {
-		result = kernel_sendpage(sock, rqstp->rq_respages[0],
-					     ((unsigned long)xdr->tail[0].iov_base)
-						& (PAGE_SIZE-1),
-					     xdr->tail[0].iov_len, 0);
-
+		result = kernel_sendpage(sock, tailpage, tailoffset,
+				   xdr->tail[0].iov_len, 0);
 		if (result > 0)
 			len += result;
 	}
+
+out:
+	return len;
+}
+
+
+/*
+ * Generic sendto routine
+ */
+static int svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr)
+{
+	struct svc_sock	*svsk =
+		container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
+	struct socket	*sock = svsk->sk_sock;
+	union {
+		struct cmsghdr	hdr;
+		long		all[SVC_PKTINFO_SPACE / sizeof(long)];
+	} buffer;
+	struct cmsghdr *cmh = &buffer.hdr;
+	int		len = 0;
+	unsigned long tailoff;
+	unsigned long headoff;
+	RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
+
+	if (rqstp->rq_prot == IPPROTO_UDP) {
+		struct msghdr msg = {
+			.msg_name	= &rqstp->rq_addr,
+			.msg_namelen	= rqstp->rq_addrlen,
+			.msg_control	= cmh,
+			.msg_controllen	= sizeof(buffer),
+			.msg_flags	= MSG_MORE,
+		};
+
+		svc_set_cmsg_data(rqstp, cmh);
+
+		if (sock_sendmsg(sock, &msg, 0) < 0)
+			goto out;
+	}
+
+	tailoff = ((unsigned long)xdr->tail[0].iov_base) & (PAGE_SIZE-1);
+	headoff = 0;
+	len = svc_send_common(sock, xdr, rqstp->rq_respages[0], headoff,
+			       rqstp->rq_respages[0], tailoff);
+
 out:
 	dprintk("svc: socket %p sendto([%p %Zu... ], %d) = %d (addr %s)\n",
 		svsk, xdr->head[0].iov_base, xdr->head[0].iov_len,
@@ -951,6 +972,57 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
 	return -EAGAIN;
 }
 
+static int svc_process_calldir(struct svc_sock *svsk, struct svc_rqst *rqstp,
+			       struct rpc_rqst **reqpp, struct kvec *vec)
+{
+	struct rpc_rqst *req = NULL;
+	u32 *p;
+	u32 xid;
+	u32 calldir;
+	int len;
+
+	len = svc_recvfrom(rqstp, vec, 1, 8);
+	if (len < 0)
+		goto error;
+
+	p = (u32 *)rqstp->rq_arg.head[0].iov_base;
+	xid = *p++;
+	calldir = *p;
+
+	if (calldir == 0) {
+		/* REQUEST is the most common case */
+		vec[0] = rqstp->rq_arg.head[0];
+	} else {
+		/* REPLY */
+		if (svsk->sk_bc_xprt)
+			req = xprt_lookup_rqst(svsk->sk_bc_xprt, xid);
+
+		if (!req) {
+			printk(KERN_NOTICE
+				"%s: Got unrecognized reply: "
+				"calldir 0x%x sk_bc_xprt %p xid %08x\n",
+				__func__, ntohl(calldir),
+				svsk->sk_bc_xprt, xid);
+			vec[0] = rqstp->rq_arg.head[0];
+			goto out;
+		}
+
+		memcpy(&req->rq_private_buf, &req->rq_rcv_buf,
+		       sizeof(struct xdr_buf));
+		/* copy the xid and call direction */
+		memcpy(req->rq_private_buf.head[0].iov_base,
+		       rqstp->rq_arg.head[0].iov_base, 8);
+		vec[0] = req->rq_private_buf.head[0];
+	}
+ out:
+	vec[0].iov_base += 8;
+	vec[0].iov_len -= 8;
+	len = svsk->sk_reclen - 8;
+ error:
+	*reqpp = req;
+	return len;
+}
+
 /*
  * Receive data from a TCP socket.
  */
@@ -962,6 +1034,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 	int		len;
 	struct kvec *vec;
 	int pnum, vlen;
+	struct rpc_rqst *req = NULL;
 
 	dprintk("svc: tcp_recv %p data %d conn %d close %d\n",
 		svsk, test_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags),
@@ -975,9 +1048,27 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 	vec = rqstp->rq_vec;
 	vec[0] = rqstp->rq_arg.head[0];
 	vlen = PAGE_SIZE;
+
+	/*
+	 * We have enough data for the whole tcp record. Let's try and read the
+	 * first 8 bytes to get the xid and the call direction. We can use this
+	 * to figure out if this is a call or a reply to a callback. If
+	 * sk_reclen is < 8 (xid and calldir), then this is a malformed packet.
+	 * In that case, don't bother with the calldir and just read the data.
+	 * It will be rejected in svc_process.
+	 */
+	if (len >= 8) {
+		len = svc_process_calldir(svsk, rqstp, &req, vec);
+		if (len < 0)
+			goto err_again;
+		vlen -= 8;
+	}
+
 	pnum = 1;
 	while (vlen < len) {
-		vec[pnum].iov_base = page_address(rqstp->rq_pages[pnum]);
+		vec[pnum].iov_base = (req) ?
+			page_address(req->rq_private_buf.pages[pnum - 1]) :
+			page_address(rqstp->rq_pages[pnum]);
 		vec[pnum].iov_len = PAGE_SIZE;
 		pnum++;
 		vlen += PAGE_SIZE;
@@ -989,6 +1080,16 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 	if (len < 0)
 		goto err_again;
 
+	/*
+	 * Account for the 8 bytes we read earlier
+	 */
+	len += 8;
+
+	if (req) {
+		xprt_complete_rqst(req->rq_task, len);
+		len = 0;
+		goto out;
+	}
 	dprintk("svc: TCP complete record (%d bytes)\n", len);
 	rqstp->rq_arg.len = len;
 	rqstp->rq_arg.page_base = 0;
@@ -1002,6 +1103,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 	rqstp->rq_xprt_ctxt   = NULL;
 	rqstp->rq_prot	      = IPPROTO_TCP;
 
+out:
 	/* Reset TCP read info */
 	svsk->sk_reclen = 0;
 	svsk->sk_tcplen = 0;
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index f412a85..fd46d42 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -832,6 +832,11 @@ static void xprt_timer(struct rpc_task *task)
 	spin_unlock_bh(&xprt->transport_lock);
 }
 
+static inline int xprt_has_timer(struct rpc_xprt *xprt)
+{
+	return xprt->idle_timeout != 0;
+}
+
 /**
  * xprt_prepare_transmit - reserve the transport before sending a request
  * @task: RPC task about to send a request
@@ -1013,7 +1018,7 @@ void xprt_release(struct rpc_task *task)
 	if (!list_empty(&req->rq_list))
 		list_del(&req->rq_list);
 	xprt->last_used = jiffies;
-	if (list_empty(&xprt->recv))
+	if (list_empty(&xprt->recv) && xprt_has_timer(xprt))
 		mod_timer(&xprt->timer,
 				xprt->last_used + xprt->idle_timeout);
 	spin_unlock_bh(&xprt->transport_lock);
@@ -1082,8 +1087,11 @@ found:
 #endif /* CONFIG_NFS_V4_1 */
 
 	INIT_WORK(&xprt->task_cleanup, xprt_autoclose);
-	setup_timer(&xprt->timer, xprt_init_autodisconnect,
-			(unsigned long)xprt);
+	if (xprt_has_timer(xprt))
+		setup_timer(&xprt->timer, xprt_init_autodisconnect,
+			    (unsigned long)xprt);
+	else
+		init_timer(&xprt->timer);
 	xprt->last_used = jiffies;
 	xprt->cwnd = RPC_INITCWND;
 	xprt->bind_index = 0;
@@ -1102,7 +1110,6 @@ found:
 
 	dprintk("RPC:       created transport %p with %u slots\n", xprt,
 			xprt->max_reqs);
-
 	return xprt;
 }
 
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 62438f3..d9a2b81 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -32,6 +32,7 @@
 #include <linux/tcp.h>
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/sched.h>
+#include <linux/sunrpc/svcsock.h>
 #include <linux/sunrpc/xprtsock.h>
 #include <linux/file.h>
 #ifdef CONFIG_NFS_V4_1
@@ -43,6 +44,7 @@
 #include <net/udp.h>
 #include <net/tcp.h>
 
+#include "sunrpc.h"
 /*
  * xprtsock tunables
  */
@@ -2098,6 +2100,134 @@ static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
 			xprt->stat.bklog_u);
 }
 
+/*
+ * Allocate a bunch of pages for a scratch buffer for the rpc code. The reason
+ * we allocate pages instead doing a kmalloc like rpc_malloc is because we want
+ * to use the server side send routines.
+ */
+void *bc_malloc(struct rpc_task *task, size_t size)
+{
+	struct page *page;
+	struct rpc_buffer *buf;
+
+	BUG_ON(size > PAGE_SIZE - sizeof(struct rpc_buffer));
+	page = alloc_page(GFP_KERNEL);
+
+	if (!page)
+		return NULL;
+
+	buf = page_address(page);
+	buf->len = PAGE_SIZE;
+
+	return buf->data;
+}
+
+/*
+ * Free the space allocated in the bc_alloc routine
+ */
+void bc_free(void *buffer)
+{
+	struct rpc_buffer *buf;
+
+	if (!buffer)
+		return;
+
+	buf = container_of(buffer, struct rpc_buffer, data);
+	free_page((unsigned long)buf);
+}
+
+/*
+ * Use the svc_sock to send the callback. Must be called with svsk->sk_mutex
+ * held. Borrows heavily from svc_tcp_sendto and xs_tcp_send_request.
+ */
+static int bc_sendto(struct rpc_rqst *req)
+{
+	int len;
+	struct xdr_buf *xbufp = &req->rq_snd_buf;
+	struct rpc_xprt *xprt = req->rq_xprt;
+	struct sock_xprt *transport =
+				container_of(xprt, struct sock_xprt, xprt);
+	struct socket *sock = transport->sock;
+	unsigned long headoff;
+	unsigned long tailoff;
+
+	/*
+	 * Set up the rpc header and record marker stuff
+	 */
+	xs_encode_tcp_record_marker(xbufp);
+
+	tailoff = (unsigned long)xbufp->tail[0].iov_base & ~PAGE_MASK;
+	headoff = (unsigned long)xbufp->head[0].iov_base & ~PAGE_MASK;
+	len = svc_send_common(sock, xbufp,
+			      virt_to_page(xbufp->head[0].iov_base), headoff,
+			      xbufp->tail[0].iov_base, tailoff);
+
+	if (len != xbufp->len) {
+		printk(KERN_NOTICE "Error sending entire callback!\n");
+		len = -EAGAIN;
+	}
+
+	return len;
+}
+
+/*
+ * The send routine. Borrows from svc_send
+ */
+static int bc_send_request(struct rpc_task *task)
+{
+	struct rpc_rqst *req = task->tk_rqstp;
+	struct svc_xprt	*xprt;
+	struct svc_sock         *svsk;
+	u32                     len;
+
+	dprintk("sending request with xid: %08x\n", ntohl(req->rq_xid));
+	/*
+	 * Get the server socket associated with this callback xprt
+	 */
+	xprt = req->rq_xprt->bc_xprt;
+	svsk = container_of(xprt, struct svc_sock, sk_xprt);
+
+	/*
+	 * Grab the mutex to serialize data as the connection is shared
+	 * with the fore channel
+	 */
+	if (!mutex_trylock(&xprt->xpt_mutex)) {
+		rpc_sleep_on(&xprt->xpt_bc_pending, task, NULL);
+		if (!mutex_trylock(&xprt->xpt_mutex))
+			return -EAGAIN;
+		rpc_wake_up_queued_task(&xprt->xpt_bc_pending, task);
+	}
+	if (test_bit(XPT_DEAD, &xprt->xpt_flags))
+		len = -ENOTCONN;
+	else
+		len = bc_sendto(req);
+	mutex_unlock(&xprt->xpt_mutex);
+
+	if (len > 0)
+		len = 0;
+
+	return len;
+}
+
+/*
+ * The close routine. Since this is client initiated, we do nothing
+ */
+
+static void bc_close(struct rpc_xprt *xprt)
+{
+	return;
+}
+
+/*
+ * The xprt destroy routine. Again, because this connection is client
+ * initiated, we do nothing
+ */
+
+static void bc_destroy(struct rpc_xprt *xprt)
+{
+	return;
+}
+
 static struct rpc_xprt_ops xs_udp_ops = {
 	.set_buffer_size	= xs_udp_set_buffer_size,
 	.reserve_xprt		= xprt_reserve_xprt_cong,
@@ -2134,6 +2264,22 @@ static struct rpc_xprt_ops xs_tcp_ops = {
 	.print_stats		= xs_tcp_print_stats,
 };
 
+/*
+ * The rpc_xprt_ops for the server backchannel
+ */
+
+static struct rpc_xprt_ops bc_tcp_ops = {
+	.reserve_xprt		= xprt_reserve_xprt,
+	.release_xprt		= xprt_release_xprt,
+	.buf_alloc		= bc_malloc,
+	.buf_free		= bc_free,
+	.send_request		= bc_send_request,
+	.set_retrans_timeout	= xprt_set_retrans_timeout_def,
+	.close			= bc_close,
+	.destroy		= bc_destroy,
+	.print_stats		= xs_tcp_print_stats,
+};
+
 static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args,
 				      unsigned int slot_table_size)
 {
-- 
cgit v0.10.2


From 9e4c6379a62d94d3362b12c7a00f2105df6d7eeb Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Wed, 9 Sep 2009 16:32:54 +1000
Subject: sunrpc/cache: change cache_defer_req to return -ve error, not
 boolean.

As "cache_defer_req" does not sound like a predicate, having it return
a boolean value can be confusing.  It is more consistent to return
0 for success and negative for error.

Exactly what error code to return is not important as we don't
differentiate between reasons why the request wasn't deferred,
we only care about whether it was deferred or not.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index ade8a7e..1a50dfe 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -255,7 +255,7 @@ int cache_check(struct cache_detail *detail,
 	}
 
 	if (rv == -EAGAIN) {
-		if (cache_defer_req(rqstp, h) == 0) {
+		if (cache_defer_req(rqstp, h) < 0) {
 			/* Request is not deferred */
 			rv = cache_is_valid(detail, h);
 			if (rv == -EAGAIN)
@@ -511,11 +511,11 @@ static int cache_defer_req(struct cache_req *req, struct cache_head *item)
 		 * or continue and drop the oldest below
 		 */
 		if (net_random()&1)
-			return 0;
+			return -ENOMEM;
 	}
 	dreq = req->defer(req);
 	if (dreq == NULL)
-		return 0;
+		return -ENOMEM;
 
 	dreq->item = item;
 
@@ -545,9 +545,9 @@ static int cache_defer_req(struct cache_req *req, struct cache_head *item)
 	if (!test_bit(CACHE_PENDING, &item->flags)) {
 		/* must have just been validated... */
 		cache_revisit_request(item);
-		return 0;
+		return -EAGAIN;
 	}
-	return 1;
+	return 0;
 }
 
 static void cache_revisit_request(struct cache_head *item)
-- 
cgit v0.10.2


From 908329f2c08b8b5af7b394f709b0ee9c43b93041 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Wed, 9 Sep 2009 16:32:54 +1000
Subject: sunrpc/cache: simplify cache_fresh_locked and cache_fresh_unlocked.

The extra call to cache_revisit_request in cache_fresh_unlocked is not
needed, as should have been fairly clear at the time of
   commit 4013edea9a0b6cdcb1fdf5d4011e47e068fd6efb

If there are requests to be revisited, then we can be sure that
CACHE_PENDING is set, so the second call is sufficient.

So remove the first call.
Then remove the 'new' parameter,
then remove the return value for cache_fresh_locked which is only used
to provide the value for 'new'.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 1a50dfe..f2895d0 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -105,18 +105,16 @@ EXPORT_SYMBOL_GPL(sunrpc_cache_lookup);
 
 static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch);
 
-static int cache_fresh_locked(struct cache_head *head, time_t expiry)
+static void cache_fresh_locked(struct cache_head *head, time_t expiry)
 {
 	head->expiry_time = expiry;
 	head->last_refresh = get_seconds();
-	return !test_and_set_bit(CACHE_VALID, &head->flags);
+	set_bit(CACHE_VALID, &head->flags);
 }
 
 static void cache_fresh_unlocked(struct cache_head *head,
-			struct cache_detail *detail, int new)
+				 struct cache_detail *detail)
 {
-	if (new)
-		cache_revisit_request(head);
 	if (test_and_clear_bit(CACHE_PENDING, &head->flags)) {
 		cache_revisit_request(head);
 		cache_dequeue(detail, head);
@@ -132,7 +130,6 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
 	 */
 	struct cache_head **head;
 	struct cache_head *tmp;
-	int is_new;
 
 	if (!test_bit(CACHE_VALID, &old->flags)) {
 		write_lock(&detail->hash_lock);
@@ -141,9 +138,9 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
 				set_bit(CACHE_NEGATIVE, &old->flags);
 			else
 				detail->update(old, new);
-			is_new = cache_fresh_locked(old, new->expiry_time);
+			cache_fresh_locked(old, new->expiry_time);
 			write_unlock(&detail->hash_lock);
-			cache_fresh_unlocked(old, detail, is_new);
+			cache_fresh_unlocked(old, detail);
 			return old;
 		}
 		write_unlock(&detail->hash_lock);
@@ -167,11 +164,11 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
 	*head = tmp;
 	detail->entries++;
 	cache_get(tmp);
-	is_new = cache_fresh_locked(tmp, new->expiry_time);
+	cache_fresh_locked(tmp, new->expiry_time);
 	cache_fresh_locked(old, 0);
 	write_unlock(&detail->hash_lock);
-	cache_fresh_unlocked(tmp, detail, is_new);
-	cache_fresh_unlocked(old, detail, 0);
+	cache_fresh_unlocked(tmp, detail);
+	cache_fresh_unlocked(old, detail);
 	cache_put(old, detail);
 	return tmp;
 }
@@ -240,8 +237,8 @@ int cache_check(struct cache_detail *detail,
 				cache_revisit_request(h);
 				if (rv == -EAGAIN) {
 					set_bit(CACHE_NEGATIVE, &h->flags);
-					cache_fresh_unlocked(h, detail,
-					     cache_fresh_locked(h, get_seconds()+CACHE_NEW_EXPIRY));
+					cache_fresh_locked(h, get_seconds()+CACHE_NEW_EXPIRY);
+					cache_fresh_unlocked(h, detail);
 					rv = -ENOENT;
 				}
 				break;
-- 
cgit v0.10.2


From 64549e9357e5222a73e41aa87372b37abb047720 Mon Sep 17 00:00:00 2001
From: Michael Buesch <mb@bu3sch.de>
Date: Sun, 19 Jul 2009 21:40:39 +0200
Subject: ieee1394: raw1394: Do not leak memory on failed trylock.

Do not leak the allocated memory in case the mutex_trylock() failed
to acquire the lock.

Signed-off-by: Michael Buesch <mb@bu3sch.de>

This bug does not happen in practice:  All raw1394 clients use
libraw1394, and accesses to a libraw1394 handle need to be serialized
by the client.  This is documented in libraw1394's API reference.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>

diff --git a/drivers/ieee1394/raw1394.c b/drivers/ieee1394/raw1394.c
index da5f882..0bc3d78 100644
--- a/drivers/ieee1394/raw1394.c
+++ b/drivers/ieee1394/raw1394.c
@@ -2272,8 +2272,10 @@ static ssize_t raw1394_write(struct file *file, const char __user * buffer,
 		return -EFAULT;
 	}
 
-	if (!mutex_trylock(&fi->state_mutex))
+	if (!mutex_trylock(&fi->state_mutex)) {
+		free_pending_request(req);
 		return -EAGAIN;
+	}
 
 	switch (fi->state) {
 	case opened:
-- 
cgit v0.10.2


From 928ec5f148e729076e9202e7c78babede628a50c Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Sun, 6 Sep 2009 18:49:17 +0200
Subject: firewire: ohci: fix Self ID Count register mask (safeguard against
 buffer overflow)

The selfIDSize field of Self ID Count is 9 bits wide, and we are only
interested in the high 8 bits.  Fix the mask accordingly.  The
previously too large mask didn't do damage though because the next few
bits in the register are reserved and therefore zero with presently
existing hardware.

Also, check for the maximum possible self ID count of 252 (according to
OHCI 1.1 clause 11.2 and IEEE 1394a-2000 clause 4.3.4.1, i.e. up to four
self IDs of up to 63 nodes, even though IEEE 1394 up to edition 2008
defines only up to three self IDs per node).  More than 252 self IDs
would only happen if the self ID receive DMA unit malfunctioned, which
would likely be caught by other self ID buffer checks.  However, check
it early to be sure.  More than 253 quadlets would overflow the Topology
Map CSR.

Reported-By: PaX Team
Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>

diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c
index 76b321b..5d52425 100644
--- a/drivers/firewire/ohci.c
+++ b/drivers/firewire/ohci.c
@@ -1279,8 +1279,8 @@ static void bus_reset_tasklet(unsigned long data)
 	 * the inverted quadlets and a header quadlet, we shift one
 	 * bit extra to get the actual number of self IDs.
 	 */
-	self_id_count = (reg >> 3) & 0x3ff;
-	if (self_id_count == 0) {
+	self_id_count = (reg >> 3) & 0xff;
+	if (self_id_count == 0 || self_id_count > 252) {
 		fw_notify("inconsistent self IDs\n");
 		return;
 	}
-- 
cgit v0.10.2


From 18668ff9a3232d5f942a2f7abc1ad67d2760dcdf Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Sun, 6 Sep 2009 18:49:48 +0200
Subject: firewire: core: header file cleanup

fw_card_get, fw_card_put, fw_card_release are currently not exported for
use outside the firewire-core.  Move their definitions/ declarations
from the subsystem header file to the core header file.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>

diff --git a/drivers/firewire/core.h b/drivers/firewire/core.h
index 6052816..7ff6e75 100644
--- a/drivers/firewire/core.h
+++ b/drivers/firewire/core.h
@@ -96,6 +96,20 @@ int fw_core_initiate_bus_reset(struct fw_card *card, int short_reset);
 int fw_compute_block_crc(u32 *block);
 void fw_schedule_bm_work(struct fw_card *card, unsigned long delay);
 
+static inline struct fw_card *fw_card_get(struct fw_card *card)
+{
+	kref_get(&card->kref);
+
+	return card;
+}
+
+void fw_card_release(struct kref *kref);
+
+static inline void fw_card_put(struct fw_card *card)
+{
+	kref_put(&card->kref, fw_card_release);
+}
+
 
 /* -cdev */
 
diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index 192d1e43..7e1d4de 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -134,20 +134,6 @@ struct fw_card {
 	u32 topology_map[(CSR_TOPOLOGY_MAP_END - CSR_TOPOLOGY_MAP) / 4];
 };
 
-static inline struct fw_card *fw_card_get(struct fw_card *card)
-{
-	kref_get(&card->kref);
-
-	return card;
-}
-
-void fw_card_release(struct kref *kref);
-
-static inline void fw_card_put(struct fw_card *card)
-{
-	kref_put(&card->kref, fw_card_release);
-}
-
 struct fw_attribute_group {
 	struct attribute_group *groups[2];
 	struct attribute_group group;
-- 
cgit v0.10.2


From b171e204b32b69e241af994d6e9be559e33535c1 Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Sun, 6 Sep 2009 18:50:29 +0200
Subject: firewire: core: fix race with parallel PCI device probe

The config ROM buffer received from generate_config_rom is a globally
shared static buffer.  Extend the card_mutex protection in fw_add_card
until after the config ROM was copied into the card driver's buffer.
Otherwise, parallelized card driver probes may end up with ROM contents
that were meant for a different card.

firewire-ohci's card->driver->enable hook is safe to be called within
the card_mutex.  Furthermore, it is safe to reorder card_list update
versus card enable, which simplifies the code a little.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>

diff --git a/drivers/firewire/core-card.c b/drivers/firewire/core-card.c
index f74edae..e4864e8 100644
--- a/drivers/firewire/core-card.c
+++ b/drivers/firewire/core-card.c
@@ -444,16 +444,13 @@ int fw_card_add(struct fw_card *card,
 	card->guid = guid;
 
 	mutex_lock(&card_mutex);
-	config_rom = generate_config_rom(card, &length);
-	list_add_tail(&card->link, &card_list);
-	mutex_unlock(&card_mutex);
 
+	config_rom = generate_config_rom(card, &length);
 	ret = card->driver->enable(card, config_rom, length);
-	if (ret < 0) {
-		mutex_lock(&card_mutex);
-		list_del(&card->link);
-		mutex_unlock(&card_mutex);
-	}
+	if (ret == 0)
+		list_add_tail(&card->link, &card_list);
+
+	mutex_unlock(&card_mutex);
 
 	return ret;
 }
-- 
cgit v0.10.2


From 85cb9b68640cf467a99d4b6d518f1293222dbc9e Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Tue, 8 Sep 2009 01:13:53 +0200
Subject: firewire: core: fix topology map response handler

This register is 1 kBytes large.  Adjust topology_map.length to prevent
registration of other response handlers in this region and to make sure
that we respond to requests to the upper half of the register.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>

diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c
index 479b22f..da628c7 100644
--- a/drivers/firewire/core-transaction.c
+++ b/drivers/firewire/core-transaction.c
@@ -834,7 +834,7 @@ static void handle_topology_map(struct fw_card *card, struct fw_request *request
 }
 
 static struct fw_address_handler topology_map = {
-	.length			= 0x200,
+	.length			= 0x400,
 	.address_callback	= handle_topology_map,
 };
 
-- 
cgit v0.10.2


From 094614fc14966bc6a6259ade55f051fe17f36122 Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Sun, 6 Sep 2009 18:51:27 +0200
Subject: firewire: sbp2: fix status reception

Per SBP-2 clause 5.3, a target shall store 8...32 bytes of status
information.  Trailing zeros after the first 8 bytes don't need to be
stored, they are implicit.  Fix the status write handler to clear all
unwritten status data.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>

diff --git a/drivers/firewire/sbp2.c b/drivers/firewire/sbp2.c
index e5df822..8f83bff 100644
--- a/drivers/firewire/sbp2.c
+++ b/drivers/firewire/sbp2.c
@@ -425,19 +425,20 @@ static void sbp2_status_write(struct fw_card *card, struct fw_request *request,
 	struct sbp2_logical_unit *lu = callback_data;
 	struct sbp2_orb *orb;
 	struct sbp2_status status;
-	size_t header_size;
 	unsigned long flags;
 
 	if (tcode != TCODE_WRITE_BLOCK_REQUEST ||
-	    length == 0 || length > sizeof(status)) {
+	    length < 8 || length > sizeof(status)) {
 		fw_send_response(card, request, RCODE_TYPE_ERROR);
 		return;
 	}
 
-	header_size = min(length, 2 * sizeof(u32));
-	fw_memcpy_from_be32(&status, payload, header_size);
-	if (length > header_size)
-		memcpy(status.data, payload + 8, length - header_size);
+	status.status  = be32_to_cpup(payload);
+	status.orb_low = be32_to_cpup(payload + 4);
+	memset(status.data, 0, sizeof(status.data));
+	if (length > 8)
+		memcpy(status.data, payload + 8, length - 8);
+
 	if (STATUS_GET_SOURCE(status) == 2 || STATUS_GET_SOURCE(status) == 3) {
 		fw_notify("non-orb related status write, not handled\n");
 		fw_send_response(card, request, RCODE_COMPLETE);
-- 
cgit v0.10.2


From 3c5f80357c3fb3170e39e5d0ae87ddd6652f36ac Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Sun, 6 Sep 2009 19:33:50 +0200
Subject: firewire: sbp2: remove a workaround for Momobay FX-3A

The inquiry delay does more harm than good in tests on a recent kernel.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>

diff --git a/drivers/firewire/sbp2.c b/drivers/firewire/sbp2.c
index 8f83bff..50f0176 100644
--- a/drivers/firewire/sbp2.c
+++ b/drivers/firewire/sbp2.c
@@ -354,8 +354,7 @@ static const struct {
 	/* DViCO Momobay FX-3A with TSB42AA9A bridge */ {
 		.firmware_revision	= 0x002800,
 		.model			= 0x000000,
-		.workarounds		= SBP2_WORKAROUND_DELAY_INQUIRY |
-					  SBP2_WORKAROUND_POWER_CONDITION,
+		.workarounds		= SBP2_WORKAROUND_POWER_CONDITION,
 	},
 	/* Initio bridges, actually only needed for some older ones */ {
 		.firmware_revision	= 0x000200,
-- 
cgit v0.10.2


From 625f0850a8e27b6a8d6fdb95056f35bc22d92b55 Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Sun, 6 Sep 2009 19:34:17 +0200
Subject: ieee1394: sbp2: remove a workaround for Momobay FX-3A

The inquiry delay is not necessary anymore in tests on a recent kernel.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>

diff --git a/drivers/ieee1394/sbp2.c b/drivers/ieee1394/sbp2.c
index 52b25f8..f199896 100644
--- a/drivers/ieee1394/sbp2.c
+++ b/drivers/ieee1394/sbp2.c
@@ -372,8 +372,7 @@ static const struct {
 	/* DViCO Momobay FX-3A with TSB42AA9A bridge */ {
 		.firmware_revision	= 0x002800,
 		.model			= 0x000000,
-		.workarounds		= SBP2_WORKAROUND_DELAY_INQUIRY |
-					  SBP2_WORKAROUND_POWER_CONDITION,
+		.workarounds		= SBP2_WORKAROUND_POWER_CONDITION,
 	},
 	/* Initio bridges, actually only needed for some older ones */ {
 		.firmware_revision	= 0x000200,
-- 
cgit v0.10.2


From f300baba5a1536070d6d77bf0c8c4ca999bb4f0f Mon Sep 17 00:00:00 2001
From: Alexandros Batsakis <batsakis@netapp.com>
Date: Thu, 10 Sep 2009 17:33:30 +0300
Subject: nfsd41: sunrpc: add new xprt class for nfsv4.1 backchannel

[sunrpc: change idle timeout value for the backchannel]
Signed-off-by: Alexandros Batsakis <batsakis@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Acked-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 3d02558..8ed9642 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -114,6 +114,7 @@ struct rpc_create_args {
 	rpc_authflavor_t	authflavor;
 	unsigned long		flags;
 	char			*client_name;
+	struct svc_xprt		*bc_xprt;	/* NFSv4.1 backchannel */
 };
 
 /* Values for "flags" field */
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 228d694..6f9457a 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -124,6 +124,23 @@ struct rpc_xprt_ops {
 	void		(*print_stats)(struct rpc_xprt *xprt, struct seq_file *seq);
 };
 
+/*
+ * RPC transport identifiers
+ *
+ * To preserve compatibility with the historical use of raw IP protocol
+ * id's for transport selection, UDP and TCP identifiers are specified
+ * with the previous values. No such restriction exists for new transports,
+ * except that they may not collide with these values (17 and 6,
+ * respectively).
+ */
+#define XPRT_TRANSPORT_BC       (1 << 31)
+enum xprt_transports {
+	XPRT_TRANSPORT_UDP	= IPPROTO_UDP,
+	XPRT_TRANSPORT_TCP	= IPPROTO_TCP,
+	XPRT_TRANSPORT_BC_TCP	= IPPROTO_TCP | XPRT_TRANSPORT_BC,
+	XPRT_TRANSPORT_RDMA	= 256
+};
+
 struct rpc_xprt {
 	struct kref		kref;		/* Reference count */
 	struct rpc_xprt_ops *	ops;		/* transport methods */
@@ -232,6 +249,7 @@ struct xprt_create {
 	struct sockaddr *	srcaddr;	/* optional local address */
 	struct sockaddr *	dstaddr;	/* remote peer address */
 	size_t			addrlen;
+	struct svc_xprt		*bc_xprt;	/* NFSv4.1 backchannel */
 };
 
 struct xprt_class {
diff --git a/include/linux/sunrpc/xprtrdma.h b/include/linux/sunrpc/xprtrdma.h
index 54a379c..c2f04e1 100644
--- a/include/linux/sunrpc/xprtrdma.h
+++ b/include/linux/sunrpc/xprtrdma.h
@@ -41,11 +41,6 @@
 #define _LINUX_SUNRPC_XPRTRDMA_H
 
 /*
- * RPC transport identifier for RDMA
- */
-#define XPRT_TRANSPORT_RDMA	256
-
-/*
  * rpcbind (v3+) RDMA netid.
  */
 #define RPCBIND_NETID_RDMA	"rdma"
diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h
index c2a46c4..3f14a02 100644
--- a/include/linux/sunrpc/xprtsock.h
+++ b/include/linux/sunrpc/xprtsock.h
@@ -13,17 +13,6 @@ int		init_socket_xprt(void);
 void		cleanup_socket_xprt(void);
 
 /*
- * RPC transport identifiers for UDP, TCP
- *
- * To preserve compatibility with the historical use of raw IP protocol
- * id's for transport selection, these are specified with the previous
- * values. No such restriction exists for new transports, except that
- * they may not collide with these values (17 and 6, respectively).
- */
-#define XPRT_TRANSPORT_UDP	IPPROTO_UDP
-#define XPRT_TRANSPORT_TCP	IPPROTO_TCP
-
-/*
  * RPC slot table sizes for UDP, TCP transports
  */
 extern unsigned int xprt_udp_slot_table_entries;
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index c1e467e..7389804 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -288,6 +288,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
 		.srcaddr = args->saddress,
 		.dstaddr = args->address,
 		.addrlen = args->addrsize,
+		.bc_xprt = args->bc_xprt,
 	};
 	char servername[48];
 
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index d9a2b81..bee4154 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2468,11 +2468,93 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
 	return ERR_PTR(-EINVAL);
 }
 
+/**
+ * xs_setup_bc_tcp - Set up transport to use a TCP backchannel socket
+ * @args: rpc transport creation arguments
+ *
+ */
+static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
+{
+	struct sockaddr *addr = args->dstaddr;
+	struct rpc_xprt *xprt;
+	struct sock_xprt *transport;
+	struct svc_sock *bc_sock;
+
+	if (!args->bc_xprt)
+		ERR_PTR(-EINVAL);
+
+	xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries);
+	if (IS_ERR(xprt))
+		return xprt;
+	transport = container_of(xprt, struct sock_xprt, xprt);
+
+	xprt->prot = IPPROTO_TCP;
+	xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32);
+	xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
+	xprt->timeout = &xs_tcp_default_timeout;
+
+	/* backchannel */
+	xprt_set_bound(xprt);
+	xprt->bind_timeout = 0;
+	xprt->connect_timeout = 0;
+	xprt->reestablish_timeout = 0;
+	xprt->idle_timeout = 0;
+
+	/*
+	 * The backchannel uses the same socket connection as the
+	 * forechannel
+	 */
+	xprt->bc_xprt = args->bc_xprt;
+	bc_sock = container_of(args->bc_xprt, struct svc_sock, sk_xprt);
+	bc_sock->sk_bc_xprt = xprt;
+	transport->sock = bc_sock->sk_sock;
+	transport->inet = bc_sock->sk_sk;
+
+	xprt->ops = &bc_tcp_ops;
+
+	switch (addr->sa_family) {
+	case AF_INET:
+		xs_format_peer_addresses(xprt, "tcp",
+					 RPCBIND_NETID_TCP);
+		break;
+	case AF_INET6:
+		xs_format_peer_addresses(xprt, "tcp",
+				   RPCBIND_NETID_TCP6);
+		break;
+	default:
+		kfree(xprt);
+		return ERR_PTR(-EAFNOSUPPORT);
+	}
+
+	if (xprt_bound(xprt))
+		dprintk("RPC:       set up xprt to %s (port %s) via %s\n",
+				xprt->address_strings[RPC_DISPLAY_ADDR],
+				xprt->address_strings[RPC_DISPLAY_PORT],
+				xprt->address_strings[RPC_DISPLAY_PROTO]);
+	else
+		dprintk("RPC:       set up xprt to %s (autobind) via %s\n",
+				xprt->address_strings[RPC_DISPLAY_ADDR],
+				xprt->address_strings[RPC_DISPLAY_PROTO]);
+
+	/*
+	 * Since we don't want connections for the backchannel, we set
+	 * the xprt status to connected
+	 */
+	xprt_set_connected(xprt);
+
+
+	if (try_module_get(THIS_MODULE))
+		return xprt;
+	kfree(xprt->slot);
+	kfree(xprt);
+	return ERR_PTR(-EINVAL);
+}
+
 static struct xprt_class	xs_udp_transport = {
 	.list		= LIST_HEAD_INIT(xs_udp_transport.list),
 	.name		= "udp",
 	.owner		= THIS_MODULE,
-	.ident		= IPPROTO_UDP,
+	.ident		= XPRT_TRANSPORT_UDP,
 	.setup		= xs_setup_udp,
 };
 
@@ -2480,10 +2562,18 @@ static struct xprt_class	xs_tcp_transport = {
 	.list		= LIST_HEAD_INIT(xs_tcp_transport.list),
 	.name		= "tcp",
 	.owner		= THIS_MODULE,
-	.ident		= IPPROTO_TCP,
+	.ident		= XPRT_TRANSPORT_TCP,
 	.setup		= xs_setup_tcp,
 };
 
+static struct xprt_class	xs_bc_tcp_transport = {
+	.list		= LIST_HEAD_INIT(xs_bc_tcp_transport.list),
+	.name		= "tcp NFSv4.1 backchannel",
+	.owner		= THIS_MODULE,
+	.ident		= XPRT_TRANSPORT_BC_TCP,
+	.setup		= xs_setup_bc_tcp,
+};
+
 /**
  * init_socket_xprt - set up xprtsock's sysctls, register with RPC client
  *
@@ -2497,6 +2587,7 @@ int init_socket_xprt(void)
 
 	xprt_register_transport(&xs_udp_transport);
 	xprt_register_transport(&xs_tcp_transport);
+	xprt_register_transport(&xs_bc_tcp_transport);
 
 	return 0;
 }
@@ -2516,6 +2607,7 @@ void cleanup_socket_xprt(void)
 
 	xprt_unregister_transport(&xs_udp_transport);
 	xprt_unregister_transport(&xs_tcp_transport);
+	xprt_unregister_transport(&xs_bc_tcp_transport);
 }
 
 static int param_set_uint_minmax(const char *val, struct kernel_param *kp,
-- 
cgit v0.10.2


From 4be36ca0cefc09725f52a9590d061399d3e524d7 Mon Sep 17 00:00:00 2001
From: Benny Halevy <bhalevy@panasas.com>
Date: Thu, 10 Sep 2009 12:25:46 +0300
Subject: nfsd4: fix whitespace in NFSPROC4_CLNT_CB_NULL definition

Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 81d1c52..63bb384 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -56,7 +56,7 @@
 /* Index of predefined Linux callback client operations */
 
 enum {
-        NFSPROC4_CLNT_CB_NULL = 0,
+	NFSPROC4_CLNT_CB_NULL = 0,
 	NFSPROC4_CLNT_CB_RECALL,
 };
 
-- 
cgit v0.10.2


From d1ff65226c5afe55f9af38a439058f41b71e114f Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Tue, 15 Sep 2009 11:59:49 +0200
Subject: HID: fix non-atomic allocation in hid_input_report

'interrupt' variable can't be used to safely determine whether
we are running in atomic context or not, as we might be called from
during control transfer completion through hid_ctrl() in atomic
context with interrupt == 0.

Reported-by: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 342b7d3..ca9bb26 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1089,8 +1089,7 @@ int hid_input_report(struct hid_device *hid, int type, u8 *data, int size, int i
 		return -1;
 	}
 
-	buf = kmalloc(sizeof(char) * HID_DEBUG_BUFSIZE,
-			interrupt ? GFP_ATOMIC : GFP_KERNEL);
+	buf = kmalloc(sizeof(char) * HID_DEBUG_BUFSIZE, GFP_ATOMIC);
 
 	if (!buf) {
 		report = hid_get_report(report_enum, data);
-- 
cgit v0.10.2


From 8079ce34f2c3f5bfedcea8d4fb7290ce46ac5b56 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 31 Aug 2009 16:49:19 +0200
Subject: sfi: Remove unused code

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Feng Tang <feng.tang@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/arch/x86/kernel/sfi.c b/arch/x86/kernel/sfi.c
index 761df3f..b294b7a 100644
--- a/arch/x86/kernel/sfi.c
+++ b/arch/x86/kernel/sfi.c
@@ -55,16 +55,12 @@ void __init mp_sfi_register_lapic_address(unsigned long address)
 /* All CPUs enumerated by SFI must be present and enabled */
 void __cpuinit mp_sfi_register_lapic(u8 id)
 {
-	int boot_cpu = 0;
-
 	if (MAX_APICS - id <= 0) {
 		pr_warning("Processor #%d invalid (max %d)\n",
 			id, MAX_APICS);
 		return;
 	}
 
-	if (id == boot_cpu_physical_apicid)
-		boot_cpu = 1;
 	pr_info("registering lapic[%d]\n", id);
 
 	generic_processor_info(id, GET_APIC_VERSION(apic_read(APIC_LVR)));
-- 
cgit v0.10.2


From 3834f47291df475be3f0f0fb7ccaa098967cc054 Mon Sep 17 00:00:00 2001
From: Feng Tang <feng.tang@intel.com>
Date: Mon, 14 Sep 2009 13:01:53 +0800
Subject: SFI: remove unneeded includes

Signed-off-by: Feng Tang <feng.tang@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/arch/x86/kernel/sfi.c b/arch/x86/kernel/sfi.c
index b294b7a..34e0993 100644
--- a/arch/x86/kernel/sfi.c
+++ b/arch/x86/kernel/sfi.c
@@ -21,22 +21,15 @@
 #define KMSG_COMPONENT "SFI"
 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
 
-#include <linux/bootmem.h>
-#include <linux/cpumask.h>
-#include <linux/module.h>
 #include <linux/acpi.h>
 #include <linux/init.h>
-#include <linux/efi.h>
-#include <linux/irq.h>
 #include <linux/sfi.h>
 #include <linux/io.h>
 
 #include <asm/io_apic.h>
-#include <asm/pgtable.h>
 #include <asm/mpspec.h>
 #include <asm/setup.h>
 #include <asm/apic.h>
-#include <asm/e820.h>
 
 #ifdef CONFIG_X86_LOCAL_APIC
 static unsigned long sfi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
diff --git a/drivers/sfi/sfi_core.c b/drivers/sfi/sfi_core.c
index 7bd6b18..d3b4968 100644
--- a/drivers/sfi/sfi_core.c
+++ b/drivers/sfi/sfi_core.c
@@ -62,16 +62,11 @@
 #include <linux/bootmem.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/string.h>
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/acpi.h>
 #include <linux/init.h>
-#include <linux/irq.h>
 #include <linux/sfi.h>
-#include <linux/smp.h>
-
-#include <asm/pgtable.h>
 
 #include "sfi_core.h"
 
-- 
cgit v0.10.2


From 886e3b7fe6054230c89ae078a09565ed183ecc73 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Tue, 15 Sep 2009 12:22:42 -0400
Subject: nfsd4: fix null dereference creating nfsv4 callback client

On setting up the callback to the client, we attempt to use the same
authentication flavor the client did.  We find an rpc cred to use by
calling rpcauth_lookup_credcache(), which assumes that the given
authentication flavor has a credentials cache.  However, this is not
required to be true--in particular, auth_null does not use one.
Instead, we should call the auth's lookup_cred() method.

Without this, a client attempting to mount using nfsv4 and auth_null
triggers a null dereference.

Cc: stable@kernel.org
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 63bb384..4abb882 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -437,6 +437,7 @@ static struct rpc_cred *lookup_cb_cred(struct nfs4_cb_conn *cb)
 	struct auth_cred acred = {
 		.machine_cred = 1
 	};
+	struct rpc_auth *auth = cb->cb_client->cl_auth;
 
 	/*
 	 * Note in the gss case this doesn't actually have to wait for a
@@ -444,8 +445,7 @@ static struct rpc_cred *lookup_cb_cred(struct nfs4_cb_conn *cb)
 	 * non-uptodate cred which the rpc state machine will fill in with
 	 * a refresh_upcall later.
 	 */
-	return rpcauth_lookup_credcache(cb->cb_client->cl_auth, &acred,
-							RPCAUTH_LOOKUP_NEW);
+	return auth->au_ops->lookup_cred(auth, &acred, RPCAUTH_LOOKUP_NEW);
 }
 
 void do_probe_callback(struct nfs4_client *clp)
-- 
cgit v0.10.2


From 5d351754fcf58d1a604aa7cf95c2805e8a098ad9 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 15 Sep 2009 13:32:13 -0400
Subject: SUNRPC: Defer the auth_gss upcall when the RPC call is asynchronous

Otherwise, the upcall is going to be synchronous, which may not be what the
caller wants...

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index 3f63218..996df4d 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -111,7 +111,7 @@ struct rpc_credops {
 	void			(*crdestroy)(struct rpc_cred *);
 
 	int			(*crmatch)(struct auth_cred *, struct rpc_cred *, int);
-	void			(*crbind)(struct rpc_task *, struct rpc_cred *);
+	void			(*crbind)(struct rpc_task *, struct rpc_cred *, int);
 	__be32 *		(*crmarshal)(struct rpc_task *, __be32 *);
 	int			(*crrefresh)(struct rpc_task *);
 	__be32 *		(*crvalidate)(struct rpc_task *, __be32 *);
@@ -140,7 +140,7 @@ struct rpc_cred *	rpcauth_lookup_credcache(struct rpc_auth *, struct auth_cred *
 void			rpcauth_init_cred(struct rpc_cred *, const struct auth_cred *, struct rpc_auth *, const struct rpc_credops *);
 struct rpc_cred *	rpcauth_lookupcred(struct rpc_auth *, int);
 void			rpcauth_bindcred(struct rpc_task *, struct rpc_cred *, int);
-void			rpcauth_generic_bind_cred(struct rpc_task *, struct rpc_cred *);
+void			rpcauth_generic_bind_cred(struct rpc_task *, struct rpc_cred *, int);
 void			put_rpccred(struct rpc_cred *);
 void			rpcauth_unbindcred(struct rpc_task *);
 __be32 *		rpcauth_marshcred(struct rpc_task *, __be32 *);
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 0c431c2..54a4e04 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -385,7 +385,7 @@ rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred,
 EXPORT_SYMBOL_GPL(rpcauth_init_cred);
 
 void
-rpcauth_generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred)
+rpcauth_generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred, int lookupflags)
 {
 	task->tk_msg.rpc_cred = get_rpccred(cred);
 	dprintk("RPC: %5u holding %s cred %p\n", task->tk_pid,
@@ -394,7 +394,7 @@ rpcauth_generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred)
 EXPORT_SYMBOL_GPL(rpcauth_generic_bind_cred);
 
 static void
-rpcauth_bind_root_cred(struct rpc_task *task)
+rpcauth_bind_root_cred(struct rpc_task *task, int lookupflags)
 {
 	struct rpc_auth *auth = task->tk_client->cl_auth;
 	struct auth_cred acred = {
@@ -405,7 +405,7 @@ rpcauth_bind_root_cred(struct rpc_task *task)
 
 	dprintk("RPC: %5u looking up %s cred\n",
 		task->tk_pid, task->tk_client->cl_auth->au_ops->au_name);
-	ret = auth->au_ops->lookup_cred(auth, &acred, 0);
+	ret = auth->au_ops->lookup_cred(auth, &acred, lookupflags);
 	if (!IS_ERR(ret))
 		task->tk_msg.rpc_cred = ret;
 	else
@@ -413,14 +413,14 @@ rpcauth_bind_root_cred(struct rpc_task *task)
 }
 
 static void
-rpcauth_bind_new_cred(struct rpc_task *task)
+rpcauth_bind_new_cred(struct rpc_task *task, int lookupflags)
 {
 	struct rpc_auth *auth = task->tk_client->cl_auth;
 	struct rpc_cred *ret;
 
 	dprintk("RPC: %5u looking up %s cred\n",
 		task->tk_pid, auth->au_ops->au_name);
-	ret = rpcauth_lookupcred(auth, 0);
+	ret = rpcauth_lookupcred(auth, lookupflags);
 	if (!IS_ERR(ret))
 		task->tk_msg.rpc_cred = ret;
 	else
@@ -430,12 +430,16 @@ rpcauth_bind_new_cred(struct rpc_task *task)
 void
 rpcauth_bindcred(struct rpc_task *task, struct rpc_cred *cred, int flags)
 {
+	int lookupflags = 0;
+
+	if (flags & RPC_TASK_ASYNC)
+		lookupflags |= RPCAUTH_LOOKUP_NEW;
 	if (cred != NULL)
-		cred->cr_ops->crbind(task, cred);
+		cred->cr_ops->crbind(task, cred, lookupflags);
 	else if (flags & RPC_TASK_ROOTCREDS)
-		rpcauth_bind_root_cred(task);
+		rpcauth_bind_root_cred(task, lookupflags);
 	else
-		rpcauth_bind_new_cred(task);
+		rpcauth_bind_new_cred(task, lookupflags);
 }
 
 void
diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c
index 4028502..bf88bf8 100644
--- a/net/sunrpc/auth_generic.c
+++ b/net/sunrpc/auth_generic.c
@@ -55,13 +55,13 @@ struct rpc_cred *rpc_lookup_machine_cred(void)
 EXPORT_SYMBOL_GPL(rpc_lookup_machine_cred);
 
 static void
-generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred)
+generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred, int lookupflags)
 {
 	struct rpc_auth *auth = task->tk_client->cl_auth;
 	struct auth_cred *acred = &container_of(cred, struct generic_cred, gc_base)->acred;
 	struct rpc_cred *ret;
 
-	ret = auth->au_ops->lookup_cred(auth, acred, 0);
+	ret = auth->au_ops->lookup_cred(auth, acred, lookupflags);
 	if (!IS_ERR(ret))
 		task->tk_msg.rpc_cred = ret;
 	else
-- 
cgit v0.10.2


From 29ab23cc5d351658d01a4327d55e9106a73fd04f Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Tue, 15 Sep 2009 15:56:50 -0400
Subject: nfsd4: allow nfs4 state startup to fail

The failure here is pretty unlikely, but we should handle it anyway.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 46e9ac5..11db40c 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4004,7 +4004,7 @@ set_max_delegations(void)
 
 /* initialization to perform when the nfsd service is started: */
 
-static void
+static int
 __nfs4_state_start(void)
 {
 	unsigned long grace_time;
@@ -4016,19 +4016,26 @@ __nfs4_state_start(void)
 	printk(KERN_INFO "NFSD: starting %ld-second grace period\n",
 	       grace_time/HZ);
 	laundry_wq = create_singlethread_workqueue("nfsd4");
+	if (laundry_wq == NULL)
+		return -ENOMEM;
 	queue_delayed_work(laundry_wq, &laundromat_work, grace_time);
 	set_max_delegations();
+	return 0;
 }
 
-void
+int
 nfs4_state_start(void)
 {
+	int ret;
+
 	if (nfs4_init)
-		return;
+		return 0;
 	nfsd4_load_reboot_recovery_data();
-	__nfs4_state_start();
+	ret = __nfs4_state_start();
+	if (ret)
+		return ret;
 	nfs4_init = 1;
-	return;
+	return 0;
 }
 
 time_t
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 4472449..fcc0010 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -411,7 +411,9 @@ nfsd_svc(unsigned short port, int nrservs)
 	error =	nfsd_racache_init(2*nrservs);
 	if (error<0)
 		goto out;
-	nfs4_state_start();
+	error = nfs4_state_start();
+	if (error)
+		goto out;
 
 	nfsd_reset_versions();
 
diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
index 2812ed5..24fdf89 100644
--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -166,7 +166,7 @@ extern int nfsd_max_blksize;
 extern unsigned int max_delegations;
 int nfs4_state_init(void);
 void nfsd4_free_slabs(void);
-void nfs4_state_start(void);
+int nfs4_state_start(void);
 void nfs4_state_shutdown(void);
 time_t nfs4_lease_time(void);
 void nfs4_reset_lease(time_t leasetime);
@@ -174,7 +174,7 @@ int nfs4_reset_recoverydir(char *recdir);
 #else
 static inline int nfs4_state_init(void) { return 0; }
 static inline void nfsd4_free_slabs(void) { }
-static inline void nfs4_state_start(void) { }
+static inline int nfs4_state_start(void) { }
 static inline void nfs4_state_shutdown(void) { }
 static inline time_t nfs4_lease_time(void) { return 0; }
 static inline void nfs4_reset_lease(time_t leasetime) { }
-- 
cgit v0.10.2


From 80fc015bdfe1f5b870c1e1ee02d78e709523fee7 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Tue, 15 Sep 2009 18:07:35 -0400
Subject: nfsd4: use common rpc_cred for all callbacks

Callbacks are always made using the machine's identity, so we can use a
single auth_generic credential shared among callbacks to all clients and
let the rpc code take care of the rest.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 4abb882..1285197 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -432,42 +432,29 @@ static const struct rpc_call_ops nfsd4_cb_probe_ops = {
 	.rpc_call_done = nfsd4_cb_probe_done,
 };
 
-static struct rpc_cred *lookup_cb_cred(struct nfs4_cb_conn *cb)
+static struct rpc_cred *callback_cred;
+
+int set_callback_cred(void)
 {
-	struct auth_cred acred = {
-		.machine_cred = 1
-	};
-	struct rpc_auth *auth = cb->cb_client->cl_auth;
-
-	/*
-	 * Note in the gss case this doesn't actually have to wait for a
-	 * gss upcall (or any calls to the client); this just creates a
-	 * non-uptodate cred which the rpc state machine will fill in with
-	 * a refresh_upcall later.
-	 */
-	return auth->au_ops->lookup_cred(auth, &acred, RPCAUTH_LOOKUP_NEW);
+	callback_cred = rpc_lookup_machine_cred();
+	if (!callback_cred)
+		return -ENOMEM;
+	return 0;
 }
 
+
 void do_probe_callback(struct nfs4_client *clp)
 {
 	struct nfs4_cb_conn *cb = &clp->cl_cb_conn;
 	struct rpc_message msg = {
 		.rpc_proc       = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
 		.rpc_argp       = clp,
+		.rpc_cred	= callback_cred
 	};
-	struct rpc_cred *cred;
 	int status;
 
-	cred = lookup_cb_cred(cb);
-	if (IS_ERR(cred)) {
-		status = PTR_ERR(cred);
-		goto out;
-	}
-	cb->cb_cred = cred;
-	msg.rpc_cred = cb->cb_cred;
 	status = rpc_call_async(cb->cb_client, &msg, RPC_TASK_SOFT,
 				&nfsd4_cb_probe_ops, (void *)clp);
-out:
 	if (status) {
 		warn_no_callback_path(clp, status);
 		put_nfs4_client(clp);
@@ -550,7 +537,7 @@ nfsd4_cb_recall(struct nfs4_delegation *dp)
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL],
 		.rpc_argp = dp,
-		.rpc_cred = clp->cl_cb_conn.cb_cred
+		.rpc_cred = callback_cred
 	};
 	int status;
 
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 11db40c..0445192 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -696,10 +696,6 @@ shutdown_callback_client(struct nfs4_client *clp)
 		clp->cl_cb_conn.cb_client = NULL;
 		rpc_shutdown_client(clnt);
 	}
-	if (clp->cl_cb_conn.cb_cred) {
-		put_rpccred(clp->cl_cb_conn.cb_cred);
-		clp->cl_cb_conn.cb_cred = NULL;
-	}
 }
 
 static inline void
@@ -4020,7 +4016,7 @@ __nfs4_state_start(void)
 		return -ENOMEM;
 	queue_delayed_work(laundry_wq, &laundromat_work, grace_time);
 	set_max_delegations();
-	return 0;
+	return set_callback_cred();
 }
 
 int
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index 70ef5f4..9bf3aa8 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -89,7 +89,6 @@ struct nfs4_cb_conn {
 	/* RPC client info */
 	atomic_t		cb_set;     /* successful CB_NULL call */
 	struct rpc_clnt *       cb_client;
-	struct rpc_cred	*	cb_cred;
 };
 
 /* Maximum number of slots per session. 160 is useful for long haul TCP */
@@ -362,6 +361,7 @@ extern int nfs4_in_grace(void);
 extern __be32 nfs4_check_open_reclaim(clientid_t *clid);
 extern void put_nfs4_client(struct nfs4_client *clp);
 extern void nfs4_free_stateowner(struct kref *kref);
+extern int set_callback_cred(void);
 extern void nfsd4_probe_callback(struct nfs4_client *clp);
 extern void nfsd4_cb_recall(struct nfs4_delegation *dp);
 extern void nfs4_put_delegation(struct nfs4_delegation *dp);
-- 
cgit v0.10.2


From 38524ab38f2752beee262a97403d871665838172 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Thu, 10 Sep 2009 12:25:59 +0300
Subject: nfsd41: Backchannel: callback infrastructure

Keep the xprt used for create_session in cl_cb_xprt.
Mark cl_callback.cb_minorversion = 1 and remember
the client provided cl_callback.cb_prog rpc program number.
Use it to probe the callback path.

Use the client's network address to initialize as the
callback's address as expected by the xprt creation
routines.

Define xdr sizes and code nfs4_cb_compound header to be able
to send a null callback rpc.

Signed-off-by: Andy Adamson<andros@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
[get callback minorversion from fore channel's]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[nfsd41: change bc_sock to bc_xprt]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[pulled definition for cl_cb_xprt]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[nfsd41: set up backchannel's cb_addr]
[moved rpc_create_args init to "nfsd: modify nfsd4.1 backchannel to use new xprt class"]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 1285197..db4188c 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -43,6 +43,7 @@
 #include <linux/sunrpc/xdr.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/svcsock.h>
 #include <linux/nfsd/nfsd.h>
 #include <linux/nfsd/state.h>
 #include <linux/sunrpc/sched.h>
@@ -52,16 +53,19 @@
 
 #define NFSPROC4_CB_NULL 0
 #define NFSPROC4_CB_COMPOUND 1
+#define NFS4_STATEID_SIZE 16
 
 /* Index of predefined Linux callback client operations */
 
 enum {
 	NFSPROC4_CLNT_CB_NULL = 0,
 	NFSPROC4_CLNT_CB_RECALL,
+	NFSPROC4_CLNT_CB_SEQUENCE,
 };
 
 enum nfs_cb_opnum4 {
 	OP_CB_RECALL            = 4,
+	OP_CB_SEQUENCE          = 11,
 };
 
 #define NFS4_MAXTAGLEN		20
@@ -70,15 +74,22 @@ enum nfs_cb_opnum4 {
 #define NFS4_dec_cb_null_sz		0
 #define cb_compound_enc_hdr_sz		4
 #define cb_compound_dec_hdr_sz		(3 + (NFS4_MAXTAGLEN >> 2))
+#define sessionid_sz			(NFS4_MAX_SESSIONID_LEN >> 2)
+#define cb_sequence_enc_sz		(sessionid_sz + 4 +             \
+					1 /* no referring calls list yet */)
+#define cb_sequence_dec_sz		(op_dec_sz + sessionid_sz + 4)
+
 #define op_enc_sz			1
 #define op_dec_sz			2
 #define enc_nfs4_fh_sz			(1 + (NFS4_FHSIZE >> 2))
 #define enc_stateid_sz			(NFS4_STATEID_SIZE >> 2)
 #define NFS4_enc_cb_recall_sz		(cb_compound_enc_hdr_sz +       \
+					cb_sequence_enc_sz +            \
 					1 + enc_stateid_sz +            \
 					enc_nfs4_fh_sz)
 
 #define NFS4_dec_cb_recall_sz		(cb_compound_dec_hdr_sz  +      \
+					cb_sequence_dec_sz +            \
 					op_dec_sz)
 
 /*
@@ -137,11 +148,13 @@ xdr_error:                                      \
 } while (0)
 
 struct nfs4_cb_compound_hdr {
-	int		status;
-	u32		ident;
+	/* args */
+	u32		ident;	/* minorversion 0 only */
 	u32		nops;
 	__be32		*nops_p;
 	u32		minorversion;
+	/* res */
+	int		status;
 	u32		taglen;
 	char		*tag;
 };
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 0445192..d8196b4 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -702,6 +702,8 @@ static inline void
 free_client(struct nfs4_client *clp)
 {
 	shutdown_callback_client(clp);
+	if (clp->cl_cb_xprt)
+		svc_xprt_put(clp->cl_cb_xprt);
 	if (clp->cl_cred.cr_group_info)
 		put_group_info(clp->cl_cred.cr_group_info);
 	kfree(clp->cl_principal);
@@ -1317,6 +1319,18 @@ nfsd4_create_session(struct svc_rqst *rqstp,
 		cr_ses->flags &= ~SESSION4_PERSIST;
 		cr_ses->flags &= ~SESSION4_RDMA;
 
+		if (cr_ses->flags & SESSION4_BACK_CHAN) {
+			unconf->cl_cb_xprt = rqstp->rq_xprt;
+			svc_xprt_get(unconf->cl_cb_xprt);
+			rpc_copy_addr(
+				(struct sockaddr *)&unconf->cl_cb_conn.cb_addr,
+				sa);
+			unconf->cl_cb_conn.cb_addrlen = svc_addr_len(sa);
+			unconf->cl_cb_conn.cb_minorversion =
+				cstate->minorversion;
+			unconf->cl_cb_conn.cb_prog = cr_ses->callback_prog;
+			nfsd4_probe_callback(unconf);
+		}
 		conf = unconf;
 	} else {
 		status = nfserr_stale_clientid;
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index 9bf3aa8..c916032 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -211,6 +211,9 @@ struct nfs4_client {
 	struct nfsd4_clid_slot	cl_cs_slot;	/* create_session slot */
 	u32			cl_exchange_flags;
 	struct nfs4_sessionid	cl_sessionid;
+
+	/* for nfs41 callbacks */
+	struct svc_xprt		*cl_cb_xprt;	/* 4.1 callback transport */
 };
 
 /* struct nfs4_client_reset
-- 
cgit v0.10.2


From 132f97715c098393fb8de3c26b07b9fdbd2334f1 Mon Sep 17 00:00:00 2001
From: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
Date: Thu, 10 Sep 2009 12:26:12 +0300
Subject: nfsd41: Backchannel: Add sequence arguments to callback RPC arguments

Follow the model we use in the client. Make the sequence arguments
part of the regular RPC arguments.  None of the callbacks that are
soon to be implemented expect results that need to be passed back
to the caller, so we don't define a separate RPC results structure.
For session validation, the cb_sequence decoding will use a pointer
to the sequence arguments that are part of the RPC argument.

Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
[define struct nfsd4_cb_sequence here]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index db4188c..f311757 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -92,6 +92,11 @@ enum nfs_cb_opnum4 {
 					cb_sequence_dec_sz +            \
 					op_dec_sz)
 
+struct nfs4_rpc_args {
+	void				*args_op;
+	struct nfsd4_cb_sequence	args_seq;
+};
+
 /*
 * Generic encode routines from fs/nfs/nfs4xdr.c
 */
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index c916032..0e5b5ae 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -60,6 +60,12 @@ typedef struct {
 #define si_stateownerid   si_opaque.so_stateownerid
 #define si_fileid         si_opaque.so_fileid
 
+struct nfsd4_cb_sequence {
+	/* args/res */
+	u32			cbs_minorversion;
+	struct nfs4_client	*cbs_clp;
+};
+
 struct nfs4_delegation {
 	struct list_head	dl_perfile;
 	struct list_head	dl_perclnt;
-- 
cgit v0.10.2


From 199ff35e1c8724871e157c2e48556c2794946e82 Mon Sep 17 00:00:00 2001
From: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
Date: Thu, 10 Sep 2009 12:26:25 +0300
Subject: nfsd41: Backchannel: Server backchannel RPC wait queue

RPC callback requests will wait on this wait queue if the backchannel
is out of slots.

Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index d8196b4..f4cebd9 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -775,6 +775,8 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir)
 	INIT_LIST_HEAD(&clp->cl_delegations);
 	INIT_LIST_HEAD(&clp->cl_sessions);
 	INIT_LIST_HEAD(&clp->cl_lru);
+	clear_bit(0, &clp->cl_cb_slot_busy);
+	rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table");
 	return clp;
 }
 
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index 0e5b5ae..9cc40a1 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -219,7 +219,11 @@ struct nfs4_client {
 	struct nfs4_sessionid	cl_sessionid;
 
 	/* for nfs41 callbacks */
+	/* We currently support a single back channel with a single slot */
+	unsigned long		cl_cb_slot_busy;
 	struct svc_xprt		*cl_cb_xprt;	/* 4.1 callback transport */
+	struct rpc_wait_queue	cl_cb_waitq;	/* backchannel callers may */
+						/* wait here for slots */
 };
 
 /* struct nfs4_client_reset
-- 
cgit v0.10.2


From 2a1d1b593803d7c18a369bf148f3b48c5a3260fc Mon Sep 17 00:00:00 2001
From: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
Date: Thu, 10 Sep 2009 12:26:38 +0300
Subject: nfsd41: Backchannel: Setup sequence information

Follows the model used by the NFS client.  Setup the RPC prepare and done
function pointers so that we can populate the sequence information if
minorversion == 1.  rpc_run_task() is then invoked directly just like
existing NFS client operations do.

nfsd4_cb_prepare() determines if the sequence information needs to be setup.
If the slot is in use, it adds itself to the wait queue.

nfsd4_cb_done() wakes anyone sleeping on the callback channel wait queue
after our RPC reply has been received.  It also sets the task message
result pointer to NULL to clearly indicate we're done using it.

Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
[define and initialize cl_cb_seq_nr here]
[pulled out unused defintion of nfsd4_cb_done]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index f311757..25a0906 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -501,6 +501,67 @@ nfsd4_probe_callback(struct nfs4_client *clp)
 	do_probe_callback(clp);
 }
 
+/*
+ * There's currently a single callback channel slot.
+ * If the slot is available, then mark it busy.  Otherwise, set the
+ * thread for sleeping on the callback RPC wait queue.
+ */
+static int nfsd41_cb_setup_sequence(struct nfs4_client *clp,
+		struct rpc_task *task)
+{
+	struct nfs4_rpc_args *args = task->tk_msg.rpc_argp;
+	u32 *ptr = (u32 *)clp->cl_sessionid.data;
+	int status = 0;
+
+	dprintk("%s: %u:%u:%u:%u\n", __func__,
+		ptr[0], ptr[1], ptr[2], ptr[3]);
+
+	if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) {
+		rpc_sleep_on(&clp->cl_cb_waitq, task, NULL);
+		dprintk("%s slot is busy\n", __func__);
+		status = -EAGAIN;
+		goto out;
+	}
+
+	/*
+	 * We'll need the clp during XDR encoding and decoding,
+	 * and the sequence during decoding to verify the reply
+	 */
+	args->args_seq.cbs_clp = clp;
+	task->tk_msg.rpc_resp = &args->args_seq;
+
+out:
+	dprintk("%s status=%d\n", __func__, status);
+	return status;
+}
+
+/*
+ * TODO: cb_sequence should support referring call lists, cachethis, multiple
+ * slots, and mark callback channel down on communication errors.
+ */
+static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
+{
+	struct nfs4_delegation *dp = calldata;
+	struct nfs4_client *clp = dp->dl_client;
+	struct nfs4_rpc_args *args = task->tk_msg.rpc_argp;
+	u32 minorversion = clp->cl_cb_conn.cb_minorversion;
+	int status = 0;
+
+	args->args_seq.cbs_minorversion = minorversion;
+	if (minorversion) {
+		status = nfsd41_cb_setup_sequence(clp, task);
+		if (status) {
+			if (status != -EAGAIN) {
+				/* terminate rpc task */
+				task->tk_status = status;
+				task->tk_action = NULL;
+			}
+			return;
+		}
+	}
+	rpc_call_start(task);
+}
+
 static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
 {
 	struct nfs4_delegation *dp = calldata;
@@ -540,6 +601,7 @@ static void nfsd4_cb_recall_release(void *calldata)
 }
 
 static const struct rpc_call_ops nfsd4_cb_recall_ops = {
+	.rpc_call_prepare = nfsd4_cb_prepare,
 	.rpc_call_done = nfsd4_cb_recall_done,
 	.rpc_release = nfsd4_cb_recall_release,
 };
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index f4cebd9..76b7bcb 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1331,6 +1331,7 @@ nfsd4_create_session(struct svc_rqst *rqstp,
 			unconf->cl_cb_conn.cb_minorversion =
 				cstate->minorversion;
 			unconf->cl_cb_conn.cb_prog = cr_ses->callback_prog;
+			unconf->cl_cb_seq_nr = 1;
 			nfsd4_probe_callback(unconf);
 		}
 		conf = unconf;
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index 9cc40a1..b38d113 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -221,6 +221,7 @@ struct nfs4_client {
 	/* for nfs41 callbacks */
 	/* We currently support a single back channel with a single slot */
 	unsigned long		cl_cb_slot_busy;
+	u32			cl_cb_seq_nr;
 	struct svc_xprt		*cl_cb_xprt;	/* 4.1 callback transport */
 	struct rpc_wait_queue	cl_cb_waitq;	/* backchannel callers may */
 						/* wait here for slots */
-- 
cgit v0.10.2


From 2af73580b7d7b687175f47ba092640761602b221 Mon Sep 17 00:00:00 2001
From: Benny Halevy <bhalevy@panasas.com>
Date: Thu, 10 Sep 2009 12:26:51 +0300
Subject: nfsd41: Backchannel: cb_sequence callback

Implement the cb_sequence callback conforming to draft-ietf-nfsv4-minorversion1

Note: highest slot id and target highest slot id do not have to be 0
as was previously implemented.  They can be greater than what the
nfs server sent if the client supports a larger slot table on the
backchannel.  At this point we just ignore that.

Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
[Rework the back channel xdr using the shared v4.0 and v4.1 framework.]
Signed-off-by: Andy Adamson <andros@netapp.com>
[fixed indentation]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[nfsd41: use nfsd4_cb_sequence for callback minorversion]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[nfsd41: fix verification of CB_SEQUENCE highest slot id[
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[nfsd41: Backchannel: Remove old backchannel serialization]
[nfsd41: Backchannel: First callback sequence ID should be 1]
Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[nfsd41: decode_cb_sequence does not need to actually decode ignored fields]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 25a0906..d37707d 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -256,6 +256,27 @@ encode_cb_recall(struct xdr_stream *xdr, struct nfs4_delegation *dp,
 	hdr->nops++;
 }
 
+static void
+encode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_cb_sequence *args,
+		   struct nfs4_cb_compound_hdr *hdr)
+{
+	__be32 *p;
+
+	if (hdr->minorversion == 0)
+		return;
+
+	RESERVE_SPACE(1 + NFS4_MAX_SESSIONID_LEN + 20);
+
+	WRITE32(OP_CB_SEQUENCE);
+	WRITEMEM(args->cbs_clp->cl_sessionid.data, NFS4_MAX_SESSIONID_LEN);
+	WRITE32(args->cbs_clp->cl_cb_seq_nr);
+	WRITE32(0);		/* slotid, always 0 */
+	WRITE32(0);		/* highest slotid always 0 */
+	WRITE32(0);		/* cachethis always 0 */
+	WRITE32(0); /* FIXME: support referring_call_lists */
+	hdr->nops++;
+}
+
 static int
 nfs4_xdr_enc_cb_null(struct rpc_rqst *req, __be32 *p)
 {
@@ -317,6 +338,57 @@ decode_cb_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
 	return 0;
 }
 
+/*
+ * Our current back channel implmentation supports a single backchannel
+ * with a single slot.
+ */
+static int
+decode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_cb_sequence *res,
+		   struct rpc_rqst *rqstp)
+{
+	struct nfs4_sessionid id;
+	int status;
+	u32 dummy;
+	__be32 *p;
+
+	if (res->cbs_minorversion == 0)
+		return 0;
+
+	status = decode_cb_op_hdr(xdr, OP_CB_SEQUENCE);
+	if (status)
+		return status;
+
+	/*
+	 * If the server returns different values for sessionID, slotID or
+	 * sequence number, the server is looney tunes.
+	 */
+	status = -ESERVERFAULT;
+
+	READ_BUF(NFS4_MAX_SESSIONID_LEN + 16);
+	memcpy(id.data, p, NFS4_MAX_SESSIONID_LEN);
+	p += XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN);
+	if (memcmp(id.data, res->cbs_clp->cl_sessionid.data,
+		   NFS4_MAX_SESSIONID_LEN)) {
+		dprintk("%s Invalid session id\n", __func__);
+		goto out;
+	}
+	READ32(dummy);
+	if (dummy != res->cbs_clp->cl_cb_seq_nr) {
+		dprintk("%s Invalid sequence number\n", __func__);
+		goto out;
+	}
+	READ32(dummy); 	/* slotid must be 0 */
+	if (dummy != 0) {
+		dprintk("%s Invalid slotid\n", __func__);
+		goto out;
+	}
+	/* FIXME: process highest slotid and target highest slotid */
+	status = 0;
+out:
+	return status;
+}
+
+
 static int
 nfs4_xdr_dec_cb_null(struct rpc_rqst *req, __be32 *p)
 {
-- 
cgit v0.10.2


From 0421b5c55acd0e88920cb9a5bcea6ed738186853 Mon Sep 17 00:00:00 2001
From: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
Date: Thu, 10 Sep 2009 12:27:04 +0300
Subject: nfsd41: Backchannel: Implement cb_recall over NFSv4.1

Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
[nfsd41: cb_recall callback]
[Share v4.0 and v4.1 back channel xdr]
Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Ricardo Labiaga <ricardo.labiaga@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[Share v4.0 and v4.1 back channel xdr]
Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[nfsd41: use nfsd4_cb_sequence for callback minorversion]
[nfsd41: conditionally decode_sequence in nfs4_xdr_dec_cb_recall]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[nfsd41: Backchannel: Add sequence arguments to callback RPC arguments]
Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
[pulled-in definition of nfsd4_cb_done]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index d37707d..89f23ed 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -288,15 +288,19 @@ nfs4_xdr_enc_cb_null(struct rpc_rqst *req, __be32 *p)
 }
 
 static int
-nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, __be32 *p, struct nfs4_delegation *args)
+nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, __be32 *p,
+		struct nfs4_rpc_args *rpc_args)
 {
 	struct xdr_stream xdr;
+	struct nfs4_delegation *args = rpc_args->args_op;
 	struct nfs4_cb_compound_hdr hdr = {
 		.ident = args->dl_ident,
+		.minorversion = rpc_args->args_seq.cbs_minorversion,
 	};
 
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
 	encode_cb_compound_hdr(&xdr, &hdr);
+	encode_cb_sequence(&xdr, &rpc_args->args_seq, &hdr);
 	encode_cb_recall(&xdr, args, &hdr);
 	encode_cb_nops(&hdr);
 	return 0;
@@ -396,7 +400,8 @@ nfs4_xdr_dec_cb_null(struct rpc_rqst *req, __be32 *p)
 }
 
 static int
-nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, __be32 *p)
+nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, __be32 *p,
+		struct nfsd4_cb_sequence *seq)
 {
 	struct xdr_stream xdr;
 	struct nfs4_cb_compound_hdr hdr;
@@ -406,6 +411,11 @@ nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, __be32 *p)
 	status = decode_cb_compound_hdr(&xdr, &hdr);
 	if (status)
 		goto out;
+	if (seq) {
+		status = decode_cb_sequence(&xdr, seq, rqstp);
+		if (status)
+			goto out;
+	}
 	status = decode_cb_op_hdr(&xdr, OP_CB_RECALL);
 out:
 	return status;
@@ -634,11 +644,34 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
 	rpc_call_start(task);
 }
 
+static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
+{
+	struct nfs4_delegation *dp = calldata;
+	struct nfs4_client *clp = dp->dl_client;
+
+	dprintk("%s: minorversion=%d\n", __func__,
+		clp->cl_cb_conn.cb_minorversion);
+
+	if (clp->cl_cb_conn.cb_minorversion) {
+		/* No need for lock, access serialized in nfsd4_cb_prepare */
+		++clp->cl_cb_seq_nr;
+		clear_bit(0, &clp->cl_cb_slot_busy);
+		rpc_wake_up_next(&clp->cl_cb_waitq);
+		dprintk("%s: freed slot, new seqid=%d\n", __func__,
+			clp->cl_cb_seq_nr);
+
+		/* We're done looking into the sequence information */
+		task->tk_msg.rpc_resp = NULL;
+	}
+}
+
 static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
 {
 	struct nfs4_delegation *dp = calldata;
 	struct nfs4_client *clp = dp->dl_client;
 
+	nfsd4_cb_done(task, calldata);
+
 	switch (task->tk_status) {
 	case -EIO:
 		/* Network partition? */
@@ -651,16 +684,19 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
 		break;
 	default:
 		/* success, or error we can't handle */
-		return;
+		goto done;
 	}
 	if (dp->dl_retries--) {
 		rpc_delay(task, 2*HZ);
 		task->tk_status = 0;
 		rpc_restart_call(task);
+		return;
 	} else {
 		atomic_set(&clp->cl_cb_conn.cb_set, 0);
 		warn_no_callback_path(clp, task->tk_status);
 	}
+done:
+	kfree(task->tk_msg.rpc_argp);
 }
 
 static void nfsd4_cb_recall_release(void *calldata)
@@ -686,17 +722,24 @@ nfsd4_cb_recall(struct nfs4_delegation *dp)
 {
 	struct nfs4_client *clp = dp->dl_client;
 	struct rpc_clnt *clnt = clp->cl_cb_conn.cb_client;
+	struct nfs4_rpc_args *args;
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL],
-		.rpc_argp = dp,
 		.rpc_cred = callback_cred
 	};
-	int status;
+	int status = -ENOMEM;
 
+	args = kzalloc(sizeof(*args), GFP_KERNEL);
+	if (!args)
+		goto out;
+	args->args_op = dp;
+	msg.rpc_argp = args;
 	dp->dl_retries = 1;
 	status = rpc_call_async(clnt, &msg, RPC_TASK_SOFT,
 				&nfsd4_cb_recall_ops, dp);
+out:
 	if (status) {
+		kfree(args);
 		put_nfs4_client(clp);
 		nfs4_put_delegation(dp);
 	}
-- 
cgit v0.10.2


From 3ddc8bf5f31c906c558ce3da4856208a864d2fc1 Mon Sep 17 00:00:00 2001
From: Alexandros Batsakis <batsakis@netapp.com>
Date: Thu, 10 Sep 2009 12:27:21 +0300
Subject: nfsd41: modify nfsd4.1 backchannel to use new xprt class

This patch enables the use of the nfsv4.1 backchannel.

Signed-off-by: Alexandros Batsakis <batsakis@netapp.com>
[initialize rpc_create_args.bc_xprt too]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 89f23ed..24e8d78 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -48,6 +48,7 @@
 #include <linux/nfsd/state.h>
 #include <linux/sunrpc/sched.h>
 #include <linux/nfs4.h>
+#include <linux/sunrpc/xprtsock.h>
 
 #define NFSDDBG_FACILITY                NFSDDBG_PROC
 
@@ -483,7 +484,7 @@ int setup_callback_client(struct nfs4_client *clp)
 		.to_retries	= 0,
 	};
 	struct rpc_create_args args = {
-		.protocol	= IPPROTO_TCP,
+		.protocol	= XPRT_TRANSPORT_TCP,
 		.address	= (struct sockaddr *) &cb->cb_addr,
 		.addrsize	= cb->cb_addrlen,
 		.timeout	= &timeparms,
@@ -498,7 +499,10 @@ int setup_callback_client(struct nfs4_client *clp)
 
 	if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5))
 		return -EINVAL;
-
+	if (cb->cb_minorversion) {
+		args.bc_xprt = clp->cl_cb_xprt;
+		args.protocol = XPRT_TRANSPORT_BC_TCP;
+	}
 	/* Create RPC client */
 	client = rpc_create(&args);
 	if (IS_ERR(client)) {
-- 
cgit v0.10.2


From b09333c4644d173d95b8f3fd4f1dc4375d91be2a Mon Sep 17 00:00:00 2001
From: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
Date: Thu, 10 Sep 2009 12:27:34 +0300
Subject: nfsd41: Refactor create_client()

Move common initialization of 'struct nfs4_client' inside create_client().

Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>

[nfsd41: Remember the auth flavor to use for callbacks]
Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 76b7bcb..a2bd37e 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -759,27 +759,6 @@ expire_client(struct nfs4_client *clp)
 	put_nfs4_client(clp);
 }
 
-static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir)
-{
-	struct nfs4_client *clp;
-
-	clp = alloc_client(name);
-	if (clp == NULL)
-		return NULL;
-	memcpy(clp->cl_recdir, recdir, HEXDIR_LEN);
-	atomic_set(&clp->cl_count, 1);
-	atomic_set(&clp->cl_cb_conn.cb_set, 0);
-	INIT_LIST_HEAD(&clp->cl_idhash);
-	INIT_LIST_HEAD(&clp->cl_strhash);
-	INIT_LIST_HEAD(&clp->cl_openowners);
-	INIT_LIST_HEAD(&clp->cl_delegations);
-	INIT_LIST_HEAD(&clp->cl_sessions);
-	INIT_LIST_HEAD(&clp->cl_lru);
-	clear_bit(0, &clp->cl_cb_slot_busy);
-	rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table");
-	return clp;
-}
-
 static void copy_verf(struct nfs4_client *target, nfs4_verifier *source)
 {
 	memcpy(target->cl_verifier.data, source->data,
@@ -842,6 +821,46 @@ static void gen_confirm(struct nfs4_client *clp)
 	*p++ = i++;
 }
 
+static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir,
+		struct svc_rqst *rqstp, nfs4_verifier *verf)
+{
+	struct nfs4_client *clp;
+	struct sockaddr *sa = svc_addr(rqstp);
+	char *princ;
+
+	clp = alloc_client(name);
+	if (clp == NULL)
+		return NULL;
+
+	princ = svc_gss_principal(rqstp);
+	if (princ) {
+		clp->cl_principal = kstrdup(princ, GFP_KERNEL);
+		if (clp->cl_principal == NULL) {
+			free_client(clp);
+			return NULL;
+		}
+	}
+
+	memcpy(clp->cl_recdir, recdir, HEXDIR_LEN);
+	atomic_set(&clp->cl_count, 1);
+	atomic_set(&clp->cl_cb_conn.cb_set, 0);
+	INIT_LIST_HEAD(&clp->cl_idhash);
+	INIT_LIST_HEAD(&clp->cl_strhash);
+	INIT_LIST_HEAD(&clp->cl_openowners);
+	INIT_LIST_HEAD(&clp->cl_delegations);
+	INIT_LIST_HEAD(&clp->cl_sessions);
+	INIT_LIST_HEAD(&clp->cl_lru);
+	clear_bit(0, &clp->cl_cb_slot_busy);
+	rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table");
+	copy_verf(clp, verf);
+	rpc_copy_addr((struct sockaddr *) &clp->cl_addr, sa);
+	clp->cl_flavor = rqstp->rq_flavor;
+	copy_cred(&clp->cl_cred, &rqstp->rq_cred);
+	gen_confirm(clp);
+
+	return clp;
+}
+
 static int check_name(struct xdr_netobj name)
 {
 	if (name.len == 0) 
@@ -1189,17 +1208,13 @@ nfsd4_exchange_id(struct svc_rqst *rqstp,
 
 out_new:
 	/* Normal case */
-	new = create_client(exid->clname, dname);
+	new = create_client(exid->clname, dname, rqstp, &verf);
 	if (new == NULL) {
 		status = nfserr_serverfault;
 		goto out;
 	}
 
-	copy_verf(new, &verf);
-	copy_cred(&new->cl_cred, &rqstp->rq_cred);
-	rpc_copy_addr((struct sockaddr *) &new->cl_addr, sa);
 	gen_clid(new);
-	gen_confirm(new);
 	add_to_unconfirmed(new, strhashval);
 out_copy:
 	exid->clientid.cl_boot = new->cl_clientid.cl_boot;
@@ -1473,7 +1488,6 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	unsigned int 		strhashval;
 	struct nfs4_client	*conf, *unconf, *new;
 	__be32 			status;
-	char			*princ;
 	char                    dname[HEXDIR_LEN];
 	
 	if (!check_name(clname))
@@ -1518,7 +1532,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		 */
 		if (unconf)
 			expire_client(unconf);
-		new = create_client(clname, dname);
+		new = create_client(clname, dname, rqstp, &clverifier);
 		if (new == NULL)
 			goto out;
 		gen_clid(new);
@@ -1535,7 +1549,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 			 */
 			expire_client(unconf);
 		}
-		new = create_client(clname, dname);
+		new = create_client(clname, dname, rqstp, &clverifier);
 		if (new == NULL)
 			goto out;
 		copy_clid(new, conf);
@@ -1545,7 +1559,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		 * probable client reboot; state will be removed if
 		 * confirmed.
 		 */
-		new = create_client(clname, dname);
+		new = create_client(clname, dname, rqstp, &clverifier);
 		if (new == NULL)
 			goto out;
 		gen_clid(new);
@@ -1556,24 +1570,11 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		 * confirmed.
 		 */
 		expire_client(unconf);
-		new = create_client(clname, dname);
+		new = create_client(clname, dname, rqstp, &clverifier);
 		if (new == NULL)
 			goto out;
 		gen_clid(new);
 	}
-	copy_verf(new, &clverifier);
-	rpc_copy_addr((struct sockaddr *) &new->cl_addr, sa);
-	new->cl_flavor = rqstp->rq_flavor;
-	princ = svc_gss_principal(rqstp);
-	if (princ) {
-		new->cl_principal = kstrdup(princ, GFP_KERNEL);
-		if (new->cl_principal == NULL) {
-			free_client(new);
-			goto out;
-		}
-	}
-	copy_cred(&new->cl_cred, &rqstp->rq_cred);
-	gen_confirm(new);
 	gen_callback(new, setclid, rpc_get_scope_id(sa));
 	add_to_unconfirmed(new, strhashval);
 	setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot;
-- 
cgit v0.10.2


From 79dfc9687661c13ef95eb4c2226f3db4ccab52c9 Mon Sep 17 00:00:00 2001
From: Cliff Cai <cliff.cai@analog.com>
Date: Wed, 16 Sep 2009 20:25:08 -0400
Subject: ASoC: Blackfin AC97: add a few missing multichannel define handling

Somewhere along the line, most of SND_BF5XX_MULTICHAN_SUPPORT handling was
merged, but two places were missed (the probe/resume functions).  Restore
handling of this option so it gets initialized properly.

Signed-off-by: Cliff Cai <cliff.cai@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/blackfin/bf5xx-ac97.c b/sound/soc/blackfin/bf5xx-ac97.c
index 2758b90..e693229 100644
--- a/sound/soc/blackfin/bf5xx-ac97.c
+++ b/sound/soc/blackfin/bf5xx-ac97.c
@@ -277,7 +277,11 @@ static int bf5xx_ac97_resume(struct snd_soc_dai *dai)
 	if (!dai->active)
 		return 0;
 
+#if defined(CONFIG_SND_BF5XX_MULTICHAN_SUPPORT)
+	ret = sport_set_multichannel(sport, 16, 0x3FF, 1);
+#else
 	ret = sport_set_multichannel(sport, 16, 0x1F, 1);
+#endif
 	if (ret) {
 		pr_err("SPORT is busy!\n");
 		return -EBUSY;
@@ -334,7 +338,11 @@ static int bf5xx_ac97_probe(struct platform_device *pdev,
 		goto sport_err;
 	}
 	/*SPORT works in TDM mode to simulate AC97 transfers*/
+#if defined(CONFIG_SND_BF5XX_MULTICHAN_SUPPORT)
+	ret = sport_set_multichannel(sport_handle, 16, 0x3FF, 1);
+#else
 	ret = sport_set_multichannel(sport_handle, 16, 0x1F, 1);
+#endif
 	if (ret) {
 		pr_err("SPORT is busy!\n");
 		ret = -EBUSY;
-- 
cgit v0.10.2


From d75150d7c49db42021b8f966d2cbdc215a530208 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Wed, 16 Sep 2009 20:25:09 -0400
Subject: ASoC: bf5xx-sport: the irq save/restore funcs take an unsigned long

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/blackfin/bf5xx-sport.c b/sound/soc/blackfin/bf5xx-sport.c
index 469ce7f..99051ff 100644
--- a/sound/soc/blackfin/bf5xx-sport.c
+++ b/sound/soc/blackfin/bf5xx-sport.c
@@ -326,7 +326,7 @@ static inline int sport_hook_tx_dummy(struct sport_device *sport)
 
 int sport_tx_start(struct sport_device *sport)
 {
-	unsigned flags;
+	unsigned long flags;
 	pr_debug("%s: tx_run:%d, rx_run:%d\n", __func__,
 			sport->tx_run, sport->rx_run);
 	if (sport->tx_run)
-- 
cgit v0.10.2


From 7d156a25bd3e8e6ff74faf02faecb5fc5fb4839e Mon Sep 17 00:00:00 2001
From: Barry Song <barry.song@analog.com>
Date: Wed, 16 Sep 2009 20:25:10 -0400
Subject: ASoC: fix typos in Blackfin headers

Signed-off-by: Barry Song <barry.song@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/blackfin/bf5xx-ac97.h b/sound/soc/blackfin/bf5xx-ac97.h
index 3f2a911..a1f97dd 100644
--- a/sound/soc/blackfin/bf5xx-ac97.h
+++ b/sound/soc/blackfin/bf5xx-ac97.h
@@ -1,5 +1,5 @@
 /*
- * linux/sound/arm/bf5xx-ac97.h
+ * sound/soc/blackfin/bf5xx-ac97.h
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
diff --git a/sound/soc/blackfin/bf5xx-i2s.h b/sound/soc/blackfin/bf5xx-i2s.h
index 7107d1a..264ecdc 100644
--- a/sound/soc/blackfin/bf5xx-i2s.h
+++ b/sound/soc/blackfin/bf5xx-i2s.h
@@ -1,5 +1,5 @@
 /*
- * linux/sound/arm/bf5xx-i2s.h
+ * sound/soc/blackfin/bf5xx-i2s.h
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
-- 
cgit v0.10.2


From fab19bae0c2951ed8bc517a53848b027fead293d Mon Sep 17 00:00:00 2001
From: Barry Song <barry.song@analog.com>
Date: Wed, 16 Sep 2009 20:25:11 -0400
Subject: ASoC: Blackfin I2S: add lost platform_device parameter to resume
 function

Commit dc7d7b830ee1 trimmed the platform_device parameter from all of the
suspend functions, but it also accidentally removed it from the resume
function in the Blackfin I2S driver.  So restore it.

Signed-off-by: Barry Song <barry.song@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/blackfin/bf5xx-i2s.c b/sound/soc/blackfin/bf5xx-i2s.c
index 876abad..19539c6 100644
--- a/sound/soc/blackfin/bf5xx-i2s.c
+++ b/sound/soc/blackfin/bf5xx-i2s.c
@@ -227,7 +227,8 @@ static int bf5xx_i2s_probe(struct platform_device *pdev,
 	return 0;
 }
 
-static void bf5xx_i2s_remove(struct snd_soc_dai *dai)
+static void bf5xx_i2s_remove(struct platform_device *pdev,
+			struct snd_soc_dai *dai)
 {
 	pr_debug("%s enter\n", __func__);
 	peripheral_free_list(&sport_req[sport_num][0]);
-- 
cgit v0.10.2


From c4c259bcc27c4242b012106afdba183622b1735f Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Tue, 15 Sep 2009 16:27:45 +0200
Subject: HID: consolidate connect and disconnect into core code

HID core registers input, hidraw and hiddev devices, but leaves
unregistering it up to the individual driver, which is not really nice.
Let's move all the logic to the core.

Reported-by: Marcel Holtmann <marcel@holtmann.org>
Reported-by: Brian Rogers <brian@xyzw.org>
Acked-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index ca9bb26..be34d32 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1237,6 +1237,17 @@ int hid_connect(struct hid_device *hdev, unsigned int connect_mask)
 }
 EXPORT_SYMBOL_GPL(hid_connect);
 
+void hid_disconnect(struct hid_device *hdev)
+{
+	if (hdev->claimed & HID_CLAIMED_INPUT)
+		hidinput_disconnect(hdev);
+	if (hdev->claimed & HID_CLAIMED_HIDDEV)
+		hdev->hiddev_disconnect(hdev);
+	if (hdev->claimed & HID_CLAIMED_HIDRAW)
+		hidraw_disconnect(hdev);
+}
+EXPORT_SYMBOL_GPL(hid_disconnect);
+
 /* a list of devices for which there is a specialized driver on HID bus */
 static const struct hid_device_id hid_blacklist[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_A4TECH, USB_DEVICE_ID_A4TECH_WCP32PU) },
diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c
index 1b0e07a..03bd703 100644
--- a/drivers/hid/usbhid/hid-core.c
+++ b/drivers/hid/usbhid/hid-core.c
@@ -1041,13 +1041,6 @@ static void usbhid_stop(struct hid_device *hid)
 
 	hid_cancel_delayed_stuff(usbhid);
 
-	if (hid->claimed & HID_CLAIMED_INPUT)
-		hidinput_disconnect(hid);
-	if (hid->claimed & HID_CLAIMED_HIDDEV)
-		hiddev_disconnect(hid);
-	if (hid->claimed & HID_CLAIMED_HIDRAW)
-		hidraw_disconnect(hid);
-
 	hid->claimed = 0;
 
 	usb_free_urb(usbhid->urbin);
@@ -1085,7 +1078,7 @@ static struct hid_ll_driver usb_hid_driver = {
 	.hidinput_input_event = usb_hidinput_input_event,
 };
 
-static int hid_probe(struct usb_interface *intf, const struct usb_device_id *id)
+static int usbhid_probe(struct usb_interface *intf, const struct usb_device_id *id)
 {
 	struct usb_host_interface *interface = intf->cur_altsetting;
 	struct usb_device *dev = interface_to_usbdev(intf);
@@ -1117,6 +1110,7 @@ static int hid_probe(struct usb_interface *intf, const struct usb_device_id *id)
 	hid->ff_init = hid_pidff_init;
 #ifdef CONFIG_USB_HIDDEV
 	hid->hiddev_connect = hiddev_connect;
+	hid->hiddev_disconnect = hiddev_disconnect;
 	hid->hiddev_hid_event = hiddev_hid_event;
 	hid->hiddev_report_event = hiddev_report_event;
 #endif
@@ -1177,7 +1171,7 @@ err:
 	return ret;
 }
 
-static void hid_disconnect(struct usb_interface *intf)
+static void usbhid_disconnect(struct usb_interface *intf)
 {
 	struct hid_device *hid = usb_get_intfdata(intf);
 	struct usbhid_device *usbhid;
@@ -1359,8 +1353,8 @@ MODULE_DEVICE_TABLE (usb, hid_usb_ids);
 
 static struct usb_driver hid_driver = {
 	.name =		"usbhid",
-	.probe =	hid_probe,
-	.disconnect =	hid_disconnect,
+	.probe =	usbhid_probe,
+	.disconnect =	usbhid_disconnect,
 #ifdef CONFIG_PM
 	.suspend =	hid_suspend,
 	.resume =	hid_resume,
diff --git a/include/linux/hid.h b/include/linux/hid.h
index a0ebdac..10f6284 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -494,6 +494,7 @@ struct hid_device {							/* device report descriptor */
 
 	/* hiddev event handler */
 	int (*hiddev_connect)(struct hid_device *, unsigned int);
+	void (*hiddev_disconnect)(struct hid_device *);
 	void (*hiddev_hid_event) (struct hid_device *, struct hid_field *field,
 				  struct hid_usage *, __s32);
 	void (*hiddev_report_event) (struct hid_device *, struct hid_report *);
@@ -691,6 +692,7 @@ struct hid_device *hid_allocate_device(void);
 int hid_parse_report(struct hid_device *hid, __u8 *start, unsigned size);
 int hid_check_keys_pressed(struct hid_device *hid);
 int hid_connect(struct hid_device *hid, unsigned int connect_mask);
+void hid_disconnect(struct hid_device *hid);
 
 /**
  * hid_map_usage - map usage input bits
@@ -800,6 +802,7 @@ static inline int __must_check hid_hw_start(struct hid_device *hdev,
  */
 static inline void hid_hw_stop(struct hid_device *hdev)
 {
+	hid_disconnect(hdev);
 	hdev->ll_driver->stop(hdev);
 }
 
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 09bedeb..49d8495 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -577,11 +577,6 @@ static int hidp_session(void *arg)
 	}
 
 	if (session->hid) {
-		if (session->hid->claimed & HID_CLAIMED_INPUT)
-			hidinput_disconnect(session->hid);
-		if (session->hid->claimed & HID_CLAIMED_HIDRAW)
-			hidraw_disconnect(session->hid);
-
 		hid_destroy_device(session->hid);
 		session->hid = NULL;
 	}
@@ -747,8 +742,6 @@ static void hidp_stop(struct hid_device *hid)
 	skb_queue_purge(&session->ctrl_transmit);
 	skb_queue_purge(&session->intr_transmit);
 
-	if (hid->claimed & HID_CLAIMED_INPUT)
-		hidinput_disconnect(hid);
 	hid->claimed = 0;
 }
 
-- 
cgit v0.10.2


From a2d693cf650f000ea22351484ee66cf4c2651eef Mon Sep 17 00:00:00 2001
From: Alessandro Guido <ag@alessandroguido.name>
Date: Thu, 17 Sep 2009 15:42:57 +0200
Subject: HID: Remove duplicate Kconfig entry

Signed-off-by: Alessandro Guido <ag@alessandroguido.name>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
index 111afbe..24d90ea 100644
--- a/drivers/hid/Kconfig
+++ b/drivers/hid/Kconfig
@@ -205,13 +205,6 @@ config HID_NTRIG
 	Support for N-Trig touch screen.
 
 config HID_PANTHERLORD
-	tristate "Pantherlord devices support" if EMBEDDED
-	depends on USB_HID
-	default !EMBEDDED
-	---help---
-	Support for PantherLord/GreenAsia based device support.
-
-config HID_PANTHERLORD
 	tristate "Pantherlord support" if EMBEDDED
 	depends on USB_HID
 	default !EMBEDDED
-- 
cgit v0.10.2


From c0826574ddc0df486ecfc2d655e08904c6513209 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Thu, 17 Sep 2009 17:03:06 +1000
Subject: nfsd: return success for non-NFS4 nfs4_state_start

Today's linux-next build (sparc64_defconfig) failed like this:

In file included from arch/sparc/kernel/sys_sparc32.c:32:
include/linux/nfsd/nfsd.h: In function 'nfs4_state_start':
include/linux/nfsd/nfsd.h:177: error: no return statement in function returning non-void

Caused by commit 29ab23cc5d351658d01a4327d55e9106a73fd04f ("nfsd4: allow
nfs4 state startup to fail").  Please, if you add code that depends on a
CONFIG option, build with that option enabled and disabled.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
index 24fdf89..03bbe9039 100644
--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -174,7 +174,7 @@ int nfs4_reset_recoverydir(char *recdir);
 #else
 static inline int nfs4_state_init(void) { return 0; }
 static inline void nfsd4_free_slabs(void) { }
-static inline int nfs4_state_start(void) { }
+static inline int nfs4_state_start(void) { return 0; }
 static inline void nfs4_state_shutdown(void) { }
 static inline time_t nfs4_lease_time(void) { return 0; }
 static inline void nfs4_reset_lease(time_t leasetime) { }
-- 
cgit v0.10.2


From ad80efc469f56d41f3f4adc1b2c86bf65689ebeb Mon Sep 17 00:00:00 2001
From: Cliff Cai <cliff.cai@analog.com>
Date: Wed, 16 Sep 2009 20:25:12 -0400
Subject: ASoC: Blackfin I2S: fix resuming when device hasn't been used

If the sound system hasn't been utilized yet and we suspend, then we
attempt to save/restore using state that doesn't exist.  So use a global
handle instead to reconfigure properly.

Signed-off-by: Cliff Cai <cliff.cai@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/blackfin/bf5xx-i2s.c b/sound/soc/blackfin/bf5xx-i2s.c
index 19539c6..1e9d161 100644
--- a/sound/soc/blackfin/bf5xx-i2s.c
+++ b/sound/soc/blackfin/bf5xx-i2s.c
@@ -237,36 +237,31 @@ static void bf5xx_i2s_remove(struct platform_device *pdev,
 #ifdef CONFIG_PM
 static int bf5xx_i2s_suspend(struct snd_soc_dai *dai)
 {
-	struct sport_device *sport =
-		(struct sport_device *)dai->private_data;
 
 	pr_debug("%s : sport %d\n", __func__, dai->id);
-	if (!dai->active)
-		return 0;
+
 	if (dai->capture.active)
-		sport_rx_stop(sport);
+		sport_rx_stop(sport_handle);
 	if (dai->playback.active)
-		sport_tx_stop(sport);
+		sport_tx_stop(sport_handle);
 	return 0;
 }
 
 static int bf5xx_i2s_resume(struct snd_soc_dai *dai)
 {
 	int ret;
-	struct sport_device *sport =
-		(struct sport_device *)dai->private_data;
 
 	pr_debug("%s : sport %d\n", __func__, dai->id);
-	if (!dai->active)
-		return 0;
 
-	ret = sport_config_rx(sport, RFSR | RCKFE, RSFSE|0x1f, 0, 0);
+	ret = sport_config_rx(sport_handle, bf5xx_i2s.rcr1,
+				      bf5xx_i2s.rcr2, 0, 0);
 	if (ret) {
 		pr_err("SPORT is busy!\n");
 		return -EBUSY;
 	}
 
-	ret = sport_config_tx(sport, TFSR | TCKFE, TSFSE|0x1f, 0, 0);
+	ret = sport_config_tx(sport_handle, bf5xx_i2s.tcr1,
+				      bf5xx_i2s.tcr2, 0, 0);
 	if (ret) {
 		pr_err("SPORT is busy!\n");
 		return -EBUSY;
-- 
cgit v0.10.2


From 0c31cf3e4af79ea18bbd365b07ef0de207673894 Mon Sep 17 00:00:00 2001
From: Chaithrika U S <chaithrika@ti.com>
Date: Tue, 15 Sep 2009 18:13:29 -0400
Subject: ASoC: DaVinci: Fixes to McASP configuration

McASP register settings are not correct for DSP mode of operation.
There is a channel swap initally. This patch provides fixes to
the register values for proper working.

Tested on DA830/OMAP-L137 EVM, DM6467 EVM.

Signed-off-by: Chaithrika U S <chaithrika@ti.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/davinci/davinci-mcasp.c b/sound/soc/davinci/davinci-mcasp.c
index eca22d7..7a06c0a 100644
--- a/sound/soc/davinci/davinci-mcasp.c
+++ b/sound/soc/davinci/davinci-mcasp.c
@@ -512,34 +512,49 @@ static int davinci_config_channel_size(struct davinci_audio_dev *dev,
 				       int channel_size)
 {
 	u32 fmt = 0;
+	u32 mask, rotate;
 
 	switch (channel_size) {
 	case DAVINCI_AUDIO_WORD_8:
 		fmt = 0x03;
+		rotate = 6;
+		mask = 0x000000ff;
 		break;
 
 	case DAVINCI_AUDIO_WORD_12:
 		fmt = 0x05;
+		rotate = 5;
+		mask = 0x00000fff;
 		break;
 
 	case DAVINCI_AUDIO_WORD_16:
 		fmt = 0x07;
+		rotate = 4;
+		mask = 0x0000ffff;
 		break;
 
 	case DAVINCI_AUDIO_WORD_20:
 		fmt = 0x09;
+		rotate = 3;
+		mask = 0x000fffff;
 		break;
 
 	case DAVINCI_AUDIO_WORD_24:
 		fmt = 0x0B;
+		rotate = 2;
+		mask = 0x00ffffff;
 		break;
 
 	case DAVINCI_AUDIO_WORD_28:
 		fmt = 0x0D;
+		rotate = 1;
+		mask = 0x0fffffff;
 		break;
 
 	case DAVINCI_AUDIO_WORD_32:
 		fmt = 0x0F;
+		rotate = 0;
+		mask = 0xffffffff;
 		break;
 
 	default:
@@ -550,6 +565,13 @@ static int davinci_config_channel_size(struct davinci_audio_dev *dev,
 					RXSSZ(fmt), RXSSZ(0x0F));
 	mcasp_mod_bits(dev->base + DAVINCI_MCASP_TXFMT_REG,
 					TXSSZ(fmt), TXSSZ(0x0F));
+	mcasp_mod_bits(dev->base + DAVINCI_MCASP_TXFMT_REG, TXROT(rotate),
+							TXROT(7));
+	mcasp_mod_bits(dev->base + DAVINCI_MCASP_RXFMT_REG, RXROT(rotate),
+							RXROT(7));
+	mcasp_set_reg(dev->base + DAVINCI_MCASP_TXMASK_REG, mask);
+	mcasp_set_reg(dev->base + DAVINCI_MCASP_RXMASK_REG, mask);
+
 	return 0;
 }
 
@@ -638,7 +660,6 @@ static void davinci_hw_param(struct davinci_audio_dev *dev, int stream)
 			printk(KERN_ERR "playback tdm slot %d not supported\n",
 				dev->tdm_slots);
 
-		mcasp_set_reg(dev->base + DAVINCI_MCASP_TXMASK_REG, 0xFFFFFFFF);
 		mcasp_clr_bits(dev->base + DAVINCI_MCASP_TXFMCTL_REG, FSXDUR);
 	} else {
 		/* bit stream is MSB first with no delay */
@@ -655,7 +676,6 @@ static void davinci_hw_param(struct davinci_audio_dev *dev, int stream)
 			printk(KERN_ERR "capture tdm slot %d not supported\n",
 				dev->tdm_slots);
 
-		mcasp_set_reg(dev->base + DAVINCI_MCASP_RXMASK_REG, 0xFFFFFFFF);
 		mcasp_clr_bits(dev->base + DAVINCI_MCASP_RXFMCTL_REG, FSRDUR);
 	}
 }
-- 
cgit v0.10.2


From 67e7328f1577230ef3a1430c1a7e5c07978c6e51 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Wed, 9 Sep 2009 16:32:54 +1000
Subject: sunrpc/cache: use list_del_init for the list_head entries in
 cache_deferred_req

Using list_del_init is generally safer than list_del, and it will
allow us, in a subsequent patch, to see if an entry has already been
processed or not.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index f2895d0..4a32a30 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -529,8 +529,8 @@ static int cache_defer_req(struct cache_req *req, struct cache_head *item)
 	if (++cache_defer_cnt > DFR_MAX) {
 		dreq = list_entry(cache_defer_list.prev,
 				  struct cache_deferred_req, recent);
-		list_del(&dreq->recent);
-		list_del(&dreq->hash);
+		list_del_init(&dreq->recent);
+		list_del_init(&dreq->hash);
 		cache_defer_cnt--;
 	}
 	spin_unlock(&cache_defer_lock);
@@ -564,7 +564,7 @@ static void cache_revisit_request(struct cache_head *item)
 			dreq = list_entry(lp, struct cache_deferred_req, hash);
 			lp = lp->next;
 			if (dreq->item == item) {
-				list_del(&dreq->hash);
+				list_del_init(&dreq->hash);
 				list_move(&dreq->recent, &pending);
 				cache_defer_cnt--;
 			}
@@ -590,7 +590,7 @@ void cache_clean_deferred(void *owner)
 
 	list_for_each_entry_safe(dreq, tmp, &cache_defer_list, recent) {
 		if (dreq->owner == owner) {
-			list_del(&dreq->hash);
+			list_del_init(&dreq->hash);
 			list_move(&dreq->recent, &pending);
 			cache_defer_cnt--;
 		}
-- 
cgit v0.10.2


From 6a891a3111fe701517bb31c2204304724c7299c8 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Fri, 18 Sep 2009 22:45:43 +0200
Subject: i2c: Drop unused i2c_driver.id field

Nobody is using i2c_driver.id any longer, so we can drop that field.

Signed-off-by: Jean Delvare <khali@linux-fr.org>

diff --git a/include/linux/i2c-id.h b/include/linux/i2c-id.h
index c9087de..e844a0b 100644
--- a/include/linux/i2c-id.h
+++ b/include/linux/i2c-id.h
@@ -28,17 +28,6 @@
    identify a legacy client. If you don't need them, just don't set them. */
 
 /*
- * ---- Driver types -----------------------------------------------------
- */
-
-#define I2C_DRIVERID_MSP3400	 1
-#define I2C_DRIVERID_TUNER	 2
-#define I2C_DRIVERID_TDA7432	27	/* Stereo sound processor	*/
-#define I2C_DRIVERID_TVAUDIO    29      /* Generic TV sound driver      */
-#define I2C_DRIVERID_SAA711X	73	/* saa711x video encoders	*/
-#define I2C_DRIVERID_INFRARED	75	/* I2C InfraRed on Video boards */
-
-/*
  * ---- Adapter types ----------------------------------------------------
  */
 
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index f4784c0..57d41b0 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -98,7 +98,6 @@ extern s32 i2c_smbus_write_i2c_block_data(struct i2c_client *client,
 
 /**
  * struct i2c_driver - represent an I2C device driver
- * @id: Unique driver ID (optional)
  * @class: What kind of i2c device we instantiate (for detect)
  * @attach_adapter: Callback for bus addition (for legacy drivers)
  * @detach_adapter: Callback for bus removal (for legacy drivers)
@@ -135,7 +134,6 @@ extern s32 i2c_smbus_write_i2c_block_data(struct i2c_client *client,
  * not allowed.
  */
 struct i2c_driver {
-	int id;
 	unsigned int class;
 
 	/* Notifies the driver that a new bus has appeared or is about to be
-- 
cgit v0.10.2


From 27693ce5f372c0af3b0730f5152b35432afa0fd7 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Fri, 18 Sep 2009 22:45:43 +0200
Subject: i2c-taos-evm: Switch echo off to improve performance

When echo is on, we waste time reading back our orders. Switching echo
off makes performance much better: SMBus byte data transactions are 47%
faster and byte transactions are 24% faster.

Signed-off-by: Jean Delvare <khali@linux-fr.org>

diff --git a/drivers/i2c/busses/i2c-taos-evm.c b/drivers/i2c/busses/i2c-taos-evm.c
index 224aa12..dd39c1e 100644
--- a/drivers/i2c/busses/i2c-taos-evm.c
+++ b/drivers/i2c/busses/i2c-taos-evm.c
@@ -32,10 +32,12 @@
 
 #define TAOS_STATE_INIT		0
 #define TAOS_STATE_IDLE		1
-#define TAOS_STATE_SEND		2
+#define TAOS_STATE_EOFF		2
 #define TAOS_STATE_RECV		3
 
 #define TAOS_CMD_RESET		0x12
+#define TAOS_CMD_ECHO_ON	'+'
+#define TAOS_CMD_ECHO_OFF	'-'
 
 static DECLARE_WAIT_QUEUE_HEAD(wq);
 
@@ -102,17 +104,9 @@ static int taos_smbus_xfer(struct i2c_adapter *adapter, u16 addr,
 
 	/* Send the transaction to the TAOS EVM */
 	dev_dbg(&adapter->dev, "Command buffer: %s\n", taos->buffer);
-	taos->pos = 0;
-	taos->state = TAOS_STATE_SEND;
-	serio_write(serio, taos->buffer[0]);
-	wait_event_interruptible_timeout(wq, taos->state == TAOS_STATE_IDLE,
-					 msecs_to_jiffies(250));
-	if (taos->state != TAOS_STATE_IDLE) {
-		dev_err(&adapter->dev, "Transaction failed "
-			"(state=%d, pos=%d)\n", taos->state, taos->pos);
-		taos->addr = 0;
-		return -EIO;
-	}
+	for (p = taos->buffer; *p; p++)
+		serio_write(serio, *p);
+
 	taos->addr = addr;
 
 	/* Start the transaction and read the answer */
@@ -122,7 +116,7 @@ static int taos_smbus_xfer(struct i2c_adapter *adapter, u16 addr,
 	wait_event_interruptible_timeout(wq, taos->state == TAOS_STATE_IDLE,
 					 msecs_to_jiffies(150));
 	if (taos->state != TAOS_STATE_IDLE
-	 || taos->pos != 6) {
+	 || taos->pos != 5) {
 		dev_err(&adapter->dev, "Transaction timeout (pos=%d)\n",
 			taos->pos);
 		return -EIO;
@@ -130,7 +124,7 @@ static int taos_smbus_xfer(struct i2c_adapter *adapter, u16 addr,
 	dev_dbg(&adapter->dev, "Answer buffer: %s\n", taos->buffer);
 
 	/* Interpret the returned string */
-	p = taos->buffer + 2;
+	p = taos->buffer + 1;
 	p[3] = '\0';
 	if (!strcmp(p, "NAK"))
 		return -ENODEV;
@@ -173,13 +167,9 @@ static irqreturn_t taos_interrupt(struct serio *serio, unsigned char data,
 			wake_up_interruptible(&wq);
 		}
 		break;
-	case TAOS_STATE_SEND:
-		if (taos->buffer[++taos->pos])
-			serio_write(serio, taos->buffer[taos->pos]);
-		else {
-			taos->state = TAOS_STATE_IDLE;
-			wake_up_interruptible(&wq);
-		}
+	case TAOS_STATE_EOFF:
+		taos->state = TAOS_STATE_IDLE;
+		wake_up_interruptible(&wq);
 		break;
 	case TAOS_STATE_RECV:
 		taos->buffer[taos->pos++] = data;
@@ -257,6 +247,19 @@ static int taos_connect(struct serio *serio, struct serio_driver *drv)
 	}
 	strlcpy(adapter->name, name, sizeof(adapter->name));
 
+	/* Turn echo off for better performance */
+	taos->state = TAOS_STATE_EOFF;
+	serio_write(serio, TAOS_CMD_ECHO_OFF);
+
+	wait_event_interruptible_timeout(wq, taos->state == TAOS_STATE_IDLE,
+					 msecs_to_jiffies(250));
+	if (taos->state != TAOS_STATE_IDLE) {
+		err = -ENODEV;
+		dev_err(&adapter->dev, "Echo off failed "
+			"(state=%d)\n", taos->state);
+		goto exit_close;
+	}
+
 	err = i2c_add_adapter(adapter);
 	if (err)
 		goto exit_close;
-- 
cgit v0.10.2


From ac7809414fb1e3e49b88ad6016e57598594aa4e2 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Fri, 18 Sep 2009 22:45:44 +0200
Subject: i2c/tsl2550: Use combined SMBus transactions

Make the I/O faster, mainly by using combined SMBus transactions when
possible. While the TSL2550 datasheet doesn't say the device supports
them, they seem to work just fine in practice, and a combined
transaction is faster than two simple transactions in many cases and
always more reliable.

A side effect is to suppress the delays between SMBus writes and
reads. The datasheet doesn't say they are needed and things work just
fine for me without them.

I also couldn't see any reason for the delay between reading the two
channels. Nor for the loop to get a reading in the first place. The
400 ms delay between samples only matters at chip power-up, after that
the chip always hold the previously sampled value so we never get to
wait.

All these changes make reading the lux value much faster and cheaper.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Tested-by: Michele De Candia <michele.decandia@valueteam.com>
Cc: Rodolfo Giometti <giometti@linux.it>

diff --git a/drivers/i2c/chips/tsl2550.c b/drivers/i2c/chips/tsl2550.c
index b96f302..aa96bd2 100644
--- a/drivers/i2c/chips/tsl2550.c
+++ b/drivers/i2c/chips/tsl2550.c
@@ -24,10 +24,9 @@
 #include <linux/slab.h>
 #include <linux/i2c.h>
 #include <linux/mutex.h>
-#include <linux/delay.h>
 
 #define TSL2550_DRV_NAME	"tsl2550"
-#define DRIVER_VERSION		"1.1.2"
+#define DRIVER_VERSION		"1.2"
 
 /*
  * Defines
@@ -96,32 +95,13 @@ static int tsl2550_set_power_state(struct i2c_client *client, int state)
 
 static int tsl2550_get_adc_value(struct i2c_client *client, u8 cmd)
 {
-	unsigned long end;
-	int loop = 0, ret = 0;
+	int ret;
 
-	/*
-	 * Read ADC channel waiting at most 400ms (see data sheet for further
-	 * info).
-	 * To avoid long busy wait we spin for few milliseconds then
-	 * start sleeping.
-	 */
-	end = jiffies + msecs_to_jiffies(400);
-	while (time_before(jiffies, end)) {
-		i2c_smbus_write_byte(client, cmd);
-
-		if (loop++ < 5)
-			mdelay(1);
-		else
-			msleep(1);
-
-		ret = i2c_smbus_read_byte(client);
-		if (ret < 0)
-			return ret;
-		else if (ret & 0x0080)
-			break;
-	}
+	ret = i2c_smbus_read_byte_data(client, cmd);
+	if (ret < 0)
+		return ret;
 	if (!(ret & 0x80))
-		return -EIO;
+		return -EAGAIN;
 	return ret & 0x7f;	/* remove the "valid" bit */
 }
 
@@ -285,8 +265,6 @@ static ssize_t __tsl2550_show_lux(struct i2c_client *client, char *buf)
 		return ret;
 	ch0 = ret;
 
-	mdelay(1);
-
 	ret = tsl2550_get_adc_value(client, TSL2550_READ_ADC1);
 	if (ret < 0)
 		return ret;
@@ -345,11 +323,10 @@ static int tsl2550_init_client(struct i2c_client *client)
 	 * Probe the chip. To do so we try to power up the device and then to
 	 * read back the 0x03 code
 	 */
-	err = i2c_smbus_write_byte(client, TSL2550_POWER_UP);
+	err = i2c_smbus_read_byte_data(client, TSL2550_POWER_UP);
 	if (err < 0)
 		return err;
-	mdelay(1);
-	if (i2c_smbus_read_byte(client) != TSL2550_POWER_UP)
+	if (err != TSL2550_POWER_UP)
 		return -ENODEV;
 	data->power_state = 1;
 
@@ -374,7 +351,8 @@ static int __devinit tsl2550_probe(struct i2c_client *client,
 	struct tsl2550_data *data;
 	int *opmode, err = 0;
 
-	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE)) {
+	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_WRITE_BYTE
+					    | I2C_FUNC_SMBUS_READ_BYTE_DATA)) {
 		err = -EIO;
 		goto exit;
 	}
-- 
cgit v0.10.2


From 51298d1257b9f0a356ad66650c9fe2ca5bfa5ae3 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Fri, 18 Sep 2009 22:45:45 +0200
Subject: i2c: Convert i2c clients to a device type

This is required for upcoming changes.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Cc: Kay Sievers <kay.sievers@vrfy.org>

diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index 0e45c29..f236b34 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -64,9 +64,13 @@ static const struct i2c_device_id *i2c_match_id(const struct i2c_device_id *id,
 
 static int i2c_device_match(struct device *dev, struct device_driver *drv)
 {
-	struct i2c_client	*client = to_i2c_client(dev);
-	struct i2c_driver	*driver = to_i2c_driver(drv);
+	struct i2c_client	*client = i2c_verify_client(dev);
+	struct i2c_driver	*driver;
 
+	if (!client)
+		return 0;
+
+	driver = to_i2c_driver(drv);
 	/* match on an id table if there is one */
 	if (driver->id_table)
 		return i2c_match_id(driver->id_table, client) != NULL;
@@ -94,10 +98,14 @@ static int i2c_device_uevent(struct device *dev, struct kobj_uevent_env *env)
 
 static int i2c_device_probe(struct device *dev)
 {
-	struct i2c_client	*client = to_i2c_client(dev);
-	struct i2c_driver	*driver = to_i2c_driver(dev->driver);
+	struct i2c_client	*client = i2c_verify_client(dev);
+	struct i2c_driver	*driver;
 	int status;
 
+	if (!client)
+		return 0;
+
+	driver = to_i2c_driver(dev->driver);
 	if (!driver->probe || !driver->id_table)
 		return -ENODEV;
 	client->driver = driver;
@@ -114,11 +122,11 @@ static int i2c_device_probe(struct device *dev)
 
 static int i2c_device_remove(struct device *dev)
 {
-	struct i2c_client	*client = to_i2c_client(dev);
+	struct i2c_client	*client = i2c_verify_client(dev);
 	struct i2c_driver	*driver;
 	int			status;
 
-	if (!dev->driver)
+	if (!client || !dev->driver)
 		return 0;
 
 	driver = to_i2c_driver(dev->driver);
@@ -136,37 +144,40 @@ static int i2c_device_remove(struct device *dev)
 
 static void i2c_device_shutdown(struct device *dev)
 {
+	struct i2c_client *client = i2c_verify_client(dev);
 	struct i2c_driver *driver;
 
-	if (!dev->driver)
+	if (!client || !dev->driver)
 		return;
 	driver = to_i2c_driver(dev->driver);
 	if (driver->shutdown)
-		driver->shutdown(to_i2c_client(dev));
+		driver->shutdown(client);
 }
 
 static int i2c_device_suspend(struct device *dev, pm_message_t mesg)
 {
+	struct i2c_client *client = i2c_verify_client(dev);
 	struct i2c_driver *driver;
 
-	if (!dev->driver)
+	if (!client || !dev->driver)
 		return 0;
 	driver = to_i2c_driver(dev->driver);
 	if (!driver->suspend)
 		return 0;
-	return driver->suspend(to_i2c_client(dev), mesg);
+	return driver->suspend(client, mesg);
 }
 
 static int i2c_device_resume(struct device *dev)
 {
+	struct i2c_client *client = i2c_verify_client(dev);
 	struct i2c_driver *driver;
 
-	if (!dev->driver)
+	if (!client || !dev->driver)
 		return 0;
 	driver = to_i2c_driver(dev->driver);
 	if (!driver->resume)
 		return 0;
-	return driver->resume(to_i2c_client(dev));
+	return driver->resume(client);
 }
 
 static void i2c_client_dev_release(struct device *dev)
@@ -188,18 +199,28 @@ show_modalias(struct device *dev, struct device_attribute *attr, char *buf)
 	return sprintf(buf, "%s%s\n", I2C_MODULE_PREFIX, client->name);
 }
 
-static struct device_attribute i2c_dev_attrs[] = {
-	__ATTR(name, S_IRUGO, show_client_name, NULL),
+static DEVICE_ATTR(name, S_IRUGO, show_client_name, NULL);
+static DEVICE_ATTR(modalias, S_IRUGO, show_modalias, NULL);
+
+static struct attribute *i2c_dev_attrs[] = {
+	&dev_attr_name.attr,
 	/* modalias helps coldplug:  modprobe $(cat .../modalias) */
-	__ATTR(modalias, S_IRUGO, show_modalias, NULL),
-	{ },
+	&dev_attr_modalias.attr,
+	NULL
+};
+
+static struct attribute_group i2c_dev_attr_group = {
+	.attrs		= i2c_dev_attrs,
+};
+
+static const struct attribute_group *i2c_dev_attr_groups[] = {
+	&i2c_dev_attr_group,
+	NULL
 };
 
 struct bus_type i2c_bus_type = {
 	.name		= "i2c",
-	.dev_attrs	= i2c_dev_attrs,
 	.match		= i2c_device_match,
-	.uevent		= i2c_device_uevent,
 	.probe		= i2c_device_probe,
 	.remove		= i2c_device_remove,
 	.shutdown	= i2c_device_shutdown,
@@ -208,6 +229,12 @@ struct bus_type i2c_bus_type = {
 };
 EXPORT_SYMBOL_GPL(i2c_bus_type);
 
+static struct device_type i2c_client_type = {
+	.groups		= i2c_dev_attr_groups,
+	.uevent		= i2c_device_uevent,
+	.release	= i2c_client_dev_release,
+};
+
 
 /**
  * i2c_verify_client - return parameter as i2c_client, or NULL
@@ -220,7 +247,7 @@ EXPORT_SYMBOL_GPL(i2c_bus_type);
  */
 struct i2c_client *i2c_verify_client(struct device *dev)
 {
-	return (dev->bus == &i2c_bus_type)
+	return (dev->type == &i2c_client_type)
 			? to_i2c_client(dev)
 			: NULL;
 }
@@ -273,7 +300,7 @@ i2c_new_device(struct i2c_adapter *adap, struct i2c_board_info const *info)
 
 	client->dev.parent = &client->adapter->dev;
 	client->dev.bus = &i2c_bus_type;
-	client->dev.release = i2c_client_dev_release;
+	client->dev.type = &i2c_client_type;
 
 	dev_set_name(&client->dev, "%d-%04x", i2c_adapter_id(adap),
 		     client->addr);
-- 
cgit v0.10.2


From 4f8cf8240a0c8b232c2ae22e019a4ba1d5f19ccd Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Fri, 18 Sep 2009 22:45:46 +0200
Subject: i2c: Convert i2c adapters to bus devices

Kay says i2c adapters shouldn't be class devices but bus devices.
Convert them that way, using a device type.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Cc: Kay Sievers <kay.sievers@vrfy.org>

diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index f236b34..a18a251 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -46,6 +46,7 @@ static DEFINE_MUTEX(core_lock);
 static DEFINE_IDR(i2c_adapter_idr);
 static LIST_HEAD(userspace_devices);
 
+static struct device_type i2c_client_type;
 static int i2c_check_addr(struct i2c_adapter *adapter, int addr);
 static int i2c_detect(struct i2c_adapter *adapter, struct i2c_driver *driver);
 
@@ -186,10 +187,10 @@ static void i2c_client_dev_release(struct device *dev)
 }
 
 static ssize_t
-show_client_name(struct device *dev, struct device_attribute *attr, char *buf)
+show_name(struct device *dev, struct device_attribute *attr, char *buf)
 {
-	struct i2c_client *client = to_i2c_client(dev);
-	return sprintf(buf, "%s\n", client->name);
+	return sprintf(buf, "%s\n", dev->type == &i2c_client_type ?
+		       to_i2c_client(dev)->name : to_i2c_adapter(dev)->name);
 }
 
 static ssize_t
@@ -199,7 +200,7 @@ show_modalias(struct device *dev, struct device_attribute *attr, char *buf)
 	return sprintf(buf, "%s%s\n", I2C_MODULE_PREFIX, client->name);
 }
 
-static DEVICE_ATTR(name, S_IRUGO, show_client_name, NULL);
+static DEVICE_ATTR(name, S_IRUGO, show_name, NULL);
 static DEVICE_ATTR(modalias, S_IRUGO, show_modalias, NULL);
 
 static struct attribute *i2c_dev_attrs[] = {
@@ -395,13 +396,6 @@ static void i2c_adapter_dev_release(struct device *dev)
 	complete(&adap->dev_released);
 }
 
-static ssize_t
-show_adapter_name(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	struct i2c_adapter *adap = to_i2c_adapter(dev);
-	return sprintf(buf, "%s\n", adap->name);
-}
-
 /*
  * Let users instantiate I2C devices through sysfs. This can be used when
  * platform initialization code doesn't contain the proper data for
@@ -520,17 +514,28 @@ i2c_sysfs_delete_device(struct device *dev, struct device_attribute *attr,
 	return res;
 }
 
-static struct device_attribute i2c_adapter_attrs[] = {
-	__ATTR(name, S_IRUGO, show_adapter_name, NULL),
-	__ATTR(new_device, S_IWUSR, NULL, i2c_sysfs_new_device),
-	__ATTR(delete_device, S_IWUSR, NULL, i2c_sysfs_delete_device),
-	{ },
+static DEVICE_ATTR(new_device, S_IWUSR, NULL, i2c_sysfs_new_device);
+static DEVICE_ATTR(delete_device, S_IWUSR, NULL, i2c_sysfs_delete_device);
+
+static struct attribute *i2c_adapter_attrs[] = {
+	&dev_attr_name.attr,
+	&dev_attr_new_device.attr,
+	&dev_attr_delete_device.attr,
+	NULL
+};
+
+static struct attribute_group i2c_adapter_attr_group = {
+	.attrs		= i2c_adapter_attrs,
 };
 
-static struct class i2c_adapter_class = {
-	.owner			= THIS_MODULE,
-	.name			= "i2c-adapter",
-	.dev_attrs		= i2c_adapter_attrs,
+static const struct attribute_group *i2c_adapter_attr_groups[] = {
+	&i2c_adapter_attr_group,
+	NULL
+};
+
+static struct device_type i2c_adapter_type = {
+	.groups		= i2c_adapter_attr_groups,
+	.release	= i2c_adapter_dev_release,
 };
 
 static void i2c_scan_static_board_info(struct i2c_adapter *adapter)
@@ -582,8 +587,8 @@ static int i2c_register_adapter(struct i2c_adapter *adap)
 		adap->timeout = HZ;
 
 	dev_set_name(&adap->dev, "i2c-%d", adap->nr);
-	adap->dev.release = &i2c_adapter_dev_release;
-	adap->dev.class = &i2c_adapter_class;
+	adap->dev.bus = &i2c_bus_type;
+	adap->dev.type = &i2c_adapter_type;
 	res = device_register(&adap->dev);
 	if (res)
 		goto out_list;
@@ -795,9 +800,13 @@ EXPORT_SYMBOL(i2c_del_adapter);
 
 static int __attach_adapter(struct device *dev, void *data)
 {
-	struct i2c_adapter *adapter = to_i2c_adapter(dev);
+	struct i2c_adapter *adapter;
 	struct i2c_driver *driver = data;
 
+	if (dev->type != &i2c_adapter_type)
+		return 0;
+	adapter = to_i2c_adapter(dev);
+
 	i2c_detect(adapter, driver);
 
 	/* Legacy drivers scan i2c busses directly */
@@ -836,8 +845,7 @@ int i2c_register_driver(struct module *owner, struct i2c_driver *driver)
 	INIT_LIST_HEAD(&driver->clients);
 	/* Walk the adapters that are already present */
 	mutex_lock(&core_lock);
-	class_for_each_device(&i2c_adapter_class, NULL, driver,
-			      __attach_adapter);
+	bus_for_each_dev(&i2c_bus_type, NULL, driver, __attach_adapter);
 	mutex_unlock(&core_lock);
 
 	return 0;
@@ -846,10 +854,14 @@ EXPORT_SYMBOL(i2c_register_driver);
 
 static int __detach_adapter(struct device *dev, void *data)
 {
-	struct i2c_adapter *adapter = to_i2c_adapter(dev);
+	struct i2c_adapter *adapter;
 	struct i2c_driver *driver = data;
 	struct i2c_client *client, *_n;
 
+	if (dev->type != &i2c_adapter_type)
+		return 0;
+	adapter = to_i2c_adapter(dev);
+
 	/* Remove the devices we created ourselves as the result of hardware
 	 * probing (using a driver's detect method) */
 	list_for_each_entry_safe(client, _n, &driver->clients, detected) {
@@ -877,8 +889,7 @@ static int __detach_adapter(struct device *dev, void *data)
 void i2c_del_driver(struct i2c_driver *driver)
 {
 	mutex_lock(&core_lock);
-	class_for_each_device(&i2c_adapter_class, NULL, driver,
-			      __detach_adapter);
+	bus_for_each_dev(&i2c_bus_type, NULL, driver, __detach_adapter);
 	mutex_unlock(&core_lock);
 
 	driver_unregister(&driver->driver);
@@ -967,16 +978,11 @@ static int __init i2c_init(void)
 	retval = bus_register(&i2c_bus_type);
 	if (retval)
 		return retval;
-	retval = class_register(&i2c_adapter_class);
-	if (retval)
-		goto bus_err;
 	retval = i2c_add_driver(&dummy_driver);
 	if (retval)
-		goto class_err;
+		goto bus_err;
 	return 0;
 
-class_err:
-	class_unregister(&i2c_adapter_class);
 bus_err:
 	bus_unregister(&i2c_bus_type);
 	return retval;
@@ -985,7 +991,6 @@ bus_err:
 static void __exit i2c_exit(void)
 {
 	i2c_del_driver(&dummy_driver);
-	class_unregister(&i2c_adapter_class);
 	bus_unregister(&i2c_bus_type);
 }
 
-- 
cgit v0.10.2


From 2bb5095affdb8d6e8646a5b8b5a35c1d6a28c3e7 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Fri, 18 Sep 2009 22:45:46 +0200
Subject: i2c: Provide compatibility links for i2c adapters

Some user-space applications may be relying on i2c adapters showing up
as class devices in sysfs. Provide compatibility links for them for
the time being. We will remove them after a long transition period.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Cc: Kay Sievers <kay.sievers@vrfy.org>

diff --git a/drivers/i2c/Kconfig b/drivers/i2c/Kconfig
index 711ca08..d7ece13 100644
--- a/drivers/i2c/Kconfig
+++ b/drivers/i2c/Kconfig
@@ -27,6 +27,14 @@ config I2C_BOARDINFO
 	boolean
 	default y
 
+config I2C_COMPAT
+	boolean "Enable compatibility bits for old user-space"
+	default y
+	help
+	  Say Y here if you intend to run lm-sensors 3.1.1 or older, or any
+	  other user-space package which expects i2c adapters to be class
+	  devices. If you don't know, say Y.
+
 config I2C_CHARDEV
 	tristate "I2C device interface"
 	help
diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index a18a251..8d80fce 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -538,6 +538,10 @@ static struct device_type i2c_adapter_type = {
 	.release	= i2c_adapter_dev_release,
 };
 
+#ifdef CONFIG_I2C_COMPAT
+static struct class_compat *i2c_adapter_compat_class;
+#endif
+
 static void i2c_scan_static_board_info(struct i2c_adapter *adapter)
 {
 	struct i2c_devinfo	*devinfo;
@@ -595,6 +599,14 @@ static int i2c_register_adapter(struct i2c_adapter *adap)
 
 	dev_dbg(&adap->dev, "adapter [%s] registered\n", adap->name);
 
+#ifdef CONFIG_I2C_COMPAT
+	res = class_compat_create_link(i2c_adapter_compat_class, &adap->dev,
+				       adap->dev.parent);
+	if (res)
+		dev_warn(&adap->dev,
+			 "Failed to create compatibility class link\n");
+#endif
+
 	/* create pre-declared device nodes */
 	if (adap->nr < __i2c_first_dynamic_bus_num)
 		i2c_scan_static_board_info(adap);
@@ -773,6 +785,11 @@ int i2c_del_adapter(struct i2c_adapter *adap)
 	   checking the returned value. */
 	res = device_for_each_child(&adap->dev, NULL, __unregister_client);
 
+#ifdef CONFIG_I2C_COMPAT
+	class_compat_remove_link(i2c_adapter_compat_class, &adap->dev,
+				 adap->dev.parent);
+#endif
+
 	/* clean up the sysfs representation */
 	init_completion(&adap->dev_released);
 	device_unregister(&adap->dev);
@@ -978,12 +995,23 @@ static int __init i2c_init(void)
 	retval = bus_register(&i2c_bus_type);
 	if (retval)
 		return retval;
+#ifdef CONFIG_I2C_COMPAT
+	i2c_adapter_compat_class = class_compat_register("i2c-adapter");
+	if (!i2c_adapter_compat_class) {
+		retval = -ENOMEM;
+		goto bus_err;
+	}
+#endif
 	retval = i2c_add_driver(&dummy_driver);
 	if (retval)
-		goto bus_err;
+		goto class_err;
 	return 0;
 
+class_err:
+#ifdef CONFIG_I2C_COMPAT
+	class_compat_unregister(i2c_adapter_compat_class);
 bus_err:
+#endif
 	bus_unregister(&i2c_bus_type);
 	return retval;
 }
@@ -991,6 +1019,9 @@ bus_err:
 static void __exit i2c_exit(void)
 {
 	i2c_del_driver(&dummy_driver);
+#ifdef CONFIG_I2C_COMPAT
+	class_compat_unregister(i2c_adapter_compat_class);
+#endif
 	bus_unregister(&i2c_bus_type);
 }
 
-- 
cgit v0.10.2


From fce96f3e5d34b0e3195f9d1bf2b3c7e3841e90ff Mon Sep 17 00:00:00 2001
From: Willy Tarreau <wtarreau@exceliance.fr>
Date: Fri, 18 Sep 2009 22:45:47 +0200
Subject: i2c/scx200_acb: Provide more information on bus errors

Upon a bus error, it's rather hard to guess what happened. Dumping the
address, length and status provides a lot of value for troubleshooting
issues.

Signed-off-by: Willy Tarreau <wtarreau@exceliance.fr>
Signed-off-by: Jean Delvare <khali@linux-fr.org>

diff --git a/drivers/i2c/busses/scx200_acb.c b/drivers/i2c/busses/scx200_acb.c
index 648ecc6..cf994bd 100644
--- a/drivers/i2c/busses/scx200_acb.c
+++ b/drivers/i2c/busses/scx200_acb.c
@@ -217,8 +217,10 @@ static void scx200_acb_machine(struct scx200_acb_iface *iface, u8 status)
 	return;
 
  error:
-	dev_err(&iface->adapter.dev, "%s in state %s\n", errmsg,
-		scx200_acb_state_name[iface->state]);
+	dev_err(&iface->adapter.dev,
+		"%s in state %s (addr=0x%02x, len=%d, status=0x%02x)\n", errmsg,
+		scx200_acb_state_name[iface->state], iface->address_byte,
+		iface->len, status);
 
 	iface->state = state_idle;
 	iface->result = -EIO;
-- 
cgit v0.10.2


From 4ba2ccb83e03077bb94f8848ee573f1e27cea969 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <w.sang@pengutronix.de>
Date: Fri, 18 Sep 2009 22:45:47 +0200
Subject: gpio/pcf857x: Copy i2c_device_id from old pcf8574 driver

The deprecated pcf8574 driver is going to be removed. Make sure the
replacement driver inherits all i2c_device_ids for a smooth transition.

Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
Cc: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Jean Delvare <khali@linux-fr.org>

diff --git a/drivers/gpio/pcf857x.c b/drivers/gpio/pcf857x.c
index 9525724..3f1ec1e 100644
--- a/drivers/gpio/pcf857x.c
+++ b/drivers/gpio/pcf857x.c
@@ -28,6 +28,7 @@
 
 static const struct i2c_device_id pcf857x_id[] = {
 	{ "pcf8574", 8 },
+	{ "pcf8574a", 8 },
 	{ "pca8574", 8 },
 	{ "pca9670", 8 },
 	{ "pca9672", 8 },
-- 
cgit v0.10.2


From 8f67eeb0b44cde19216955975ffef8513a87c0c0 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <w.sang@pengutronix.de>
Date: Fri, 18 Sep 2009 22:45:48 +0200
Subject: i2c/chips: Remove deprecated pcf8575 driver

The pcf8575 driver in drivers/i2c/chips which just exports its register to
sysfs is superseded by drivers/gpio/pcf857x.c which properly uses the gpiolib.
As this driver has been deprecated for more than a year, finally remove it.

Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
Cc: Bart Van Assche <bart.vanassche@gmail.com>
Signed-off-by: Jean Delvare <khali@linux-fr.org>

diff --git a/Documentation/i2c/chips/pcf8575 b/Documentation/i2c/chips/pcf8575
deleted file mode 100644
index 40b268e..0000000
--- a/Documentation/i2c/chips/pcf8575
+++ /dev/null
@@ -1,69 +0,0 @@
-About the PCF8575 chip and the pcf8575 kernel driver
-====================================================
-
-The PCF8575 chip is produced by the following manufacturers:
-
-  * Philips NXP
-    http://www.nxp.com/#/pip/cb=[type=product,path=50807/41735/41850,final=PCF8575_3]|pip=[pip=PCF8575_3][0]
-
-  * Texas Instruments
-    http://focus.ti.com/docs/prod/folders/print/pcf8575.html
-
-
-Some vendors sell small PCB's with the PCF8575 mounted on it. You can connect
-such a board to a Linux host via e.g. an USB to I2C interface. Examples of
-PCB boards with a PCF8575:
-
-  * SFE Breakout Board for PCF8575 I2C Expander by RobotShop
-    http://www.robotshop.ca/home/products/robot-parts/electronics/adapters-converters/sfe-pcf8575-i2c-expander-board.html
-
-  * Breakout Board for PCF8575 I2C Expander by Spark Fun Electronics
-    http://www.sparkfun.com/commerce/product_info.php?products_id=8130
-
-
-Description
------------
-The PCF8575 chip is a 16-bit I/O expander for the I2C bus. Up to eight of
-these chips can be connected to the same I2C bus. You can find this
-chip on some custom designed hardware, but you won't find it on PC
-motherboards.
-
-The PCF8575 chip consists of a 16-bit quasi-bidirectional port and an I2C-bus
-interface. Each of the sixteen I/O's can be independently used as an input or
-an output. To set up an I/O pin as an input, you have to write a 1 to the
-corresponding output.
-
-For more information please see the datasheet.
-
-
-Detection
----------
-
-There is no method known to detect whether a chip on a given I2C address is
-a PCF8575 or whether it is any other I2C device, so you have to pass the I2C
-bus and address of the installed PCF8575 devices explicitly to the driver at
-load time via the force=... parameter.
-
-/sys interface
---------------
-
-For each address on which a PCF8575 chip was found or forced the following
-files will be created under /sys:
-* /sys/bus/i2c/devices/<bus>-<address>/read
-* /sys/bus/i2c/devices/<bus>-<address>/write
-where bus is the I2C bus number (0, 1, ...) and address is the four-digit
-hexadecimal representation of the 7-bit I2C address of the PCF8575
-(0020 .. 0027).
-
-The read file is read-only. Reading it will trigger an I2C read and will hence
-report the current input state for the pins configured as inputs, and the
-current output value for the pins configured as outputs.
-
-The write file is read-write. Writing a value to it will configure all pins
-as output for which the corresponding bit is zero. Reading the write file will
-return the value last written, or -EAGAIN if no value has yet been written to
-the write file.
-
-On module initialization the configuration of the chip is not changed -- the
-chip is left in the state it was already configured in through either power-up
-or through previous I2C write actions.
diff --git a/drivers/i2c/chips/Kconfig b/drivers/i2c/chips/Kconfig
index 02d746c..1b5455e 100644
--- a/drivers/i2c/chips/Kconfig
+++ b/drivers/i2c/chips/Kconfig
@@ -33,24 +33,6 @@ config SENSORS_PCF8574
 	  These devices are hard to detect and rarely found on mainstream
 	  hardware.  If unsure, say N.
 
-config PCF8575
-	tristate "Philips PCF8575 (DEPRECATED)"
-	default n
-	depends on GPIO_PCF857X = "n"
-	help
-	  If you say yes here you get support for Philips PCF8575 chip.
-	  This chip is a 16-bit I/O expander for the I2C bus.  Several other
-	  chip manufacturers sell equivalent chips, e.g. Texas Instruments.
-
-	  This driver can also be built as a module.  If so, the module
-	  will be called pcf8575.
-
-	  This driver is deprecated and will be dropped soon. Use
-	  drivers/gpio/pcf857x.c instead.
-
-	  This device is hard to detect and is rarely found on mainstream
-	  hardware.  If unsure, say N.
-
 config SENSORS_PCA9539
 	tristate "Philips PCA9539 16-bit I/O port (DEPRECATED)"
 	depends on EXPERIMENTAL && GPIO_PCA953X = "n"
diff --git a/drivers/i2c/chips/Makefile b/drivers/i2c/chips/Makefile
index f4680d1..fceb377 100644
--- a/drivers/i2c/chips/Makefile
+++ b/drivers/i2c/chips/Makefile
@@ -13,7 +13,6 @@
 obj-$(CONFIG_DS1682)		+= ds1682.o
 obj-$(CONFIG_SENSORS_PCA9539)	+= pca9539.o
 obj-$(CONFIG_SENSORS_PCF8574)	+= pcf8574.o
-obj-$(CONFIG_PCF8575)		+= pcf8575.o
 obj-$(CONFIG_SENSORS_TSL2550)	+= tsl2550.o
 
 ifeq ($(CONFIG_I2C_DEBUG_CHIP),y)
diff --git a/drivers/i2c/chips/pcf8575.c b/drivers/i2c/chips/pcf8575.c
deleted file mode 100644
index 07fd7cb..0000000
--- a/drivers/i2c/chips/pcf8575.c
+++ /dev/null
@@ -1,198 +0,0 @@
-/*
-  pcf8575.c
-
-  About the PCF8575 chip: the PCF8575 is a 16-bit I/O expander for the I2C bus
-  produced by a.o. Philips Semiconductors.
-
-  Copyright (C) 2006 Michael Hennerich, Analog Devices Inc.
-  <hennerich@blackfin.uclinux.org>
-  Based on pcf8574.c.
-
-  Copyright (c) 2007 Bart Van Assche <bart.vanassche@gmail.com>.
-  Ported this driver from ucLinux to the mainstream Linux kernel.
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation; either version 2 of the License, or
-  (at your option) any later version.
-
-  This program is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program; if not, write to the Free Software
-  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-*/
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/i2c.h>
-#include <linux/slab.h>  /* kzalloc() */
-#include <linux/sysfs.h> /* sysfs_create_group() */
-
-/* Addresses to scan: none, device can't be detected */
-static const unsigned short normal_i2c[] = { I2C_CLIENT_END };
-
-/* Insmod parameters */
-I2C_CLIENT_INSMOD;
-
-
-/* Each client has this additional data */
-struct pcf8575_data {
-	int write;		/* last written value, or error code */
-};
-
-/* following are the sysfs callback functions */
-static ssize_t show_read(struct device *dev, struct device_attribute *attr,
-			 char *buf)
-{
-	struct i2c_client *client = to_i2c_client(dev);
-	u16 val;
-	u8 iopin_state[2];
-
-	i2c_master_recv(client, iopin_state, 2);
-
-	val = iopin_state[0];
-	val |= iopin_state[1] << 8;
-
-	return sprintf(buf, "%u\n", val);
-}
-
-static DEVICE_ATTR(read, S_IRUGO, show_read, NULL);
-
-static ssize_t show_write(struct device *dev, struct device_attribute *attr,
-			  char *buf)
-{
-	struct pcf8575_data *data = dev_get_drvdata(dev);
-	if (data->write < 0)
-		return data->write;
-	return sprintf(buf, "%d\n", data->write);
-}
-
-static ssize_t set_write(struct device *dev, struct device_attribute *attr,
-			 const char *buf, size_t count)
-{
-	struct i2c_client *client = to_i2c_client(dev);
-	struct pcf8575_data *data = i2c_get_clientdata(client);
-	unsigned long val = simple_strtoul(buf, NULL, 10);
-	u8 iopin_state[2];
-
-	if (val > 0xffff)
-		return -EINVAL;
-
-	data->write = val;
-
-	iopin_state[0] = val & 0xFF;
-	iopin_state[1] = val >> 8;
-
-	i2c_master_send(client, iopin_state, 2);
-
-	return count;
-}
-
-static DEVICE_ATTR(write, S_IWUSR | S_IRUGO, show_write, set_write);
-
-static struct attribute *pcf8575_attributes[] = {
-	&dev_attr_read.attr,
-	&dev_attr_write.attr,
-	NULL
-};
-
-static const struct attribute_group pcf8575_attr_group = {
-	.attrs = pcf8575_attributes,
-};
-
-/*
- * Real code
- */
-
-/* Return 0 if detection is successful, -ENODEV otherwise */
-static int pcf8575_detect(struct i2c_client *client, int kind,
-			  struct i2c_board_info *info)
-{
-	struct i2c_adapter *adapter = client->adapter;
-
-	if (!i2c_check_functionality(adapter, I2C_FUNC_I2C))
-		return -ENODEV;
-
-	/* This is the place to detect whether the chip at the specified
-	   address really is a PCF8575 chip. However, there is no method known
-	   to detect whether an I2C chip is a PCF8575 or any other I2C chip. */
-
-	strlcpy(info->type, "pcf8575", I2C_NAME_SIZE);
-
-	return 0;
-}
-
-static int pcf8575_probe(struct i2c_client *client,
-			 const struct i2c_device_id *id)
-{
-	struct pcf8575_data *data;
-	int err;
-
-	data = kzalloc(sizeof(struct pcf8575_data), GFP_KERNEL);
-	if (!data) {
-		err = -ENOMEM;
-		goto exit;
-	}
-
-	i2c_set_clientdata(client, data);
-	data->write = -EAGAIN;
-
-	/* Register sysfs hooks */
-	err = sysfs_create_group(&client->dev.kobj, &pcf8575_attr_group);
-	if (err)
-		goto exit_free;
-
-	return 0;
-
-exit_free:
-	kfree(data);
-exit:
-	return err;
-}
-
-static int pcf8575_remove(struct i2c_client *client)
-{
-	sysfs_remove_group(&client->dev.kobj, &pcf8575_attr_group);
-	kfree(i2c_get_clientdata(client));
-	return 0;
-}
-
-static const struct i2c_device_id pcf8575_id[] = {
-	{ "pcf8575", 0 },
-	{ }
-};
-
-static struct i2c_driver pcf8575_driver = {
-	.driver = {
-		.owner	= THIS_MODULE,
-		.name	= "pcf8575",
-	},
-	.probe		= pcf8575_probe,
-	.remove		= pcf8575_remove,
-	.id_table	= pcf8575_id,
-
-	.detect		= pcf8575_detect,
-	.address_data	= &addr_data,
-};
-
-static int __init pcf8575_init(void)
-{
-	return i2c_add_driver(&pcf8575_driver);
-}
-
-static void __exit pcf8575_exit(void)
-{
-	i2c_del_driver(&pcf8575_driver);
-}
-
-MODULE_AUTHOR("Michael Hennerich <hennerich@blackfin.uclinux.org>, "
-	      "Bart Van Assche <bart.vanassche@gmail.com>");
-MODULE_DESCRIPTION("pcf8575 driver");
-MODULE_LICENSE("GPL");
-
-module_init(pcf8575_init);
-module_exit(pcf8575_exit);
-- 
cgit v0.10.2


From 732d481127abaa0add41ee918191ea08e9ede17e Mon Sep 17 00:00:00 2001
From: Wolfram Sang <w.sang@pengutronix.de>
Date: Fri, 18 Sep 2009 22:45:48 +0200
Subject: i2c/chips: Remove deprecated pca9539 driver

The pca9539 driver in drivers/i2c/chips which just exports its registers to
sysfs is superseded by drivers/gpio/pca953x.c which properly uses the gpiolib.
As this driver has been deprecated for more than a year, finally remove it.

Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
Acked-by: Ben Gardner <gardner.ben@gmail.com>
Signed-off-by: Jean Delvare <khali@linux-fr.org>

diff --git a/Documentation/i2c/chips/pca9539 b/Documentation/i2c/chips/pca9539
deleted file mode 100644
index 6aff890..0000000
--- a/Documentation/i2c/chips/pca9539
+++ /dev/null
@@ -1,58 +0,0 @@
-Kernel driver pca9539
-=====================
-
-NOTE: this driver is deprecated and will be dropped soon, use
-drivers/gpio/pca9539.c instead.
-
-Supported chips:
-  * Philips PCA9539
-    Prefix: 'pca9539'
-    Addresses scanned: none
-    Datasheet:
-        http://www.semiconductors.philips.com/acrobat/datasheets/PCA9539_2.pdf
-
-Author: Ben Gardner <bgardner@wabtec.com>
-
-
-Description
------------
-
-The Philips PCA9539 is a 16 bit low power I/O device.
-All 16 lines can be individually configured as an input or output.
-The input sense can also be inverted.
-The 16 lines are split between two bytes.
-
-
-Detection
----------
-
-The PCA9539 is difficult to detect and not commonly found in PC machines,
-so you have to pass the I2C bus and address of the installed PCA9539
-devices explicitly to the driver at load time via the force=... parameter.
-
-
-Sysfs entries
--------------
-
-Each is a byte that maps to the 8 I/O bits.
-A '0' suffix is for bits 0-7, while '1' is for bits 8-15.
-
-input[01]     - read the current value
-output[01]    - sets the output value
-direction[01] - direction of each bit: 1=input, 0=output
-invert[01]    - toggle the input bit sense
-
-input reads the actual state of the line and is always available.
-The direction defaults to input for all channels.
-
-
-General Remarks
----------------
-
-Note that each output, direction, and invert entry controls 8 lines.
-You should use the read, modify, write sequence.
-For example. to set output bit 0 of 1.
-  val=$(cat output0)
-  val=$(( $val | 1 ))
-  echo $val > output0
-
diff --git a/drivers/i2c/chips/Kconfig b/drivers/i2c/chips/Kconfig
index 1b5455e..8cd1a7f 100644
--- a/drivers/i2c/chips/Kconfig
+++ b/drivers/i2c/chips/Kconfig
@@ -33,19 +33,6 @@ config SENSORS_PCF8574
 	  These devices are hard to detect and rarely found on mainstream
 	  hardware.  If unsure, say N.
 
-config SENSORS_PCA9539
-	tristate "Philips PCA9539 16-bit I/O port (DEPRECATED)"
-	depends on EXPERIMENTAL && GPIO_PCA953X = "n"
-	help
-	  If you say yes here you get support for the Philips PCA9539
-	  16-bit I/O port.
-
-	  This driver can also be built as a module.  If so, the module
-	  will be called pca9539.
-
-	  This driver is deprecated and will be dropped soon. Use
-	  drivers/gpio/pca953x.c instead.
-
 config SENSORS_TSL2550
 	tristate "Taos TSL2550 ambient light sensor"
 	depends on EXPERIMENTAL
diff --git a/drivers/i2c/chips/Makefile b/drivers/i2c/chips/Makefile
index fceb377..8cd778d 100644
--- a/drivers/i2c/chips/Makefile
+++ b/drivers/i2c/chips/Makefile
@@ -11,7 +11,6 @@
 #
 
 obj-$(CONFIG_DS1682)		+= ds1682.o
-obj-$(CONFIG_SENSORS_PCA9539)	+= pca9539.o
 obj-$(CONFIG_SENSORS_PCF8574)	+= pcf8574.o
 obj-$(CONFIG_SENSORS_TSL2550)	+= tsl2550.o
 
diff --git a/drivers/i2c/chips/pca9539.c b/drivers/i2c/chips/pca9539.c
deleted file mode 100644
index 270de4e..0000000
--- a/drivers/i2c/chips/pca9539.c
+++ /dev/null
@@ -1,152 +0,0 @@
-/*
-    pca9539.c - 16-bit I/O port with interrupt and reset
-
-    Copyright (C) 2005 Ben Gardner <bgardner@wabtec.com>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; version 2 of the License.
-*/
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/i2c.h>
-#include <linux/hwmon-sysfs.h>
-
-/* Addresses to scan: none, device is not autodetected */
-static const unsigned short normal_i2c[] = { I2C_CLIENT_END };
-
-/* Insmod parameters */
-I2C_CLIENT_INSMOD_1(pca9539);
-
-enum pca9539_cmd
-{
-	PCA9539_INPUT_0		= 0,
-	PCA9539_INPUT_1		= 1,
-	PCA9539_OUTPUT_0	= 2,
-	PCA9539_OUTPUT_1	= 3,
-	PCA9539_INVERT_0	= 4,
-	PCA9539_INVERT_1	= 5,
-	PCA9539_DIRECTION_0	= 6,
-	PCA9539_DIRECTION_1	= 7,
-};
-
-/* following are the sysfs callback functions */
-static ssize_t pca9539_show(struct device *dev, struct device_attribute *attr,
-			    char *buf)
-{
-	struct sensor_device_attribute *psa = to_sensor_dev_attr(attr);
-	struct i2c_client *client = to_i2c_client(dev);
-	return sprintf(buf, "%d\n", i2c_smbus_read_byte_data(client,
-							     psa->index));
-}
-
-static ssize_t pca9539_store(struct device *dev, struct device_attribute *attr,
-			     const char *buf, size_t count)
-{
-	struct sensor_device_attribute *psa = to_sensor_dev_attr(attr);
-	struct i2c_client *client = to_i2c_client(dev);
-	unsigned long val = simple_strtoul(buf, NULL, 0);
-	if (val > 0xff)
-		return -EINVAL;
-	i2c_smbus_write_byte_data(client, psa->index, val);
-	return count;
-}
-
-/* Define the device attributes */
-
-#define PCA9539_ENTRY_RO(name, cmd_idx) \
-	static SENSOR_DEVICE_ATTR(name, S_IRUGO, pca9539_show, NULL, cmd_idx)
-
-#define PCA9539_ENTRY_RW(name, cmd_idx) \
-	static SENSOR_DEVICE_ATTR(name, S_IRUGO | S_IWUSR, pca9539_show, \
-				  pca9539_store, cmd_idx)
-
-PCA9539_ENTRY_RO(input0, PCA9539_INPUT_0);
-PCA9539_ENTRY_RO(input1, PCA9539_INPUT_1);
-PCA9539_ENTRY_RW(output0, PCA9539_OUTPUT_0);
-PCA9539_ENTRY_RW(output1, PCA9539_OUTPUT_1);
-PCA9539_ENTRY_RW(invert0, PCA9539_INVERT_0);
-PCA9539_ENTRY_RW(invert1, PCA9539_INVERT_1);
-PCA9539_ENTRY_RW(direction0, PCA9539_DIRECTION_0);
-PCA9539_ENTRY_RW(direction1, PCA9539_DIRECTION_1);
-
-static struct attribute *pca9539_attributes[] = {
-	&sensor_dev_attr_input0.dev_attr.attr,
-	&sensor_dev_attr_input1.dev_attr.attr,
-	&sensor_dev_attr_output0.dev_attr.attr,
-	&sensor_dev_attr_output1.dev_attr.attr,
-	&sensor_dev_attr_invert0.dev_attr.attr,
-	&sensor_dev_attr_invert1.dev_attr.attr,
-	&sensor_dev_attr_direction0.dev_attr.attr,
-	&sensor_dev_attr_direction1.dev_attr.attr,
-	NULL
-};
-
-static struct attribute_group pca9539_defattr_group = {
-	.attrs = pca9539_attributes,
-};
-
-/* Return 0 if detection is successful, -ENODEV otherwise */
-static int pca9539_detect(struct i2c_client *client, int kind,
-			  struct i2c_board_info *info)
-{
-	struct i2c_adapter *adapter = client->adapter;
-
-	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA))
-		return -ENODEV;
-
-	strlcpy(info->type, "pca9539", I2C_NAME_SIZE);
-
-	return 0;
-}
-
-static int pca9539_probe(struct i2c_client *client,
-			 const struct i2c_device_id *id)
-{
-	/* Register sysfs hooks */
-	return sysfs_create_group(&client->dev.kobj,
-				  &pca9539_defattr_group);
-}
-
-static int pca9539_remove(struct i2c_client *client)
-{
-	sysfs_remove_group(&client->dev.kobj, &pca9539_defattr_group);
-	return 0;
-}
-
-static const struct i2c_device_id pca9539_id[] = {
-	{ "pca9539", 0 },
-	{ }
-};
-
-static struct i2c_driver pca9539_driver = {
-	.driver = {
-		.name	= "pca9539",
-	},
-	.probe		= pca9539_probe,
-	.remove		= pca9539_remove,
-	.id_table	= pca9539_id,
-
-	.detect		= pca9539_detect,
-	.address_data	= &addr_data,
-};
-
-static int __init pca9539_init(void)
-{
-	return i2c_add_driver(&pca9539_driver);
-}
-
-static void __exit pca9539_exit(void)
-{
-	i2c_del_driver(&pca9539_driver);
-}
-
-MODULE_AUTHOR("Ben Gardner <bgardner@wabtec.com>");
-MODULE_DESCRIPTION("PCA9539 driver");
-MODULE_LICENSE("GPL");
-
-module_init(pca9539_init);
-module_exit(pca9539_exit);
-
-- 
cgit v0.10.2


From e7c5c49ecdac6dc5a6b67a27838b1b562eeec1b9 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <w.sang@pengutronix.de>
Date: Fri, 18 Sep 2009 22:45:49 +0200
Subject: i2c/chips: Remove deprecated pcf8574 driver

The pcf8574 driver in drivers/i2c/chips which just exports its register to
sysfs is superseded by drivers/gpio/pcf857x.c which properly uses the gpiolib.
As this driver has been deprecated for more than a year, finally remove it.

Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
Cc: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Jean Delvare <khali@linux-fr.org>

diff --git a/Documentation/i2c/chips/pcf8574 b/Documentation/i2c/chips/pcf8574
deleted file mode 100644
index 235815c..0000000
--- a/Documentation/i2c/chips/pcf8574
+++ /dev/null
@@ -1,65 +0,0 @@
-Kernel driver pcf8574
-=====================
-
-Supported chips:
-  * Philips PCF8574
-    Prefix: 'pcf8574'
-    Addresses scanned: none
-    Datasheet: Publicly available at the Philips Semiconductors website
-               http://www.semiconductors.philips.com/pip/PCF8574P.html
-
- * Philips PCF8574A
-    Prefix: 'pcf8574a'
-    Addresses scanned: none
-    Datasheet: Publicly available at the Philips Semiconductors website
-               http://www.semiconductors.philips.com/pip/PCF8574P.html
-
-Authors:
-        Frodo Looijaard <frodol@dds.nl>,
-        Philip Edelbrock <phil@netroedge.com>,
-        Dan Eaton <dan.eaton@rocketlogix.com>,
-        Aurelien Jarno <aurelien@aurel32.net>,
-        Jean Delvare <khali@linux-fr.org>,
-
-
-Description
------------
-The PCF8574(A) is an 8-bit I/O expander for the I2C bus produced by Philips
-Semiconductors. It is designed to provide a byte I2C interface to up to 16
-separate devices (8 x PCF8574 and 8 x PCF8574A).
-
-This device consists of a quasi-bidirectional port. Each of the eight I/Os
-can be independently used as an input or output. To setup an I/O as an
-input, you have to write a 1 to the corresponding output.
-
-For more informations see the datasheet.
-
-
-Accessing PCF8574(A) via /sys interface
--------------------------------------
-
-The PCF8574(A) is plainly impossible to detect ! Stupid chip.
-So, you have to pass the I2C bus and address of the installed PCF857A
-and PCF8574A devices explicitly to the driver at load time via the
-force=... parameter.
-
-On detection (i.e. insmod, modprobe et al.), directories are being
-created for each detected PCF8574(A):
-
-/sys/bus/i2c/devices/<0>-<1>/
-where <0> is the bus the chip was detected on (e. g. i2c-0)
-and <1> the chip address ([20..27] or [38..3f]):
-
-(example: /sys/bus/i2c/devices/1-0020/)
-
-Inside these directories, there are two files each:
-read and write (and one file with chip name).
-
-The read file is read-only. Reading gives you the current I/O input
-if the corresponding output is set as 1, otherwise the current output
-value, that is to say 0.
-
-The write file is read/write. Writing a value outputs it on the I/O
-port. Reading returns the last written value. As it is not possible
-to read this value from the chip, you need to write at least once to
-this file before you can read back from it.
diff --git a/drivers/i2c/chips/Kconfig b/drivers/i2c/chips/Kconfig
index 8cd1a7f..f9618f4 100644
--- a/drivers/i2c/chips/Kconfig
+++ b/drivers/i2c/chips/Kconfig
@@ -16,23 +16,6 @@ config DS1682
 	  This driver can also be built as a module.  If so, the module
 	  will be called ds1682.
 
-config SENSORS_PCF8574
-	tristate "Philips PCF8574 and PCF8574A (DEPRECATED)"
-	depends on EXPERIMENTAL && GPIO_PCF857X = "n"
-	default n
-	help
-	  If you say yes here you get support for Philips PCF8574 and 
-	  PCF8574A chips. These chips are 8-bit I/O expanders for the I2C bus.
-
-	  This driver can also be built as a module.  If so, the module
-	  will be called pcf8574.
-
-	  This driver is deprecated and will be dropped soon. Use
-	  drivers/gpio/pcf857x.c instead.
-
-	  These devices are hard to detect and rarely found on mainstream
-	  hardware.  If unsure, say N.
-
 config SENSORS_TSL2550
 	tristate "Taos TSL2550 ambient light sensor"
 	depends on EXPERIMENTAL
diff --git a/drivers/i2c/chips/Makefile b/drivers/i2c/chips/Makefile
index 8cd778d..749cf36 100644
--- a/drivers/i2c/chips/Makefile
+++ b/drivers/i2c/chips/Makefile
@@ -11,7 +11,6 @@
 #
 
 obj-$(CONFIG_DS1682)		+= ds1682.o
-obj-$(CONFIG_SENSORS_PCF8574)	+= pcf8574.o
 obj-$(CONFIG_SENSORS_TSL2550)	+= tsl2550.o
 
 ifeq ($(CONFIG_I2C_DEBUG_CHIP),y)
diff --git a/drivers/i2c/chips/pcf8574.c b/drivers/i2c/chips/pcf8574.c
deleted file mode 100644
index 6ec3098..0000000
--- a/drivers/i2c/chips/pcf8574.c
+++ /dev/null
@@ -1,215 +0,0 @@
-/*
-    Copyright (c) 2000  Frodo Looijaard <frodol@dds.nl>, 
-                        Philip Edelbrock <phil@netroedge.com>,
-                        Dan Eaton <dan.eaton@rocketlogix.com>
-    Ported to Linux 2.6 by Aurelien Jarno <aurel32@debian.org> with 
-    the help of Jean Delvare <khali@linux-fr.org>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-    
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program; if not, write to the Free Software
-    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-*/
-
-/* A few notes about the PCF8574:
-
-* The PCF8574 is an 8-bit I/O expander for the I2C bus produced by
-  Philips Semiconductors.  It is designed to provide a byte I2C
-  interface to up to 8 separate devices.
-  
-* The PCF8574 appears as a very simple SMBus device which can be
-  read from or written to with SMBUS byte read/write accesses.
-
-  --Dan
-
-*/
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/i2c.h>
-
-/* Addresses to scan: none, device can't be detected */
-static const unsigned short normal_i2c[] = { I2C_CLIENT_END };
-
-/* Insmod parameters */
-I2C_CLIENT_INSMOD_2(pcf8574, pcf8574a);
-
-/* Each client has this additional data */
-struct pcf8574_data {
-	int write;			/* Remember last written value */
-};
-
-static void pcf8574_init_client(struct i2c_client *client);
-
-/* following are the sysfs callback functions */
-static ssize_t show_read(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	struct i2c_client *client = to_i2c_client(dev);
-	return sprintf(buf, "%u\n", i2c_smbus_read_byte(client));
-}
-
-static DEVICE_ATTR(read, S_IRUGO, show_read, NULL);
-
-static ssize_t show_write(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	struct pcf8574_data *data = i2c_get_clientdata(to_i2c_client(dev));
-
-	if (data->write < 0)
-		return data->write;
-
-	return sprintf(buf, "%d\n", data->write);
-}
-
-static ssize_t set_write(struct device *dev, struct device_attribute *attr, const char *buf,
-			 size_t count)
-{
-	struct i2c_client *client = to_i2c_client(dev);
-	struct pcf8574_data *data = i2c_get_clientdata(client);
-	unsigned long val = simple_strtoul(buf, NULL, 10);
-
-	if (val > 0xff)
-		return -EINVAL;
-
-	data->write = val;
-	i2c_smbus_write_byte(client, data->write);
-	return count;
-}
-
-static DEVICE_ATTR(write, S_IWUSR | S_IRUGO, show_write, set_write);
-
-static struct attribute *pcf8574_attributes[] = {
-	&dev_attr_read.attr,
-	&dev_attr_write.attr,
-	NULL
-};
-
-static const struct attribute_group pcf8574_attr_group = {
-	.attrs = pcf8574_attributes,
-};
-
-/*
- * Real code
- */
-
-/* Return 0 if detection is successful, -ENODEV otherwise */
-static int pcf8574_detect(struct i2c_client *client, int kind,
-			  struct i2c_board_info *info)
-{
-	struct i2c_adapter *adapter = client->adapter;
-	const char *client_name;
-
-	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE))
-		return -ENODEV;
-
-	/* Now, we would do the remaining detection. But the PCF8574 is plainly
-	   impossible to detect! Stupid chip. */
-
-	/* Determine the chip type */
-	if (kind <= 0) {
-		if (client->addr >= 0x38 && client->addr <= 0x3f)
-			kind = pcf8574a;
-		else
-			kind = pcf8574;
-	}
-
-	if (kind == pcf8574a)
-		client_name = "pcf8574a";
-	else
-		client_name = "pcf8574";
-	strlcpy(info->type, client_name, I2C_NAME_SIZE);
-
-	return 0;
-}
-
-static int pcf8574_probe(struct i2c_client *client,
-			 const struct i2c_device_id *id)
-{
-	struct pcf8574_data *data;
-	int err;
-
-	data = kzalloc(sizeof(struct pcf8574_data), GFP_KERNEL);
-	if (!data) {
-		err = -ENOMEM;
-		goto exit;
-	}
-
-	i2c_set_clientdata(client, data);
-
-	/* Initialize the PCF8574 chip */
-	pcf8574_init_client(client);
-
-	/* Register sysfs hooks */
-	err = sysfs_create_group(&client->dev.kobj, &pcf8574_attr_group);
-	if (err)
-		goto exit_free;
-	return 0;
-
-      exit_free:
-	kfree(data);
-      exit:
-	return err;
-}
-
-static int pcf8574_remove(struct i2c_client *client)
-{
-	sysfs_remove_group(&client->dev.kobj, &pcf8574_attr_group);
-	kfree(i2c_get_clientdata(client));
-	return 0;
-}
-
-/* Called when we have found a new PCF8574. */
-static void pcf8574_init_client(struct i2c_client *client)
-{
-	struct pcf8574_data *data = i2c_get_clientdata(client);
-	data->write = -EAGAIN;
-}
-
-static const struct i2c_device_id pcf8574_id[] = {
-	{ "pcf8574", 0 },
-	{ "pcf8574a", 0 },
-	{ }
-};
-
-static struct i2c_driver pcf8574_driver = {
-	.driver = {
-		.name	= "pcf8574",
-	},
-	.probe		= pcf8574_probe,
-	.remove		= pcf8574_remove,
-	.id_table	= pcf8574_id,
-
-	.detect		= pcf8574_detect,
-	.address_data	= &addr_data,
-};
-
-static int __init pcf8574_init(void)
-{
-	return i2c_add_driver(&pcf8574_driver);
-}
-
-static void __exit pcf8574_exit(void)
-{
-	i2c_del_driver(&pcf8574_driver);
-}
-
-
-MODULE_AUTHOR
-    ("Frodo Looijaard <frodol@dds.nl>, "
-     "Philip Edelbrock <phil@netroedge.com>, "
-     "Dan Eaton <dan.eaton@rocketlogix.com> "
-     "and Aurelien Jarno <aurelien@aurel32.net>");
-MODULE_DESCRIPTION("PCF8574 driver");
-MODULE_LICENSE("GPL");
-
-module_init(pcf8574_init);
-module_exit(pcf8574_exit);
-- 
cgit v0.10.2


From 76b3e28fa728bb68895cbd8375f5ce233bd891de Mon Sep 17 00:00:00 2001
From: Crane Cai <crane.cai@amd.com>
Date: Fri, 18 Sep 2009 22:45:50 +0200
Subject: i2c-piix4: Add AMD SB900 SMBus device ID

Add new SMBus device ID for AMD SB900.

Signed-off-by: Crane Cai <crane.cai@amd.com>
Signed-off-by: Jean Delvare <khali@linux-fr.org>

diff --git a/Documentation/i2c/busses/i2c-piix4 b/Documentation/i2c/busses/i2c-piix4
index f889481..c5b37c5 100644
--- a/Documentation/i2c/busses/i2c-piix4
+++ b/Documentation/i2c/busses/i2c-piix4
@@ -8,6 +8,8 @@ Supported adapters:
     Datasheet: Only available via NDA from ServerWorks
   * ATI IXP200, IXP300, IXP400, SB600, SB700 and SB800 southbridges
     Datasheet: Not publicly available
+  * AMD SB900
+    Datasheet: Not publicly available
   * Standard Microsystems (SMSC) SLC90E66 (Victory66) southbridge
     Datasheet: Publicly available at the SMSC website http://www.smsc.com
 
diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index 8206442..bb216c5 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -113,7 +113,7 @@ config I2C_ISCH
 	  will be called i2c-isch.
 
 config I2C_PIIX4
-	tristate "Intel PIIX4 and compatible (ATI/Serverworks/Broadcom/SMSC)"
+	tristate "Intel PIIX4 and compatible (ATI/AMD/Serverworks/Broadcom/SMSC)"
 	depends on PCI
 	help
 	  If you say yes to this option, support will be included for the Intel
@@ -128,6 +128,7 @@ config I2C_PIIX4
 	    ATI SB600
 	    ATI SB700
 	    ATI SB800
+	    AMD SB900
 	    Serverworks OSB4
 	    Serverworks CSB5
 	    Serverworks CSB6
diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c
index 0249a7d..a782c7a 100644
--- a/drivers/i2c/busses/i2c-piix4.c
+++ b/drivers/i2c/busses/i2c-piix4.c
@@ -22,6 +22,7 @@
 	Intel PIIX4, 440MX
 	Serverworks OSB4, CSB5, CSB6, HT-1000, HT-1100
 	ATI IXP200, IXP300, IXP400, SB600, SB700, SB800
+	AMD SB900
 	SMSC Victory66
 
    Note: we assume there can only be one device, with one SMBus interface.
@@ -479,6 +480,7 @@ static struct pci_device_id piix4_ids[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP300_SMBUS) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP400_SMBUS) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_SB900_SMBUS) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_SERVERWORKS,
 		     PCI_DEVICE_ID_SERVERWORKS_OSB4) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_SERVERWORKS,
@@ -499,9 +501,10 @@ static int __devinit piix4_probe(struct pci_dev *dev,
 {
 	int retval;
 
-	if ((dev->vendor == PCI_VENDOR_ID_ATI) &&
-	    (dev->device == PCI_DEVICE_ID_ATI_SBX00_SMBUS) &&
-	    (dev->revision >= 0x40))
+	if ((dev->vendor == PCI_VENDOR_ID_ATI &&
+	     dev->device == PCI_DEVICE_ID_ATI_SBX00_SMBUS &&
+	     dev->revision >= 0x40) ||
+	    dev->vendor == PCI_VENDOR_ID_AMD)
 		/* base address location etc changed in SB800 */
 		retval = piix4_setup_sb800(dev, id);
 	else
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 3b6b788..7803565 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -543,6 +543,7 @@
 #define PCI_DEVICE_ID_AMD_8131_BRIDGE	0x7450
 #define PCI_DEVICE_ID_AMD_8131_APIC	0x7451
 #define PCI_DEVICE_ID_AMD_8132_BRIDGE	0x7458
+#define PCI_DEVICE_ID_AMD_SB900_SMBUS	0x780b
 #define PCI_DEVICE_ID_AMD_CS5535_IDE    0x208F
 #define PCI_DEVICE_ID_AMD_CS5536_ISA    0x2090
 #define PCI_DEVICE_ID_AMD_CS5536_FLASH  0x2091
-- 
cgit v0.10.2


From a1867d36b3bda28314fdd832a510dc9e55821c4c Mon Sep 17 00:00:00 2001
From: Wolfram Sang <w.sang@pengutronix.de>
Date: Fri, 18 Sep 2009 22:45:51 +0200
Subject: MAINTAINERS: Add maintainer for AT24 and PCA9564/PCA9665

Add me as the maintainer for the i2c drivers I feel responsible for as I
found patches going mainline I missed due to no Cc.

Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
Cc: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Jean Delvare <khali@linux-fr.org>

diff --git a/MAINTAINERS b/MAINTAINERS
index e613c6dd..9a1cd2f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -894,6 +894,13 @@ F:	drivers/dma/
 F:	include/linux/dmaengine.h
 F:	include/linux/async_tx.h
 
+AT24 EEPROM DRIVER
+M:	Wolfram Sang <w.sang@pengutronix.de>
+L:	linux-i2c@vger.kernel.org
+S:	Maintained
+F:	drivers/misc/eeprom/at24.c
+F:	include/linux/i2c/at24.h
+
 ATA OVER ETHERNET (AOE) DRIVER
 M:	"Ed L. Cashin" <ecashin@coraid.com>
 W:	http://www.coraid.com/support/linux
@@ -3957,6 +3964,15 @@ S:	Maintained
 F:	drivers/leds/leds-pca9532.c
 F:	include/linux/leds-pca9532.h
 
+PCA9564/PCA9665 I2C BUS DRIVER
+M:	Wolfram Sang <w.sang@pengutronix.de>
+L:	linux-i2c@vger.kernel.org
+S:	Maintained
+F:	drivers/i2c/algos/i2c-algo-pca.c
+F:	drivers/i2c/busses/i2c-pca-*
+F:	include/linux/i2c-algo-pca.h
+F:	include/linux/i2c-pca-platform.h
+
 PCI ERROR RECOVERY
 M:	Linas Vepstas <linas@austin.ibm.com>
 L:	linux-pci@vger.kernel.org
-- 
cgit v0.10.2


From 449d2c759ddba46a89b698bdc64bfc2f7cc5bb66 Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Fri, 18 Sep 2009 22:45:51 +0200
Subject: i2c-pnx: Correct use of request_region/request_mem_region

request_mem_region should be used when ioremap is used subsequently.
release_region is then correspondingly replaced by release_mem_region.

The semantic patch that fixes this problem is as follows:
(http://coccinelle.lip6.fr/)

// <smpl>
@r@
expression start,E;
@@

- request_region
+ request_mem_region
  (start,...)
... when != request_mem_region(start,...)
    when != start = E
ioremap(start,...)

@@
expression r.start;
@@

- release_region
+ release_mem_region
  (start,...)
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Cc: Vitaly Wool <vitalywool@gmail.com>
Signed-off-by: Jean Delvare <khali@linux-fr.org>

diff --git a/drivers/i2c/busses/i2c-pnx.c b/drivers/i2c/busses/i2c-pnx.c
index ec15cff..6ff6c20 100644
--- a/drivers/i2c/busses/i2c-pnx.c
+++ b/drivers/i2c/busses/i2c-pnx.c
@@ -586,7 +586,8 @@ static int __devinit i2c_pnx_probe(struct platform_device *pdev)
 	alg_data->mif.timer.data = (unsigned long)i2c_pnx->adapter;
 
 	/* Register I/O resource */
-	if (!request_region(alg_data->base, I2C_PNX_REGION_SIZE, pdev->name)) {
+	if (!request_mem_region(alg_data->base, I2C_PNX_REGION_SIZE,
+				pdev->name)) {
 		dev_err(&pdev->dev,
 		       "I/O region 0x%08x for I2C already in use.\n",
 		       alg_data->base);
@@ -650,7 +651,7 @@ out_clock:
 out_unmap:
 	iounmap((void *)alg_data->ioaddr);
 out_release:
-	release_region(alg_data->base, I2C_PNX_REGION_SIZE);
+	release_mem_region(alg_data->base, I2C_PNX_REGION_SIZE);
 out_drvdata:
 	platform_set_drvdata(pdev, NULL);
 out:
@@ -667,7 +668,7 @@ static int __devexit i2c_pnx_remove(struct platform_device *pdev)
 	i2c_del_adapter(adap);
 	i2c_pnx->set_clock_stop(pdev);
 	iounmap((void *)alg_data->ioaddr);
-	release_region(alg_data->base, I2C_PNX_REGION_SIZE);
+	release_mem_region(alg_data->base, I2C_PNX_REGION_SIZE);
 	platform_set_drvdata(pdev, NULL);
 
 	return 0;
-- 
cgit v0.10.2


From dc9854212e0d7318d7133697906d98b78f3088b6 Mon Sep 17 00:00:00 2001
From: Crane Cai <crane.cai@amd.com>
Date: Fri, 18 Sep 2009 22:45:51 +0200
Subject: i2c: Add driver for SMBus Control Method Interface

This driver supports the SMBus Control Method Interface. It needs BIOS declare
ACPI control methods which described in SMBus Control Method Interface Spec.
http://smbus.org/specs/smbus_cmi10.pdf

Signed-off-by: Crane Cai <crane.cai@amd.com>
Signed-off-by: Jean Delvare <khali@linux-fr.org>

diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index bb216c5..838bd1e 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -762,4 +762,15 @@ config SCx200_ACB
 	  This support is also available as a module.  If so, the module
 	  will be called scx200_acb.
 
+config I2C_SCMI
+	tristate "SMBus Control Method Interface"
+	depends on ACPI
+	help
+	  This driver supports the SMBus Control Method Interface. It needs the
+	  BIOS to declare ACPI control methods as described in the SMBus Control
+	  Method Interface specification.
+
+	  To compile this driver as a module, choose M here:
+	  the module will be called i2c-scmi.
+
 endmenu
diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile
index e654263b..060da43 100644
--- a/drivers/i2c/busses/Makefile
+++ b/drivers/i2c/busses/Makefile
@@ -2,6 +2,9 @@
 # Makefile for the i2c bus drivers.
 #
 
+# SMBus CMI driver
+obj-$(CONFIG_I2C_SCMI)		+= i2c-scmi.o
+
 # PC SMBus host controller drivers
 obj-$(CONFIG_I2C_ALI1535)	+= i2c-ali1535.o
 obj-$(CONFIG_I2C_ALI1563)	+= i2c-ali1563.o
diff --git a/drivers/i2c/busses/i2c-scmi.c b/drivers/i2c/busses/i2c-scmi.c
new file mode 100644
index 0000000..276a046
--- /dev/null
+++ b/drivers/i2c/busses/i2c-scmi.c
@@ -0,0 +1,430 @@
+/*
+ * SMBus driver for ACPI SMBus CMI
+ *
+ * Copyright (C) 2009 Crane Cai <crane.cai@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation version 2.
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/kernel.h>
+#include <linux/stddef.h>
+#include <linux/init.h>
+#include <linux/i2c.h>
+#include <linux/acpi.h>
+
+#define ACPI_SMBUS_HC_CLASS		"smbus"
+#define ACPI_SMBUS_HC_DEVICE_NAME	"cmi"
+
+ACPI_MODULE_NAME("smbus_cmi");
+
+struct smbus_methods_t {
+	char *mt_info;
+	char *mt_sbr;
+	char *mt_sbw;
+};
+
+struct acpi_smbus_cmi {
+	acpi_handle handle;
+	struct i2c_adapter adapter;
+	u8 cap_info:1;
+	u8 cap_read:1;
+	u8 cap_write:1;
+};
+
+static const struct smbus_methods_t smbus_methods = {
+	.mt_info = "_SBI",
+	.mt_sbr  = "_SBR",
+	.mt_sbw  = "_SBW",
+};
+
+static const struct acpi_device_id acpi_smbus_cmi_ids[] = {
+	{"SMBUS01", 0},
+	{"", 0}
+};
+
+#define ACPI_SMBUS_STATUS_OK			0x00
+#define ACPI_SMBUS_STATUS_FAIL			0x07
+#define ACPI_SMBUS_STATUS_DNAK			0x10
+#define ACPI_SMBUS_STATUS_DERR			0x11
+#define ACPI_SMBUS_STATUS_CMD_DENY		0x12
+#define ACPI_SMBUS_STATUS_UNKNOWN		0x13
+#define ACPI_SMBUS_STATUS_ACC_DENY		0x17
+#define ACPI_SMBUS_STATUS_TIMEOUT		0x18
+#define ACPI_SMBUS_STATUS_NOTSUP		0x19
+#define ACPI_SMBUS_STATUS_BUSY			0x1a
+#define ACPI_SMBUS_STATUS_PEC			0x1f
+
+#define ACPI_SMBUS_PRTCL_WRITE			0x00
+#define ACPI_SMBUS_PRTCL_READ			0x01
+#define ACPI_SMBUS_PRTCL_QUICK			0x02
+#define ACPI_SMBUS_PRTCL_BYTE			0x04
+#define ACPI_SMBUS_PRTCL_BYTE_DATA		0x06
+#define ACPI_SMBUS_PRTCL_WORD_DATA		0x08
+#define ACPI_SMBUS_PRTCL_BLOCK_DATA		0x0a
+
+
+static int
+acpi_smbus_cmi_access(struct i2c_adapter *adap, u16 addr, unsigned short flags,
+		   char read_write, u8 command, int size,
+		   union i2c_smbus_data *data)
+{
+	int result = 0;
+	struct acpi_smbus_cmi *smbus_cmi = adap->algo_data;
+	unsigned char protocol;
+	acpi_status status = 0;
+	struct acpi_object_list input;
+	union acpi_object mt_params[5];
+	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+	union acpi_object *obj;
+	union acpi_object *pkg;
+	char *method;
+	int len = 0;
+
+	dev_dbg(&adap->dev, "access size: %d %s\n", size,
+		(read_write) ? "READ" : "WRITE");
+	switch (size) {
+	case I2C_SMBUS_QUICK:
+		protocol = ACPI_SMBUS_PRTCL_QUICK;
+		command = 0;
+		if (read_write == I2C_SMBUS_WRITE) {
+			mt_params[3].type = ACPI_TYPE_INTEGER;
+			mt_params[3].integer.value = 0;
+			mt_params[4].type = ACPI_TYPE_INTEGER;
+			mt_params[4].integer.value = 0;
+		}
+		break;
+
+	case I2C_SMBUS_BYTE:
+		protocol = ACPI_SMBUS_PRTCL_BYTE;
+		if (read_write == I2C_SMBUS_WRITE) {
+			mt_params[3].type = ACPI_TYPE_INTEGER;
+			mt_params[3].integer.value = 0;
+			mt_params[4].type = ACPI_TYPE_INTEGER;
+			mt_params[4].integer.value = 0;
+		} else {
+			command = 0;
+		}
+		break;
+
+	case I2C_SMBUS_BYTE_DATA:
+		protocol = ACPI_SMBUS_PRTCL_BYTE_DATA;
+		if (read_write == I2C_SMBUS_WRITE) {
+			mt_params[3].type = ACPI_TYPE_INTEGER;
+			mt_params[3].integer.value = 1;
+			mt_params[4].type = ACPI_TYPE_INTEGER;
+			mt_params[4].integer.value = data->byte;
+		}
+		break;
+
+	case I2C_SMBUS_WORD_DATA:
+		protocol = ACPI_SMBUS_PRTCL_WORD_DATA;
+		if (read_write == I2C_SMBUS_WRITE) {
+			mt_params[3].type = ACPI_TYPE_INTEGER;
+			mt_params[3].integer.value = 2;
+			mt_params[4].type = ACPI_TYPE_INTEGER;
+			mt_params[4].integer.value = data->word;
+		}
+		break;
+
+	case I2C_SMBUS_BLOCK_DATA:
+		protocol = ACPI_SMBUS_PRTCL_BLOCK_DATA;
+		if (read_write == I2C_SMBUS_WRITE) {
+			len = data->block[0];
+			if (len == 0 || len > I2C_SMBUS_BLOCK_MAX)
+				return -EINVAL;
+			mt_params[3].type = ACPI_TYPE_INTEGER;
+			mt_params[3].integer.value = len;
+			mt_params[4].type = ACPI_TYPE_BUFFER;
+			mt_params[4].buffer.pointer = data->block + 1;
+		}
+		break;
+
+	default:
+		dev_warn(&adap->dev, "Unsupported transaction %d\n", size);
+		return -EOPNOTSUPP;
+	}
+
+	if (read_write == I2C_SMBUS_READ) {
+		protocol |= ACPI_SMBUS_PRTCL_READ;
+		method = smbus_methods.mt_sbr;
+		input.count = 3;
+	} else {
+		protocol |= ACPI_SMBUS_PRTCL_WRITE;
+		method = smbus_methods.mt_sbw;
+		input.count = 5;
+	}
+
+	input.pointer = mt_params;
+	mt_params[0].type = ACPI_TYPE_INTEGER;
+	mt_params[0].integer.value = protocol;
+	mt_params[1].type = ACPI_TYPE_INTEGER;
+	mt_params[1].integer.value = addr;
+	mt_params[2].type = ACPI_TYPE_INTEGER;
+	mt_params[2].integer.value = command;
+
+	status = acpi_evaluate_object(smbus_cmi->handle, method, &input,
+				      &buffer);
+	if (ACPI_FAILURE(status)) {
+		ACPI_ERROR((AE_INFO, "Evaluating %s: %i", method, status));
+		return -EIO;
+	}
+
+	pkg = buffer.pointer;
+	if (pkg && pkg->type == ACPI_TYPE_PACKAGE)
+		obj = pkg->package.elements;
+	else {
+		ACPI_ERROR((AE_INFO, "Invalid argument type"));
+		result = -EIO;
+		goto out;
+	}
+	if (obj == NULL || obj->type != ACPI_TYPE_INTEGER) {
+		ACPI_ERROR((AE_INFO, "Invalid argument type"));
+		result = -EIO;
+		goto out;
+	}
+
+	result = obj->integer.value;
+	ACPI_DEBUG_PRINT((ACPI_DB_INFO, "%s return status: %i\n",
+			  method, result));
+
+	switch (result) {
+	case ACPI_SMBUS_STATUS_OK:
+		result = 0;
+		break;
+	case ACPI_SMBUS_STATUS_BUSY:
+		result = -EBUSY;
+		goto out;
+	case ACPI_SMBUS_STATUS_TIMEOUT:
+		result = -ETIMEDOUT;
+		goto out;
+	case ACPI_SMBUS_STATUS_DNAK:
+		result = -ENXIO;
+		goto out;
+	default:
+		result = -EIO;
+		goto out;
+	}
+
+	if (read_write == I2C_SMBUS_WRITE || size == I2C_SMBUS_QUICK)
+		goto out;
+
+	obj = pkg->package.elements + 1;
+	if (obj == NULL || obj->type != ACPI_TYPE_INTEGER) {
+		ACPI_ERROR((AE_INFO, "Invalid argument type"));
+		result = -EIO;
+		goto out;
+	}
+
+	len = obj->integer.value;
+	obj = pkg->package.elements + 2;
+	switch (size) {
+	case I2C_SMBUS_BYTE:
+	case I2C_SMBUS_BYTE_DATA:
+	case I2C_SMBUS_WORD_DATA:
+		if (obj == NULL || obj->type != ACPI_TYPE_INTEGER) {
+			ACPI_ERROR((AE_INFO, "Invalid argument type"));
+			result = -EIO;
+			goto out;
+		}
+		if (len == 2)
+			data->word = obj->integer.value;
+		else
+			data->byte = obj->integer.value;
+		break;
+	case I2C_SMBUS_BLOCK_DATA:
+		if (obj == NULL || obj->type != ACPI_TYPE_BUFFER) {
+			ACPI_ERROR((AE_INFO, "Invalid argument type"));
+			result = -EIO;
+			goto out;
+		}
+		if (len == 0 || len > I2C_SMBUS_BLOCK_MAX)
+			return -EPROTO;
+		data->block[0] = len;
+		memcpy(data->block + 1, obj->buffer.pointer, len);
+		break;
+	}
+
+out:
+	kfree(buffer.pointer);
+	dev_dbg(&adap->dev, "Transaction status: %i\n", result);
+	return result;
+}
+
+static u32 acpi_smbus_cmi_func(struct i2c_adapter *adapter)
+{
+	struct acpi_smbus_cmi *smbus_cmi = adapter->algo_data;
+	u32 ret;
+
+	ret = smbus_cmi->cap_read | smbus_cmi->cap_write ?
+		I2C_FUNC_SMBUS_QUICK : 0;
+
+	ret |= smbus_cmi->cap_read ?
+		(I2C_FUNC_SMBUS_READ_BYTE |
+		I2C_FUNC_SMBUS_READ_BYTE_DATA |
+		I2C_FUNC_SMBUS_READ_WORD_DATA |
+		I2C_FUNC_SMBUS_READ_BLOCK_DATA) : 0;
+
+	ret |= smbus_cmi->cap_write ?
+		(I2C_FUNC_SMBUS_WRITE_BYTE |
+		I2C_FUNC_SMBUS_WRITE_BYTE_DATA |
+		I2C_FUNC_SMBUS_WRITE_WORD_DATA |
+		I2C_FUNC_SMBUS_WRITE_BLOCK_DATA) : 0;
+
+	return ret;
+}
+
+static const struct i2c_algorithm acpi_smbus_cmi_algorithm = {
+	.smbus_xfer = acpi_smbus_cmi_access,
+	.functionality = acpi_smbus_cmi_func,
+};
+
+
+static int acpi_smbus_cmi_add_cap(struct acpi_smbus_cmi *smbus_cmi,
+				  const char *name)
+{
+	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+	union acpi_object *obj;
+	acpi_status status;
+
+	if (!strcmp(name, smbus_methods.mt_info)) {
+		status = acpi_evaluate_object(smbus_cmi->handle,
+					smbus_methods.mt_info,
+					NULL, &buffer);
+		if (ACPI_FAILURE(status)) {
+			ACPI_ERROR((AE_INFO, "Evaluating %s: %i",
+				   smbus_methods.mt_info, status));
+			return -EIO;
+		}
+
+		obj = buffer.pointer;
+		if (obj && obj->type == ACPI_TYPE_PACKAGE)
+			obj = obj->package.elements;
+		else {
+			ACPI_ERROR((AE_INFO, "Invalid argument type"));
+			kfree(buffer.pointer);
+			return -EIO;
+		}
+
+		if (obj->type != ACPI_TYPE_INTEGER) {
+			ACPI_ERROR((AE_INFO, "Invalid argument type"));
+			kfree(buffer.pointer);
+			return -EIO;
+		} else
+			ACPI_DEBUG_PRINT((ACPI_DB_INFO, "SMBus CMI Version %x"
+					  "\n", (int)obj->integer.value));
+
+		kfree(buffer.pointer);
+		smbus_cmi->cap_info = 1;
+	} else if (!strcmp(name, smbus_methods.mt_sbr))
+		smbus_cmi->cap_read = 1;
+	else if (!strcmp(name, smbus_methods.mt_sbw))
+		smbus_cmi->cap_write = 1;
+	else
+		ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Unsupported CMI method: %s\n",
+				 name));
+
+	return 0;
+}
+
+static acpi_status acpi_smbus_cmi_query_methods(acpi_handle handle, u32 level,
+			void *context, void **return_value)
+{
+	char node_name[5];
+	struct acpi_buffer buffer = { sizeof(node_name), node_name };
+	struct acpi_smbus_cmi *smbus_cmi = context;
+	acpi_status status;
+
+	status = acpi_get_name(handle, ACPI_SINGLE_NAME, &buffer);
+
+	if (ACPI_SUCCESS(status))
+		acpi_smbus_cmi_add_cap(smbus_cmi, node_name);
+
+	return AE_OK;
+}
+
+static int acpi_smbus_cmi_add(struct acpi_device *device)
+{
+	struct acpi_smbus_cmi *smbus_cmi;
+
+	smbus_cmi = kzalloc(sizeof(struct acpi_smbus_cmi), GFP_KERNEL);
+	if (!smbus_cmi)
+		return -ENOMEM;
+
+	smbus_cmi->handle = device->handle;
+	strcpy(acpi_device_name(device), ACPI_SMBUS_HC_DEVICE_NAME);
+	strcpy(acpi_device_class(device), ACPI_SMBUS_HC_CLASS);
+	device->driver_data = smbus_cmi;
+	smbus_cmi->cap_info = 0;
+	smbus_cmi->cap_read = 0;
+	smbus_cmi->cap_write = 0;
+
+	acpi_walk_namespace(ACPI_TYPE_METHOD, smbus_cmi->handle, 1,
+			    acpi_smbus_cmi_query_methods, smbus_cmi, NULL);
+
+	if (smbus_cmi->cap_info == 0)
+		goto err;
+
+	snprintf(smbus_cmi->adapter.name, sizeof(smbus_cmi->adapter.name),
+		"SMBus CMI adapter %s (%s)",
+		acpi_device_name(device),
+		acpi_device_uid(device));
+	smbus_cmi->adapter.owner = THIS_MODULE;
+	smbus_cmi->adapter.algo = &acpi_smbus_cmi_algorithm;
+	smbus_cmi->adapter.algo_data = smbus_cmi;
+	smbus_cmi->adapter.class = I2C_CLASS_HWMON | I2C_CLASS_SPD;
+	smbus_cmi->adapter.dev.parent = &device->dev;
+
+	if (i2c_add_adapter(&smbus_cmi->adapter)) {
+		dev_err(&device->dev, "Couldn't register adapter!\n");
+		goto err;
+	}
+
+	return 0;
+
+err:
+	kfree(smbus_cmi);
+	device->driver_data = NULL;
+	return -EIO;
+}
+
+static int acpi_smbus_cmi_remove(struct acpi_device *device, int type)
+{
+	struct acpi_smbus_cmi *smbus_cmi = acpi_driver_data(device);
+
+	i2c_del_adapter(&smbus_cmi->adapter);
+	kfree(smbus_cmi);
+	device->driver_data = NULL;
+
+	return 0;
+}
+
+static struct acpi_driver acpi_smbus_cmi_driver = {
+	.name = ACPI_SMBUS_HC_DEVICE_NAME,
+	.class = ACPI_SMBUS_HC_CLASS,
+	.ids = acpi_smbus_cmi_ids,
+	.ops = {
+		.add = acpi_smbus_cmi_add,
+		.remove = acpi_smbus_cmi_remove,
+	},
+};
+
+static int __init acpi_smbus_cmi_init(void)
+{
+	return acpi_bus_register_driver(&acpi_smbus_cmi_driver);
+}
+
+static void __exit acpi_smbus_cmi_exit(void)
+{
+	acpi_bus_unregister_driver(&acpi_smbus_cmi_driver);
+}
+
+module_init(acpi_smbus_cmi_init);
+module_exit(acpi_smbus_cmi_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Crane Cai <crane.cai@amd.com>");
+MODULE_DESCRIPTION("ACPI SMBus CMI driver");
-- 
cgit v0.10.2


From cfd550ed3d3bd509b475c7a9d425fc63bf843a7c Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Fri, 18 Sep 2009 22:45:52 +0200
Subject: i2c: Clearly mark ACPI drivers as such

Now that we have ACPI-based SMBus controller drivers, and we will start
telling users to use them instead of native drivers when I/O resources
conflict, I think it would be good to clearly mark ACPI drivers as such
in Kconfig.

This is exactly the same as we just did for hwmon drivers.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Cc: Crane Cai <crane.cai@amd.com>

diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index 838bd1e..6bedd2f 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -232,6 +232,22 @@ config I2C_VIAPRO
 	  This driver can also be built as a module.  If so, the module
 	  will be called i2c-viapro.
 
+if ACPI
+
+comment "ACPI drivers"
+
+config I2C_SCMI
+	tristate "SMBus Control Method Interface"
+	help
+	  This driver supports the SMBus Control Method Interface. It needs the
+	  BIOS to declare ACPI control methods as described in the SMBus Control
+	  Method Interface specification.
+
+	  To compile this driver as a module, choose M here:
+	  the module will be called i2c-scmi.
+
+endif # ACPI
+
 comment "Mac SMBus host controller drivers"
 	depends on PPC_CHRP || PPC_PMAC
 
@@ -762,15 +778,4 @@ config SCx200_ACB
 	  This support is also available as a module.  If so, the module
 	  will be called scx200_acb.
 
-config I2C_SCMI
-	tristate "SMBus Control Method Interface"
-	depends on ACPI
-	help
-	  This driver supports the SMBus Control Method Interface. It needs the
-	  BIOS to declare ACPI control methods as described in the SMBus Control
-	  Method Interface specification.
-
-	  To compile this driver as a module, choose M here:
-	  the module will be called i2c-scmi.
-
 endmenu
diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile
index 060da43..ff937ac 100644
--- a/drivers/i2c/busses/Makefile
+++ b/drivers/i2c/busses/Makefile
@@ -2,7 +2,7 @@
 # Makefile for the i2c bus drivers.
 #
 
-# SMBus CMI driver
+# ACPI drivers
 obj-$(CONFIG_I2C_SCMI)		+= i2c-scmi.o
 
 # PC SMBus host controller drivers
-- 
cgit v0.10.2


From cd68c374ea9ce202ae7c6346777d10078e243d49 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Wed, 9 Sep 2009 16:32:54 +1000
Subject: sunrpc/cache: avoid variable over-loading in cache_defer_req

In cache_defer_req, 'dreq' is used for two significantly different
values that happen to be of the same type.

This is both confusing, and makes it hard to extend the range of one of
the values as we will in the next patch.
So introduce 'discard' to take one of the values.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 4a32a30..d6eee29 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -500,7 +500,7 @@ static int cache_defer_cnt;
 
 static int cache_defer_req(struct cache_req *req, struct cache_head *item)
 {
-	struct cache_deferred_req *dreq;
+	struct cache_deferred_req *dreq, *discard;
 	int hash = DFR_HASH(item);
 
 	if (cache_defer_cnt >= DFR_MAX) {
@@ -525,20 +525,20 @@ static int cache_defer_req(struct cache_req *req, struct cache_head *item)
 	list_add(&dreq->hash, &cache_defer_hash[hash]);
 
 	/* it is in, now maybe clean up */
-	dreq = NULL;
+	discard = NULL;
 	if (++cache_defer_cnt > DFR_MAX) {
-		dreq = list_entry(cache_defer_list.prev,
-				  struct cache_deferred_req, recent);
-		list_del_init(&dreq->recent);
-		list_del_init(&dreq->hash);
+		discard = list_entry(cache_defer_list.prev,
+				     struct cache_deferred_req, recent);
+		list_del_init(&discard->recent);
+		list_del_init(&discard->hash);
 		cache_defer_cnt--;
 	}
 	spin_unlock(&cache_defer_lock);
 
-	if (dreq) {
+	if (discard)
 		/* there was one too many */
-		dreq->revisit(dreq, 1);
-	}
+		discard->revisit(discard, 1);
+
 	if (!test_bit(CACHE_PENDING, &item->flags)) {
 		/* must have just been validated... */
 		cache_revisit_request(item);
-- 
cgit v0.10.2


From 6da25bf51689a5cc60370d30275dbb9e6852e0cb Mon Sep 17 00:00:00 2001
From: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Date: Sat, 12 Sep 2009 15:22:11 -0300
Subject: thinkpad-acpi: don't ask about brightness_mode for fw. 1V and 1R

X40 (firmware 1V) and T41 (firmware 1R) have been confirmed to work
well with the new defaults, so we can stop pestering people to confirm
that fact.

For now, whitelist just these two firmware types.  It is best to have
at least one more firmware type confirmed for Radeon 9xxx and Intel
GMA-2 ThinkPads before removing the confirmation requests entirely.

Reported-by: Robert de Rooy <robert.de.rooy@gmail.com>
Signed-off-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Cc: stable@kernel.org
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index e856008..d287283 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -5655,16 +5655,16 @@ static const struct tpacpi_quirk brightness_quirk_table[] __initconst = {
 	/* Models with ATI GPUs known to require ECNVRAM mode */
 	TPACPI_Q_IBM('1', 'Y', TPACPI_BRGHT_Q_EC),	/* T43/p ATI */
 
-	/* Models with ATI GPUs (waiting confirmation) */
-	TPACPI_Q_IBM('1', 'R', TPACPI_BRGHT_Q_ASK|TPACPI_BRGHT_Q_EC),
+	/* Models with ATI GPUs that can use ECNVRAM */
+	TPACPI_Q_IBM('1', 'R', TPACPI_BRGHT_Q_EC),
 	TPACPI_Q_IBM('1', 'Q', TPACPI_BRGHT_Q_ASK|TPACPI_BRGHT_Q_EC),
 	TPACPI_Q_IBM('7', '6', TPACPI_BRGHT_Q_ASK|TPACPI_BRGHT_Q_EC),
 	TPACPI_Q_IBM('7', '8', TPACPI_BRGHT_Q_ASK|TPACPI_BRGHT_Q_EC),
 
-	/* Models with Intel Extreme Graphics 2 (waiting confirmation) */
+	/* Models with Intel Extreme Graphics 2 */
+	TPACPI_Q_IBM('1', 'U', TPACPI_BRGHT_Q_NOEC),
 	TPACPI_Q_IBM('1', 'V', TPACPI_BRGHT_Q_ASK|TPACPI_BRGHT_Q_NOEC),
 	TPACPI_Q_IBM('1', 'W', TPACPI_BRGHT_Q_ASK|TPACPI_BRGHT_Q_NOEC),
-	TPACPI_Q_IBM('1', 'U', TPACPI_BRGHT_Q_ASK|TPACPI_BRGHT_Q_NOEC),
 
 	/* Models with Intel GMA900 */
 	TPACPI_Q_IBM('7', '0', TPACPI_BRGHT_Q_NOEC),	/* T43, R52 */
-- 
cgit v0.10.2


From 600a99fa3b4ce4a54375fb089e5ce0f3a1c9a7e1 Mon Sep 17 00:00:00 2001
From: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Date: Sat, 12 Sep 2009 15:22:12 -0300
Subject: thinkpad-acpi: firmware version checks

Use the quirk infrastructure to warn of outdated firmware and also of
firmware versions that are known to cause problems.

Signed-off-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index d287283..cc4155c 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -1601,6 +1601,187 @@ static void tpacpi_remove_driver_attributes(struct device_driver *drv)
 #endif
 }
 
+/*************************************************************************
+ * Firmware Data
+ */
+
+/*
+ * Table of recommended minimum BIOS versions
+ *
+ * Reasons for listing:
+ *    1. Stable BIOS, listed because the unknown ammount of
+ *       bugs and bad ACPI behaviour on older versions
+ *
+ *    2. BIOS or EC fw with known bugs that trigger on Linux
+ *
+ *    3. BIOS with known reduced functionality in older versions
+ *
+ *  We recommend the latest BIOS and EC version.
+ *  We only support the latest BIOS and EC fw version as a rule.
+ *
+ *  Sources: IBM ThinkPad Public Web Documents (update changelogs),
+ *  Information from users in ThinkWiki
+ */
+
+#define TPV_Q(__v, __id1, __id2, __bv1, __bv2)		\
+	{ .vendor	= (__v),			\
+	  .bios		= TPID(__id1, __id2),		\
+	  .ec		= TPACPI_MATCH_ANY,		\
+	  .quirks	= TPACPI_MATCH_ANY << 16	\
+			  | (__bv1) << 8 | (__bv2) }
+
+#define TPV_Q_X(__v, __bid1, __bid2, __bv1, __bv2,	\
+		__eid1, __eid2, __ev1, __ev2)		\
+	{ .vendor	= (__v),			\
+	  .bios		= TPID(__bid1, __bid2),		\
+	  .ec		= TPID(__eid1, __eid2),		\
+	  .quirks	= (__ev1) << 24 | (__ev2) << 16 \
+			  | (__bv1) << 8 | (__bv2) }
+
+#define TPV_QI0(__id1, __id2, __bv1, __bv2) \
+	TPV_Q(PCI_VENDOR_ID_IBM, __id1, __id2, __bv1, __bv2)
+
+#define TPV_QI1(__id1, __id2, __bv1, __bv2, __ev1, __ev2) \
+	TPV_Q_X(PCI_VENDOR_ID_IBM, __id1, __id2, 	\
+		__bv1, __bv2, __id1, __id2, __ev1, __ev2)
+
+#define TPV_QI2(__bid1, __bid2, __bv1, __bv2,		\
+		__eid1, __eid2, __ev1, __ev2) 		\
+	TPV_Q_X(PCI_VENDOR_ID_IBM, __bid1, __bid2, 	\
+		__bv1, __bv2, __eid1, __eid2, __ev1, __ev2)
+
+#define TPV_QL0(__id1, __id2, __bv1, __bv2) \
+	TPV_Q(PCI_VENDOR_ID_LENOVO, __id1, __id2, __bv1, __bv2)
+
+#define TPV_QL1(__id1, __id2, __bv1, __bv2, __ev1, __ev2) \
+	TPV_Q_X(PCI_VENDOR_ID_LENOVO, __id1, __id2, 	\
+		__bv1, __bv2, __id1, __id2, __ev1, __ev2)
+
+#define TPV_QL2(__bid1, __bid2, __bv1, __bv2,		\
+		__eid1, __eid2, __ev1, __ev2) 		\
+	TPV_Q_X(PCI_VENDOR_ID_LENOVO, __bid1, __bid2, 	\
+		__bv1, __bv2, __eid1, __eid2, __ev1, __ev2)
+
+static const struct tpacpi_quirk tpacpi_bios_version_qtable[] __initconst = {
+	/*  Numeric models ------------------ */
+	/*      FW MODEL   BIOS VERS	      */
+	TPV_QI0('I', 'M',  '6', '5'),		 /* 570 */
+	TPV_QI0('I', 'U',  '2', '6'),		 /* 570E */
+	TPV_QI0('I', 'B',  '5', '4'),		 /* 600 */
+	TPV_QI0('I', 'H',  '4', '7'),		 /* 600E */
+	TPV_QI0('I', 'N',  '3', '6'),		 /* 600E */
+	TPV_QI0('I', 'T',  '5', '5'),		 /* 600X */
+	TPV_QI0('I', 'D',  '4', '8'),		 /* 770, 770E, 770ED */
+	TPV_QI0('I', 'I',  '4', '2'),		 /* 770X */
+	TPV_QI0('I', 'O',  '2', '3'),		 /* 770Z */
+
+	/* A-series ------------------------- */
+	/*      FW MODEL   BIOS VERS  EC VERS */
+	TPV_QI0('I', 'W',  '5', '9'),		 /* A20m */
+	TPV_QI0('I', 'V',  '6', '9'),		 /* A20p */
+	TPV_QI0('1', '0',  '2', '6'),		 /* A21e, A22e */
+	TPV_QI0('K', 'U',  '3', '6'),		 /* A21e */
+	TPV_QI0('K', 'X',  '3', '6'),		 /* A21m, A22m */
+	TPV_QI0('K', 'Y',  '3', '8'),		 /* A21p, A22p */
+	TPV_QI0('1', 'B',  '1', '7'),		 /* A22e */
+	TPV_QI0('1', '3',  '2', '0'),		 /* A22m */
+	TPV_QI0('1', 'E',  '7', '3'),		 /* A30/p (0) */
+	TPV_QI1('1', 'G',  '4', '1',  '1', '7'), /* A31/p (0) */
+	TPV_QI1('1', 'N',  '1', '6',  '0', '7'), /* A31/p (0) */
+
+	/* G-series ------------------------- */
+	/*      FW MODEL   BIOS VERS	      */
+	TPV_QI0('1', 'T',  'A', '6'),		 /* G40 */
+	TPV_QI0('1', 'X',  '5', '7'),		 /* G41 */
+
+	/* R-series, T-series --------------- */
+	/*      FW MODEL   BIOS VERS  EC VERS */
+	TPV_QI0('1', 'C',  'F', '0'),		 /* R30 */
+	TPV_QI0('1', 'F',  'F', '1'),		 /* R31 */
+	TPV_QI0('1', 'M',  '9', '7'),		 /* R32 */
+	TPV_QI0('1', 'O',  '6', '1'),		 /* R40 */
+	TPV_QI0('1', 'P',  '6', '5'),		 /* R40 */
+	TPV_QI0('1', 'S',  '7', '0'),		 /* R40e */
+	TPV_QI1('1', 'R',  'D', 'R',  '7', '1'), /* R50/p, R51,
+						    T40/p, T41/p, T42/p (1) */
+	TPV_QI1('1', 'V',  '7', '1',  '2', '8'), /* R50e, R51 (1) */
+	TPV_QI1('7', '8',  '7', '1',  '0', '6'), /* R51e (1) */
+	TPV_QI1('7', '6',  '6', '9',  '1', '6'), /* R52 (1) */
+	TPV_QI1('7', '0',  '6', '9',  '2', '8'), /* R52, T43 (1) */
+
+	TPV_QI0('I', 'Y',  '6', '1'),		 /* T20 */
+	TPV_QI0('K', 'Z',  '3', '4'),		 /* T21 */
+	TPV_QI0('1', '6',  '3', '2'),		 /* T22 */
+	TPV_QI1('1', 'A',  '6', '4',  '2', '3'), /* T23 (0) */
+	TPV_QI1('1', 'I',  '7', '1',  '2', '0'), /* T30 (0) */
+	TPV_QI1('1', 'Y',  '6', '5',  '2', '9'), /* T43/p (1) */
+
+	TPV_QL1('7', '9',  'E', '3',  '5', '0'), /* T60/p */
+	TPV_QL1('7', 'C',  'D', '2',  '2', '2'), /* R60, R60i */
+	TPV_QL0('7', 'E',  'D', '0'),		 /* R60e, R60i */
+
+	/*      BIOS FW    BIOS VERS  EC FW     EC VERS */
+	TPV_QI2('1', 'W',  '9', '0',  '1', 'V', '2', '8'), /* R50e (1) */
+	TPV_QL2('7', 'I',  '3', '4',  '7', '9', '5', '0'), /* T60/p wide */
+
+	/* X-series ------------------------- */
+	/*      FW MODEL   BIOS VERS  EC VERS */
+	TPV_QI0('I', 'Z',  '9', 'D'),		 /* X20, X21 */
+	TPV_QI0('1', 'D',  '7', '0'),		 /* X22, X23, X24 */
+	TPV_QI1('1', 'K',  '4', '8',  '1', '8'), /* X30 (0) */
+	TPV_QI1('1', 'Q',  '9', '7',  '2', '3'), /* X31, X32 (0) */
+	TPV_QI1('1', 'U',  'D', '3',  'B', '2'), /* X40 (0) */
+	TPV_QI1('7', '4',  '6', '4',  '2', '7'), /* X41 (0) */
+	TPV_QI1('7', '5',  '6', '0',  '2', '0'), /* X41t (0) */
+
+	TPV_QL0('7', 'B',  'D', '7'),		 /* X60/s */
+	TPV_QL0('7', 'J',  '3', '0'),		 /* X60t */
+
+	/* (0) - older versions lack DMI EC fw string and functionality */
+	/* (1) - older versions known to lack functionality */
+};
+
+#undef TPV_QL1
+#undef TPV_QL0
+#undef TPV_QI2
+#undef TPV_QI1
+#undef TPV_QI0
+#undef TPV_Q_X
+#undef TPV_Q
+
+static void __init tpacpi_check_outdated_fw(void)
+{
+	unsigned long fwvers;
+	u16 ec_version, bios_version;
+
+	fwvers = tpacpi_check_quirks(tpacpi_bios_version_qtable,
+				ARRAY_SIZE(tpacpi_bios_version_qtable));
+
+	if (!fwvers)
+		return;
+
+	bios_version = fwvers & 0xffffU;
+	ec_version = (fwvers >> 16) & 0xffffU;
+
+	/* note that unknown versions are set to 0x0000 and we use that */
+	if ((bios_version > thinkpad_id.bios_release) ||
+	    (ec_version > thinkpad_id.ec_release &&
+				ec_version != TPACPI_MATCH_ANY)) {
+		/*
+		 * The changelogs would let us track down the exact
+		 * reason, but it is just too much of a pain to track
+		 * it.  We only list BIOSes that are either really
+		 * broken, or really stable to begin with, so it is
+		 * best if the user upgrades the firmware anyway.
+		 */
+		printk(TPACPI_WARN
+			"WARNING: Outdated ThinkPad BIOS/EC firmware\n");
+		printk(TPACPI_WARN
+			"WARNING: This firmware may be missing critical bug "
+			"fixes and/or important features\n");
+	}
+}
+
 /****************************************************************************
  ****************************************************************************
  *
@@ -1634,6 +1815,7 @@ static int __init thinkpad_acpi_driver_init(struct ibm_init_struct *iibm)
 			(thinkpad_id.nummodel_str) ?
 				thinkpad_id.nummodel_str : "unknown");
 
+	tpacpi_check_outdated_fw();
 	return 0;
 }
 
-- 
cgit v0.10.2


From e675abafcc0df38125e6e94a9ba91c92fe774f52 Mon Sep 17 00:00:00 2001
From: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Date: Sat, 12 Sep 2009 15:22:13 -0300
Subject: thinkpad-acpi: be more strict when detecting a ThinkPad

Use stricter checks to decide that we're running on a supported ThinkPad.
This should remove some possible false positives, although nobody ever
bothered to report any.

Signed-off-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index cc4155c..d69ab3f 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -1621,6 +1621,9 @@ static void tpacpi_remove_driver_attributes(struct device_driver *drv)
  *
  *  Sources: IBM ThinkPad Public Web Documents (update changelogs),
  *  Information from users in ThinkWiki
+ *
+ *  WARNING: we use this table also to detect that the machine is
+ *  a ThinkPad in some cases, so don't remove entries lightly.
  */
 
 #define TPV_Q(__v, __id1, __id2, __bv1, __bv2)		\
@@ -1782,6 +1785,12 @@ static void __init tpacpi_check_outdated_fw(void)
 	}
 }
 
+static bool __init tpacpi_is_fw_known(void)
+{
+	return tpacpi_check_quirks(tpacpi_bios_version_qtable,
+			ARRAY_SIZE(tpacpi_bios_version_qtable)) != 0;
+}
+
 /****************************************************************************
  ****************************************************************************
  *
@@ -7706,9 +7715,11 @@ static int __init probe_for_thinkpad(void)
 
 	/*
 	 * Non-ancient models have better DMI tagging, but very old models
-	 * don't.
+	 * don't.  tpacpi_is_fw_known() is a cheat to help in that case.
 	 */
-	is_thinkpad = (thinkpad_id.model_str != NULL);
+	is_thinkpad = (thinkpad_id.model_str != NULL) ||
+		      (thinkpad_id.ec_model != 0) ||
+		      tpacpi_is_fw_known();
 
 	/* ec is required because many other handles are relative to it */
 	TPACPI_ACPIHANDLE_INIT(ec);
@@ -7719,13 +7730,6 @@ static int __init probe_for_thinkpad(void)
 		return -ENODEV;
 	}
 
-	/*
-	 * Risks a regression on very old machines, but reduces potential
-	 * false positives a damn great deal
-	 */
-	if (!is_thinkpad)
-		is_thinkpad = (thinkpad_id.vendor == PCI_VENDOR_ID_IBM);
-
 	if (!is_thinkpad && !force_load)
 		return -ENODEV;
 
-- 
cgit v0.10.2


From db25f16d1dcce8de12f2f5daf884cda02196b28c Mon Sep 17 00:00:00 2001
From: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Date: Sat, 12 Sep 2009 15:22:14 -0300
Subject: thinkpad-acpi: hotkey poll fixes

Fix some locking, avoid exiting the kthread before kthread_stop() is
called on it, and clean up the hotkey poll routines a little bit.

Also, restore bits in the firmware mask after hotkey_source_mask is
changed.  Without this, we leave events disabled...

Signed-off-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index d69ab3f..679a73b 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -1922,16 +1922,42 @@ struct tp_nvram_state {
        u8 volume_level;
 };
 
+/* kthread for the hotkey poller */
 static struct task_struct *tpacpi_hotkey_task;
-static u32 hotkey_source_mask;		/* bit mask 0=ACPI,1=NVRAM */
-static int hotkey_poll_freq = 10;	/* Hz */
+
+/* Acquired while the poller kthread is running, use to sync start/stop */
 static struct mutex hotkey_thread_mutex;
+
+/*
+ * Acquire mutex to write poller control variables.
+ * Increment hotkey_config_change when changing them.
+ *
+ * See HOTKEY_CONFIG_CRITICAL_START/HOTKEY_CONFIG_CRITICAL_END
+ */
 static struct mutex hotkey_thread_data_mutex;
 static unsigned int hotkey_config_change;
 
+/*
+ * hotkey poller control variables
+ *
+ * Must be atomic or readers will also need to acquire mutex
+ */
+static u32 hotkey_source_mask;		/* bit mask 0=ACPI,1=NVRAM */
+static unsigned int hotkey_poll_freq = 10; /* Hz */
+
+#define HOTKEY_CONFIG_CRITICAL_START \
+	do { \
+		mutex_lock(&hotkey_thread_data_mutex); \
+		hotkey_config_change++; \
+	} while (0);
+#define HOTKEY_CONFIG_CRITICAL_END \
+	mutex_unlock(&hotkey_thread_data_mutex);
+
 #else /* CONFIG_THINKPAD_ACPI_HOTKEY_POLL */
 
 #define hotkey_source_mask 0U
+#define HOTKEY_CONFIG_CRITICAL_START
+#define HOTKEY_CONFIG_CRITICAL_END
 
 #endif /* CONFIG_THINKPAD_ACPI_HOTKEY_POLL */
 
@@ -1956,19 +1982,6 @@ static u16 *hotkey_keycode_map;
 
 static struct attribute_set *hotkey_dev_attributes;
 
-#ifdef CONFIG_THINKPAD_ACPI_HOTKEY_POLL
-#define HOTKEY_CONFIG_CRITICAL_START \
-	do { \
-		mutex_lock(&hotkey_thread_data_mutex); \
-		hotkey_config_change++; \
-	} while (0);
-#define HOTKEY_CONFIG_CRITICAL_END \
-	mutex_unlock(&hotkey_thread_data_mutex);
-#else
-#define HOTKEY_CONFIG_CRITICAL_START
-#define HOTKEY_CONFIG_CRITICAL_END
-#endif /* CONFIG_THINKPAD_ACPI_HOTKEY_POLL */
-
 /* HKEY.MHKG() return bits */
 #define TP_HOTKEY_TABLET_MASK (1 << 3)
 
@@ -2013,7 +2026,9 @@ static int hotkey_mask_get(void)
 		if (!acpi_evalf(hkey_handle, &m, "DHKN", "d"))
 			return -EIO;
 	}
+	HOTKEY_CONFIG_CRITICAL_START
 	hotkey_mask = m | (hotkey_source_mask & hotkey_mask);
+	HOTKEY_CONFIG_CRITICAL_END
 
 	return 0;
 }
@@ -2266,6 +2281,7 @@ static int hotkey_kthread(void *data)
 	unsigned int si, so;
 	unsigned long t;
 	unsigned int change_detector, must_reset;
+	unsigned int poll_freq;
 
 	mutex_lock(&hotkey_thread_mutex);
 
@@ -2282,12 +2298,17 @@ static int hotkey_kthread(void *data)
 	mutex_lock(&hotkey_thread_data_mutex);
 	change_detector = hotkey_config_change;
 	mask = hotkey_source_mask & hotkey_mask;
+	poll_freq = hotkey_poll_freq;
 	mutex_unlock(&hotkey_thread_data_mutex);
 	hotkey_read_nvram(&s[so], mask);
 
-	while (!kthread_should_stop() && hotkey_poll_freq) {
-		if (t == 0)
-			t = 1000/hotkey_poll_freq;
+	while (!kthread_should_stop()) {
+		if (t == 0) {
+			if (likely(poll_freq))
+				t = 1000/poll_freq;
+			else
+				t = 100;	/* should never happen... */
+		}
 		t = msleep_interruptible(t);
 		if (unlikely(kthread_should_stop()))
 			break;
@@ -2303,6 +2324,7 @@ static int hotkey_kthread(void *data)
 			change_detector = hotkey_config_change;
 		}
 		mask = hotkey_source_mask & hotkey_mask;
+		poll_freq = hotkey_poll_freq;
 		mutex_unlock(&hotkey_thread_data_mutex);
 
 		if (likely(mask)) {
@@ -2322,6 +2344,7 @@ exit:
 	return 0;
 }
 
+/* call with hotkey_mutex held */
 static void hotkey_poll_stop_sync(void)
 {
 	if (tpacpi_hotkey_task) {
@@ -2338,10 +2361,11 @@ static void hotkey_poll_stop_sync(void)
 }
 
 /* call with hotkey_mutex held */
-static void hotkey_poll_setup(int may_warn)
+static void hotkey_poll_setup(bool may_warn)
 {
-	if ((hotkey_source_mask & hotkey_mask) != 0 &&
-	    hotkey_poll_freq > 0 &&
+	u32 hotkeys_to_poll = hotkey_source_mask & hotkey_mask;
+
+	if (hotkeys_to_poll != 0 && hotkey_poll_freq > 0 &&
 	    (tpacpi_inputdev->users > 0 || hotkey_report_mode < 2)) {
 		if (!tpacpi_hotkey_task) {
 			tpacpi_hotkey_task = kthread_run(hotkey_kthread,
@@ -2355,26 +2379,37 @@ static void hotkey_poll_setup(int may_warn)
 		}
 	} else {
 		hotkey_poll_stop_sync();
-		if (may_warn &&
-		    hotkey_source_mask != 0 && hotkey_poll_freq == 0) {
+		if (may_warn && hotkeys_to_poll != 0 &&
+		    hotkey_poll_freq == 0) {
 			printk(TPACPI_NOTICE
 				"hot keys 0x%08x require polling, "
 				"which is currently disabled\n",
-				hotkey_source_mask);
+				hotkeys_to_poll);
 		}
 	}
 }
 
-static void hotkey_poll_setup_safe(int may_warn)
+static void hotkey_poll_setup_safe(bool may_warn)
 {
 	mutex_lock(&hotkey_mutex);
 	hotkey_poll_setup(may_warn);
 	mutex_unlock(&hotkey_mutex);
 }
 
+/* call with hotkey_mutex held */
+static void hotkey_poll_set_freq(unsigned int freq)
+{
+	if (!freq)
+		hotkey_poll_stop_sync();
+
+	HOTKEY_CONFIG_CRITICAL_START
+	hotkey_poll_freq = freq;
+	HOTKEY_CONFIG_CRITICAL_END
+}
+
 #else /* CONFIG_THINKPAD_ACPI_HOTKEY_POLL */
 
-static void hotkey_poll_setup_safe(int __unused)
+static void hotkey_poll_setup_safe(bool __unused)
 {
 }
 
@@ -2392,7 +2427,7 @@ static int hotkey_inputdev_open(struct input_dev *dev)
 	case TPACPI_LIFE_EXITING:
 		return -EBUSY;
 	case TPACPI_LIFE_RUNNING:
-		hotkey_poll_setup_safe(0);
+		hotkey_poll_setup_safe(false);
 		return 0;
 	}
 
@@ -2405,7 +2440,7 @@ static void hotkey_inputdev_close(struct input_dev *dev)
 {
 	/* disable hotkey polling when possible */
 	if (tpacpi_lifecycle == TPACPI_LIFE_RUNNING)
-		hotkey_poll_setup_safe(0);
+		hotkey_poll_setup_safe(false);
 }
 
 /* sysfs hotkey enable ------------------------------------------------- */
@@ -2479,7 +2514,7 @@ static ssize_t hotkey_mask_store(struct device *dev,
 	res = hotkey_mask_set(t);
 
 #ifdef CONFIG_THINKPAD_ACPI_HOTKEY_POLL
-	hotkey_poll_setup(1);
+	hotkey_poll_setup(true);
 #endif
 
 	mutex_unlock(&hotkey_mutex);
@@ -2568,7 +2603,8 @@ static ssize_t hotkey_source_mask_store(struct device *dev,
 	hotkey_source_mask = t;
 	HOTKEY_CONFIG_CRITICAL_END
 
-	hotkey_poll_setup(1);
+	hotkey_poll_setup(true);
+	hotkey_mask_set(hotkey_mask);
 
 	mutex_unlock(&hotkey_mutex);
 
@@ -2601,9 +2637,9 @@ static ssize_t hotkey_poll_freq_store(struct device *dev,
 	if (mutex_lock_killable(&hotkey_mutex))
 		return -ERESTARTSYS;
 
-	hotkey_poll_freq = t;
+	hotkey_poll_set_freq(t);
+	hotkey_poll_setup(true);
 
-	hotkey_poll_setup(1);
 	mutex_unlock(&hotkey_mutex);
 
 	tpacpi_disclose_usertask("hotkey_poll_freq", "set to %lu\n", t);
@@ -2794,7 +2830,9 @@ static void tpacpi_send_radiosw_update(void)
 static void hotkey_exit(void)
 {
 #ifdef CONFIG_THINKPAD_ACPI_HOTKEY_POLL
+	mutex_lock(&hotkey_mutex);
 	hotkey_poll_stop_sync();
+	mutex_unlock(&hotkey_mutex);
 #endif
 
 	if (hotkey_dev_attributes)
@@ -3031,7 +3069,7 @@ static int __init hotkey_init(struct ibm_init_struct *iibm)
 	}
 
 	vdbg_printk(TPACPI_DBG_INIT | TPACPI_DBG_HKEY,
-		    "hotkey source mask 0x%08x, polling freq %d\n",
+		    "hotkey source mask 0x%08x, polling freq %u\n",
 		    hotkey_source_mask, hotkey_poll_freq);
 #endif
 
@@ -3169,7 +3207,7 @@ static int __init hotkey_init(struct ibm_init_struct *iibm)
 	tpacpi_inputdev->open = &hotkey_inputdev_open;
 	tpacpi_inputdev->close = &hotkey_inputdev_close;
 
-	hotkey_poll_setup_safe(1);
+	hotkey_poll_setup_safe(true);
 	tpacpi_send_radiosw_update();
 	tpacpi_input_send_tabletsw();
 
@@ -3457,7 +3495,7 @@ static void hotkey_resume(void)
 	hotkey_tablet_mode_notify_change();
 	hotkey_wakeup_reason_notify_change();
 	hotkey_wakeup_hotunplug_complete_notify_change();
-	hotkey_poll_setup_safe(0);
+	hotkey_poll_setup_safe(false);
 }
 
 /* procfs -------------------------------------------------------------- */
-- 
cgit v0.10.2


From 06777be6d8688ba93103fffbbe9e64a5e6fab3c8 Mon Sep 17 00:00:00 2001
From: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Date: Sat, 12 Sep 2009 15:22:15 -0300
Subject: thinkpad-acpi: deprecate hotkey_bios_mask

Some analysis of the ACPI DSDTs shows that the HKEY pre-enabled mask
is always 0x80c (FN+F3,FN+F4 and FN+F12), which are the hotkeys that
the second gen of HKEY firmware supported (the first gen didn't report
any hotkeys, the second reported these tree hotkeys but had no mask
support, and the third added mask support).

So, this is probably some sort of backwards compatibility with older
versions of the IBM ThinkVantage suite.  We have no use for that, and
I know of exactly ZERO users of that attribute, anyway.  Start the
process of getting rid of it.

Signed-off-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/Documentation/laptops/thinkpad-acpi.txt b/Documentation/laptops/thinkpad-acpi.txt
index e2ddcde..ab4b58e 100644
--- a/Documentation/laptops/thinkpad-acpi.txt
+++ b/Documentation/laptops/thinkpad-acpi.txt
@@ -240,9 +240,13 @@ sysfs notes:
 		Returns 0.
 
 	hotkey_bios_mask:
+		DEPRECATED, DON'T USE, WILL BE REMOVED IN THE FUTURE.
+
 		Returns the hot keys mask when thinkpad-acpi was loaded.
 		Upon module unload, the hot keys mask will be restored
-		to this value.
+		to this value.   This is always 0x80c, because those are
+		the hotkeys that were supported by ancient firmware
+		without mask support.
 
 	hotkey_enable:
 		DEPRECATED, WILL BE REMOVED SOON.
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index 679a73b..6b66789 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -2544,6 +2544,8 @@ static ssize_t hotkey_bios_mask_show(struct device *dev,
 			   struct device_attribute *attr,
 			   char *buf)
 {
+	printk_deprecated_attribute("hotkey_bios_mask",
+			"This attribute is useless.");
 	return snprintf(buf, PAGE_SIZE, "0x%08x\n", hotkey_orig_mask);
 }
 
-- 
cgit v0.10.2


From 20c9aa46f644b3ddb161a819d1b0c2b07097c4ee Mon Sep 17 00:00:00 2001
From: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Date: Sat, 12 Sep 2009 15:22:16 -0300
Subject: thinkpad-acpi: Fix procfs hotkey reset command

echo "reset" > /proc/acpi/ibm/hotkey should do something non-useless,
so instead of setting it to Fn+F2, Fn+F3, Fn+F5, set it to
hotkey_recommended_mask.

It is not like it will survive for much longer, anyway.

Signed-off-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/Documentation/laptops/thinkpad-acpi.txt b/Documentation/laptops/thinkpad-acpi.txt
index ab4b58e..6d03487 100644
--- a/Documentation/laptops/thinkpad-acpi.txt
+++ b/Documentation/laptops/thinkpad-acpi.txt
@@ -219,7 +219,7 @@ The following commands can be written to the /proc/acpi/ibm/hotkey file:
 	echo 0xffffffff > /proc/acpi/ibm/hotkey -- enable all hot keys
 	echo 0 > /proc/acpi/ibm/hotkey -- disable all possible hot keys
 	... any other 8-hex-digit mask ...
-	echo reset > /proc/acpi/ibm/hotkey -- restore the original mask
+	echo reset > /proc/acpi/ibm/hotkey -- restore the recommended mask
 
 The following commands have been deprecated and will cause the kernel
 to log a warning:
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index 6b66789..dd779e5 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -3569,7 +3569,8 @@ static int hotkey_write(char *buf)
 			hotkey_enabledisable_warn(0);
 			res = -EPERM;
 		} else if (strlencmp(cmd, "reset") == 0) {
-			mask = hotkey_orig_mask;
+			mask = (hotkey_all_mask | hotkey_source_mask)
+				& ~hotkey_reserved_mask;
 		} else if (sscanf(cmd, "0x%x", &mask) == 1) {
 			/* mask set */
 		} else if (sscanf(cmd, "%x", &mask) == 1) {
-- 
cgit v0.10.2


From 230d8cf25ac32c7d2fdb4dda861ec5d954000ffb Mon Sep 17 00:00:00 2001
From: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Date: Sat, 12 Sep 2009 15:22:17 -0300
Subject: thinkpad-acpi: don't poll by default any of the reserved hotkeys

Init hotkey_source_mask late, so that we can make use of
hotkey_reserved_mask to avoid polling any of the reserved
hotkeys by default.

Signed-off-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index dd779e5..9c3bb0c 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -3062,19 +3062,6 @@ static int __init hotkey_init(struct ibm_init_struct *iibm)
 			goto err_exit;
 	}
 
-#ifdef CONFIG_THINKPAD_ACPI_HOTKEY_POLL
-	if (tp_features.hotkey_mask) {
-		hotkey_source_mask = TPACPI_HKEY_NVRAM_GOOD_MASK
-					& ~hotkey_all_mask;
-	} else {
-		hotkey_source_mask = TPACPI_HKEY_NVRAM_GOOD_MASK;
-	}
-
-	vdbg_printk(TPACPI_DBG_INIT | TPACPI_DBG_HKEY,
-		    "hotkey source mask 0x%08x, polling freq %u\n",
-		    hotkey_source_mask, hotkey_poll_freq);
-#endif
-
 #ifdef CONFIG_THINKPAD_ACPI_DEBUGFACILITIES
 	if (dbg_wlswemul) {
 		tp_features.hotkey_wlsw = 1;
@@ -3186,6 +3173,21 @@ static int __init hotkey_init(struct ibm_init_struct *iibm)
 			| (1 << TP_ACPI_HOTKEYSCAN_FNEND);
 	}
 
+#ifdef CONFIG_THINKPAD_ACPI_HOTKEY_POLL
+	if (tp_features.hotkey_mask) {
+		hotkey_source_mask = TPACPI_HKEY_NVRAM_GOOD_MASK
+					& ~hotkey_all_mask
+					& ~hotkey_reserved_mask;
+	} else {
+		hotkey_source_mask = TPACPI_HKEY_NVRAM_GOOD_MASK
+					& ~hotkey_reserved_mask;
+	}
+
+	vdbg_printk(TPACPI_DBG_INIT | TPACPI_DBG_HKEY,
+		    "hotkey source mask 0x%08x, polling freq %u\n",
+		    hotkey_source_mask, hotkey_poll_freq);
+#endif
+
 	dbg_printk(TPACPI_DBG_INIT | TPACPI_DBG_HKEY,
 			"enabling firmware HKEY event interface...\n");
 	res = hotkey_status_set(true);
-- 
cgit v0.10.2


From de4c8cc7bddd9c43dc1b85517ab445ffa8163058 Mon Sep 17 00:00:00 2001
From: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Date: Sat, 12 Sep 2009 15:22:18 -0300
Subject: thinkpad-acpi: report brightness events when required

Report KEY_BRIGHTNESSUP and KEY_BRIGHTNESSDOWN input events when the
ThinkPad is in "passive brightness control" mode (because either we or
ACPI video touched _BCL), and ACPI video is not processing these
events by itself.

This happens only on Lenovo ThinkPads with ACPI video support, when
operating with the ACPI video driver in acpi_backlight=vendor mode.

Issuing these events is the right thing to do, and will work around
bugzilla #13368, if userspace is properly configured and actively
handles these events.

For other ThinkPads, and when ACPI video is handling brightness
changes, thinkpad-acpi will continue NOT sending KEY_BRIGHTNESS*
events by default.

Signed-off-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index 9c3bb0c..955adf6 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -2854,6 +2854,15 @@ static void hotkey_exit(void)
 	}
 }
 
+static void __init hotkey_unmap(const unsigned int scancode)
+{
+	if (hotkey_keycode_map[scancode] != KEY_RESERVED) {
+		clear_bit(hotkey_keycode_map[scancode],
+			  tpacpi_inputdev->keybit);
+		hotkey_keycode_map[scancode] = KEY_RESERVED;
+	}
+}
+
 static int __init hotkey_init(struct ibm_init_struct *iibm)
 {
 	/* Requirements for changing the default keymaps:
@@ -2932,11 +2941,11 @@ static int __init hotkey_init(struct ibm_init_struct *iibm)
 		KEY_UNKNOWN,	/* 0x0D: FN+INSERT */
 		KEY_UNKNOWN,	/* 0x0E: FN+DELETE */
 
-		/* These either have to go through ACPI video, or
-		 * act like in the IBM ThinkPads, so don't ever
-		 * enable them by default */
-		KEY_RESERVED,	/* 0x0F: FN+HOME (brightness up) */
-		KEY_RESERVED,	/* 0x10: FN+END (brightness down) */
+		/* These should be enabled --only-- when ACPI video
+		 * is disabled (i.e. in "vendor" mode), and are handled
+		 * in a special way by the init code */
+		KEY_BRIGHTNESSUP,	/* 0x0F: FN+HOME (brightness up) */
+		KEY_BRIGHTNESSDOWN,	/* 0x10: FN+END (brightness down) */
 
 		KEY_RESERVED,	/* 0x11: FN+PGUP (thinklight toggle) */
 
@@ -3162,15 +3171,14 @@ static int __init hotkey_init(struct ibm_init_struct *iibm)
 		       "Disabling thinkpad-acpi brightness events "
 		       "by default...\n");
 
-		/* The hotkey_reserved_mask change below is not
-		 * necessary while the keys are at KEY_RESERVED in the
-		 * default map, but better safe than sorry, leave it
-		 * here as a marker of what we have to do, especially
-		 * when we finally become able to set this at runtime
-		 * on response to X.org requests */
+		/* Disable brightness up/down on Lenovo thinkpads when
+		 * ACPI is handling them, otherwise it is plain impossible
+		 * for userspace to do something even remotely sane */
 		hotkey_reserved_mask |=
 			(1 << TP_ACPI_HOTKEYSCAN_FNHOME)
 			| (1 << TP_ACPI_HOTKEYSCAN_FNEND);
+		hotkey_unmap(TP_ACPI_HOTKEYSCAN_FNHOME);
+		hotkey_unmap(TP_ACPI_HOTKEYSCAN_FNEND);
 	}
 
 #ifdef CONFIG_THINKPAD_ACPI_HOTKEY_POLL
-- 
cgit v0.10.2


From 5f0dadb4bd259c3b832e19702f552947244edfb9 Mon Sep 17 00:00:00 2001
From: Corentin Chary <corentincj@iksaif.net>
Date: Mon, 14 Sep 2009 12:43:52 +0200
Subject: thinkpad_acpi: fix rfkill memory leak on unload

rfkill_unregister() should always be followed by rfkill_destroy()

Signed-off-by: Corentin Chary <corentincj@iksaif.net>
Acked-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index 955adf6..f78d275 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -1278,6 +1278,7 @@ static void tpacpi_destroy_rfkill(const enum tpacpi_rfk_id id)
 	tp_rfk = tpacpi_rfkill_switches[id];
 	if (tp_rfk) {
 		rfkill_unregister(tp_rfk->rfkill);
+		rfkill_destroy(tp_rfk->rfkill);
 		tpacpi_rfkill_switches[id] = NULL;
 		kfree(tp_rfk);
 	}
-- 
cgit v0.10.2


From 9caeb5324427990db7bc97e674794d201c1f0797 Mon Sep 17 00:00:00 2001
From: Herton Ronaldo Krzesinski <herton@mandriva.com.br>
Date: Mon, 14 Sep 2009 21:11:21 -0300
Subject: topstar-laptop: add new driver for hotkeys support on Topstar N01

This adds Topstar Laptop Extras ACPI driver. It enables hotkeys
functionality with Topstar N01 netbook. Besides hotkeys there are
other functions exposed by its ACPI firmware, but for now only
hotkeys reporting on Topstar N01 is supported. Topstar is a chinese
manufacturer, its website can be currently reached at
http://www.topstardigital.cn/

Signed-off-by: Herton Ronaldo Krzesinski <herton@mandriva.com.br>
Reviewed-by: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Reviewed-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/MAINTAINERS b/MAINTAINERS
index 8dca9d8..d75a7a7 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4968,6 +4968,11 @@ T:	quilt http://svn.sourceforge.jp/svnroot/tomoyo/trunk/2.2.x/tomoyo-lsm/patches
 S:	Maintained
 F:	security/tomoyo/
 
+TOPSTAR LAPTOP EXTRAS DRIVER
+M:	Herton Ronaldo Krzesinski <herton@mandriva.com.br>
+S:	Maintained
+F:	drivers/platform/x86/topstar-laptop.c
+
 TOSHIBA ACPI EXTRAS DRIVER
 S:	Orphan
 F:	drivers/platform/x86/toshiba_acpi.c
diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index 77c6097..4831562 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -396,6 +396,15 @@ config ACPI_ASUS
 	  NOTE: This driver is deprecated and will probably be removed soon,
 	  use asus-laptop instead.
 
+config TOPSTAR_LAPTOP
+	tristate "Topstar Laptop Extras"
+	depends on ACPI
+	depends on INPUT
+	---help---
+	  This driver adds support for hotkeys found on Topstar laptops.
+
+	  If you have a Topstar laptop, say Y or M here.
+
 config ACPI_TOSHIBA
 	tristate "Toshiba Laptop Extras"
 	depends on ACPI
diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile
index 641b8bf..d1c1621 100644
--- a/drivers/platform/x86/Makefile
+++ b/drivers/platform/x86/Makefile
@@ -19,4 +19,5 @@ obj-$(CONFIG_PANASONIC_LAPTOP)	+= panasonic-laptop.o
 obj-$(CONFIG_INTEL_MENLOW)	+= intel_menlow.o
 obj-$(CONFIG_ACPI_WMI)		+= wmi.o
 obj-$(CONFIG_ACPI_ASUS)		+= asus_acpi.o
+obj-$(CONFIG_TOPSTAR_LAPTOP)	+= topstar-laptop.o
 obj-$(CONFIG_ACPI_TOSHIBA)	+= toshiba_acpi.o
diff --git a/drivers/platform/x86/topstar-laptop.c b/drivers/platform/x86/topstar-laptop.c
new file mode 100644
index 0000000..02f3d4e
--- /dev/null
+++ b/drivers/platform/x86/topstar-laptop.c
@@ -0,0 +1,265 @@
+/*
+ * ACPI driver for Topstar notebooks (hotkeys support only)
+ *
+ * Copyright (c) 2009 Herton Ronaldo Krzesinski <herton@mandriva.com.br>
+ *
+ * Implementation inspired by existing x86 platform drivers, in special
+ * asus/eepc/fujitsu-laptop, thanks to their authors
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/acpi.h>
+#include <linux/input.h>
+
+#define ACPI_TOPSTAR_CLASS "topstar"
+
+struct topstar_hkey {
+	struct input_dev *inputdev;
+};
+
+struct tps_key_entry {
+	u8 code;
+	u16 keycode;
+};
+
+static struct tps_key_entry topstar_keymap[] = {
+	{ 0x80, KEY_BRIGHTNESSUP },
+	{ 0x81, KEY_BRIGHTNESSDOWN },
+	{ 0x83, KEY_VOLUMEUP },
+	{ 0x84, KEY_VOLUMEDOWN },
+	{ 0x85, KEY_MUTE },
+	{ 0x86, KEY_SWITCHVIDEOMODE },
+	{ 0x87, KEY_F13 }, /* touchpad enable/disable key */
+	{ 0x88, KEY_WLAN },
+	{ 0x8a, KEY_WWW },
+	{ 0x8b, KEY_MAIL },
+	{ 0x8c, KEY_MEDIA },
+	{ 0x96, KEY_F14 }, /* G key? */
+	{ }
+};
+
+static struct tps_key_entry *tps_get_key_by_scancode(int code)
+{
+	struct tps_key_entry *key;
+
+	for (key = topstar_keymap; key->code; key++)
+		if (code == key->code)
+			return key;
+
+	return NULL;
+}
+
+static struct tps_key_entry *tps_get_key_by_keycode(int code)
+{
+	struct tps_key_entry *key;
+
+	for (key = topstar_keymap; key->code; key++)
+		if (code == key->keycode)
+			return key;
+
+	return NULL;
+}
+
+static void acpi_topstar_notify(struct acpi_device *device, u32 event)
+{
+	struct tps_key_entry *key;
+	static bool dup_evnt[2];
+	bool *dup;
+	struct topstar_hkey *hkey = acpi_driver_data(device);
+
+	/* 0x83 and 0x84 key events comes duplicated... */
+	if (event == 0x83 || event == 0x84) {
+		dup = &dup_evnt[event - 0x83];
+		if (*dup) {
+			*dup = false;
+			return;
+		}
+		*dup = true;
+	}
+
+	/*
+	 * 'G key' generate two event codes, convert to only
+	 * one event/key code for now (3G switch?)
+	 */
+	if (event == 0x97)
+		event = 0x96;
+
+	key = tps_get_key_by_scancode(event);
+	if (key) {
+		input_report_key(hkey->inputdev, key->keycode, 1);
+		input_sync(hkey->inputdev);
+		input_report_key(hkey->inputdev, key->keycode, 0);
+		input_sync(hkey->inputdev);
+		return;
+	}
+
+	/* Known non hotkey events don't handled or that we don't care yet */
+	if (event == 0x8e || event == 0x8f || event == 0x90)
+		return;
+
+	pr_info("unknown event = 0x%02x\n", event);
+}
+
+static int acpi_topstar_fncx_switch(struct acpi_device *device, bool state)
+{
+	acpi_status status;
+	union acpi_object fncx_params[1] = {
+		{ .type = ACPI_TYPE_INTEGER }
+	};
+	struct acpi_object_list fncx_arg_list = { 1, &fncx_params[0] };
+
+	fncx_params[0].integer.value = state ? 0x86 : 0x87;
+	status = acpi_evaluate_object(device->handle, "FNCX", &fncx_arg_list, NULL);
+	if (ACPI_FAILURE(status)) {
+		pr_err("Unable to switch FNCX notifications\n");
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+static int topstar_getkeycode(struct input_dev *dev, int scancode, int *keycode)
+{
+	struct tps_key_entry *key = tps_get_key_by_scancode(scancode);
+
+	if (!key)
+		return -EINVAL;
+
+	*keycode = key->keycode;
+	return 0;
+}
+
+static int topstar_setkeycode(struct input_dev *dev, int scancode, int keycode)
+{
+	struct tps_key_entry *key;
+	int old_keycode;
+
+	if (keycode < 0 || keycode > KEY_MAX)
+		return -EINVAL;
+
+	key = tps_get_key_by_scancode(scancode);
+
+	if (!key)
+		return -EINVAL;
+
+	old_keycode = key->keycode;
+	key->keycode = keycode;
+	set_bit(keycode, dev->keybit);
+	if (!tps_get_key_by_keycode(old_keycode))
+		clear_bit(old_keycode, dev->keybit);
+	return 0;
+}
+
+static int acpi_topstar_init_hkey(struct topstar_hkey *hkey)
+{
+	struct tps_key_entry *key;
+
+	hkey->inputdev = input_allocate_device();
+	if (!hkey->inputdev) {
+		pr_err("Unable to allocate input device\n");
+		return -ENODEV;
+	}
+	hkey->inputdev->name = "Topstar Laptop extra buttons";
+	hkey->inputdev->phys = "topstar/input0";
+	hkey->inputdev->id.bustype = BUS_HOST;
+	hkey->inputdev->getkeycode = topstar_getkeycode;
+	hkey->inputdev->setkeycode = topstar_setkeycode;
+	for (key = topstar_keymap; key->code; key++) {
+		set_bit(EV_KEY, hkey->inputdev->evbit);
+		set_bit(key->keycode, hkey->inputdev->keybit);
+	}
+	if (input_register_device(hkey->inputdev)) {
+		pr_err("Unable to register input device\n");
+		input_free_device(hkey->inputdev);
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+static int acpi_topstar_add(struct acpi_device *device)
+{
+	struct topstar_hkey *tps_hkey;
+
+	tps_hkey = kzalloc(sizeof(struct topstar_hkey), GFP_KERNEL);
+	if (!tps_hkey)
+		return -ENOMEM;
+
+	strcpy(acpi_device_name(device), "Topstar TPSACPI");
+	strcpy(acpi_device_class(device), ACPI_TOPSTAR_CLASS);
+
+	if (acpi_topstar_fncx_switch(device, true))
+		goto add_err;
+
+	if (acpi_topstar_init_hkey(tps_hkey))
+		goto add_err;
+
+	device->driver_data = tps_hkey;
+	return 0;
+
+add_err:
+	kfree(tps_hkey);
+	return -ENODEV;
+}
+
+static int acpi_topstar_remove(struct acpi_device *device, int type)
+{
+	struct topstar_hkey *tps_hkey = acpi_driver_data(device);
+
+	acpi_topstar_fncx_switch(device, false);
+
+	input_unregister_device(tps_hkey->inputdev);
+	kfree(tps_hkey);
+
+	return 0;
+}
+
+static const struct acpi_device_id topstar_device_ids[] = {
+	{ "TPSACPI01", 0 },
+	{ "", 0 },
+};
+MODULE_DEVICE_TABLE(acpi, topstar_device_ids);
+
+static struct acpi_driver acpi_topstar_driver = {
+	.name = "Topstar laptop ACPI driver",
+	.class = ACPI_TOPSTAR_CLASS,
+	.ids = topstar_device_ids,
+	.ops = {
+		.add = acpi_topstar_add,
+		.remove = acpi_topstar_remove,
+		.notify = acpi_topstar_notify,
+	},
+};
+
+static int __init topstar_laptop_init(void)
+{
+	int ret;
+
+	ret = acpi_bus_register_driver(&acpi_topstar_driver);
+	if (ret < 0)
+		return ret;
+
+	printk(KERN_INFO "Topstar Laptop ACPI extras driver loaded\n");
+
+	return 0;
+}
+
+static void __exit topstar_laptop_exit(void)
+{
+	acpi_bus_unregister_driver(&acpi_topstar_driver);
+}
+
+module_init(topstar_laptop_init);
+module_exit(topstar_laptop_exit);
+
+MODULE_AUTHOR("Herton Ronaldo Krzesinski");
+MODULE_DESCRIPTION("Topstar Laptop ACPI Extras driver");
+MODULE_LICENSE("GPL");
-- 
cgit v0.10.2


From de584afa5e188a2da484bb5373d449598cdb9f5e Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@us.ibm.com>
Date: Fri, 18 Sep 2009 12:41:09 -0700
Subject: hwmon driver for ACPI 4.0 power meters

This driver exposes ACPI 4.0 compliant power meters as hardware monitoring
devices.  This second revision of the driver also exports the ACPI string
info as sysfs attributes, a list of the devices that the meter measures,
and will send ACPI notifications over the ACPI netlink socket.  This
latest revision only enables the power capping controls if it can be
confirmed that the power cap can be enforced by the hardware and explains
how the notification interfaces work.

[akpm@linux-foundation.org: remove default-y]
[akpm@linux-foundation.org: build fix]
Signed-off-by: Darrick J. Wong <djwong@us.ibm.com>
Cc: Zhang Rui <rui.zhang@intel.com>
Cc: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/Documentation/hwmon/acpi_power_meter b/Documentation/hwmon/acpi_power_meter
new file mode 100644
index 0000000..c80399a
--- /dev/null
+++ b/Documentation/hwmon/acpi_power_meter
@@ -0,0 +1,51 @@
+Kernel driver power_meter
+=========================
+
+This driver talks to ACPI 4.0 power meters.
+
+Supported systems:
+  * Any recent system with ACPI 4.0.
+    Prefix: 'power_meter'
+    Datasheet: http://acpi.info/, section 10.4.
+
+Author: Darrick J. Wong
+
+Description
+-----------
+
+This driver implements sensor reading support for the power meters exposed in
+the ACPI 4.0 spec (Chapter 10.4).  These devices have a simple set of
+features--a power meter that returns average power use over a configurable
+interval, an optional capping mechanism, and a couple of trip points.  The
+sysfs interface conforms with the specification outlined in the "Power" section
+of Documentation/hwmon/sysfs-interface.
+
+Special Features
+----------------
+
+The power[1-*]_is_battery knob indicates if the power supply is a battery.
+Both power[1-*]_average_{min,max} must be set before the trip points will work.
+When both of them are set, an ACPI event will be broadcast on the ACPI netlink
+socket and a poll notification will be sent to the appropriate
+power[1-*]_average sysfs file.
+
+The power[1-*]_{model_number, serial_number, oem_info} fields display arbitrary
+strings that ACPI provides with the meter.  The measures/ directory contains
+symlinks to the devices that this meter measures.
+
+Some computers have the ability to enforce a power cap in hardware.  If this is
+the case, the power[1-*]_cap and related sysfs files will appear.  When the
+average power consumption exceeds the cap, an ACPI event will be broadcast on
+the netlink event socket and a poll notification will be sent to the
+appropriate power[1-*]_alarm file to indicate that capping has begun, and the
+hardware has taken action to reduce power consumption.  Most likely this will
+result in reduced performance.
+
+There are a few other ACPI notifications that can be sent by the firmware.  In
+all cases the ACPI event will be broadcast on the ACPI netlink event socket as
+well as sent as a poll notification to a sysfs file.  The events are as
+follows:
+
+power[1-*]_cap will be notified if the firmware changes the power cap.
+power[1-*]_interval will be notified if the firmware changes the averaging
+interval.
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index 7ec7d88..2c1cab5 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -82,6 +82,17 @@ config ACPI_PROCFS_POWER
 
 	  Say N to delete power /proc/acpi/ directories that have moved to /sys/
 
+config ACPI_POWER_METER
+	tristate "ACPI 4.0 power meter"
+	depends on HWMON
+	help
+	  This driver exposes ACPI 4.0 power meters as hardware monitoring
+	  devices.  Say Y (or M) if you have a computer with ACPI 4.0 firmware
+	  and a power meter.
+
+	  To compile this driver as a module, choose M here:
+	  the module will be called power-meter.
+
 config ACPI_SYSFS_POWER
 	bool "Future power /sys interface"
 	select POWER_SUPPLY
diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index 03a985b..82cd49d 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -56,6 +56,7 @@ obj-$(CONFIG_ACPI_HOTPLUG_MEMORY) += acpi_memhotplug.o
 obj-$(CONFIG_ACPI_BATTERY)	+= battery.o
 obj-$(CONFIG_ACPI_SBS)		+= sbshc.o
 obj-$(CONFIG_ACPI_SBS)		+= sbs.o
+obj-$(CONFIG_ACPI_POWER_METER)	+= power_meter.o
 
 # processor has its own "processor." module_param namespace
 processor-y			:= processor_core.o processor_throttling.o
diff --git a/drivers/acpi/power_meter.c b/drivers/acpi/power_meter.c
new file mode 100644
index 0000000..e6bfd77
--- /dev/null
+++ b/drivers/acpi/power_meter.c
@@ -0,0 +1,1018 @@
+/*
+ * A hwmon driver for ACPI 4.0 power meters
+ * Copyright (C) 2009 IBM
+ *
+ * Author: Darrick J. Wong <djwong@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/module.h>
+#include <linux/hwmon.h>
+#include <linux/hwmon-sysfs.h>
+#include <linux/jiffies.h>
+#include <linux/mutex.h>
+#include <linux/dmi.h>
+#include <linux/kdev_t.h>
+#include <linux/sched.h>
+#include <linux/time.h>
+#include <acpi/acpi_drivers.h>
+#include <acpi/acpi_bus.h>
+
+#define ACPI_POWER_METER_NAME		"power_meter"
+ACPI_MODULE_NAME(ACPI_POWER_METER_NAME);
+#define ACPI_POWER_METER_DEVICE_NAME	"Power Meter"
+#define ACPI_POWER_METER_CLASS		"power_meter_resource"
+
+#define NUM_SENSORS			17
+
+#define POWER_METER_CAN_MEASURE	(1 << 0)
+#define POWER_METER_CAN_TRIP	(1 << 1)
+#define POWER_METER_CAN_CAP	(1 << 2)
+#define POWER_METER_CAN_NOTIFY	(1 << 3)
+#define POWER_METER_IS_BATTERY	(1 << 8)
+#define UNKNOWN_HYSTERESIS	0xFFFFFFFF
+
+#define METER_NOTIFY_CONFIG	0x80
+#define METER_NOTIFY_TRIP	0x81
+#define METER_NOTIFY_CAP	0x82
+#define METER_NOTIFY_CAPPING	0x83
+#define METER_NOTIFY_INTERVAL	0x84
+
+#define POWER_AVERAGE_NAME	"power1_average"
+#define POWER_CAP_NAME		"power1_cap"
+#define POWER_AVG_INTERVAL_NAME	"power1_average_interval"
+#define POWER_ALARM_NAME	"power1_alarm"
+
+static int cap_in_hardware;
+static int force_cap_on;
+
+static int can_cap_in_hardware(void)
+{
+	return force_cap_on || cap_in_hardware;
+}
+
+static struct acpi_device_id power_meter_ids[] = {
+	{"ACPI000D", 0},
+	{"", 0},
+};
+MODULE_DEVICE_TABLE(acpi, power_meter_ids);
+
+struct acpi_power_meter_capabilities {
+	acpi_integer		flags;
+	acpi_integer		units;
+	acpi_integer		type;
+	acpi_integer		accuracy;
+	acpi_integer		sampling_time;
+	acpi_integer		min_avg_interval;
+	acpi_integer		max_avg_interval;
+	acpi_integer		hysteresis;
+	acpi_integer		configurable_cap;
+	acpi_integer		min_cap;
+	acpi_integer		max_cap;
+};
+
+struct acpi_power_meter_resource {
+	struct acpi_device	*acpi_dev;
+	acpi_bus_id		name;
+	struct mutex		lock;
+	struct device		*hwmon_dev;
+	struct acpi_power_meter_capabilities	caps;
+	acpi_string		model_number;
+	acpi_string		serial_number;
+	acpi_string		oem_info;
+	acpi_integer		power;
+	acpi_integer		cap;
+	acpi_integer		avg_interval;
+	int			sensors_valid;
+	unsigned long		sensors_last_updated;
+	struct sensor_device_attribute	sensors[NUM_SENSORS];
+	int			num_sensors;
+	int			trip[2];
+	int			num_domain_devices;
+	struct acpi_device	**domain_devices;
+	struct kobject		*holders_dir;
+};
+
+struct ro_sensor_template {
+	char *label;
+	ssize_t (*show)(struct device *dev,
+			struct device_attribute *devattr,
+			char *buf);
+	int index;
+};
+
+struct rw_sensor_template {
+	char *label;
+	ssize_t (*show)(struct device *dev,
+			struct device_attribute *devattr,
+			char *buf);
+	ssize_t (*set)(struct device *dev,
+		       struct device_attribute *devattr,
+		       const char *buf, size_t count);
+	int index;
+};
+
+/* Averaging interval */
+static int update_avg_interval(struct acpi_power_meter_resource *resource)
+{
+	unsigned long long data;
+	acpi_status status;
+
+	status = acpi_evaluate_integer(resource->acpi_dev->handle, "_GAI",
+				       NULL, &data);
+	if (ACPI_FAILURE(status)) {
+		ACPI_EXCEPTION((AE_INFO, status, "Evaluating _GAI"));
+		return -ENODEV;
+	}
+
+	resource->avg_interval = data;
+	return 0;
+}
+
+static ssize_t show_avg_interval(struct device *dev,
+				 struct device_attribute *devattr,
+				 char *buf)
+{
+	struct acpi_device *acpi_dev = to_acpi_device(dev);
+	struct acpi_power_meter_resource *resource = acpi_dev->driver_data;
+
+	mutex_lock(&resource->lock);
+	update_avg_interval(resource);
+	mutex_unlock(&resource->lock);
+
+	return sprintf(buf, "%llu\n", resource->avg_interval);
+}
+
+static ssize_t set_avg_interval(struct device *dev,
+				struct device_attribute *devattr,
+				const char *buf, size_t count)
+{
+	struct acpi_device *acpi_dev = to_acpi_device(dev);
+	struct acpi_power_meter_resource *resource = acpi_dev->driver_data;
+	union acpi_object arg0 = { ACPI_TYPE_INTEGER };
+	struct acpi_object_list args = { 1, &arg0 };
+	int res;
+	unsigned long temp;
+	unsigned long long data;
+	acpi_status status;
+
+	res = strict_strtoul(buf, 10, &temp);
+	if (res)
+		return res;
+
+	if (temp > resource->caps.max_avg_interval ||
+	    temp < resource->caps.min_avg_interval)
+		return -EINVAL;
+	arg0.integer.value = temp;
+
+	mutex_lock(&resource->lock);
+	status = acpi_evaluate_integer(resource->acpi_dev->handle, "_PAI",
+				       &args, &data);
+	if (!ACPI_FAILURE(status))
+		resource->avg_interval = temp;
+	mutex_unlock(&resource->lock);
+
+	if (ACPI_FAILURE(status)) {
+		ACPI_EXCEPTION((AE_INFO, status, "Evaluating _PAI"));
+		return -EINVAL;
+	}
+
+	/* _PAI returns 0 on success, nonzero otherwise */
+	if (data)
+		return -EINVAL;
+
+	return count;
+}
+
+/* Cap functions */
+static int update_cap(struct acpi_power_meter_resource *resource)
+{
+	unsigned long long data;
+	acpi_status status;
+
+	status = acpi_evaluate_integer(resource->acpi_dev->handle, "_GHL",
+				       NULL, &data);
+	if (ACPI_FAILURE(status)) {
+		ACPI_EXCEPTION((AE_INFO, status, "Evaluating _GHL"));
+		return -ENODEV;
+	}
+
+	resource->cap = data;
+	return 0;
+}
+
+static ssize_t show_cap(struct device *dev,
+			struct device_attribute *devattr,
+			char *buf)
+{
+	struct acpi_device *acpi_dev = to_acpi_device(dev);
+	struct acpi_power_meter_resource *resource = acpi_dev->driver_data;
+
+	mutex_lock(&resource->lock);
+	update_cap(resource);
+	mutex_unlock(&resource->lock);
+
+	return sprintf(buf, "%llu\n", resource->cap * 1000);
+}
+
+static ssize_t set_cap(struct device *dev, struct device_attribute *devattr,
+		       const char *buf, size_t count)
+{
+	struct acpi_device *acpi_dev = to_acpi_device(dev);
+	struct acpi_power_meter_resource *resource = acpi_dev->driver_data;
+	union acpi_object arg0 = { ACPI_TYPE_INTEGER };
+	struct acpi_object_list args = { 1, &arg0 };
+	int res;
+	unsigned long temp;
+	unsigned long long data;
+	acpi_status status;
+
+	res = strict_strtoul(buf, 10, &temp);
+	if (res)
+		return res;
+
+	temp /= 1000;
+	if (temp > resource->caps.max_cap || temp < resource->caps.min_cap)
+		return -EINVAL;
+	arg0.integer.value = temp;
+
+	mutex_lock(&resource->lock);
+	status = acpi_evaluate_integer(resource->acpi_dev->handle, "_SHL",
+				       &args, &data);
+	if (!ACPI_FAILURE(status))
+		resource->cap = temp;
+	mutex_unlock(&resource->lock);
+
+	if (ACPI_FAILURE(status)) {
+		ACPI_EXCEPTION((AE_INFO, status, "Evaluating _SHL"));
+		return -EINVAL;
+	}
+
+	/* _SHL returns 0 on success, nonzero otherwise */
+	if (data)
+		return -EINVAL;
+
+	return count;
+}
+
+/* Power meter trip points */
+static int set_acpi_trip(struct acpi_power_meter_resource *resource)
+{
+	union acpi_object arg_objs[] = {
+		{ACPI_TYPE_INTEGER},
+		{ACPI_TYPE_INTEGER}
+	};
+	struct acpi_object_list args = { 2, arg_objs };
+	unsigned long long data;
+	acpi_status status;
+
+	/* Both trip levels must be set */
+	if (resource->trip[0] < 0 || resource->trip[1] < 0)
+		return 0;
+
+	/* This driver stores min, max; ACPI wants max, min. */
+	arg_objs[0].integer.value = resource->trip[1];
+	arg_objs[1].integer.value = resource->trip[0];
+
+	status = acpi_evaluate_integer(resource->acpi_dev->handle, "_PTP",
+				       &args, &data);
+	if (ACPI_FAILURE(status)) {
+		ACPI_EXCEPTION((AE_INFO, status, "Evaluating _PTP"));
+		return -EINVAL;
+	}
+
+	return data;
+}
+
+static ssize_t set_trip(struct device *dev, struct device_attribute *devattr,
+			const char *buf, size_t count)
+{
+	struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
+	struct acpi_device *acpi_dev = to_acpi_device(dev);
+	struct acpi_power_meter_resource *resource = acpi_dev->driver_data;
+	int res;
+	unsigned long temp;
+
+	res = strict_strtoul(buf, 10, &temp);
+	if (res)
+		return res;
+
+	temp /= 1000;
+	if (temp < 0)
+		return -EINVAL;
+
+	mutex_lock(&resource->lock);
+	resource->trip[attr->index - 7] = temp;
+	res = set_acpi_trip(resource);
+	mutex_unlock(&resource->lock);
+
+	if (res)
+		return res;
+
+	return count;
+}
+
+/* Power meter */
+static int update_meter(struct acpi_power_meter_resource *resource)
+{
+	unsigned long long data;
+	acpi_status status;
+	unsigned long local_jiffies = jiffies;
+
+	if (time_before(local_jiffies, resource->sensors_last_updated +
+			msecs_to_jiffies(resource->caps.sampling_time)) &&
+			resource->sensors_valid)
+		return 0;
+
+	status = acpi_evaluate_integer(resource->acpi_dev->handle, "_PMM",
+				       NULL, &data);
+	if (ACPI_FAILURE(status)) {
+		ACPI_EXCEPTION((AE_INFO, status, "Evaluating _PMM"));
+		return -ENODEV;
+	}
+
+	resource->power = data;
+	resource->sensors_valid = 1;
+	resource->sensors_last_updated = jiffies;
+	return 0;
+}
+
+static ssize_t show_power(struct device *dev,
+			  struct device_attribute *devattr,
+			  char *buf)
+{
+	struct acpi_device *acpi_dev = to_acpi_device(dev);
+	struct acpi_power_meter_resource *resource = acpi_dev->driver_data;
+
+	mutex_lock(&resource->lock);
+	update_meter(resource);
+	mutex_unlock(&resource->lock);
+
+	return sprintf(buf, "%llu\n", resource->power * 1000);
+}
+
+/* Miscellaneous */
+static ssize_t show_str(struct device *dev,
+			struct device_attribute *devattr,
+			char *buf)
+{
+	struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
+	struct acpi_device *acpi_dev = to_acpi_device(dev);
+	struct acpi_power_meter_resource *resource = acpi_dev->driver_data;
+	acpi_string val;
+
+	switch (attr->index) {
+	case 0:
+		val = resource->model_number;
+		break;
+	case 1:
+		val = resource->serial_number;
+		break;
+	case 2:
+		val = resource->oem_info;
+		break;
+	default:
+		BUG();
+	}
+
+	return sprintf(buf, "%s\n", val);
+}
+
+static ssize_t show_val(struct device *dev,
+			struct device_attribute *devattr,
+			char *buf)
+{
+	struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
+	struct acpi_device *acpi_dev = to_acpi_device(dev);
+	struct acpi_power_meter_resource *resource = acpi_dev->driver_data;
+	acpi_integer val = 0;
+
+	switch (attr->index) {
+	case 0:
+		val = resource->caps.min_avg_interval;
+		break;
+	case 1:
+		val = resource->caps.max_avg_interval;
+		break;
+	case 2:
+		val = resource->caps.min_cap * 1000;
+		break;
+	case 3:
+		val = resource->caps.max_cap * 1000;
+		break;
+	case 4:
+		if (resource->caps.hysteresis == UNKNOWN_HYSTERESIS)
+			return sprintf(buf, "unknown\n");
+
+		val = resource->caps.hysteresis * 1000;
+		break;
+	case 5:
+		if (resource->caps.flags & POWER_METER_IS_BATTERY)
+			val = 1;
+		else
+			val = 0;
+		break;
+	case 6:
+		if (resource->power > resource->cap)
+			val = 1;
+		else
+			val = 0;
+		break;
+	case 7:
+	case 8:
+		if (resource->trip[attr->index - 7] < 0)
+			return sprintf(buf, "unknown\n");
+
+		val = resource->trip[attr->index - 7] * 1000;
+		break;
+	default:
+		BUG();
+	}
+
+	return sprintf(buf, "%llu\n", val);
+}
+
+static ssize_t show_accuracy(struct device *dev,
+			     struct device_attribute *devattr,
+			     char *buf)
+{
+	struct acpi_device *acpi_dev = to_acpi_device(dev);
+	struct acpi_power_meter_resource *resource = acpi_dev->driver_data;
+	unsigned int acc = resource->caps.accuracy;
+
+	return sprintf(buf, "%u.%u%%\n", acc / 1000, acc % 1000);
+}
+
+static ssize_t show_name(struct device *dev,
+			 struct device_attribute *devattr,
+			 char *buf)
+{
+	return sprintf(buf, "%s\n", ACPI_POWER_METER_NAME);
+}
+
+/* Sensor descriptions.  If you add a sensor, update NUM_SENSORS above! */
+static struct ro_sensor_template meter_ro_attrs[] = {
+{POWER_AVERAGE_NAME, show_power, 0},
+{"power1_accuracy", show_accuracy, 0},
+{"power1_average_interval_min", show_val, 0},
+{"power1_average_interval_max", show_val, 1},
+{"power1_is_battery", show_val, 5},
+{NULL, NULL, 0},
+};
+
+static struct rw_sensor_template meter_rw_attrs[] = {
+{POWER_AVG_INTERVAL_NAME, show_avg_interval, set_avg_interval, 0},
+{NULL, NULL, NULL, 0},
+};
+
+static struct ro_sensor_template misc_cap_attrs[] = {
+{"power1_cap_min", show_val, 2},
+{"power1_cap_max", show_val, 3},
+{"power1_cap_hyst", show_val, 4},
+{POWER_ALARM_NAME, show_val, 6},
+{NULL, NULL, 0},
+};
+
+static struct ro_sensor_template ro_cap_attrs[] = {
+{POWER_CAP_NAME, show_cap, 0},
+{NULL, NULL, 0},
+};
+
+static struct rw_sensor_template rw_cap_attrs[] = {
+{POWER_CAP_NAME, show_cap, set_cap, 0},
+{NULL, NULL, NULL, 0},
+};
+
+static struct rw_sensor_template trip_attrs[] = {
+{"power1_average_min", show_val, set_trip, 7},
+{"power1_average_max", show_val, set_trip, 8},
+{NULL, NULL, NULL, 0},
+};
+
+static struct ro_sensor_template misc_attrs[] = {
+{"name", show_name, 0},
+{"power1_model_number", show_str, 0},
+{"power1_oem_info", show_str, 2},
+{"power1_serial_number", show_str, 1},
+{NULL, NULL, 0},
+};
+
+/* Read power domain data */
+static void remove_domain_devices(struct acpi_power_meter_resource *resource)
+{
+	int i;
+
+	if (!resource->num_domain_devices)
+		return;
+
+	for (i = 0; i < resource->num_domain_devices; i++) {
+		struct acpi_device *obj = resource->domain_devices[i];
+		if (!obj)
+			continue;
+
+		sysfs_remove_link(resource->holders_dir,
+				  kobject_name(&obj->dev.kobj));
+		put_device(&obj->dev);
+	}
+
+	kfree(resource->domain_devices);
+	kobject_put(resource->holders_dir);
+}
+
+static int read_domain_devices(struct acpi_power_meter_resource *resource)
+{
+	int res = 0;
+	int i;
+	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+	union acpi_object *pss;
+	acpi_status status;
+
+	status = acpi_evaluate_object(resource->acpi_dev->handle, "_PMD", NULL,
+				      &buffer);
+	if (ACPI_FAILURE(status)) {
+		ACPI_EXCEPTION((AE_INFO, status, "Evaluating _PMD"));
+		return -ENODEV;
+	}
+
+	pss = buffer.pointer;
+	if (!pss ||
+	    pss->type != ACPI_TYPE_PACKAGE) {
+		dev_err(&resource->acpi_dev->dev, ACPI_POWER_METER_NAME
+			"Invalid _PMD data\n");
+		res = -EFAULT;
+		goto end;
+	}
+
+	if (!pss->package.count)
+		goto end;
+
+	resource->domain_devices = kzalloc(sizeof(struct acpi_device *) *
+					   pss->package.count, GFP_KERNEL);
+	if (!resource->domain_devices) {
+		res = -ENOMEM;
+		goto end;
+	}
+
+	resource->holders_dir = kobject_create_and_add("measures",
+					&resource->acpi_dev->dev.kobj);
+	if (!resource->holders_dir) {
+		res = -ENOMEM;
+		goto exit_free;
+	}
+
+	resource->num_domain_devices = pss->package.count;
+
+	for (i = 0; i < pss->package.count; i++) {
+		struct acpi_device *obj;
+		union acpi_object *element = &(pss->package.elements[i]);
+
+		/* Refuse non-references */
+		if (element->type != ACPI_TYPE_LOCAL_REFERENCE)
+			continue;
+
+		/* Create a symlink to domain objects */
+		resource->domain_devices[i] = NULL;
+		status = acpi_bus_get_device(element->reference.handle,
+					     &resource->domain_devices[i]);
+		if (ACPI_FAILURE(status))
+			continue;
+
+		obj = resource->domain_devices[i];
+		get_device(&obj->dev);
+
+		res = sysfs_create_link(resource->holders_dir, &obj->dev.kobj,
+				      kobject_name(&obj->dev.kobj));
+		if (res) {
+			put_device(&obj->dev);
+			resource->domain_devices[i] = NULL;
+		}
+	}
+
+	res = 0;
+	goto end;
+
+exit_free:
+	kfree(resource->domain_devices);
+end:
+	kfree(buffer.pointer);
+	return res;
+}
+
+/* Registration and deregistration */
+static int register_ro_attrs(struct acpi_power_meter_resource *resource,
+			     struct ro_sensor_template *ro)
+{
+	struct device *dev = &resource->acpi_dev->dev;
+	struct sensor_device_attribute *sensors =
+		&resource->sensors[resource->num_sensors];
+	int res = 0;
+
+	while (ro->label) {
+		sensors->dev_attr.attr.name = ro->label;
+		sensors->dev_attr.attr.mode = S_IRUGO;
+		sensors->dev_attr.show = ro->show;
+		sensors->index = ro->index;
+
+		res = device_create_file(dev, &sensors->dev_attr);
+		if (res) {
+			sensors->dev_attr.attr.name = NULL;
+			goto error;
+		}
+		sensors++;
+		resource->num_sensors++;
+		ro++;
+	}
+
+error:
+	return res;
+}
+
+static int register_rw_attrs(struct acpi_power_meter_resource *resource,
+			     struct rw_sensor_template *rw)
+{
+	struct device *dev = &resource->acpi_dev->dev;
+	struct sensor_device_attribute *sensors =
+		&resource->sensors[resource->num_sensors];
+	int res = 0;
+
+	while (rw->label) {
+		sensors->dev_attr.attr.name = rw->label;
+		sensors->dev_attr.attr.mode = S_IRUGO | S_IWUSR;
+		sensors->dev_attr.show = rw->show;
+		sensors->dev_attr.store = rw->set;
+		sensors->index = rw->index;
+
+		res = device_create_file(dev, &sensors->dev_attr);
+		if (res) {
+			sensors->dev_attr.attr.name = NULL;
+			goto error;
+		}
+		sensors++;
+		resource->num_sensors++;
+		rw++;
+	}
+
+error:
+	return res;
+}
+
+static void remove_attrs(struct acpi_power_meter_resource *resource)
+{
+	int i;
+
+	for (i = 0; i < resource->num_sensors; i++) {
+		if (!resource->sensors[i].dev_attr.attr.name)
+			continue;
+		device_remove_file(&resource->acpi_dev->dev,
+				   &resource->sensors[i].dev_attr);
+	}
+
+	remove_domain_devices(resource);
+
+	resource->num_sensors = 0;
+}
+
+static int setup_attrs(struct acpi_power_meter_resource *resource)
+{
+	int res = 0;
+
+	res = read_domain_devices(resource);
+	if (res)
+		return res;
+
+	if (resource->caps.flags & POWER_METER_CAN_MEASURE) {
+		res = register_ro_attrs(resource, meter_ro_attrs);
+		if (res)
+			goto error;
+		res = register_rw_attrs(resource, meter_rw_attrs);
+		if (res)
+			goto error;
+	}
+
+	if (resource->caps.flags & POWER_METER_CAN_CAP) {
+		if (!can_cap_in_hardware()) {
+			dev_err(&resource->acpi_dev->dev,
+				"Ignoring unsafe software power cap!\n");
+			goto skip_unsafe_cap;
+		}
+
+		if (resource->caps.configurable_cap) {
+			res = register_rw_attrs(resource, rw_cap_attrs);
+			if (res)
+				goto error;
+		} else {
+			res = register_ro_attrs(resource, ro_cap_attrs);
+			if (res)
+				goto error;
+		}
+		res = register_ro_attrs(resource, misc_cap_attrs);
+		if (res)
+			goto error;
+	}
+skip_unsafe_cap:
+
+	if (resource->caps.flags & POWER_METER_CAN_TRIP) {
+		res = register_rw_attrs(resource, trip_attrs);
+		if (res)
+			goto error;
+	}
+
+	res = register_ro_attrs(resource, misc_attrs);
+	if (res)
+		goto error;
+
+	return res;
+error:
+	remove_domain_devices(resource);
+	remove_attrs(resource);
+	return res;
+}
+
+static void free_capabilities(struct acpi_power_meter_resource *resource)
+{
+	acpi_string *str;
+	int i;
+
+	str = &resource->model_number;
+	for (i = 0; i < 3; i++, str++)
+		kfree(*str);
+}
+
+static int read_capabilities(struct acpi_power_meter_resource *resource)
+{
+	int res = 0;
+	int i;
+	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+	struct acpi_buffer state = { 0, NULL };
+	struct acpi_buffer format = { sizeof("NNNNNNNNNNN"), "NNNNNNNNNNN" };
+	union acpi_object *pss;
+	acpi_string *str;
+	acpi_status status;
+
+	status = acpi_evaluate_object(resource->acpi_dev->handle, "_PMC", NULL,
+				      &buffer);
+	if (ACPI_FAILURE(status)) {
+		ACPI_EXCEPTION((AE_INFO, status, "Evaluating _PMC"));
+		return -ENODEV;
+	}
+
+	pss = buffer.pointer;
+	if (!pss ||
+	    pss->type != ACPI_TYPE_PACKAGE ||
+	    pss->package.count != 14) {
+		dev_err(&resource->acpi_dev->dev, ACPI_POWER_METER_NAME
+			"Invalid _PMC data\n");
+		res = -EFAULT;
+		goto end;
+	}
+
+	/* Grab all the integer data at once */
+	state.length = sizeof(struct acpi_power_meter_capabilities);
+	state.pointer = &resource->caps;
+
+	status = acpi_extract_package(pss, &format, &state);
+	if (ACPI_FAILURE(status)) {
+		ACPI_EXCEPTION((AE_INFO, status, "Invalid data"));
+		res = -EFAULT;
+		goto end;
+	}
+
+	if (resource->caps.units) {
+		dev_err(&resource->acpi_dev->dev, ACPI_POWER_METER_NAME
+			"Unknown units %llu.\n",
+			resource->caps.units);
+		res = -EINVAL;
+		goto end;
+	}
+
+	/* Grab the string data */
+	str = &resource->model_number;
+
+	for (i = 11; i < 14; i++) {
+		union acpi_object *element = &(pss->package.elements[i]);
+
+		if (element->type != ACPI_TYPE_STRING) {
+			res = -EINVAL;
+			goto error;
+		}
+
+		*str = kzalloc(sizeof(u8) * (element->string.length + 1),
+			       GFP_KERNEL);
+		if (!*str) {
+			res = -ENOMEM;
+			goto error;
+		}
+
+		strncpy(*str, element->string.pointer, element->string.length);
+		str++;
+	}
+
+	dev_info(&resource->acpi_dev->dev, "Found ACPI power meter.\n");
+	goto end;
+error:
+	str = &resource->model_number;
+	for (i = 0; i < 3; i++, str++)
+		kfree(*str);
+end:
+	kfree(buffer.pointer);
+	return res;
+}
+
+/* Handle ACPI event notifications */
+static void acpi_power_meter_notify(struct acpi_device *device, u32 event)
+{
+	struct acpi_power_meter_resource *resource;
+	int res;
+
+	if (!device || !acpi_driver_data(device))
+		return;
+
+	resource = acpi_driver_data(device);
+
+	mutex_lock(&resource->lock);
+	switch (event) {
+	case METER_NOTIFY_CONFIG:
+		free_capabilities(resource);
+		res = read_capabilities(resource);
+		if (res)
+			break;
+
+		remove_attrs(resource);
+		setup_attrs(resource);
+		break;
+	case METER_NOTIFY_TRIP:
+		sysfs_notify(&device->dev.kobj, NULL, POWER_AVERAGE_NAME);
+		update_meter(resource);
+		break;
+	case METER_NOTIFY_CAP:
+		sysfs_notify(&device->dev.kobj, NULL, POWER_CAP_NAME);
+		update_cap(resource);
+		break;
+	case METER_NOTIFY_INTERVAL:
+		sysfs_notify(&device->dev.kobj, NULL, POWER_AVG_INTERVAL_NAME);
+		update_avg_interval(resource);
+		break;
+	case METER_NOTIFY_CAPPING:
+		sysfs_notify(&device->dev.kobj, NULL, POWER_ALARM_NAME);
+		dev_info(&device->dev, "Capping in progress.\n");
+		break;
+	default:
+		BUG();
+	}
+	mutex_unlock(&resource->lock);
+
+	acpi_bus_generate_netlink_event(ACPI_POWER_METER_CLASS,
+					dev_name(&device->dev), event, 0);
+}
+
+static int acpi_power_meter_add(struct acpi_device *device)
+{
+	int res;
+	struct acpi_power_meter_resource *resource;
+
+	if (!device)
+		return -EINVAL;
+
+	resource = kzalloc(sizeof(struct acpi_power_meter_resource),
+			   GFP_KERNEL);
+	if (!resource)
+		return -ENOMEM;
+
+	resource->sensors_valid = 0;
+	resource->acpi_dev = device;
+	mutex_init(&resource->lock);
+	strcpy(acpi_device_name(device), ACPI_POWER_METER_DEVICE_NAME);
+	strcpy(acpi_device_class(device), ACPI_POWER_METER_CLASS);
+	device->driver_data = resource;
+
+	free_capabilities(resource);
+	res = read_capabilities(resource);
+	if (res)
+		goto exit_free;
+
+	resource->trip[0] = resource->trip[1] = -1;
+
+	res = setup_attrs(resource);
+	if (res)
+		goto exit_free;
+
+	resource->hwmon_dev = hwmon_device_register(&device->dev);
+	if (IS_ERR(resource->hwmon_dev)) {
+		res = PTR_ERR(resource->hwmon_dev);
+		goto exit_remove;
+	}
+
+	res = 0;
+	goto exit;
+
+exit_remove:
+	remove_attrs(resource);
+exit_free:
+	kfree(resource);
+exit:
+	return res;
+}
+
+static int acpi_power_meter_remove(struct acpi_device *device, int type)
+{
+	struct acpi_power_meter_resource *resource;
+
+	if (!device || !acpi_driver_data(device))
+		return -EINVAL;
+
+	resource = acpi_driver_data(device);
+	hwmon_device_unregister(resource->hwmon_dev);
+
+	free_capabilities(resource);
+	remove_attrs(resource);
+
+	kfree(resource);
+	return 0;
+}
+
+static int acpi_power_meter_resume(struct acpi_device *device)
+{
+	struct acpi_power_meter_resource *resource;
+
+	if (!device || !acpi_driver_data(device))
+		return -EINVAL;
+
+	resource = acpi_driver_data(device);
+	free_capabilities(resource);
+	read_capabilities(resource);
+
+	return 0;
+}
+
+static struct acpi_driver acpi_power_meter_driver = {
+	.name = "power_meter",
+	.class = ACPI_POWER_METER_CLASS,
+	.ids = power_meter_ids,
+	.ops = {
+		.add = acpi_power_meter_add,
+		.remove = acpi_power_meter_remove,
+		.resume = acpi_power_meter_resume,
+		.notify = acpi_power_meter_notify,
+		},
+};
+
+/* Module init/exit routines */
+static int __init enable_cap_knobs(const struct dmi_system_id *d)
+{
+	cap_in_hardware = 1;
+	return 0;
+}
+
+static struct dmi_system_id __initdata pm_dmi_table[] = {
+	{
+		enable_cap_knobs, "IBM Active Energy Manager",
+		{
+			DMI_MATCH(DMI_SYS_VENDOR, "IBM")
+		},
+	},
+	{}
+};
+
+static int __init acpi_power_meter_init(void)
+{
+	int result;
+
+	if (acpi_disabled)
+		return -ENODEV;
+
+	dmi_check_system(pm_dmi_table);
+
+	result = acpi_bus_register_driver(&acpi_power_meter_driver);
+	if (result < 0)
+		return -ENODEV;
+
+	return 0;
+}
+
+static void __exit acpi_power_meter_exit(void)
+{
+	acpi_bus_unregister_driver(&acpi_power_meter_driver);
+}
+
+MODULE_AUTHOR("Darrick J. Wong <djwong@us.ibm.com>");
+MODULE_DESCRIPTION("ACPI 4.0 power meter driver");
+MODULE_LICENSE("GPL");
+
+module_param(force_cap_on, bool, 0644);
+MODULE_PARM_DESC(force_cap_on, "Enable power cap even it is unsafe to do so.");
+
+module_init(acpi_power_meter_init);
+module_exit(acpi_power_meter_exit);
-- 
cgit v0.10.2


From eb27cae8adaa658a0bf31631baa1ce29d8183759 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Mon, 6 Jul 2009 23:40:19 -0400
Subject: ACPI: linux/acpi.h should not include linux/dmi.h

users of acpi.h that need dmi.h should include it directly.

Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 2876fc7..0fb6b2a 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -38,6 +38,7 @@
 #include <linux/pci.h>
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
+#include <linux/dmi.h>
 
 #include "internal.h"
 
diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index 391f331..c434f65 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -42,6 +42,7 @@
 #include <asm/io.h>
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
+#include <linux/dmi.h>
 
 #define ACPI_EC_CLASS			"embedded_controller"
 #define ACPI_EC_DEVICE_NAME		"Embedded Controller"
diff --git a/drivers/acpi/pci_slot.c b/drivers/acpi/pci_slot.c
index 12158e0..7aa6802 100644
--- a/drivers/acpi/pci_slot.c
+++ b/drivers/acpi/pci_slot.c
@@ -31,6 +31,7 @@
 #include <linux/acpi.h>
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
+#include <linux/dmi.h>
 
 static int debug;
 static int check_sta_before_sun;
diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c
index 60ea984..055a455 100644
--- a/drivers/acpi/video.c
+++ b/drivers/acpi/video.c
@@ -40,7 +40,7 @@
 #include <linux/pci.h>
 #include <linux/pci_ids.h>
 #include <asm/uaccess.h>
-
+#include <linux/dmi.h>
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
 
diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index 7b287cb..4484ac0 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -33,6 +33,7 @@
 #include <linux/timer.h>
 #include <linux/irq.h>
 #include <linux/interrupt.h>
+#include <linux/dmi.h>
 
 #undef PREFIX
 #define PREFIX "DMAR:"
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 34321cf..7950c65 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -41,8 +41,6 @@
 #include <acpi/acpi_drivers.h>
 #include <acpi/acpi_numa.h>
 #include <asm/acpi.h>
-#include <linux/dmi.h>
-
 
 enum acpi_irq_model_id {
 	ACPI_IRQ_MODEL_PIC = 0,
-- 
cgit v0.10.2


From e4f55966d02c5dfade2978c2aa05fb202a78a4d1 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Mon, 6 Jul 2009 23:42:10 -0400
Subject: ACPI: remove unnecessary #ifdef CONFIG_DMI

acpi_osi_setup() does not depend on CONFIG_DMI
acpi_dmi_osi_linux()'s definition doesn't depend on CONFIG_DMI either

Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 7950c65..9260408 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -217,10 +217,8 @@ static inline int acpi_video_display_switch_support(void)
 #endif /* defined(CONFIG_ACPI_VIDEO) || defined(CONFIG_ACPI_VIDEO_MODULE) */
 
 extern int acpi_blacklisted(void);
-#ifdef CONFIG_DMI
 extern void acpi_dmi_osi_linux(int enable, const struct dmi_system_id *d);
 extern int acpi_osi_setup(char *str);
-#endif
 
 #ifdef CONFIG_ACPI_NUMA
 int acpi_get_pxm(acpi_handle handle);
-- 
cgit v0.10.2


From 09729f0b11a389e046f621f3e4841043f460b603 Mon Sep 17 00:00:00 2001
From: Corentin Chary <corentincj@iksaif.net>
Date: Mon, 14 Sep 2009 12:43:51 +0200
Subject: hp-wmi: fix rfkill memory leak on unload

rfkill_unregister() should always be followed by rfkill_destroy()
In this case, rfkill_destroy was called two times on wifi_rfkill and
never on bluetooth_rfkill.

Signed-off-by: Corentin Chary <corentincj@iksaif.net>
Acked-by: Matthew Garrett <mjg@redhat.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c
index a2ad53e..a750192 100644
--- a/drivers/platform/x86/hp-wmi.c
+++ b/drivers/platform/x86/hp-wmi.c
@@ -502,7 +502,7 @@ static int __exit hp_wmi_bios_remove(struct platform_device *device)
 	}
 	if (bluetooth_rfkill) {
 		rfkill_unregister(bluetooth_rfkill);
-		rfkill_destroy(wifi_rfkill);
+		rfkill_destroy(bluetooth_rfkill);
 	}
 	if (wwan_rfkill) {
 		rfkill_unregister(wwan_rfkill);
-- 
cgit v0.10.2


From 7d7decb213a65dea0973ed980c02dae2b1b88dbe Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Fri, 18 Sep 2009 12:41:08 -0700
Subject: acpi: switch /proc/acpi/{debug_layer,debug_level} to seq_file

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/debug.c b/drivers/acpi/debug.c
index a8287be..8a690c3 100644
--- a/drivers/acpi/debug.c
+++ b/drivers/acpi/debug.c
@@ -3,6 +3,7 @@
  */
 
 #include <linux/proc_fs.h>
+#include <linux/seq_file.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
@@ -201,72 +202,54 @@ module_param_call(trace_state, param_set_trace_state, param_get_trace_state,
 #define ACPI_SYSTEM_FILE_DEBUG_LAYER	"debug_layer"
 #define ACPI_SYSTEM_FILE_DEBUG_LEVEL		"debug_level"
 
-static int
-acpi_system_read_debug(char *page,
-		       char **start, off_t off, int count, int *eof, void *data)
+static int acpi_system_debug_proc_show(struct seq_file *m, void *v)
 {
-	char *p = page;
-	int size = 0;
 	unsigned int i;
 
-	if (off != 0)
-		goto end;
+	seq_printf(m, "%-25s\tHex        SET\n", "Description");
 
-	p += sprintf(p, "%-25s\tHex        SET\n", "Description");
-
-	switch ((unsigned long)data) {
+	switch ((unsigned long)m->private) {
 	case 0:
 		for (i = 0; i < ARRAY_SIZE(acpi_debug_layers); i++) {
-			p += sprintf(p, "%-25s\t0x%08lX [%c]\n",
+			seq_printf(m, "%-25s\t0x%08lX [%c]\n",
 				     acpi_debug_layers[i].name,
 				     acpi_debug_layers[i].value,
 				     (acpi_dbg_layer & acpi_debug_layers[i].
 				      value) ? '*' : ' ');
 		}
-		p += sprintf(p, "%-25s\t0x%08X [%c]\n", "ACPI_ALL_DRIVERS",
+		seq_printf(m, "%-25s\t0x%08X [%c]\n", "ACPI_ALL_DRIVERS",
 			     ACPI_ALL_DRIVERS,
 			     (acpi_dbg_layer & ACPI_ALL_DRIVERS) ==
 			     ACPI_ALL_DRIVERS ? '*' : (acpi_dbg_layer &
 						       ACPI_ALL_DRIVERS) ==
 			     0 ? ' ' : '-');
-		p += sprintf(p,
+		seq_printf(m,
 			     "--\ndebug_layer = 0x%08X (* = enabled, - = partial)\n",
 			     acpi_dbg_layer);
 		break;
 	case 1:
 		for (i = 0; i < ARRAY_SIZE(acpi_debug_levels); i++) {
-			p += sprintf(p, "%-25s\t0x%08lX [%c]\n",
+			seq_printf(m, "%-25s\t0x%08lX [%c]\n",
 				     acpi_debug_levels[i].name,
 				     acpi_debug_levels[i].value,
 				     (acpi_dbg_level & acpi_debug_levels[i].
 				      value) ? '*' : ' ');
 		}
-		p += sprintf(p, "--\ndebug_level = 0x%08X (* = enabled)\n",
+		seq_printf(m, "--\ndebug_level = 0x%08X (* = enabled)\n",
 			     acpi_dbg_level);
 		break;
-	default:
-		p += sprintf(p, "Invalid debug option\n");
-		break;
 	}
+	return 0;
+}
 
-      end:
-	size = (p - page);
-	if (size <= off + count)
-		*eof = 1;
-	*start = page + off;
-	size -= off;
-	if (size > count)
-		size = count;
-	if (size < 0)
-		size = 0;
-
-	return size;
+static int acpi_system_debug_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, acpi_system_debug_proc_show, PDE(inode)->data);
 }
 
-static int
-acpi_system_write_debug(struct file *file,
+static ssize_t acpi_system_debug_proc_write(struct file *file,
 			const char __user * buffer,
-			unsigned long count, void *data)
+			size_t count, loff_t *pos)
 {
 	char debug_string[12] = { '\0' };
 
@@ -279,7 +262,7 @@ acpi_system_write_debug(struct file *file,
 
 	debug_string[count] = '\0';
 
-	switch ((unsigned long)data) {
+	switch ((unsigned long)PDE(file->f_path.dentry->d_inode)->data) {
 	case 0:
 		acpi_dbg_layer = simple_strtoul(debug_string, NULL, 0);
 		break;
@@ -292,6 +275,15 @@ acpi_system_write_debug(struct file *file,
 
 	return count;
 }
+
+static const struct file_operations acpi_system_debug_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= acpi_system_debug_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+	.write		= acpi_system_debug_proc_write,
+};
 #endif
 
 int __init acpi_debug_init(void)
@@ -303,24 +295,18 @@ int __init acpi_debug_init(void)
 
 	/* 'debug_layer' [R/W] */
 	name = ACPI_SYSTEM_FILE_DEBUG_LAYER;
-	entry =
-	    create_proc_read_entry(name, S_IFREG | S_IRUGO | S_IWUSR,
-				   acpi_root_dir, acpi_system_read_debug,
-				   (void *)0);
-	if (entry)
-		entry->write_proc = acpi_system_write_debug;
-	else
+	entry = proc_create_data(name, S_IFREG | S_IRUGO | S_IWUSR,
+				 acpi_root_dir, &acpi_system_debug_proc_fops,
+				 (void *)0);
+	if (!entry)
 		goto Error;
 
 	/* 'debug_level' [R/W] */
 	name = ACPI_SYSTEM_FILE_DEBUG_LEVEL;
-	entry =
-	    create_proc_read_entry(name, S_IFREG | S_IRUGO | S_IWUSR,
-				   acpi_root_dir, acpi_system_read_debug,
-				   (void *)1);
-	if (entry)
-		entry->write_proc = acpi_system_write_debug;
-	else
+	entry = proc_create_data(name, S_IFREG | S_IRUGO | S_IWUSR,
+				 acpi_root_dir, &acpi_system_debug_proc_fops,
+				 (void *)1);
+	if (!entry)
 		goto Error;
 
       Done:
-- 
cgit v0.10.2


From df43176c934f2bc01f7615a6e20a4b8e77dcdd11 Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@novell.com>
Date: Fri, 18 Sep 2009 12:41:10 -0700
Subject: thermal: add missing Kconfig dependency

Otherwise THERMAL_HWMON can be selected when HWMON=n and THERMAL=n, which
fails to build.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Cc: Zhang Rui <rui.zhang@intel.com>
Cc: Matthew Garrett <mjg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index a86e952..bf7c687 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -15,6 +15,7 @@ menuconfig THERMAL
 
 config THERMAL_HWMON
 	bool "Hardware monitoring support"
+	depends on THERMAL
 	depends on HWMON=y || HWMON=THERMAL
 	help
 	  The generic thermal sysfs driver's hardware monitoring support
-- 
cgit v0.10.2


From ded0cdfc6a7673916b0878c32fa8ba566b4f8cdb Mon Sep 17 00:00:00 2001
From: Peter Feuerer <peter@piie.net>
Date: Thu, 6 Aug 2009 15:57:52 -0700
Subject: acerhdf: fix fan control for AOA150 model

- Apply Borislav Petkov's patch (convert the fancmd[] array to a real
  struct thus disambiguating command handling and making code more
  readable.)

- Add BIOS product to BIOS table as AOA110 and AOA150 have different
  register values

- Add force_product parameter to allow forcing different product

- fix linker warning caused by "acerhdf_drv" not being named
  "acerhdf_driver"

Signed-off-by: Peter Feuerer <peter@piie.net>
Cc: Andreas Mohr <andi@lisas.de>
Acked-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/platform/x86/acerhdf.c b/drivers/platform/x86/acerhdf.c
index bdfee17..aa298d6 100644
--- a/drivers/platform/x86/acerhdf.c
+++ b/drivers/platform/x86/acerhdf.c
@@ -52,7 +52,7 @@
  */
 #undef START_IN_KERNEL_MODE
 
-#define DRV_VER "0.5.13"
+#define DRV_VER "0.5.16"
 
 /*
  * According to the Atom N270 datasheet,
@@ -90,6 +90,7 @@ static unsigned int fanoff = 58;
 static unsigned int verbose;
 static unsigned int fanstate = ACERHDF_FAN_AUTO;
 static char force_bios[16];
+static char force_product[16];
 static unsigned int prev_interval;
 struct thermal_zone_device *thz_dev;
 struct thermal_cooling_device *cl_dev;
@@ -107,34 +108,58 @@ module_param(verbose, uint, 0600);
 MODULE_PARM_DESC(verbose, "Enable verbose dmesg output");
 module_param_string(force_bios, force_bios, 16, 0);
 MODULE_PARM_DESC(force_bios, "Force BIOS version and omit BIOS check");
+module_param_string(force_product, force_product, 16, 0);
+MODULE_PARM_DESC(force_product, "Force BIOS product and omit BIOS check");
+
+/*
+ * cmd_off: to switch the fan completely off / to check if the fan is off
+ *	cmd_auto: to set the BIOS in control of the fan. The BIOS regulates then
+ *		the fan speed depending on the temperature
+ */
+struct fancmd {
+	u8 cmd_off;
+	u8 cmd_auto;
+};
 
 /* BIOS settings */
 struct bios_settings_t {
 	const char *vendor;
+	const char *product;
 	const char *version;
 	unsigned char fanreg;
 	unsigned char tempreg;
-	unsigned char fancmd[2]; /* fan off and auto commands */
+	struct fancmd cmd;
 };
 
 /* Register addresses and values for different BIOS versions */
 static const struct bios_settings_t bios_tbl[] = {
-	{"Acer", "v0.3109", 0x55, 0x58, {0x1f, 0x00} },
-	{"Acer", "v0.3114", 0x55, 0x58, {0x1f, 0x00} },
-	{"Acer", "v0.3301", 0x55, 0x58, {0xaf, 0x00} },
-	{"Acer", "v0.3304", 0x55, 0x58, {0xaf, 0x00} },
-	{"Acer", "v0.3305", 0x55, 0x58, {0xaf, 0x00} },
-	{"Acer", "v0.3308", 0x55, 0x58, {0x21, 0x00} },
-	{"Acer", "v0.3309", 0x55, 0x58, {0x21, 0x00} },
-	{"Acer", "v0.3310", 0x55, 0x58, {0x21, 0x00} },
-	{"Gateway", "v0.3103", 0x55, 0x58, {0x21, 0x00} },
-	{"Packard Bell", "v0.3105", 0x55, 0x58, {0x21, 0x00} },
-	{"", "", 0, 0, {0, 0} }
+	/* AOA110 */
+	{"Acer", "AOA110", "v0.3109", 0x55, 0x58, {0x1f, 0x00} },
+	{"Acer", "AOA110", "v0.3114", 0x55, 0x58, {0x1f, 0x00} },
+	{"Acer", "AOA110", "v0.3301", 0x55, 0x58, {0xaf, 0x00} },
+	{"Acer", "AOA110", "v0.3304", 0x55, 0x58, {0xaf, 0x00} },
+	{"Acer", "AOA110", "v0.3305", 0x55, 0x58, {0xaf, 0x00} },
+	{"Acer", "AOA110", "v0.3308", 0x55, 0x58, {0x21, 0x00} },
+	{"Acer", "AOA110", "v0.3309", 0x55, 0x58, {0x21, 0x00} },
+	{"Acer", "AOA110", "v0.3310", 0x55, 0x58, {0x21, 0x00} },
+	/* AOA150 */
+	{"Acer", "AOA150", "v0.3114", 0x55, 0x58, {0x20, 0x00} },
+	{"Acer", "AOA150", "v0.3304", 0x55, 0x58, {0x20, 0x00} },
+	{"Acer", "AOA150", "v0.3305", 0x55, 0x58, {0x20, 0x00} },
+	{"Acer", "AOA150", "v0.3308", 0x55, 0x58, {0x20, 0x00} },
+	{"Acer", "AOA150", "v0.3309", 0x55, 0x58, {0x20, 0x00} },
+	{"Acer", "AOA150", "v0.3310", 0x55, 0x58, {0x20, 0x00} },
+	/* special BIOS / other */
+	{"Gateway", "AOA110", "v0.3103", 0x55, 0x58, {0x21, 0x00} },
+	{"Gateway", "AOA150", "v0.3103", 0x55, 0x58, {0x20, 0x00} },
+	{"Packard Bell", "DOA150", "v0.3104", 0x55, 0x58, {0x21, 0x00} },
+	{"Packard Bell", "AOA110", "v0.3105", 0x55, 0x58, {0x21, 0x00} },
+	/* pewpew-terminator */
+	{"", "", "", 0, 0, {0, 0} }
 };
 
 static const struct bios_settings_t *bios_cfg __read_mostly;
 
-
 static int acerhdf_get_temp(int *temp)
 {
 	u8 read_temp;
@@ -150,13 +175,14 @@ static int acerhdf_get_temp(int *temp)
 static int acerhdf_get_fanstate(int *state)
 {
 	u8 fan;
-	bool tmp;
 
 	if (ec_read(bios_cfg->fanreg, &fan))
 		return -EINVAL;
 
-	tmp = (fan == bios_cfg->fancmd[ACERHDF_FAN_OFF]);
-	*state = tmp ? ACERHDF_FAN_OFF : ACERHDF_FAN_AUTO;
+	if (fan != bios_cfg->cmd.cmd_off)
+		*state = ACERHDF_FAN_AUTO;
+	else
+		*state = ACERHDF_FAN_OFF;
 
 	return 0;
 }
@@ -175,7 +201,8 @@ static void acerhdf_change_fanstate(int state)
 		state = ACERHDF_FAN_AUTO;
 	}
 
-	cmd = bios_cfg->fancmd[state];
+	cmd = (state == ACERHDF_FAN_OFF) ? bios_cfg->cmd.cmd_off
+					 : bios_cfg->cmd.cmd_auto;
 	fanstate = state;
 
 	ec_write(bios_cfg->fanreg, cmd);
@@ -437,7 +464,7 @@ static int acerhdf_remove(struct platform_device *device)
 	return 0;
 }
 
-struct platform_driver acerhdf_drv = {
+static struct platform_driver acerhdf_driver = {
 	.driver = {
 		.name = "acerhdf",
 		.owner = THIS_MODULE,
@@ -454,32 +481,40 @@ static int acerhdf_check_hardware(void)
 {
 	char const *vendor, *version, *product;
 	int i;
+	unsigned long prod_len = 0;
 
 	/* get BIOS data */
 	vendor  = dmi_get_system_info(DMI_SYS_VENDOR);
 	version = dmi_get_system_info(DMI_BIOS_VERSION);
 	product = dmi_get_system_info(DMI_PRODUCT_NAME);
 
+
 	pr_info("Acer Aspire One Fan driver, v.%s\n", DRV_VER);
 
-	if (!force_bios[0]) {
-		if (strncmp(product, "AO", 2)) {
-			pr_err("no Aspire One hardware found\n");
-			return -EINVAL;
-		}
-	} else {
-		pr_info("forcing BIOS version: %s\n", version);
+	if (force_bios[0]) {
 		version = force_bios;
+		pr_info("forcing BIOS version: %s\n", version);
 		kernelmode = 0;
 	}
 
+	if (force_product[0]) {
+		product = force_product;
+		pr_info("forcing BIOS product: %s\n", product);
+		kernelmode = 0;
+	}
+
+	prod_len = strlen(product);
+
 	if (verbose)
 		pr_info("BIOS info: %s %s, product: %s\n",
 			vendor, version, product);
 
 	/* search BIOS version and vendor in BIOS settings table */
 	for (i = 0; bios_tbl[i].version[0]; i++) {
-		if (!strcmp(bios_tbl[i].vendor, vendor) &&
+		if (strlen(bios_tbl[i].product) >= prod_len &&
+		    !strncmp(bios_tbl[i].product, product,
+			   strlen(bios_tbl[i].product)) &&
+		    !strcmp(bios_tbl[i].vendor, vendor) &&
 		    !strcmp(bios_tbl[i].version, version)) {
 			bios_cfg = &bios_tbl[i];
 			break;
@@ -487,8 +522,8 @@ static int acerhdf_check_hardware(void)
 	}
 
 	if (!bios_cfg) {
-		pr_err("unknown (unsupported) BIOS version %s/%s, "
-			"please report, aborting!\n", vendor, version);
+		pr_err("unknown (unsupported) BIOS version %s/%s/%s, "
+			"please report, aborting!\n", vendor, product, version);
 		return -EINVAL;
 	}
 
@@ -509,7 +544,7 @@ static int acerhdf_register_platform(void)
 {
 	int err = 0;
 
-	err = platform_driver_register(&acerhdf_drv);
+	err = platform_driver_register(&acerhdf_driver);
 	if (err)
 		return err;
 
@@ -525,7 +560,7 @@ static void acerhdf_unregister_platform(void)
 		return;
 
 	platform_device_del(acerhdf_dev);
-	platform_driver_unregister(&acerhdf_drv);
+	platform_driver_unregister(&acerhdf_driver);
 }
 
 static int acerhdf_register_thermal(void)
-- 
cgit v0.10.2


From ff27e1f3037535a547e2474eecb688428d654dc3 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <petkovbb@gmail.com>
Date: Fri, 18 Sep 2009 12:41:05 -0700
Subject: acerhdf: convert to dev_pm_ops

Signed-off-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Peter Feuerer <peter@piie.net>
Cc: Andreas Mohr <andi@lisas.de>
Cc: Dmitry Torokhov <dtor@mail.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/platform/x86/acerhdf.c b/drivers/platform/x86/acerhdf.c
index aa298d6..5aef16f 100644
--- a/drivers/platform/x86/acerhdf.c
+++ b/drivers/platform/x86/acerhdf.c
@@ -52,7 +52,7 @@
  */
 #undef START_IN_KERNEL_MODE
 
-#define DRV_VER "0.5.16"
+#define DRV_VER "0.5.17"
 
 /*
  * According to the Atom N270 datasheet,
@@ -435,7 +435,7 @@ struct thermal_cooling_device_ops acerhdf_cooling_ops = {
 };
 
 /* suspend / resume functionality */
-static int acerhdf_suspend(struct platform_device *dev, pm_message_t state)
+static int acerhdf_suspend(struct device *dev)
 {
 	if (kernelmode)
 		acerhdf_change_fanstate(ACERHDF_FAN_AUTO);
@@ -446,14 +446,6 @@ static int acerhdf_suspend(struct platform_device *dev, pm_message_t state)
 	return 0;
 }
 
-static int acerhdf_resume(struct platform_device *device)
-{
-	if (verbose)
-		pr_notice("resuming\n");
-
-	return 0;
-}
-
 static int __devinit acerhdf_probe(struct platform_device *device)
 {
 	return 0;
@@ -464,15 +456,19 @@ static int acerhdf_remove(struct platform_device *device)
 	return 0;
 }
 
+static struct dev_pm_ops acerhdf_pm_ops = {
+	.suspend = acerhdf_suspend,
+	.freeze  = acerhdf_suspend,
+};
+
 static struct platform_driver acerhdf_driver = {
 	.driver = {
-		.name = "acerhdf",
+		.name  = "acerhdf",
 		.owner = THIS_MODULE,
+		.pm    = &acerhdf_pm_ops,
 	},
 	.probe = acerhdf_probe,
 	.remove = acerhdf_remove,
-	.suspend = acerhdf_suspend,
-	.resume = acerhdf_resume,
 };
 
 
-- 
cgit v0.10.2


From f944915187f53810130eb1c56985e27e3cbe4a6d Mon Sep 17 00:00:00 2001
From: Peter Feuerer <peter@piie.net>
Date: Fri, 18 Sep 2009 12:41:07 -0700
Subject: acerhdf: additional BIOS versions

Added BIOS versions:
Acer: AOA110-v0.3307, AOA150-v0.3301, AOA150-v0.3307
Packard Bell: AOA150-v0.3105

Signed-off-by: Peter Feuerer <peter@piie.net>
Cc: Andreas Mohr <andi@lisas.de>
Cc: Borislav Petkov <petkovbb@googlemail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/platform/x86/acerhdf.c b/drivers/platform/x86/acerhdf.c
index 5aef16f..0a8f735 100644
--- a/drivers/platform/x86/acerhdf.c
+++ b/drivers/platform/x86/acerhdf.c
@@ -139,13 +139,16 @@ static const struct bios_settings_t bios_tbl[] = {
 	{"Acer", "AOA110", "v0.3301", 0x55, 0x58, {0xaf, 0x00} },
 	{"Acer", "AOA110", "v0.3304", 0x55, 0x58, {0xaf, 0x00} },
 	{"Acer", "AOA110", "v0.3305", 0x55, 0x58, {0xaf, 0x00} },
+	{"Acer", "AOA110", "v0.3307", 0x55, 0x58, {0xaf, 0x00} },
 	{"Acer", "AOA110", "v0.3308", 0x55, 0x58, {0x21, 0x00} },
 	{"Acer", "AOA110", "v0.3309", 0x55, 0x58, {0x21, 0x00} },
 	{"Acer", "AOA110", "v0.3310", 0x55, 0x58, {0x21, 0x00} },
 	/* AOA150 */
 	{"Acer", "AOA150", "v0.3114", 0x55, 0x58, {0x20, 0x00} },
+	{"Acer", "AOA150", "v0.3301", 0x55, 0x58, {0x20, 0x00} },
 	{"Acer", "AOA150", "v0.3304", 0x55, 0x58, {0x20, 0x00} },
 	{"Acer", "AOA150", "v0.3305", 0x55, 0x58, {0x20, 0x00} },
+	{"Acer", "AOA150", "v0.3307", 0x55, 0x58, {0x20, 0x00} },
 	{"Acer", "AOA150", "v0.3308", 0x55, 0x58, {0x20, 0x00} },
 	{"Acer", "AOA150", "v0.3309", 0x55, 0x58, {0x20, 0x00} },
 	{"Acer", "AOA150", "v0.3310", 0x55, 0x58, {0x20, 0x00} },
@@ -154,6 +157,7 @@ static const struct bios_settings_t bios_tbl[] = {
 	{"Gateway", "AOA150", "v0.3103", 0x55, 0x58, {0x20, 0x00} },
 	{"Packard Bell", "DOA150", "v0.3104", 0x55, 0x58, {0x21, 0x00} },
 	{"Packard Bell", "AOA110", "v0.3105", 0x55, 0x58, {0x21, 0x00} },
+	{"Packard Bell", "AOA150", "v0.3105", 0x55, 0x58, {0x20, 0x00} },
 	/* pewpew-terminator */
 	{"", "", "", 0, 0, {0, 0} }
 };
-- 
cgit v0.10.2


From 9ac6185669d0d277c4082fa92ba8eb2e55534cbf Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 31 Aug 2009 22:32:10 +0000
Subject: ACPI: simplify deferred execution path

We had two functions, acpi_os_execute_deferred() and
acpi_os_execute_hp_deferred() that differed only in that the
latter did acpi_os_wait_events_complete(NULL) before executing
the deferred function.

This patch consolidates those two functions and uses a flag in
the struct acpi_os_dpc to determine whether to do the wait.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index c5b4f1e..d753206 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -58,6 +58,7 @@ struct acpi_os_dpc {
 	acpi_osd_exec_callback function;
 	void *context;
 	struct work_struct work;
+	int wait;
 };
 
 #ifdef CONFIG_ACPI_CUSTOM_DSDT
@@ -703,21 +704,8 @@ static void acpi_os_execute_deferred(struct work_struct *work)
 		return;
 	}
 
-	dpc->function(dpc->context);
-	kfree(dpc);
-
-	return;
-}
-
-static void acpi_os_execute_hp_deferred(struct work_struct *work)
-{
-	struct acpi_os_dpc *dpc = container_of(work, struct acpi_os_dpc, work);
-	if (!dpc) {
-		printk(KERN_ERR PREFIX "Invalid (NULL) context\n");
-		return;
-	}
-
-	acpi_os_wait_events_complete(NULL);
+	if (dpc->wait)
+		acpi_os_wait_events_complete(NULL);
 
 	dpc->function(dpc->context);
 	kfree(dpc);
@@ -746,7 +734,6 @@ static acpi_status __acpi_os_execute(acpi_execute_type type,
 	acpi_status status = AE_OK;
 	struct acpi_os_dpc *dpc;
 	struct workqueue_struct *queue;
-	work_func_t func;
 	int ret;
 	ACPI_DEBUG_PRINT((ACPI_DB_EXEC,
 			  "Scheduling function [%p(%p)] for deferred execution.\n",
@@ -779,8 +766,8 @@ static acpi_status __acpi_os_execute(acpi_execute_type type,
 	 */
 	queue = hp ? kacpi_hotplug_wq :
 		(type == OSL_NOTIFY_HANDLER ? kacpi_notify_wq : kacpid_wq);
-	func = hp ? acpi_os_execute_hp_deferred : acpi_os_execute_deferred;
-	INIT_WORK(&dpc->work, func);
+	dpc->wait = hp ? 1 : 0;
+	INIT_WORK(&dpc->work, acpi_os_execute_deferred);
 	ret = queue_work(queue, &dpc->work);
 
 	if (!ret) {
-- 
cgit v0.10.2


From 59fc9e5e21baf2bf5c87d8006e006007c3a708c2 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 31 Aug 2009 22:32:15 +0000
Subject: ACPI: remove null pointer checks in deferred execution path

Better to oops and learn about a bug than to silently cover it up.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index d753206..56071b6 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -699,18 +699,12 @@ void acpi_os_derive_pci_id(acpi_handle rhandle,	/* upper bound  */
 static void acpi_os_execute_deferred(struct work_struct *work)
 {
 	struct acpi_os_dpc *dpc = container_of(work, struct acpi_os_dpc, work);
-	if (!dpc) {
-		printk(KERN_ERR PREFIX "Invalid (NULL) context\n");
-		return;
-	}
 
 	if (dpc->wait)
 		acpi_os_wait_events_complete(NULL);
 
 	dpc->function(dpc->context);
 	kfree(dpc);
-
-	return;
 }
 
 /*******************************************************************************
@@ -739,9 +733,6 @@ static acpi_status __acpi_os_execute(acpi_execute_type type,
 			  "Scheduling function [%p(%p)] for deferred execution.\n",
 			  function, context));
 
-	if (!function)
-		return AE_BAD_PARAMETER;
-
 	/*
 	 * Allocate/initialize DPC structure.  Note that this memory will be
 	 * freed by the callee.  The kernel handles the work_struct list  in a
-- 
cgit v0.10.2


From 53de5356be3ac62c22ae1da266943059b169d9b1 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 31 Aug 2009 22:32:20 +0000
Subject: ACPI: don't pass handle for fixed hardware notifications

Fixed hardware devices have no handles, so just pass an explicit
NULL rather than something that looks like it might be meaningful.
acpi_device_notify() doesn't need the handle anyway; the only
reason it takes it as an argument is because the acpi_notify_handler
typedef requires it.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 7b90900..408ebde 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -370,7 +370,8 @@ static acpi_status acpi_device_notify_fixed(void *data)
 {
 	struct acpi_device *device = data;
 
-	acpi_device_notify(device->handle, ACPI_FIXED_HARDWARE_EVENT, device);
+	/* Fixed hardware devices have no handles */
+	acpi_device_notify(NULL, ACPI_FIXED_HARDWARE_EVENT, device);
 	return AE_OK;
 }
 
-- 
cgit v0.10.2


From 2ebe31513fcbe7a781f27002f065b50ae195022f Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Sat, 19 Sep 2009 07:34:04 -0700
Subject: intel-iommu: Limit DOMAIN_MAX_PFN to fit in an 'unsigned long'

This means we're limited to 44-bit addresses on 32-bit kernels, and
makes it sane for us to use 'unsigned long' for PFNs throughout.

Which is just as well, really, since we already do that.

Reported-by: Benjamin LaHaise <ben.lahaise@neterion.com>
Tested-by: Benjamin LaHaise <ben.lahaise@neterion.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 2ec5899..c9272a1 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -56,8 +56,14 @@
 
 #define MAX_AGAW_WIDTH 64
 
-#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
-#define DOMAIN_MAX_PFN(gaw)  ((((u64)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
+#define __DOMAIN_MAX_PFN(gaw)  ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
+#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
+
+/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
+   to match. That way, we can use 'unsigned long' for PFNs with impunity. */
+#define DOMAIN_MAX_PFN(gaw)	((unsigned long) min_t(uint64_t, \
+				__DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
+#define DOMAIN_MAX_ADDR(gaw)	(((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
 
 #define IOVA_PFN(addr)		((addr) >> PAGE_SHIFT)
 #define DMA_32BIT_PFN		IOVA_PFN(DMA_BIT_MASK(32))
-- 
cgit v0.10.2


From 59c36286b74ae6a8adebf6e133a83d7f2e3e6704 Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Sat, 19 Sep 2009 07:36:28 -0700
Subject: intel-iommu: Fix integer overflow in
 dma_pte_{clear_range,free_pagetable}()

If end_pfn is equal to (unsigned long)-1, then the loop will never end.

Seen on 32-bit kernel, but could have happened on 64-bit too once we get
hardware that supports 64-bit guest addresses.

Change both functions to a 'do {} while' loop with the test at the end,
and check for the PFN having wrapper round to zero.

Reported-by: Benjamin LaHaise <ben.lahaise@neterion.com>
Tested-by: Benjamin LaHaise <ben.lahaise@neterion.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index c9272a1..5493c79 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -785,9 +785,10 @@ static void dma_pte_clear_range(struct dmar_domain *domain,
 
 	BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
 	BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
+	BUG_ON(start_pfn > last_pfn);
 
 	/* we don't need lock here; nobody else touches the iova range */
-	while (start_pfn <= last_pfn) {
+	do {
 		first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1);
 		if (!pte) {
 			start_pfn = align_to_level(start_pfn + 1, 2);
@@ -801,7 +802,8 @@ static void dma_pte_clear_range(struct dmar_domain *domain,
 
 		domain_flush_cache(domain, first_pte,
 				   (void *)pte - (void *)first_pte);
-	}
+
+	} while (start_pfn && start_pfn <= last_pfn);
 }
 
 /* free page table pages. last level pte should already be cleared */
@@ -817,6 +819,7 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain,
 
 	BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
 	BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
+	BUG_ON(start_pfn > last_pfn);
 
 	/* We don't need lock here; nobody else touches the iova range */
 	level = 2;
@@ -827,7 +830,7 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain,
 		if (tmp + level_size(level) - 1 > last_pfn)
 			return;
 
-		while (tmp + level_size(level) - 1 <= last_pfn) {
+		do {
 			first_pte = pte = dma_pfn_level_pte(domain, tmp, level);
 			if (!pte) {
 				tmp = align_to_level(tmp + 1, level + 1);
@@ -846,7 +849,7 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain,
 			domain_flush_cache(domain, first_pte,
 					   (void *)pte - (void *)first_pte);
 			
-		}
+		} while (tmp && tmp + level_size(level) - 1 <= last_pfn);
 		level++;
 	}
 	/* free pgd */
-- 
cgit v0.10.2


From 64de5af000e99f32dd49ff5dd9a0fd7db1f60305 Mon Sep 17 00:00:00 2001
From: Benjamin LaHaise <ben.lahaise@neterion.com>
Date: Wed, 16 Sep 2009 21:05:55 -0400
Subject: intel-iommu: Fix integer wrap on 32 bit kernels

The following 64 bit promotions are necessary to handle memory above the
4GiB boundary correctly.

[dwmw2: Fix the second part not to need 64-bit arithmetic at all]

Signed-off-by: Benjamin LaHaise <ben.lahaise@neterion.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 5493c79..52026d1 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -735,7 +735,7 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
 				return NULL;
 
 			domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
-			pteval = (virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
+			pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
 			if (cmpxchg64(&pte->val, 0ULL, pteval)) {
 				/* Someone else set it while we were thinking; use theirs. */
 				free_pgtable_page(tmp_page);
@@ -2648,10 +2648,9 @@ static void flush_unmaps(void)
 			unsigned long mask;
 			struct iova *iova = deferred_flush[i].iova[j];
 
-			mask = (iova->pfn_hi - iova->pfn_lo + 1) << PAGE_SHIFT;
-			mask = ilog2(mask >> VTD_PAGE_SHIFT);
+			mask = ilog2(mm_to_dma_pfn(iova->pfn_hi - iova->pfn_lo + 1));
 			iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
-					iova->pfn_lo << PAGE_SHIFT, mask);
+					(uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
 			__free_iova(&deferred_flush[i].domain[j]->iovad, iova);
 		}
 		deferred_flush[i].next = 0;
-- 
cgit v0.10.2


From 0c02a20ff7695f9c54cc7c013dda326270ccdac8 Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Sat, 19 Sep 2009 09:37:23 -0700
Subject: intel-iommu: Kill DMAR_BROKEN_GFX_WA option.

Just make it depend on BROKEN for now, in case people scream really loud
about it (and because we might want to keep some of this logic for an
upcoming BIOS workaround, so I don't just want to rip it out entirely
just yet). But for graphics devices, it really ought to be unnecessary.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/Documentation/Intel-IOMMU.txt b/Documentation/Intel-IOMMU.txt
index 21bc416..cf9431d 100644
--- a/Documentation/Intel-IOMMU.txt
+++ b/Documentation/Intel-IOMMU.txt
@@ -56,11 +56,7 @@ Graphics Problems?
 ------------------
 If you encounter issues with graphics devices, you can try adding
 option intel_iommu=igfx_off to turn off the integrated graphics engine.
-
-If it happens to be a PCI device included in the INCLUDE_ALL Engine,
-then try enabling CONFIG_DMAR_GFX_WA to setup a 1-1 map. We hear
-graphics drivers may be in process of using DMA api's in the near
-future and at that time this option can be yanked out.
+If this fixes anything, please ensure you file a bug reporting the problem.
 
 Some exceptions to IOVA
 -----------------------
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 738bdc6..22a0fd1 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1916,7 +1916,7 @@ config DMAR_DEFAULT_ON
 config DMAR_BROKEN_GFX_WA
 	def_bool n
 	prompt "Workaround broken graphics drivers (going away soon)"
-	depends on DMAR
+	depends on DMAR && BROKEN
 	---help---
 	  Current Graphics drivers tend to use physical address
 	  for DMA and avoid using DMA APIs. Setting this config
-- 
cgit v0.10.2


From ec77e21b91f0393a5201cfd4571a82ab7d64fd29 Mon Sep 17 00:00:00 2001
From: Ryan Mallon <ryan@bluewatersys.com>
Date: Fri, 18 Sep 2009 12:51:40 -0700
Subject: mtd: SST25L (non JEDEC) SPI Flash driver

Add support for the non JEDEC SST25L SPI Flash devices.

[dwmw2: Some cleanups]

Signed-off-by: Andre Renaud <andre@bluewatersys.com>
Signed-off-by: Ryan Mallon <ryan@bluewatersys.com>
Acked-by: Linus Walleij <linus.walleij@stericsson.com>
Cc: Anton Vorontsov <avorontsov@ru.mvista.com>
Cc: "H Hartley Sweeten" <hartleys@visionengravers.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/mtd/devices/Kconfig b/drivers/mtd/devices/Kconfig
index 325fab92..c222514 100644
--- a/drivers/mtd/devices/Kconfig
+++ b/drivers/mtd/devices/Kconfig
@@ -104,6 +104,16 @@ config M25PXX_USE_FAST_READ
 	help
 	  This option enables FAST_READ access supported by ST M25Pxx.
 
+config MTD_SST25L
+	tristate "Support SST25L (non JEDEC) SPI Flash chips"
+	depends on SPI_MASTER
+	help
+	  This enables access to the non JEDEC SST25L SPI flash chips, used
+	  for program and data storage.
+
+	  Set up your spi devices with the right board-specific platform data,
+	  if you want to specify device partitioning.
+
 config MTD_SLRAM
 	tristate "Uncached system RAM"
 	help
diff --git a/drivers/mtd/devices/Makefile b/drivers/mtd/devices/Makefile
index 0993d5c..ab5c9b9 100644
--- a/drivers/mtd/devices/Makefile
+++ b/drivers/mtd/devices/Makefile
@@ -16,3 +16,4 @@ obj-$(CONFIG_MTD_LART)		+= lart.o
 obj-$(CONFIG_MTD_BLOCK2MTD)	+= block2mtd.o
 obj-$(CONFIG_MTD_DATAFLASH)	+= mtd_dataflash.o
 obj-$(CONFIG_MTD_M25P80)	+= m25p80.o
+obj-$(CONFIG_MTD_SST25L)	+= sst25l.o
diff --git a/drivers/mtd/devices/sst25l.c b/drivers/mtd/devices/sst25l.c
new file mode 100644
index 0000000..ac7d52b
--- /dev/null
+++ b/drivers/mtd/devices/sst25l.c
@@ -0,0 +1,510 @@
+/*
+ * sst25l.c
+ *
+ * Driver for SST25L SPI Flash chips
+ *
+ * Copyright © 2009 Bluewater Systems Ltd
+ * Author: Andre Renaud <andre@bluewatersys.com>
+ * Author: Ryan Mallon <ryan@bluewatersys.com>
+ *
+ * Based on m25p80.c
+ *
+ * This code is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/mutex.h>
+#include <linux/interrupt.h>
+
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/partitions.h>
+
+#include <linux/spi/spi.h>
+#include <linux/spi/flash.h>
+
+/* Erases can take up to 3 seconds! */
+#define MAX_READY_WAIT_JIFFIES	msecs_to_jiffies(3000)
+
+#define SST25L_CMD_WRSR		0x01	/* Write status register */
+#define SST25L_CMD_WRDI		0x04	/* Write disable */
+#define SST25L_CMD_RDSR		0x05	/* Read status register */
+#define SST25L_CMD_WREN		0x06	/* Write enable */
+#define SST25L_CMD_READ		0x03	/* High speed read */
+
+#define SST25L_CMD_EWSR		0x50	/* Enable write status register */
+#define SST25L_CMD_SECTOR_ERASE	0x20	/* Erase sector */
+#define SST25L_CMD_READ_ID	0x90	/* Read device ID */
+#define SST25L_CMD_AAI_PROGRAM	0xaf	/* Auto address increment */
+
+#define SST25L_STATUS_BUSY	(1 << 0)	/* Chip is busy */
+#define SST25L_STATUS_WREN	(1 << 1)	/* Write enabled */
+#define SST25L_STATUS_BP0	(1 << 2)	/* Block protection 0 */
+#define SST25L_STATUS_BP1	(1 << 3)	/* Block protection 1 */
+
+struct sst25l_flash {
+	struct spi_device	*spi;
+	struct mutex		lock;
+	struct mtd_info		mtd;
+
+	int 			partitioned;
+};
+
+struct flash_info {
+	const char		*name;
+	uint16_t		device_id;
+	unsigned		page_size;
+	unsigned		nr_pages;
+	unsigned		erase_size;
+};
+
+#define to_sst25l_flash(x) container_of(x, struct sst25l_flash, mtd)
+
+static struct flash_info __initdata sst25l_flash_info[] = {
+	{"sst25lf020a", 0xbf43, 256, 1024, 4096},
+	{"sst25lf040a",	0xbf44,	256, 2048, 4096},
+};
+
+static int sst25l_status(struct sst25l_flash *flash, int *status)
+{
+	unsigned char command, response;
+	int err;
+
+	command = SST25L_CMD_RDSR;
+	err = spi_write_then_read(flash->spi, &command, 1, &response, 1);
+	if (err < 0)
+		return err;
+
+	*status = response;
+	return 0;
+}
+
+static int sst25l_write_enable(struct sst25l_flash *flash, int enable)
+{
+	unsigned char command[2];
+	int status, err;
+
+	command[0] = enable ? SST25L_CMD_WREN : SST25L_CMD_WRDI;
+	err = spi_write(flash->spi, command, 1);
+	if (err)
+		return err;
+
+	command[0] = SST25L_CMD_EWSR;
+	err = spi_write(flash->spi, command, 1);
+	if (err)
+		return err;
+
+	command[0] = SST25L_CMD_WRSR;
+	command[1] = enable ? 0 : SST25L_STATUS_BP0 | SST25L_STATUS_BP1;
+	err = spi_write(flash->spi, command, 2);
+	if (err)
+		return err;
+
+	if (enable) {
+		err = sst25l_status(flash, &status);
+		if (err)
+			return err;
+		if (!(status & SST25L_STATUS_WREN))
+			return -EROFS;
+	}
+
+	return 0;
+}
+
+static int sst25l_wait_till_ready(struct sst25l_flash *flash)
+{
+	unsigned long deadline;
+	int status, err;
+
+	deadline = jiffies + MAX_READY_WAIT_JIFFIES;
+	do {
+		err = sst25l_status(flash, &status);
+		if (err)
+			return err;
+		if (!(status & SST25L_STATUS_BUSY))
+			return 0;
+
+		cond_resched();
+	} while (!time_after_eq(jiffies, deadline));
+
+	return -ETIMEDOUT;
+}
+
+static int sst25l_erase_sector(struct sst25l_flash *flash, uint32_t offset)
+{
+	unsigned char command[4];
+	int err;
+
+	err = sst25l_write_enable(flash, 1);
+	if (err)
+		return err;
+
+	command[0] = SST25L_CMD_SECTOR_ERASE;
+	command[1] = offset >> 16;
+	command[2] = offset >> 8;
+	command[3] = offset;
+	err = spi_write(flash->spi, command, 4);
+	if (err)
+		return err;
+
+	err = sst25l_wait_till_ready(flash);
+	if (err)
+		return err;
+
+	return sst25l_write_enable(flash, 0);
+}
+
+static int sst25l_erase(struct mtd_info *mtd, struct erase_info *instr)
+{
+	struct sst25l_flash *flash = to_sst25l_flash(mtd);
+	uint32_t addr, end;
+	int err;
+
+	/* Sanity checks */
+	if (instr->addr + instr->len > flash->mtd.size)
+		return -EINVAL;
+
+	if ((uint32_t)instr->len % mtd->erasesize)
+		return -EINVAL;
+
+	if ((uint32_t)instr->addr % mtd->erasesize)
+		return -EINVAL;
+
+	addr = instr->addr;
+	end = addr + instr->len;
+
+	mutex_lock(&flash->lock);
+
+	err = sst25l_wait_till_ready(flash);
+	if (err)
+		return err;
+
+	while (addr < end) {
+		err = sst25l_erase_sector(flash, addr);
+		if (err) {
+			mutex_unlock(&flash->lock);
+			instr->state = MTD_ERASE_FAILED;
+			dev_err(&flash->spi->dev, "Erase failed\n");
+			return err;
+		}
+
+		addr += mtd->erasesize;
+	}
+
+	mutex_unlock(&flash->lock);
+
+	instr->state = MTD_ERASE_DONE;
+	mtd_erase_callback(instr);
+	return 0;
+}
+
+static int sst25l_read(struct mtd_info *mtd, loff_t from, size_t len,
+		       size_t *retlen, unsigned char *buf)
+{
+	struct sst25l_flash *flash = to_sst25l_flash(mtd);
+	struct spi_transfer transfer[2];
+	struct spi_message message;
+	unsigned char command[4];
+	int ret;
+
+	/* Sanity checking */
+	if (len == 0)
+		return 0;
+
+	if (from + len > flash->mtd.size)
+		return -EINVAL;
+
+	if (retlen)
+		*retlen = 0;
+
+	spi_message_init(&message);
+	memset(&transfer, 0, sizeof(transfer));
+
+	command[0] = SST25L_CMD_READ;
+	command[1] = from >> 16;
+	command[2] = from >> 8;
+	command[3] = from;
+
+	transfer[0].tx_buf = command;
+	transfer[0].len = sizeof(command);
+	spi_message_add_tail(&transfer[0], &message);
+
+	transfer[1].rx_buf = buf;
+	transfer[1].len = len;
+	spi_message_add_tail(&transfer[1], &message);
+
+	mutex_lock(&flash->lock);
+
+	/* Wait for previous write/erase to complete */
+	ret = sst25l_wait_till_ready(flash);
+	if (ret) {
+		mutex_unlock(&flash->lock);
+		return ret;
+	}
+
+	spi_sync(flash->spi, &message);
+
+	if (retlen && message.actual_length > sizeof(command))
+		*retlen += message.actual_length - sizeof(command);
+
+	mutex_unlock(&flash->lock);
+	return 0;
+}
+
+static int sst25l_write(struct mtd_info *mtd, loff_t to, size_t len,
+			size_t *retlen, const unsigned char *buf)
+{
+	struct sst25l_flash *flash = to_sst25l_flash(mtd);
+	int i, j, ret, bytes, copied = 0;
+	unsigned char command[5];
+
+	/* Sanity checks */
+	if (!len)
+		return 0;
+
+	if (to + len > flash->mtd.size)
+		return -EINVAL;
+
+	if ((uint32_t)to % mtd->writesize)
+		return -EINVAL;
+
+	mutex_lock(&flash->lock);
+
+	ret = sst25l_write_enable(flash, 1);
+	if (ret)
+		goto out;
+
+	for (i = 0; i < len; i += mtd->writesize) {
+		ret = sst25l_wait_till_ready(flash);
+		if (ret)
+			goto out;
+
+		/* Write the first byte of the page */
+		command[0] = SST25L_CMD_AAI_PROGRAM;
+		command[1] = (to + i) >> 16;
+		command[2] = (to + i) >> 8;
+		command[3] = (to + i);
+		command[4] = buf[i];
+		ret = spi_write(flash->spi, command, 5);
+		if (ret < 0)
+			goto out;
+		copied++;
+
+		/*
+		 * Write the remaining bytes using auto address
+		 * increment mode
+		 */
+		bytes = min_t(uint32_t, mtd->writesize, len - i);
+		for (j = 1; j < bytes; j++, copied++) {
+			ret = sst25l_wait_till_ready(flash);
+			if (ret)
+				goto out;
+
+			command[1] = buf[i + j];
+			ret = spi_write(flash->spi, command, 2);
+			if (ret)
+				goto out;
+		}
+	}
+
+out:
+	ret = sst25l_write_enable(flash, 0);
+
+	if (retlen)
+		*retlen = copied;
+
+	mutex_unlock(&flash->lock);
+	return ret;
+}
+
+static struct flash_info *__init sst25l_match_device(struct spi_device *spi)
+{
+	struct flash_info *flash_info = NULL;
+	unsigned char command[4], response;
+	int i, err;
+	uint16_t id;
+
+	command[0] = SST25L_CMD_READ_ID;
+	command[1] = 0;
+	command[2] = 0;
+	command[3] = 0;
+	err = spi_write_then_read(spi, command, sizeof(command), &response, 1);
+	if (err < 0) {
+		dev_err(&spi->dev, "error reading device id msb\n");
+		return NULL;
+	}
+
+	id = response << 8;
+
+	command[0] = SST25L_CMD_READ_ID;
+	command[1] = 0;
+	command[2] = 0;
+	command[3] = 1;
+	err = spi_write_then_read(spi, command, sizeof(command), &response, 1);
+	if (err < 0) {
+		dev_err(&spi->dev, "error reading device id lsb\n");
+		return NULL;
+	}
+
+	id |= response;
+
+	for (i = 0; i < ARRAY_SIZE(sst25l_flash_info); i++)
+		if (sst25l_flash_info[i].device_id == id)
+			flash_info = &sst25l_flash_info[i];
+
+	if (!flash_info)
+		dev_err(&spi->dev, "unknown id %.4x\n", id);
+
+	return flash_info;
+}
+
+static int __init sst25l_probe(struct spi_device *spi)
+{
+	struct flash_info *flash_info;
+	struct sst25l_flash *flash;
+	struct flash_platform_data *data;
+	int ret, i;
+
+	flash_info = sst25l_match_device(spi);
+	if (!flash_info)
+		return -ENODEV;
+
+	flash = kzalloc(sizeof(struct sst25l_flash), GFP_KERNEL);
+	if (!flash)
+		return -ENOMEM;
+
+	flash->spi = spi;
+	mutex_init(&flash->lock);
+	dev_set_drvdata(&spi->dev, flash);
+
+	data = spi->dev.platform_data;
+	if (data && data->name)
+		flash->mtd.name = data->name;
+	else
+		flash->mtd.name = dev_name(&spi->dev);
+
+	flash->mtd.type		= MTD_NORFLASH;
+	flash->mtd.flags	= MTD_CAP_NORFLASH;
+	flash->mtd.erasesize	= flash_info->erase_size;
+	flash->mtd.writesize	= flash_info->page_size;
+	flash->mtd.size		= flash_info->page_size * flash_info->nr_pages;
+	flash->mtd.erase	= sst25l_erase;
+	flash->mtd.read		= sst25l_read;
+	flash->mtd.write 	= sst25l_write;
+
+	dev_info(&spi->dev, "%s (%lld KiB)\n", flash_info->name,
+		 (long long)flash->mtd.size >> 10);
+
+	DEBUG(MTD_DEBUG_LEVEL2,
+	      "mtd .name = %s, .size = 0x%llx (%lldMiB) "
+	      ".erasesize = 0x%.8x (%uKiB) .numeraseregions = %d\n",
+	      flash->mtd.name,
+	      (long long)flash->mtd.size, (long long)(flash->mtd.size >> 20),
+	      flash->mtd.erasesize, flash->mtd.erasesize / 1024,
+	      flash->mtd.numeraseregions);
+
+	if (flash->mtd.numeraseregions)
+		for (i = 0; i < flash->mtd.numeraseregions; i++)
+			DEBUG(MTD_DEBUG_LEVEL2,
+			      "mtd.eraseregions[%d] = { .offset = 0x%llx, "
+			      ".erasesize = 0x%.8x (%uKiB), "
+			      ".numblocks = %d }\n",
+			      i, (long long)flash->mtd.eraseregions[i].offset,
+			      flash->mtd.eraseregions[i].erasesize,
+			      flash->mtd.eraseregions[i].erasesize / 1024,
+			      flash->mtd.eraseregions[i].numblocks);
+
+	if (mtd_has_partitions()) {
+		struct mtd_partition *parts = NULL;
+		int nr_parts = 0;
+
+		if (mtd_has_cmdlinepart()) {
+			static const char *part_probes[] =
+				{"cmdlinepart", NULL};
+
+			nr_parts = parse_mtd_partitions(&flash->mtd,
+							part_probes,
+							&parts, 0);
+		}
+
+		if (nr_parts <= 0 && data && data->parts) {
+			parts = data->parts;
+			nr_parts = data->nr_parts;
+		}
+
+		if (nr_parts > 0) {
+			for (i = 0; i < nr_parts; i++) {
+				DEBUG(MTD_DEBUG_LEVEL2, "partitions[%d] = "
+				      "{.name = %s, .offset = 0x%llx, "
+				      ".size = 0x%llx (%lldKiB) }\n",
+				      i, parts[i].name,
+				      (long long)parts[i].offset,
+				      (long long)parts[i].size,
+				      (long long)(parts[i].size >> 10));
+			}
+
+			flash->partitioned = 1;
+			return add_mtd_partitions(&flash->mtd,
+						  parts, nr_parts);
+		}
+
+	} else if (data->nr_parts) {
+		dev_warn(&spi->dev, "ignoring %d default partitions on %s\n",
+			 data->nr_parts, data->name);
+	}
+
+	ret = add_mtd_device(&flash->mtd);
+	if (ret == 1) {
+		kfree(flash);
+		dev_set_drvdata(&spi->dev, NULL);
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+static int __exit sst25l_remove(struct spi_device *spi)
+{
+	struct sst25l_flash *flash = dev_get_drvdata(&spi->dev);
+	int ret;
+
+	if (mtd_has_partitions() && flash->partitioned)
+		ret = del_mtd_partitions(&flash->mtd);
+	else
+		ret = del_mtd_device(&flash->mtd);
+	if (ret == 0)
+		kfree(flash);
+	return ret;
+}
+
+static struct spi_driver sst25l_driver = {
+	.driver = {
+		.name	= "sst25l",
+		.bus	= &spi_bus_type,
+		.owner	= THIS_MODULE,
+	},
+	.probe		= sst25l_probe,
+	.remove		= __exit_p(sst25l_remove),
+};
+
+static int __init sst25l_init(void)
+{
+	return spi_register_driver(&sst25l_driver);
+}
+
+static void __exit sst25l_exit(void)
+{
+	spi_unregister_driver(&sst25l_driver);
+}
+
+module_init(sst25l_init);
+module_exit(sst25l_exit);
+
+MODULE_DESCRIPTION("MTD SPI driver for SST25L Flash chips");
+MODULE_AUTHOR("Andre Renaud <andre@bluewatersys.com>, "
+	      "Ryan Mallon <ryan@bluewatersys.com>");
+MODULE_LICENSE("GPL");
-- 
cgit v0.10.2


From 2eaaa5ff87c675aacd3a869fc5fe75a35bbd5278 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Fri, 18 Sep 2009 12:51:42 -0700
Subject: mtd: sst25l, fix lock imbalance

Add an omitted unlock to one sst25l_erase fail path.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/mtd/devices/sst25l.c b/drivers/mtd/devices/sst25l.c
index ac7d52b..c2baf33 100644
--- a/drivers/mtd/devices/sst25l.c
+++ b/drivers/mtd/devices/sst25l.c
@@ -180,8 +180,10 @@ static int sst25l_erase(struct mtd_info *mtd, struct erase_info *instr)
 	mutex_lock(&flash->lock);
 
 	err = sst25l_wait_till_ready(flash);
-	if (err)
+	if (err) {
+		mutex_unlock(&flash->lock);
 		return err;
+	}
 
 	while (addr < end) {
 		err = sst25l_erase_sector(flash, addr);
-- 
cgit v0.10.2


From f33dabbe79fdf7a8568c65faa1db7794c87ac4d3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Sep 2009 12:51:43 -0700
Subject: mtd: nand: register orion_nand using platform_driver_probe()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

orion_nand_probe lives in .init.text, so using platform_driver_register to
register it is wrong because binding a device after the init memory is
discarded (e.g.  via sysfs) results in an oops.

As requested by Nicolas Pitre platform_driver_probe is used instead of
moving the probe function to .devinit.text as proposed initially.  This
saves some memory, but devices registered after the driver is probed are
not bound (probably there are none) and binding via sysfs isn't possible.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Cc: Lennert Buytenhek <buytenh@marvell.com>
Cc: Saeed Bishara <saeed@marvell.com>
Cc: Joern Engel <joern@logfs.org>
Acked-by: Nicolas Pitre <nico@marvell.com>
Cc: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/mtd/nand/orion_nand.c b/drivers/mtd/nand/orion_nand.c
index 0d9d4bc..f59c074 100644
--- a/drivers/mtd/nand/orion_nand.c
+++ b/drivers/mtd/nand/orion_nand.c
@@ -171,7 +171,6 @@ static int __devexit orion_nand_remove(struct platform_device *pdev)
 }
 
 static struct platform_driver orion_nand_driver = {
-	.probe		= orion_nand_probe,
 	.remove		= __devexit_p(orion_nand_remove),
 	.driver		= {
 		.name	= "orion_nand",
@@ -181,7 +180,7 @@ static struct platform_driver orion_nand_driver = {
 
 static int __init orion_nand_init(void)
 {
-	return platform_driver_register(&orion_nand_driver);
+	return platform_driver_probe(&orion_nand_driver, orion_nand_probe);
 }
 
 static void __exit orion_nand_exit(void)
-- 
cgit v0.10.2


From 778dbcc1ebea6f9a560020110987449bf4607e5f Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Fri, 18 Sep 2009 12:51:44 -0700
Subject: mtd: onenand: make onenand/generic.c more generic

Remove the ARM dependency from the generic "onenand" platform device
driver.  This change makes the driver useful for other architectures as
well.  Needed for the SuperH kfr2r09 board.

Apart from the obvious Kconfig bits, the most important change is the move
away from ARM specific includes and platform data.  Together with this
change the only in-tree board code gets an update, and the driver name is
also changed gracefully break potential out of tree drivers.

The driver is also updated to allow NULL as platform data together with a
few changes to make use of resource_size() and dev_name().

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Tony Lindgren <tony@atomide.com>
Cc: Kyungmin Park <kmpark@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/arch/arm/mach-omap2/board-apollon.c b/arch/arm/mach-omap2/board-apollon.c
index dcfc20d..b4d4ef6 100644
--- a/arch/arm/mach-omap2/board-apollon.c
+++ b/arch/arm/mach-omap2/board-apollon.c
@@ -87,7 +87,7 @@ static struct mtd_partition apollon_partitions[] = {
 	},
 };
 
-static struct flash_platform_data apollon_flash_data = {
+static struct onenand_platform_data apollon_flash_data = {
 	.parts		= apollon_partitions,
 	.nr_parts	= ARRAY_SIZE(apollon_partitions),
 };
@@ -99,7 +99,7 @@ static struct resource apollon_flash_resource[] = {
 };
 
 static struct platform_device apollon_onenand_device = {
-	.name		= "onenand",
+	.name		= "onenand-flash",
 	.id		= -1,
 	.dev		= {
 		.platform_data	= &apollon_flash_data,
diff --git a/drivers/mtd/onenand/Kconfig b/drivers/mtd/onenand/Kconfig
index 3a094d1..a38f580 100644
--- a/drivers/mtd/onenand/Kconfig
+++ b/drivers/mtd/onenand/Kconfig
@@ -24,7 +24,6 @@ config MTD_ONENAND_VERIFY_WRITE
 
 config MTD_ONENAND_GENERIC
 	tristate "OneNAND Flash device via platform device driver"
-	depends on ARM
 	help
 	  Support for OneNAND flash via platform device driver.
 
diff --git a/drivers/mtd/onenand/generic.c b/drivers/mtd/onenand/generic.c
index 3a496c3..e789149 100644
--- a/drivers/mtd/onenand/generic.c
+++ b/drivers/mtd/onenand/generic.c
@@ -19,12 +19,16 @@
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/onenand.h>
 #include <linux/mtd/partitions.h>
-
 #include <asm/io.h>
-#include <asm/mach/flash.h>
-
-#define DRIVER_NAME	"onenand"
 
+/*
+ * Note: Driver name and platform data format have been updated!
+ *
+ * This version of the driver is named "onenand-flash" and takes struct
+ * onenand_platform_data as platform data. The old ARM-specific version
+ * with the name "onenand" used to take struct flash_platform_data.
+ */
+#define DRIVER_NAME	"onenand-flash"
 
 #ifdef CONFIG_MTD_PARTITIONS
 static const char *part_probes[] = { "cmdlinepart", NULL,  };
@@ -39,16 +43,16 @@ struct onenand_info {
 static int __devinit generic_onenand_probe(struct platform_device *pdev)
 {
 	struct onenand_info *info;
-	struct flash_platform_data *pdata = pdev->dev.platform_data;
+	struct onenand_platform_data *pdata = pdev->dev.platform_data;
 	struct resource *res = pdev->resource;
-	unsigned long size = res->end - res->start + 1;
+	unsigned long size = resource_size(res);
 	int err;
 
 	info = kzalloc(sizeof(struct onenand_info), GFP_KERNEL);
 	if (!info)
 		return -ENOMEM;
 
-	if (!request_mem_region(res->start, size, pdev->dev.driver->name)) {
+	if (!request_mem_region(res->start, size, dev_name(&pdev->dev))) {
 		err = -EBUSY;
 		goto out_free_info;
 	}
@@ -59,7 +63,7 @@ static int __devinit generic_onenand_probe(struct platform_device *pdev)
 		goto out_release_mem_region;
 	}
 
-	info->onenand.mmcontrol = pdata->mmcontrol;
+	info->onenand.mmcontrol = pdata ? pdata->mmcontrol : 0;
 	info->onenand.irq = platform_get_irq(pdev, 0);
 
 	info->mtd.name = dev_name(&pdev->dev);
@@ -75,7 +79,7 @@ static int __devinit generic_onenand_probe(struct platform_device *pdev)
 	err = parse_mtd_partitions(&info->mtd, part_probes, &info->parts, 0);
 	if (err > 0)
 		add_mtd_partitions(&info->mtd, info->parts, err);
-	else if (err <= 0 && pdata->parts)
+	else if (err <= 0 && pdata && pdata->parts)
 		add_mtd_partitions(&info->mtd, pdata->parts, pdata->nr_parts);
 	else
 #endif
@@ -99,7 +103,7 @@ static int __devexit generic_onenand_remove(struct platform_device *pdev)
 {
 	struct onenand_info *info = platform_get_drvdata(pdev);
 	struct resource *res = pdev->resource;
-	unsigned long size = res->end - res->start + 1;
+	unsigned long size = resource_size(res);
 
 	platform_set_drvdata(pdev, NULL);
 
diff --git a/include/linux/mtd/onenand.h b/include/linux/mtd/onenand.h
index 8ed8733..4e49f33 100644
--- a/include/linux/mtd/onenand.h
+++ b/include/linux/mtd/onenand.h
@@ -214,4 +214,12 @@ unsigned onenand_block(struct onenand_chip *this, loff_t addr);
 loff_t onenand_addr(struct onenand_chip *this, int block);
 int flexonenand_region(struct mtd_info *mtd, loff_t addr);
 
+struct mtd_partition;
+
+struct onenand_platform_data {
+	void		(*mmcontrol)(struct mtd_info *mtd, int sync_read);
+	struct mtd_partition *parts;
+	unsigned int	nr_parts;
+};
+
 #endif	/* __LINUX_MTD_ONENAND_H */
-- 
cgit v0.10.2


From 46a8cf2df2232c0051f29716ff8a166ebeb08daf Mon Sep 17 00:00:00 2001
From: Sneha Narnakaje <nsnehaprabha@ti.com>
Date: Fri, 18 Sep 2009 12:51:46 -0700
Subject: mtd: nand: add "page" parameter to all read_page/read_page_raw APIs

This patch adds a new "page" parameter to all NAND read_page/read_page_raw
APIs.  The read_page API for the new mode ECC_HW_OOB_FIRST requires the
page information to send the READOOB command and read the OOB area before
the data area.

Reviewed-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Sneha Narnakaje <nsnehaprabha@ti.com>
Signed-off-by: Sandeep Paulraj <s-paulraj@ti.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/mtd/nand/atmel_nand.c b/drivers/mtd/nand/atmel_nand.c
index 20c828b..f8e9975 100644
--- a/drivers/mtd/nand/atmel_nand.c
+++ b/drivers/mtd/nand/atmel_nand.c
@@ -218,7 +218,7 @@ static int atmel_nand_calculate(struct mtd_info *mtd,
  * buf:        buffer to store read data
  */
 static int atmel_nand_read_page(struct mtd_info *mtd,
-		struct nand_chip *chip, uint8_t *buf)
+		struct nand_chip *chip, uint8_t *buf, int page)
 {
 	int eccsize = chip->ecc.size;
 	int eccbytes = chip->ecc.bytes;
diff --git a/drivers/mtd/nand/cafe_nand.c b/drivers/mtd/nand/cafe_nand.c
index 29acd06..a70f40e 100644
--- a/drivers/mtd/nand/cafe_nand.c
+++ b/drivers/mtd/nand/cafe_nand.c
@@ -381,7 +381,7 @@ static int cafe_nand_read_oob(struct mtd_info *mtd, struct nand_chip *chip,
  * we need a special oob layout and handling.
  */
 static int cafe_nand_read_page(struct mtd_info *mtd, struct nand_chip *chip,
-			       uint8_t *buf)
+			       uint8_t *buf, int page)
 {
 	struct cafe_priv *cafe = mtd->priv;
 
diff --git a/drivers/mtd/nand/fsl_elbc_nand.c b/drivers/mtd/nand/fsl_elbc_nand.c
index 1f6eb25..ddd37d2 100644
--- a/drivers/mtd/nand/fsl_elbc_nand.c
+++ b/drivers/mtd/nand/fsl_elbc_nand.c
@@ -739,7 +739,8 @@ static int fsl_elbc_chip_init_tail(struct mtd_info *mtd)
 
 static int fsl_elbc_read_page(struct mtd_info *mtd,
                               struct nand_chip *chip,
-                              uint8_t *buf)
+			      uint8_t *buf,
+			      int page)
 {
 	fsl_elbc_read_buf(mtd, buf, mtd->writesize);
 	fsl_elbc_read_buf(mtd, chip->oob_poi, mtd->oobsize);
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 4c5e8a7..17bbd50 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -765,7 +765,7 @@ static int nand_wait(struct mtd_info *mtd, struct nand_chip *chip)
  * Not for syndrome calculating ecc controllers, which use a special oob layout
  */
 static int nand_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
-			      uint8_t *buf)
+			      uint8_t *buf, int page)
 {
 	chip->read_buf(mtd, buf, mtd->writesize);
 	chip->read_buf(mtd, chip->oob_poi, mtd->oobsize);
@@ -781,7 +781,7 @@ static int nand_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
  * We need a special oob layout and handling even when OOB isn't used.
  */
 static int nand_read_page_raw_syndrome(struct mtd_info *mtd, struct nand_chip *chip,
-			      uint8_t *buf)
+			      uint8_t *buf, int page)
 {
 	int eccsize = chip->ecc.size;
 	int eccbytes = chip->ecc.bytes;
@@ -820,7 +820,7 @@ static int nand_read_page_raw_syndrome(struct mtd_info *mtd, struct nand_chip *c
  * @buf:	buffer to store read data
  */
 static int nand_read_page_swecc(struct mtd_info *mtd, struct nand_chip *chip,
-				uint8_t *buf)
+				uint8_t *buf, int page)
 {
 	int i, eccsize = chip->ecc.size;
 	int eccbytes = chip->ecc.bytes;
@@ -830,7 +830,7 @@ static int nand_read_page_swecc(struct mtd_info *mtd, struct nand_chip *chip,
 	uint8_t *ecc_code = chip->buffers->ecccode;
 	uint32_t *eccpos = chip->ecc.layout->eccpos;
 
-	chip->ecc.read_page_raw(mtd, chip, buf);
+	chip->ecc.read_page_raw(mtd, chip, buf, page);
 
 	for (i = 0; eccsteps; eccsteps--, i += eccbytes, p += eccsize)
 		chip->ecc.calculate(mtd, p, &ecc_calc[i]);
@@ -943,7 +943,7 @@ static int nand_read_subpage(struct mtd_info *mtd, struct nand_chip *chip, uint3
  * Not for syndrome calculating ecc controllers which need a special oob layout
  */
 static int nand_read_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
-				uint8_t *buf)
+				uint8_t *buf, int page)
 {
 	int i, eccsize = chip->ecc.size;
 	int eccbytes = chip->ecc.bytes;
@@ -988,7 +988,7 @@ static int nand_read_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
  * we need a special oob layout and handling.
  */
 static int nand_read_page_syndrome(struct mtd_info *mtd, struct nand_chip *chip,
-				   uint8_t *buf)
+				   uint8_t *buf, int page)
 {
 	int i, eccsize = chip->ecc.size;
 	int eccbytes = chip->ecc.bytes;
@@ -1130,11 +1130,13 @@ static int nand_do_read_ops(struct mtd_info *mtd, loff_t from,
 
 			/* Now read the page into the buffer */
 			if (unlikely(ops->mode == MTD_OOB_RAW))
-				ret = chip->ecc.read_page_raw(mtd, chip, bufpoi);
+				ret = chip->ecc.read_page_raw(mtd, chip,
+							      bufpoi, page);
 			else if (!aligned && NAND_SUBPAGE_READ(chip) && !oob)
 				ret = chip->ecc.read_subpage(mtd, chip, col, bytes, bufpoi);
 			else
-				ret = chip->ecc.read_page(mtd, chip, bufpoi);
+				ret = chip->ecc.read_page(mtd, chip, bufpoi,
+							  page);
 			if (ret < 0)
 				break;
 
diff --git a/drivers/mtd/nand/sh_flctl.c b/drivers/mtd/nand/sh_flctl.c
index 2bc8966..c5df285 100644
--- a/drivers/mtd/nand/sh_flctl.c
+++ b/drivers/mtd/nand/sh_flctl.c
@@ -329,7 +329,7 @@ static void set_cmd_regs(struct mtd_info *mtd, uint32_t cmd, uint32_t flcmcdr_va
 }
 
 static int flctl_read_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
-				uint8_t *buf)
+				uint8_t *buf, int page)
 {
 	int i, eccsize = chip->ecc.size;
 	int eccbytes = chip->ecc.bytes;
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 4030eba..686f370 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -271,13 +271,13 @@ struct nand_ecc_ctrl {
 					   uint8_t *calc_ecc);
 	int			(*read_page_raw)(struct mtd_info *mtd,
 						 struct nand_chip *chip,
-						 uint8_t *buf);
+						 uint8_t *buf, int page);
 	void			(*write_page_raw)(struct mtd_info *mtd,
 						  struct nand_chip *chip,
 						  const uint8_t *buf);
 	int			(*read_page)(struct mtd_info *mtd,
 					     struct nand_chip *chip,
-					     uint8_t *buf);
+					     uint8_t *buf, int page);
 	int			(*read_subpage)(struct mtd_info *mtd,
 					     struct nand_chip *chip,
 					     uint32_t offs, uint32_t len,
-- 
cgit v0.10.2


From 6e0cb135b3f3713b95ea41a11155e83a8c70f5f8 Mon Sep 17 00:00:00 2001
From: Sneha Narnakaje <nsnehaprabha@ti.com>
Date: Fri, 18 Sep 2009 12:51:47 -0700
Subject: mtd: nand: add new ECC mode - ECC_HW_OOB_FIRST

This patch adds the new mode NAND_ECC_HW_OOB_FIRST in the nand code to
support 4-bit ECC on TI DaVinci devices with large page (up to 2KiB) NAND
chips.  This ECC mode is similar to NAND_ECC_HW, with the exception of
read_page API that first reads the OOB area, reads the data in chunks,
feeds the ECC from OOB area to the ECC hw engine and perform any
correction on the data as per the ECC status reported by the engine.

"ECC_HW_OOB_FIRST" name suggested by Thomas Gleixner

Reviewed-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Sneha Narnakaje <nsnehaprabha@ti.com>
Signed-off-by: Sandeep Paulraj <s-paulraj@ti.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 17bbd50..2211386 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -979,6 +979,54 @@ static int nand_read_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
 }
 
 /**
+ * nand_read_page_hwecc_oob_first - [REPLACABLE] hw ecc, read oob first
+ * @mtd:	mtd info structure
+ * @chip:	nand chip info structure
+ * @buf:	buffer to store read data
+ *
+ * Hardware ECC for large page chips, require OOB to be read first.
+ * For this ECC mode, the write_page method is re-used from ECC_HW.
+ * These methods read/write ECC from the OOB area, unlike the
+ * ECC_HW_SYNDROME support with multiple ECC steps, follows the
+ * "infix ECC" scheme and reads/writes ECC from the data area, by
+ * overwriting the NAND manufacturer bad block markings.
+ */
+static int nand_read_page_hwecc_oob_first(struct mtd_info *mtd,
+	struct nand_chip *chip, uint8_t *buf, int page)
+{
+	int i, eccsize = chip->ecc.size;
+	int eccbytes = chip->ecc.bytes;
+	int eccsteps = chip->ecc.steps;
+	uint8_t *p = buf;
+	uint8_t *ecc_code = chip->buffers->ecccode;
+	uint32_t *eccpos = chip->ecc.layout->eccpos;
+	uint8_t *ecc_calc = chip->buffers->ecccalc;
+
+	/* Read the OOB area first */
+	chip->cmdfunc(mtd, NAND_CMD_READOOB, 0, page);
+	chip->read_buf(mtd, chip->oob_poi, mtd->oobsize);
+	chip->cmdfunc(mtd, NAND_CMD_READ0, 0, page);
+
+	for (i = 0; i < chip->ecc.total; i++)
+		ecc_code[i] = chip->oob_poi[eccpos[i]];
+
+	for (i = 0; eccsteps; eccsteps--, i += eccbytes, p += eccsize) {
+		int stat;
+
+		chip->ecc.hwctl(mtd, NAND_ECC_READ);
+		chip->read_buf(mtd, p, eccsize);
+		chip->ecc.calculate(mtd, p, &ecc_calc[i]);
+
+		stat = chip->ecc.correct(mtd, p, &ecc_code[i], NULL);
+		if (stat < 0)
+			mtd->ecc_stats.failed++;
+		else
+			mtd->ecc_stats.corrected += stat;
+	}
+	return 0;
+}
+
+/**
  * nand_read_page_syndrome - [REPLACABLE] hardware ecc syndrom based page read
  * @mtd:	mtd info structure
  * @chip:	nand chip info structure
@@ -2673,6 +2721,17 @@ int nand_scan_tail(struct mtd_info *mtd)
 	 */
 
 	switch (chip->ecc.mode) {
+	case NAND_ECC_HW_OOB_FIRST:
+		/* Similar to NAND_ECC_HW, but a separate read_page handle */
+		if (!chip->ecc.calculate || !chip->ecc.correct ||
+		     !chip->ecc.hwctl) {
+			printk(KERN_WARNING "No ECC functions supplied; "
+			       "Hardware ECC not possible\n");
+			BUG();
+		}
+		if (!chip->ecc.read_page)
+			chip->ecc.read_page = nand_read_page_hwecc_oob_first;
+
 	case NAND_ECC_HW:
 		/* Use standard hwecc read page function ? */
 		if (!chip->ecc.read_page)
@@ -2695,7 +2754,7 @@ int nand_scan_tail(struct mtd_info *mtd)
 		     chip->ecc.read_page == nand_read_page_hwecc ||
 		     !chip->ecc.write_page ||
 		     chip->ecc.write_page == nand_write_page_hwecc)) {
-			printk(KERN_WARNING "No ECC functions supplied, "
+			printk(KERN_WARNING "No ECC functions supplied; "
 			       "Hardware ECC not possible\n");
 			BUG();
 		}
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 686f370..7a232a9 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -121,6 +121,7 @@ typedef enum {
 	NAND_ECC_SOFT,
 	NAND_ECC_HW,
 	NAND_ECC_HW_SYNDROME,
+	NAND_ECC_HW_OOB_FIRST,
 } nand_ecc_modes_t;
 
 /*
-- 
cgit v0.10.2


From f12a9473283e68ae708e9ada37cb352ea2652397 Mon Sep 17 00:00:00 2001
From: Sneha Narnakaje <nsnehaprabha@ti.com>
Date: Fri, 18 Sep 2009 12:51:48 -0700
Subject: mtd: nand: DaVinci: Add 4-bit ECC support for large page NAND chips

This patch adds 4-bit ECC support for large page NAND chips using the new
ECC mode NAND_ECC_HW_OOB_FIRST.  The platform data from board-dm355-evm
has been adjusted to use this mode.

The patches have been verified on DM355 device with 2KiB-page Micron
devices using mtd-tests and JFFS2.  Error correction up to 4 bits has
also been verified using nandwrite/nanddump utilities.

Reviewed-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Sneha Narnakaje <nsnehaprabha@ti.com>
Signed-off-by: Sandeep Paulraj <s-paulraj@ti.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Troy Kisky <troy.kisky@boundarydevices.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/mtd/nand/davinci_nand.c b/drivers/mtd/nand/davinci_nand.c
index 0fad648..f13f5b9 100644
--- a/drivers/mtd/nand/davinci_nand.c
+++ b/drivers/mtd/nand/davinci_nand.c
@@ -348,6 +348,12 @@ compare:
 	if (!(syndrome[0] | syndrome[1] | syndrome[2] | syndrome[3]))
 		return 0;
 
+	/*
+	 * Clear any previous address calculation by doing a dummy read of an
+	 * error address register.
+	 */
+	davinci_nand_readl(info, NAND_ERR_ADD1_OFFSET);
+
 	/* Start address calculation, and wait for it to complete.
 	 * We _could_ start reading more data while this is working,
 	 * to speed up the overall page read.
@@ -359,8 +365,10 @@ compare:
 
 		switch ((fsr >> 8) & 0x0f) {
 		case 0:		/* no error, should not happen */
+			davinci_nand_readl(info, NAND_ERR_ERRVAL1_OFFSET);
 			return 0;
 		case 1:		/* five or more errors detected */
+			davinci_nand_readl(info, NAND_ERR_ERRVAL1_OFFSET);
 			return -EIO;
 		case 2:		/* error addresses computed */
 		case 3:
@@ -500,6 +508,26 @@ static struct nand_ecclayout hwecc4_small __initconst = {
 	},
 };
 
+/* An ECC layout for using 4-bit ECC with large-page (2048bytes) flash,
+ * storing ten ECC bytes plus the manufacturer's bad block marker byte,
+ * and not overlapping the default BBT markers.
+ */
+static struct nand_ecclayout hwecc4_2048 __initconst = {
+	.eccbytes = 40,
+	.eccpos = {
+		/* at the end of spare sector */
+		24, 25, 26, 27, 28, 29,	30, 31, 32, 33,
+		34, 35, 36, 37, 38, 39,	40, 41, 42, 43,
+		44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
+		54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+		},
+	.oobfree = {
+		/* 2 bytes at offset 0 hold manufacturer badblock markers */
+		{.offset = 2, .length = 22, },
+		/* 5 bytes at offset 8 hold BBT markers */
+		/* 8 bytes at offset 16 hold JFFS2 clean markers */
+	},
+};
 
 static int __init nand_davinci_probe(struct platform_device *pdev)
 {
@@ -690,15 +718,20 @@ static int __init nand_davinci_probe(struct platform_device *pdev)
 				info->mtd.oobsize - 16;
 			goto syndrome_done;
 		}
+		if (chunks == 4) {
+			info->ecclayout = hwecc4_2048;
+			info->chip.ecc.mode = NAND_ECC_HW_OOB_FIRST;
+			goto syndrome_done;
+		}
 
-		/* For large page chips we'll be wanting to use a
-		 * not-yet-implemented mode that reads OOB data
-		 * before reading the body of the page, to avoid
-		 * the "infix OOB" model of NAND_ECC_HW_SYNDROME
-		 * (and preserve manufacturer badblock markings).
+		/* 4KiB page chips are not yet supported. The eccpos from
+		 * nand_ecclayout cannot hold 80 bytes and change to eccpos[]
+		 * breaks userspace ioctl interface with mtd-utils. Once we
+		 * resolve this issue, NAND_ECC_HW_OOB_FIRST mode can be used
+		 * for the 4KiB page chips.
 		 */
 		dev_warn(&pdev->dev, "no 4-bit ECC support yet "
-				"for large page NAND\n");
+				"for 4KiB-page NAND\n");
 		ret = -EIO;
 		goto err_scan;
 
-- 
cgit v0.10.2


From 4c1e6b2ce13b154a4a69cee220c98976f4b784df Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Fri, 18 Sep 2009 12:51:49 -0700
Subject: mtd: lart: Prevent a read from mtd->eraseregions[-1]

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/mtd/devices/lart.c b/drivers/mtd/devices/lart.c
index 578de1c..f4359fe 100644
--- a/drivers/mtd/devices/lart.c
+++ b/drivers/mtd/devices/lart.c
@@ -393,7 +393,8 @@ static int flash_erase (struct mtd_info *mtd,struct erase_info *instr)
 	* erase range is aligned with the erase size which is in
 	* effect here.
 	*/
-   if (instr->addr & (mtd->eraseregions[i].erasesize - 1)) return (-EINVAL);
+   if (i < 0 || (instr->addr & (mtd->eraseregions[i].erasesize - 1)))
+      return -EINVAL;
 
    /* Remember the erase region we start on */
    first = i;
@@ -409,7 +410,8 @@ static int flash_erase (struct mtd_info *mtd,struct erase_info *instr)
    i--;
 
    /* is the end aligned on a block boundary? */
-   if ((instr->addr + instr->len) & (mtd->eraseregions[i].erasesize - 1)) return (-EINVAL);
+   if (i < 0 || ((instr->addr + instr->len) & (mtd->eraseregions[i].erasesize - 1)))
+      return -EINVAL;
 
    addr = instr->addr;
    len = instr->len;
-- 
cgit v0.10.2


From ebf2e93036907fe2a7ddab942aa63d35f97f3b2b Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Fri, 18 Sep 2009 12:51:49 -0700
Subject: mtd: mtdconcat: prevent a read from eraseregions[-1]

If the erase region was found in the first iteration we read from
eraseregions[-1]

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c
index 792b547..db6de74 100644
--- a/drivers/mtd/mtdconcat.c
+++ b/drivers/mtd/mtdconcat.c
@@ -427,7 +427,7 @@ static int concat_erase(struct mtd_info *mtd, struct erase_info *instr)
 		 * to-be-erased area begins. Verify that the starting
 		 * offset is aligned to this region's erase size:
 		 */
-		if (instr->addr & (erase_regions[i].erasesize - 1))
+		if (i < 0 || instr->addr & (erase_regions[i].erasesize - 1))
 			return -EINVAL;
 
 		/*
@@ -440,8 +440,8 @@ static int concat_erase(struct mtd_info *mtd, struct erase_info *instr)
 		/*
 		 * check if the ending offset is aligned to this region's erase size
 		 */
-		if ((instr->addr + instr->len) & (erase_regions[i].erasesize -
-						  1))
+		if (i < 0 || ((instr->addr + instr->len) &
+					(erase_regions[i].erasesize - 1)))
 			return -EINVAL;
 	}
 
-- 
cgit v0.10.2


From a57ca0466af5da83e379d636b8c01fd53b41e2c6 Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Fri, 18 Sep 2009 12:51:50 -0700
Subject: mtd: mtdpart: prevent a read from regions[-1]

If the erase region was found in the first iteration we read from
regions[-1]

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index 349fcbe..a83cfa1 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -453,7 +453,8 @@ static struct mtd_part *add_one_partition(struct mtd_info *master,
 		for (i = 0; i < max && regions[i].offset <= slave->offset; i++)
 			;
 		/* The loop searched for the region _behind_ the first one */
-		i--;
+		if (i > 0)
+			i--;
 
 		/* Pick biggest erasesize */
 		for (; i < max && regions[i].offset < end; i++) {
-- 
cgit v0.10.2


From 9aff1b1afe9a30c358d1c3a0bb50ae77bd7f994b Mon Sep 17 00:00:00 2001
From: Hiroshi Ito <ito@mlb.co.jp>
Date: Fri, 18 Sep 2009 12:51:51 -0700
Subject: mtd: jedec_probe: fix NEC uPD29F064115 detection

linux v2.6.31-rc6 can not detect NEC uPD29F064115.

uPD29F064115 is a 16 bit device.
datasheet:
  http://www.cn.necel.com/memory/cn/download/M16062EJ2V0DS00.pdf

This applies the same fix as used for SST chips in commit
ca6f12c67ed19718cf37d0f531af9438de85b70c ("jedec_probe: Fix SST 16-bit
chip detection").

Signed-off-by: Hiroshi Ito <ito@mlb.co.jp>
Cc: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/mtd/chips/jedec_probe.c b/drivers/mtd/chips/jedec_probe.c
index ccc4cfc..0f7065d 100644
--- a/drivers/mtd/chips/jedec_probe.c
+++ b/drivers/mtd/chips/jedec_probe.c
@@ -1156,8 +1156,8 @@ static const struct amd_flash_info jedec_table[] = {
 		.mfr_id		= MANUFACTURER_NEC,
 		.dev_id		= UPD29F064115,
 		.name		= "NEC uPD29F064115",
-		.devtypes	= CFI_DEVICETYPE_X16|CFI_DEVICETYPE_X8,
-		.uaddr		= MTD_UADDR_0x0555_0x02AA,	/* ???? */
+		.devtypes	= CFI_DEVICETYPE_X16,
+		.uaddr		= MTD_UADDR_0xAAAA_0x5555,
 		.dev_size	= SIZE_8MiB,
 		.cmd_set	= P_ID_AMD_STD,
 		.nr_regions	= 3,
-- 
cgit v0.10.2


From 3ff230a742b8fc196c1fe69875a57a429877cacb Mon Sep 17 00:00:00 2001
From: Timofei Bondarenko <tim@ipi.ac.ru>
Date: Wed, 20 May 2009 19:59:02 -0400
Subject: mtd/maps: uclinux: fix building when partition support is disabled

The uClinux map driver doesn't even use partitions, so we shouldn't require
it in order to work properly.

Signed-off-by: Timofei Bondarenko <tim@ipi.ac.ru>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Sonic Zhang <sonic.zhang@analog.com>
CC: Greg Ungerer <gerg@uclinux.org>
CC: uclinux-dev@uclinux.org
CC: linux-mtd@lists.infradead.org

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/mtd/maps/uclinux.c b/drivers/mtd/maps/uclinux.c
index d4314fb..3500929 100644
--- a/drivers/mtd/maps/uclinux.c
+++ b/drivers/mtd/maps/uclinux.c
@@ -89,7 +89,11 @@ static int __init uclinux_mtd_init(void)
 	mtd->priv = mapp;
 
 	uclinux_ram_mtdinfo = mtd;
+#ifdef CONFIG_MTD_PARTITIONS
 	add_mtd_partitions(mtd, uclinux_romfs, NUM_PARTITIONS);
+#else
+	add_mtd_device(mtd);
+#endif
 
 	return(0);
 }
@@ -99,7 +103,11 @@ static int __init uclinux_mtd_init(void)
 static void __exit uclinux_mtd_cleanup(void)
 {
 	if (uclinux_ram_mtdinfo) {
+#ifdef CONFIG_MTD_PARTITIONS
 		del_mtd_partitions(uclinux_ram_mtdinfo);
+#else
+		del_mtd_device(uclinux_ram_mtdinfo);
+#endif
 		map_destroy(uclinux_ram_mtdinfo);
 		uclinux_ram_mtdinfo = NULL;
 	}
-- 
cgit v0.10.2


From 1b533d227e5f8356d2f009f90b8c8c1f02eb71b8 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Tue, 9 Jun 2009 10:37:18 +0000
Subject: mtd/maps: uclinux: depend on MTD_RAM being built into the kernel

If MTD_RAM is built as a module, the uClinux map does not work since it
can only be built in to the kernel.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/mtd/maps/Kconfig b/drivers/mtd/maps/Kconfig
index 7a58bd5..0f314eb 100644
--- a/drivers/mtd/maps/Kconfig
+++ b/drivers/mtd/maps/Kconfig
@@ -486,7 +486,7 @@ config MTD_BFIN_ASYNC
 
 config MTD_UCLINUX
 	bool "Generic uClinux RAM/ROM filesystem support"
-	depends on MTD_PARTITIONS && MTD_RAM && !MMU
+	depends on MTD_PARTITIONS && MTD_RAM=y && !MMU
 	help
 	  Map driver to support image based filesystems for uClinux.
 
-- 
cgit v0.10.2


From d79c326c048246b855b83a0092e6324df0717735 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Wed, 20 May 2009 12:04:09 -0400
Subject: mtd/maps: gpio-addr-flash: new driver for GPIO assisted flash
 addressing

This driver lets people use GPIO's for additional address lines in case
their processor does not have enough address lines already.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Bryan Wu <cooloney@kernel.org>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/mtd/maps/Kconfig b/drivers/mtd/maps/Kconfig
index 0f314eb..3a9a960 100644
--- a/drivers/mtd/maps/Kconfig
+++ b/drivers/mtd/maps/Kconfig
@@ -484,6 +484,16 @@ config MTD_BFIN_ASYNC
 
 	  If compiled as a module, it will be called bfin-async-flash.
 
+config MTD_GPIO_ADDR
+	tristate "GPIO-assisted Flash Chip Support"
+	depends on MTD_COMPLEX_MAPPINGS
+	select MTD_PARTITIONS
+	help
+	  Map driver which allows flashes to be partially physically addressed
+	  and assisted by GPIOs.
+
+	  If compiled as a module, it will be called gpio-addr-flash.
+
 config MTD_UCLINUX
 	bool "Generic uClinux RAM/ROM filesystem support"
 	depends on MTD_PARTITIONS && MTD_RAM=y && !MMU
diff --git a/drivers/mtd/maps/Makefile b/drivers/mtd/maps/Makefile
index 5beb066..1d5cf86 100644
--- a/drivers/mtd/maps/Makefile
+++ b/drivers/mtd/maps/Makefile
@@ -58,5 +58,4 @@ obj-$(CONFIG_MTD_PLATRAM)	+= plat-ram.o
 obj-$(CONFIG_MTD_OMAP_NOR)	+= omap_nor.o
 obj-$(CONFIG_MTD_INTEL_VR_NOR)	+= intel_vr_nor.o
 obj-$(CONFIG_MTD_BFIN_ASYNC)	+= bfin-async-flash.o
-obj-$(CONFIG_MTD_RBTX4939)	+= rbtx4939-flash.o
-obj-$(CONFIG_MTD_VMU)		+= vmu-flash.o
+obj-$(CONFIG_MTD_GPIO_ADDR)	+= gpio-addr-flash.o
diff --git a/drivers/mtd/maps/gpio-addr-flash.c b/drivers/mtd/maps/gpio-addr-flash.c
new file mode 100644
index 0000000..44ef9a4
--- /dev/null
+++ b/drivers/mtd/maps/gpio-addr-flash.c
@@ -0,0 +1,311 @@
+/*
+ * drivers/mtd/maps/gpio-addr-flash.c
+ *
+ * Handle the case where a flash device is mostly addressed using physical
+ * line and supplemented by GPIOs.  This way you can hook up say a 8MiB flash
+ * to a 2MiB memory range and use the GPIOs to select a particular range.
+ *
+ * Copyright © 2000 Nicolas Pitre <nico@cam.org>
+ * Copyright © 2005-2009 Analog Devices Inc.
+ *
+ * Enter bugs at http://blackfin.uclinux.org/
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/map.h>
+#include <linux/mtd/partitions.h>
+#include <linux/mtd/physmap.h>
+#include <linux/platform_device.h>
+#include <linux/types.h>
+
+#include <asm/gpio.h>
+#include <asm/io.h>
+
+#define pr_devinit(fmt, args...) ({ static const __devinitconst char __fmt[] = fmt; printk(__fmt, ## args); })
+
+#define DRIVER_NAME "gpio-addr-flash"
+#define PFX DRIVER_NAME ": "
+
+/**
+ * struct async_state - keep GPIO flash state
+ *	@mtd:         MTD state for this mapping
+ *	@map:         MTD map state for this flash
+ *	@gpio_count:  number of GPIOs used to address
+ *	@gpio_addrs:  array of GPIOs to twiddle
+ *	@gpio_values: cached GPIO values
+ *	@win_size:    dedicated memory size (if no GPIOs)
+ */
+struct async_state {
+	struct mtd_info *mtd;
+	struct map_info map;
+	size_t gpio_count;
+	unsigned *gpio_addrs;
+	int *gpio_values;
+	unsigned long win_size;
+};
+#define gf_map_info_to_state(mi) ((struct async_state *)(mi)->map_priv_1)
+
+/**
+ * gf_set_gpios() - set GPIO address lines to access specified flash offset
+ *	@state: GPIO flash state
+ *	@ofs:   desired offset to access
+ *
+ * Rather than call the GPIO framework every time, cache the last-programmed
+ * value.  This speeds up sequential accesses (which are by far the most common
+ * type).  We rely on the GPIO framework to treat non-zero value as high so
+ * that we don't have to normalize the bits.
+ */
+static void gf_set_gpios(struct async_state *state, unsigned long ofs)
+{
+	size_t i = 0;
+	int value;
+	ofs /= state->win_size;
+	do {
+		value = ofs & (1 << i);
+		if (state->gpio_values[i] != value) {
+			gpio_set_value(state->gpio_addrs[i], value);
+			state->gpio_values[i] = value;
+		}
+	} while (++i < state->gpio_count);
+}
+
+/**
+ * gf_read() - read a word at the specified offset
+ *	@map: MTD map state
+ *	@ofs: desired offset to read
+ */
+static map_word gf_read(struct map_info *map, unsigned long ofs)
+{
+	struct async_state *state = gf_map_info_to_state(map);
+	uint16_t word;
+	map_word test;
+
+	gf_set_gpios(state, ofs);
+
+	word = readw(map->virt + (ofs % state->win_size));
+	test.x[0] = word;
+	return test;
+}
+
+/**
+ * gf_copy_from() - copy a chunk of data from the flash
+ *	@map:  MTD map state
+ *	@to:   memory to copy to
+ *	@from: flash offset to copy from
+ *	@len:  how much to copy
+ *
+ * We rely on the MTD layer to chunk up copies such that a single request here
+ * will not cross a window size.  This allows us to only wiggle the GPIOs once
+ * before falling back to a normal memcpy.  Reading the higher layer code shows
+ * that this is indeed the case, but add a BUG_ON() to future proof.
+ */
+static void gf_copy_from(struct map_info *map, void *to, unsigned long from, ssize_t len)
+{
+	struct async_state *state = gf_map_info_to_state(map);
+
+	gf_set_gpios(state, from);
+
+	/* BUG if operation crosses the win_size */
+	BUG_ON(!((from + len) % state->win_size <= (from + len)));
+
+	/* operation does not cross the win_size, so one shot it */
+	memcpy_fromio(to, map->virt + (from % state->win_size), len);
+}
+
+/**
+ * gf_write() - write a word at the specified offset
+ *	@map: MTD map state
+ *	@ofs: desired offset to write
+ */
+static void gf_write(struct map_info *map, map_word d1, unsigned long ofs)
+{
+	struct async_state *state = gf_map_info_to_state(map);
+	uint16_t d;
+
+	gf_set_gpios(state, ofs);
+
+	d = d1.x[0];
+	writew(d, map->virt + (ofs % state->win_size));
+}
+
+/**
+ * gf_copy_to() - copy a chunk of data to the flash
+ *	@map:  MTD map state
+ *	@to:   flash offset to copy to
+ *	@from: memory to copy from
+ *	@len:  how much to copy
+ *
+ * See gf_copy_from() caveat.
+ */
+static void gf_copy_to(struct map_info *map, unsigned long to, const void *from, ssize_t len)
+{
+	struct async_state *state = gf_map_info_to_state(map);
+
+	gf_set_gpios(state, to);
+
+	/* BUG if operation crosses the win_size */
+	BUG_ON(!((to + len) % state->win_size <= (to + len)));
+
+	/* operation does not cross the win_size, so one shot it */
+	memcpy_toio(map->virt + (to % state->win_size), from, len);
+}
+
+#ifdef CONFIG_MTD_PARTITIONS
+static const char *part_probe_types[] = { "cmdlinepart", "RedBoot", NULL };
+#endif
+
+/**
+ * gpio_flash_probe() - setup a mapping for a GPIO assisted flash
+ *	@pdev: platform device
+ *
+ * The platform resource layout expected looks something like:
+ * struct mtd_partition partitions[] = { ... };
+ * struct physmap_flash_data flash_data = { ... };
+ * unsigned flash_gpios[] = { GPIO_XX, GPIO_XX, ... };
+ * struct resource flash_resource[] = {
+ *	{
+ *		.name  = "cfi_probe",
+ *		.start = 0x20000000,
+ *		.end   = 0x201fffff,
+ *		.flags = IORESOURCE_MEM,
+ *	}, {
+ *		.start = (unsigned long)flash_gpios,
+ *		.end   = ARRAY_SIZE(flash_gpios),
+ *		.flags = IORESOURCE_IRQ,
+ *	}
+ * };
+ * struct platform_device flash_device = {
+ *	.name          = "gpio-addr-flash",
+ *	.dev           = { .platform_data = &flash_data, },
+ *	.num_resources = ARRAY_SIZE(flash_resource),
+ *	.resource      = flash_resource,
+ *	...
+ * };
+ */
+static int __devinit gpio_flash_probe(struct platform_device *pdev)
+{
+	int ret;
+	size_t i, arr_size;
+	struct physmap_flash_data *pdata;
+	struct resource *memory;
+	struct resource *gpios;
+	struct async_state *state;
+
+	pdata = pdev->dev.platform_data;
+	memory = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	gpios = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+
+	if (!memory || !gpios || !gpios->end)
+		return -EINVAL;
+
+	arr_size = sizeof(int) * gpios->end;
+	state = kzalloc(sizeof(*state) + arr_size, GFP_KERNEL);
+	if (!state)
+		return -ENOMEM;
+
+	state->gpio_count     = gpios->end;
+	state->gpio_addrs     = (void *)gpios->start;
+	state->gpio_values    = (void *)(state + 1);
+	state->win_size       = memory->end - memory->start + 1;
+	memset(state->gpio_values, 0xff, arr_size);
+
+	state->map.name       = DRIVER_NAME;
+	state->map.read       = gf_read;
+	state->map.copy_from  = gf_copy_from;
+	state->map.write      = gf_write;
+	state->map.copy_to    = gf_copy_to;
+	state->map.bankwidth  = pdata->width;
+	state->map.size       = state->win_size * (1 << state->gpio_count);
+	state->map.virt       = (void __iomem *)memory->start;
+	state->map.phys       = NO_XIP;
+	state->map.map_priv_1 = (unsigned long)state;
+
+	platform_set_drvdata(pdev, state);
+
+	i = 0;
+	do {
+		if (gpio_request(state->gpio_addrs[i], DRIVER_NAME)) {
+			pr_devinit(KERN_ERR PFX "failed to request gpio %d\n",
+				state->gpio_addrs[i]);
+			while (i--)
+				gpio_free(state->gpio_addrs[i]);
+			kfree(state);
+			return -EBUSY;
+		}
+		gpio_direction_output(state->gpio_addrs[i], 0);
+	} while (++i < state->gpio_count);
+
+	pr_devinit(KERN_NOTICE PFX "probing %d-bit flash bus\n",
+		state->map.bankwidth * 8);
+	state->mtd = do_map_probe(memory->name, &state->map);
+	if (!state->mtd) {
+		for (i = 0; i < state->gpio_count; ++i)
+			gpio_free(state->gpio_addrs[i]);
+		kfree(state);
+		return -ENXIO;
+	}
+
+#ifdef CONFIG_MTD_PARTITIONS
+	ret = parse_mtd_partitions(state->mtd, part_probe_types, &pdata->parts, 0);
+	if (ret > 0) {
+		pr_devinit(KERN_NOTICE PFX "Using commandline partition definition\n");
+		add_mtd_partitions(state->mtd, pdata->parts, ret);
+		kfree(pdata->parts);
+
+	} else if (pdata->nr_parts) {
+		pr_devinit(KERN_NOTICE PFX "Using board partition definition\n");
+		add_mtd_partitions(state->mtd, pdata->parts, pdata->nr_parts);
+
+	} else
+#endif
+	{
+		pr_devinit(KERN_NOTICE PFX "no partition info available, registering whole flash at once\n");
+		add_mtd_device(state->mtd);
+	}
+
+	return 0;
+}
+
+static int __devexit gpio_flash_remove(struct platform_device *pdev)
+{
+	struct async_state *state = platform_get_drvdata(pdev);
+	size_t i = 0;
+	do {
+		gpio_free(state->gpio_addrs[i]);
+	} while (++i < state->gpio_count);
+#ifdef CONFIG_MTD_PARTITIONS
+	del_mtd_partitions(state->mtd);
+#endif
+	map_destroy(state->mtd);
+	kfree(state);
+	return 0;
+}
+
+static struct platform_driver gpio_flash_driver = {
+	.probe		= gpio_flash_probe,
+	.remove		= __devexit_p(gpio_flash_remove),
+	.driver		= {
+		.name	= DRIVER_NAME,
+	},
+};
+
+static int __init gpio_flash_init(void)
+{
+	return platform_driver_register(&gpio_flash_driver);
+}
+module_init(gpio_flash_init);
+
+static void __exit gpio_flash_exit(void)
+{
+	platform_driver_unregister(&gpio_flash_driver);
+}
+module_exit(gpio_flash_exit);
+
+MODULE_AUTHOR("Mike Frysinger <vapier@gentoo.org>");
+MODULE_DESCRIPTION("MTD map driver for flashes addressed physically and with gpios");
+MODULE_LICENSE("GPL");
-- 
cgit v0.10.2


From 80f53da0ac752fe16a01ffeddaea658670974a05 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Sat, 13 Jun 2009 06:15:18 -0400
Subject: mtd: fix order of TEST/PARTITIONS kconfig options

The MTD_TEST config option was added in between the MTD_PARTITIONS config
and its dependent options which causes the resulting menu system to
display incorrectly as MTD_TEST does not depend on MTD_PARTITIONS.  So
move it up a few lines where it won't cause a problem.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/mtd/Kconfig b/drivers/mtd/Kconfig
index b8e35a0..e4ec365 100644
--- a/drivers/mtd/Kconfig
+++ b/drivers/mtd/Kconfig
@@ -25,6 +25,14 @@ config MTD_DEBUG_VERBOSE
 	help
 	  Determines the verbosity level of the MTD debugging messages.
 
+config MTD_TESTS
+	tristate "MTD tests support"
+	depends on m
+	help
+	  This option includes various MTD tests into compilation. The tests
+	  should normally be compiled as kernel modules. The modules perform
+	  various checks and verifications when loaded.
+
 config MTD_CONCAT
 	tristate "MTD concatenating support"
 	help
@@ -45,14 +53,6 @@ config MTD_PARTITIONS
 	  devices. Partitioning on NFTL 'devices' is a different - that's the
 	  'normal' form of partitioning used on a block device.
 
-config MTD_TESTS
-	tristate "MTD tests support"
-	depends on m
-	help
-	  This option includes various MTD tests into compilation. The tests
-	  should normally be compiled as kernel modules. The modules perform
-	  various checks and verifications when loaded.
-
 config MTD_REDBOOT_PARTS
 	tristate "RedBoot partition table parsing"
 	depends on MTD_PARTITIONS
-- 
cgit v0.10.2


From 49aac4aec53c523f16343b4668a5a239b69659f1 Mon Sep 17 00:00:00 2001
From: Graf Yang <graf.yang@analog.com>
Date: Mon, 15 Jun 2009 08:23:41 +0000
Subject: mtd: m25p80: add support for AAI programming with SST SPI flashes

The SST SPI flashes are a bit non-standard in that they can be programmed
one byte at a time (including address!), or they can be written two bytes
at a time with auto address incrementing (AAI).  The latter form is
obviously much better for performance, so let's use it when possible.

Signed-off-by: Graf Yang <graf.yang@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c
index 6d8d265..53de9f0 100644
--- a/drivers/mtd/devices/m25p80.c
+++ b/drivers/mtd/devices/m25p80.c
@@ -44,6 +44,11 @@
 #define	OPCODE_SE		0xd8	/* Sector erase (usually 64KiB) */
 #define	OPCODE_RDID		0x9f	/* Read JEDEC ID */
 
+/* Used for SST flashes only. */
+#define	OPCODE_BP		0x02	/* Byte program */
+#define	OPCODE_WRDI		0x04	/* Write disable */
+#define	OPCODE_AAI_WP		0xad	/* Auto address increment word program */
+
 /* Status Register bits. */
 #define	SR_WIP			1	/* Write in progress */
 #define	SR_WEL			2	/* Write enable latch */
@@ -132,6 +137,15 @@ static inline int write_enable(struct m25p *flash)
 	return spi_write_then_read(flash->spi, &code, 1, NULL, 0);
 }
 
+/*
+ * Send write disble instruction to the chip.
+ */
+static inline int write_disable(struct m25p *flash)
+{
+	u8	code = OPCODE_WRDI;
+
+	return spi_write_then_read(flash->spi, &code, 1, NULL, 0);
+}
 
 /*
  * Service routine to read status register until ready, or timeout occurs.
@@ -454,6 +468,111 @@ static int m25p80_write(struct mtd_info *mtd, loff_t to, size_t len,
 	return 0;
 }
 
+static int sst_write(struct mtd_info *mtd, loff_t to, size_t len,
+		size_t *retlen, const u_char *buf)
+{
+	struct m25p *flash = mtd_to_m25p(mtd);
+	struct spi_transfer t[2];
+	struct spi_message m;
+	size_t actual;
+	int cmd_sz, ret;
+
+	if (retlen)
+		*retlen = 0;
+
+	/* sanity checks */
+	if (!len)
+		return 0;
+
+	if (to + len > flash->mtd.size)
+		return -EINVAL;
+
+	spi_message_init(&m);
+	memset(t, 0, (sizeof t));
+
+	t[0].tx_buf = flash->command;
+	t[0].len = CMD_SIZE;
+	spi_message_add_tail(&t[0], &m);
+
+	t[1].tx_buf = buf;
+	spi_message_add_tail(&t[1], &m);
+
+	mutex_lock(&flash->lock);
+
+	/* Wait until finished previous write command. */
+	ret = wait_till_ready(flash);
+	if (ret)
+		goto time_out;
+
+	write_enable(flash);
+
+	actual = to % 2;
+	/* Start write from odd address. */
+	if (actual) {
+		flash->command[0] = OPCODE_BP;
+		flash->command[1] = to >> 16;
+		flash->command[2] = to >> 8;
+		flash->command[3] = to;
+
+		/* write one byte. */
+		t[1].len = 1;
+		spi_sync(flash->spi, &m);
+		ret = wait_till_ready(flash);
+		if (ret)
+			goto time_out;
+		*retlen += m.actual_length - CMD_SIZE;
+	}
+	to += actual;
+
+	flash->command[0] = OPCODE_AAI_WP;
+	flash->command[1] = to >> 16;
+	flash->command[2] = to >> 8;
+	flash->command[3] = to;
+
+	/* Write out most of the data here. */
+	cmd_sz = CMD_SIZE;
+	for (; actual < len - 1; actual += 2) {
+		t[0].len = cmd_sz;
+		/* write two bytes. */
+		t[1].len = 2;
+		t[1].tx_buf = buf + actual;
+
+		spi_sync(flash->spi, &m);
+		ret = wait_till_ready(flash);
+		if (ret)
+			goto time_out;
+		*retlen += m.actual_length - cmd_sz;
+		cmd_sz = 1;
+		to += 2;
+	}
+	write_disable(flash);
+	ret = wait_till_ready(flash);
+	if (ret)
+		goto time_out;
+
+	/* Write out trailing byte if it exists. */
+	if (actual != len) {
+		write_enable(flash);
+		flash->command[0] = OPCODE_BP;
+		flash->command[1] = to >> 16;
+		flash->command[2] = to >> 8;
+		flash->command[3] = to;
+		t[0].len = CMD_SIZE;
+		t[1].len = 1;
+		t[1].tx_buf = buf + actual;
+
+		spi_sync(flash->spi, &m);
+		ret = wait_till_ready(flash);
+		if (ret)
+			goto time_out;
+		*retlen += m.actual_length - CMD_SIZE;
+		write_disable(flash);
+	}
+
+time_out:
+	mutex_unlock(&flash->lock);
+	return ret;
+}
 
 /****************************************************************************/
 
@@ -670,7 +789,12 @@ static int __devinit m25p_probe(struct spi_device *spi)
 	flash->mtd.size = info->sector_size * info->n_sectors;
 	flash->mtd.erase = m25p80_erase;
 	flash->mtd.read = m25p80_read;
-	flash->mtd.write = m25p80_write;
+
+	/* sst flash chips use AAI word program */
+	if (info->jedec_id >> 16 == 0xbf)
+		flash->mtd.write = sst_write;
+	else
+		flash->mtd.write = m25p80_write;
 
 	/* prefer "small sector" erase if possible */
 	if (info->flags & SECT_4K) {
-- 
cgit v0.10.2


From aa3651e4625e21c2eb8a8e504d9bbc3c2a964be0 Mon Sep 17 00:00:00 2001
From: Graf Yang <graf.yang@analog.com>
Date: Mon, 15 Jun 2009 08:23:41 +0000
Subject: mtd: m25p80: add SST WF SPI flash device information

Support SST25WF{512,010,020,040} SPI flashes.

Signed-off-by: Graf Yang <graf.yang@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c
index 53de9f0..9b78869 100644
--- a/drivers/mtd/devices/m25p80.c
+++ b/drivers/mtd/devices/m25p80.c
@@ -641,6 +641,10 @@ static struct flash_info __devinitdata m25p_data [] = {
 	{ "sst25vf080b", 0xbf258e, 0, 64 * 1024, 16, SECT_4K, },
 	{ "sst25vf016b", 0xbf2541, 0, 64 * 1024, 32, SECT_4K, },
 	{ "sst25vf032b", 0xbf254a, 0, 64 * 1024, 64, SECT_4K, },
+	{ "sst25wf512",  0xbf2501, 0, 64 * 1024, 1, SECT_4K, },
+	{ "sst25wf010",  0xbf2502, 0, 64 * 1024, 2, SECT_4K, },
+	{ "sst25wf020",  0xbf2503, 0, 64 * 1024, 4, SECT_4K, },
+	{ "sst25wf040",  0xbf2504, 0, 64 * 1024, 8, SECT_4K, },
 
 	/* ST Microelectronics -- newer production may have feature updates */
 	{ "m25p05",  0x202010,  0, 32 * 1024, 2, },
-- 
cgit v0.10.2


From 64da392ab08a88ad83f4c3f60283711ee090c9ef Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Tue, 16 Jun 2009 19:20:40 +0000
Subject: phram: cleanup error handling and associated messages

The error handling in the phram driver is pretty bad -- in many places,
errors are silently ignored or logged, but then still ignored in the
return value.  So convert all of the code to pass back the correct return
value and log error messages properly (and using the new pr_fmt() helper).

If everything does go smoothly, rather than exit silently, dump a helpful
info message like pretty much every other MTD driver does.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Acked-by: Joern Engel <joern@logfs.org>

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/mtd/devices/phram.c b/drivers/mtd/devices/phram.c
index 088fbb7..1696bbe 100644
--- a/drivers/mtd/devices/phram.c
+++ b/drivers/mtd/devices/phram.c
@@ -14,6 +14,9 @@
  * Example:
  *	phram=swap,64Mi,128Mi phram=test,900Mi,1Mi
  */
+
+#define pr_fmt(fmt) "phram: " fmt
+
 #include <asm/io.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
@@ -23,8 +26,6 @@
 #include <linux/slab.h>
 #include <linux/mtd/mtd.h>
 
-#define ERROR(fmt, args...) printk(KERN_ERR "phram: " fmt , ## args)
-
 struct phram_mtd_list {
 	struct mtd_info mtd;
 	struct list_head list;
@@ -132,7 +133,7 @@ static int register_device(char *name, unsigned long start, unsigned long len)
 	ret = -EIO;
 	new->mtd.priv = ioremap(start, len);
 	if (!new->mtd.priv) {
-		ERROR("ioremap failed\n");
+		pr_err("ioremap failed\n");
 		goto out1;
 	}
 
@@ -152,7 +153,7 @@ static int register_device(char *name, unsigned long start, unsigned long len)
 
 	ret = -EAGAIN;
 	if (add_mtd_device(&new->mtd)) {
-		ERROR("Failed to register new device\n");
+		pr_err("Failed to register new device\n");
 		goto out2;
 	}
 
@@ -227,8 +228,8 @@ static inline void kill_final_newline(char *str)
 
 
 #define parse_err(fmt, args...) do {	\
-	ERROR(fmt , ## args);	\
-	return 0;		\
+	pr_err(fmt , ## args);	\
+	return 1;		\
 } while (0)
 
 static int phram_setup(const char *val, struct kernel_param *kp)
@@ -256,12 +257,8 @@ static int phram_setup(const char *val, struct kernel_param *kp)
 		parse_err("not enough arguments\n");
 
 	ret = parse_name(&name, token[0]);
-	if (ret == -ENOMEM)
-		parse_err("out of memory\n");
-	if (ret == -ENOSPC)
-		parse_err("name too long\n");
 	if (ret)
-		return 0;
+		return ret;
 
 	ret = parse_num32(&start, token[1]);
 	if (ret) {
@@ -275,9 +272,11 @@ static int phram_setup(const char *val, struct kernel_param *kp)
 		parse_err("illegal device length\n");
 	}
 
-	register_device(name, start, len);
+	ret = register_device(name, start, len);
+	if (!ret)
+		pr_info("%s device: %#x at %#x\n", name, len, start);
 
-	return 0;
+	return ret;
 }
 
 module_param_call(phram, phram_setup, NULL, NULL, 000);
-- 
cgit v0.10.2


From 71b7d0d90d536ae4e70929cc59a1a9f6ba457c6c Mon Sep 17 00:00:00 2001
From: Eric Benard <ebenard@eukrea.com>
Date: Mon, 29 Jun 2009 13:58:01 +0200
Subject: mtd: mxc_nand: fix 2KiB pagesize NAND on i.MX27

This patch allows i.MX27 to support 2KiB pagesize NAND flash.
We are using a 1.8V NAND flash which datasheet (unfortunately only
available under NDA) says :
Page size: x8: 2,112 bytes (2,048 + 64 bytes).
Without this patch, all sectors are marked as bad eraseblock.

Signed-off-by: Eric Benard <ebenard@eukrea.com>
Acked-by : Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/mtd/nand/mxc_nand.c b/drivers/mtd/nand/mxc_nand.c
index 76beea4..65b26d5 100644
--- a/drivers/mtd/nand/mxc_nand.c
+++ b/drivers/mtd/nand/mxc_nand.c
@@ -857,6 +857,17 @@ static void mxc_nand_command(struct mtd_info *mtd, unsigned command,
 	}
 }
 
+/* Define some generic bad / good block scan pattern which are used
+ * while scanning a device for factory marked good / bad blocks. */
+static uint8_t scan_ff_pattern[] = { 0xff, 0xff };
+
+static struct nand_bbt_descr smallpage_memorybased = {
+	.options = NAND_BBT_SCAN2NDPAGE,
+	.offs = 5,
+	.len = 1,
+	.pattern = scan_ff_pattern
+};
+
 static int __init mxcnd_probe(struct platform_device *pdev)
 {
 	struct nand_chip *this;
@@ -973,7 +984,10 @@ static int __init mxcnd_probe(struct platform_device *pdev)
 		goto escan;
 	}
 
-	host->pagesize_2k = (mtd->writesize == 2048) ? 1 : 0;
+	if (mtd->writesize == 2048) {
+		host->pagesize_2k = 1;
+		this->badblock_pattern = &smallpage_memorybased;
+	}
 
 	if (this->ecc.mode == NAND_ECC_HW) {
 		switch (mtd->oobsize) {
-- 
cgit v0.10.2


From 223cf6c3b517cf6ef040cafe45af89f3b8adba74 Mon Sep 17 00:00:00 2001
From: Yeasah Pell <yeasah@comrex.com>
Date: Wed, 1 Jul 2009 18:11:35 +0300
Subject: mtd: pxa3xx_nand: add single-bit error corrections reporting

Acked-by: Eric Miao <eric.y.miao@gmail.com>
Signed-off-by: Yeasah Pell <yeasah@comrex.com>
Signed-off-by: Mike Rapoport <mike@compulab.co.il>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c
index 30a8ce6..6ea520a 100644
--- a/drivers/mtd/nand/pxa3xx_nand.c
+++ b/drivers/mtd/nand/pxa3xx_nand.c
@@ -102,6 +102,7 @@ enum {
 	ERR_SENDCMD	= -2,
 	ERR_DBERR	= -3,
 	ERR_BBERR	= -4,
+	ERR_SBERR	= -5,
 };
 
 enum {
@@ -564,11 +565,13 @@ static irqreturn_t pxa3xx_nand_irq(int irq, void *devid)
 
 	status = nand_readl(info, NDSR);
 
-	if (status & (NDSR_RDDREQ | NDSR_DBERR)) {
+	if (status & (NDSR_RDDREQ | NDSR_DBERR | NDSR_SBERR)) {
 		if (status & NDSR_DBERR)
 			info->retcode = ERR_DBERR;
+		else if (status & NDSR_SBERR)
+			info->retcode = ERR_SBERR;
 
-		disable_int(info, NDSR_RDDREQ | NDSR_DBERR);
+		disable_int(info, NDSR_RDDREQ | NDSR_DBERR | NDSR_SBERR);
 
 		if (info->use_dma) {
 			info->state = STATE_DMA_READING;
@@ -670,7 +673,7 @@ static void pxa3xx_nand_cmdfunc(struct mtd_info *mtd, unsigned command,
 		if (prepare_read_prog_cmd(info, cmdset->read1, column, page_addr))
 			break;
 
-		pxa3xx_nand_do_cmd(info, NDSR_RDDREQ | NDSR_DBERR);
+		pxa3xx_nand_do_cmd(info, NDSR_RDDREQ | NDSR_DBERR | NDSR_SBERR);
 
 		/* We only are OOB, so if the data has error, does not matter */
 		if (info->retcode == ERR_DBERR)
@@ -687,7 +690,7 @@ static void pxa3xx_nand_cmdfunc(struct mtd_info *mtd, unsigned command,
 		if (prepare_read_prog_cmd(info, cmdset->read1, column, page_addr))
 			break;
 
-		pxa3xx_nand_do_cmd(info, NDSR_RDDREQ | NDSR_DBERR);
+		pxa3xx_nand_do_cmd(info, NDSR_RDDREQ | NDSR_DBERR | NDSR_SBERR);
 
 		if (info->retcode == ERR_DBERR) {
 			/* for blank page (all 0xff), HW will calculate its ECC as
@@ -861,8 +864,12 @@ static int pxa3xx_nand_ecc_correct(struct mtd_info *mtd,
 	 * consider it as a ecc error which will tell the caller the
 	 * read fail We have distinguish all the errors, but the
 	 * nand_read_ecc only check this function return value
+	 *
+	 * Corrected (single-bit) errors must also be noted.
 	 */
-	if (info->retcode != ERR_NONE)
+	if (info->retcode == ERR_SBERR)
+		return 1;
+	else if (info->retcode != ERR_NONE)
 		return -1;
 
 	return 0;
-- 
cgit v0.10.2


From fc28c39f0ef59bfb649ddfd633275be8e45c0f9c Mon Sep 17 00:00:00 2001
From: Wolfram Sang <w.sang@pengutronix.de>
Date: Fri, 17 Jul 2009 14:39:23 +0200
Subject: mtd: maps: add mtd-ram support to physmap_of
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use physmap_of to access RAMs as mtd and add documenation for it. This approach
is a lot less intrusive as adding an of-wrapper around plat-ram.c. As most
extensions of plat-ram.c (e.g. custom map-functions) can't be mapped to the
device tree anyhow, extending physmap_of seems to be the cleanest approach.

Tested with a phyCORE-MPC5121e.

Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
Cc: Vitaly Wool <vwool@ru.mvista.com>
Cc: Artem Bityutskiy <dedekind@infradead.org>
Cc: Ken MacLeod <ken@bitsko.slc.ut.us>
Cc: Albrecht Dreß <albrecht.dress@arcor.de>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/Documentation/powerpc/dts-bindings/mtd-physmap.txt b/Documentation/powerpc/dts-bindings/mtd-physmap.txt
index 667c9bd..80152cb 100644
--- a/Documentation/powerpc/dts-bindings/mtd-physmap.txt
+++ b/Documentation/powerpc/dts-bindings/mtd-physmap.txt
@@ -1,18 +1,19 @@
-CFI or JEDEC memory-mapped NOR flash
+CFI or JEDEC memory-mapped NOR flash, MTD-RAM (NVRAM...)
 
 Flash chips (Memory Technology Devices) are often used for solid state
 file systems on embedded devices.
 
- - compatible : should contain the specific model of flash chip(s)
-   used, if known, followed by either "cfi-flash" or "jedec-flash"
- - reg : Address range(s) of the flash chip(s)
+ - compatible : should contain the specific model of mtd chip(s)
+   used, if known, followed by either "cfi-flash", "jedec-flash"
+   or "mtd-ram".
+ - reg : Address range(s) of the mtd chip(s)
    It's possible to (optionally) define multiple "reg" tuples so that
-   non-identical NOR chips can be described in one flash node.
- - bank-width : Width (in bytes) of the flash bank.  Equal to the
+   non-identical chips can be described in one node.
+ - bank-width : Width (in bytes) of the bank.  Equal to the
    device width times the number of interleaved chips.
- - device-width : (optional) Width of a single flash chip.  If
+ - device-width : (optional) Width of a single mtd chip.  If
    omitted, assumed to be equal to 'bank-width'.
- - #address-cells, #size-cells : Must be present if the flash has
+ - #address-cells, #size-cells : Must be present if the device has
    sub-nodes representing partitions (see below).  In this case
    both #address-cells and #size-cells must be equal to 1.
 
@@ -22,24 +23,24 @@ are defined:
  - vendor-id : Contains the flash chip's vendor id (1 byte).
  - device-id : Contains the flash chip's device id (1 byte).
 
-In addition to the information on the flash bank itself, the
+In addition to the information on the mtd bank itself, the
 device tree may optionally contain additional information
-describing partitions of the flash address space.  This can be
+describing partitions of the address space.  This can be
 used on platforms which have strong conventions about which
-portions of the flash are used for what purposes, but which don't
+portions of a flash are used for what purposes, but which don't
 use an on-flash partition table such as RedBoot.
 
-Each partition is represented as a sub-node of the flash device.
+Each partition is represented as a sub-node of the mtd device.
 Each node's name represents the name of the corresponding
-partition of the flash device.
+partition of the mtd device.
 
 Flash partitions
- - reg : The partition's offset and size within the flash bank.
- - label : (optional) The label / name for this flash partition.
+ - reg : The partition's offset and size within the mtd bank.
+ - label : (optional) The label / name for this partition.
    If omitted, the label is taken from the node name (excluding
    the unit address).
  - read-only : (optional) This parameter, if present, is a hint to
-   Linux that this flash partition should only be mounted
+   Linux that this partition should only be mounted
    read-only.  This is usually used for flash partitions
    containing early-boot firmware images or data which should not
    be clobbered.
@@ -78,3 +79,12 @@ Here an example with multiple "reg" tuples:
 			reg = <0 0x04000000>;
 		};
 	};
+
+An example using SRAM:
+
+	sram@2,0 {
+		compatible = "samsung,k6f1616u6a", "mtd-ram";
+		reg = <2 0 0x00200000>;
+		bank-width = <2>;
+	};
+
diff --git a/drivers/mtd/maps/physmap_of.c b/drivers/mtd/maps/physmap_of.c
index e828d58..61e4eb4 100644
--- a/drivers/mtd/maps/physmap_of.c
+++ b/drivers/mtd/maps/physmap_of.c
@@ -362,6 +362,10 @@ static struct of_device_id of_flash_match[] = {
 		.data		= (void *)"jedec_probe",
 	},
 	{
+		.compatible     = "mtd-ram",
+		.data           = (void *)"map_ram",
+	},
+	{
 		.type		= "rom",
 		.compatible	= "direct-mapped"
 	},
-- 
cgit v0.10.2


From 8bff82cbc30884fc52969608d090d874641e7196 Mon Sep 17 00:00:00 2001
From: Wan ZongShun <mcuos.com@gmail.com>
Date: Fri, 10 Jul 2009 15:17:27 +0800
Subject: mtd: add nand support for w90p910 (v2)

Add w90p910 NAND driver for w90p910 evaluation board
based on w90p910,there is a K8F1G08 NAND on my board.

[dwmw2: depend on MTD_PARTITIONS]

Signed-off-by: Wan ZongShun <mcuos.com@gmail.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index ce96c09..707d7ee 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -452,4 +452,11 @@ config MTD_NAND_SOCRATES
 	help
 	  Enables support for NAND Flash chips wired onto Socrates board.
 
+config MTD_NAND_W90P910
+	tristate "Support for NAND on w90p910 evaluation board."
+	depends on ARCH_W90X900 && MTD_PARTITIONS
+	help
+	  This enables the driver for the NAND Flash on evaluation board based
+	  on w90p910.
+
 endif # MTD_NAND
diff --git a/drivers/mtd/nand/Makefile b/drivers/mtd/nand/Makefile
index f3a786b..4f7b189 100644
--- a/drivers/mtd/nand/Makefile
+++ b/drivers/mtd/nand/Makefile
@@ -40,5 +40,6 @@ obj-$(CONFIG_MTD_NAND_SH_FLCTL)		+= sh_flctl.o
 obj-$(CONFIG_MTD_NAND_MXC)		+= mxc_nand.o
 obj-$(CONFIG_MTD_NAND_SOCRATES)		+= socrates_nand.o
 obj-$(CONFIG_MTD_NAND_TXX9NDFMC)	+= txx9ndfmc.o
+obj-$(CONFIG_MTD_NAND_W90P910)		+= w90p910_nand.o
 
 nand-objs := nand_base.o nand_bbt.o
diff --git a/drivers/mtd/nand/w90p910_nand.c b/drivers/mtd/nand/w90p910_nand.c
new file mode 100644
index 0000000..7680e73
--- /dev/null
+++ b/drivers/mtd/nand/w90p910_nand.c
@@ -0,0 +1,382 @@
+/*
+ * Copyright (c) 2009 Nuvoton technology corporation.
+ *
+ * Wan ZongShun <mcuos.com@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation;version 2 of the License.
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/platform_device.h>
+#include <linux/delay.h>
+#include <linux/clk.h>
+#include <linux/err.h>
+
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/nand.h>
+#include <linux/mtd/partitions.h>
+
+#define REG_FMICSR   	0x00
+#define REG_SMCSR    	0xa0
+#define REG_SMISR    	0xac
+#define REG_SMCMD    	0xb0
+#define REG_SMADDR   	0xb4
+#define REG_SMDATA   	0xb8
+
+#define RESET_FMI	0x01
+#define NAND_EN		0x08
+#define READYBUSY	(0x01 << 18)
+
+#define SWRST		0x01
+#define PSIZE		(0x01 << 3)
+#define DMARWEN		(0x03 << 1)
+#define BUSWID		(0x01 << 4)
+#define ECC4EN		(0x01 << 5)
+#define WP		(0x01 << 24)
+#define NANDCS		(0x01 << 25)
+#define ENDADDR		(0x01 << 31)
+
+#define read_data_reg(dev)		\
+	__raw_readl((dev)->reg + REG_SMDATA)
+
+#define write_data_reg(dev, val)	\
+	__raw_writel((val), (dev)->reg + REG_SMDATA)
+
+#define write_cmd_reg(dev, val)		\
+	__raw_writel((val), (dev)->reg + REG_SMCMD)
+
+#define write_addr_reg(dev, val)	\
+	__raw_writel((val), (dev)->reg + REG_SMADDR)
+
+struct w90p910_nand {
+	struct mtd_info mtd;
+	struct nand_chip chip;
+	void __iomem *reg;
+	struct clk *clk;
+	spinlock_t lock;
+};
+
+static const struct mtd_partition partitions[] = {
+	{
+	 .name = "NAND FS 0",
+	 .offset = 0,
+	 .size = 8 * 1024 * 1024
+	},
+	{
+	 .name = "NAND FS 1",
+	 .offset = MTDPART_OFS_APPEND,
+	 .size = MTDPART_SIZ_FULL
+	}
+};
+
+static unsigned char w90p910_nand_read_byte(struct mtd_info *mtd)
+{
+	unsigned char ret;
+	struct w90p910_nand *nand;
+
+	nand = container_of(mtd, struct w90p910_nand, mtd);
+
+	ret = (unsigned char)read_data_reg(nand);
+
+	return ret;
+}
+
+static void w90p910_nand_read_buf(struct mtd_info *mtd,
+						unsigned char *buf, int len)
+{
+	int i;
+	struct w90p910_nand *nand;
+
+	nand = container_of(mtd, struct w90p910_nand, mtd);
+
+	for (i = 0; i < len; i++)
+		buf[i] = (unsigned char)read_data_reg(nand);
+}
+
+static void w90p910_nand_write_buf(struct mtd_info *mtd,
+					const unsigned char *buf, int len)
+{
+	int i;
+	struct w90p910_nand *nand;
+
+	nand = container_of(mtd, struct w90p910_nand, mtd);
+
+	for (i = 0; i < len; i++)
+		write_data_reg(nand, buf[i]);
+}
+
+static int w90p910_verify_buf(struct mtd_info *mtd,
+					const unsigned char *buf, int len)
+{
+	int i;
+	struct w90p910_nand *nand;
+
+	nand = container_of(mtd, struct w90p910_nand, mtd);
+
+	for (i = 0; i < len; i++) {
+		if (buf[i] != (unsigned char)read_data_reg(nand))
+			return -EFAULT;
+	}
+
+	return 0;
+}
+
+static int w90p910_check_rb(struct w90p910_nand *nand)
+{
+	unsigned int val;
+	spin_lock(&nand->lock);
+	val = __raw_readl(REG_SMISR);
+	val &= READYBUSY;
+	spin_unlock(&nand->lock);
+
+	return val;
+}
+
+static int w90p910_nand_devready(struct mtd_info *mtd)
+{
+	struct w90p910_nand *nand;
+	int ready;
+
+	nand = container_of(mtd, struct w90p910_nand, mtd);
+
+	ready = (w90p910_check_rb(nand)) ? 1 : 0;
+	return ready;
+}
+
+static void w90p910_nand_command_lp(struct mtd_info *mtd,
+			unsigned int command, int column, int page_addr)
+{
+	register struct nand_chip *chip = mtd->priv;
+	struct w90p910_nand *nand;
+
+	nand = container_of(mtd, struct w90p910_nand, mtd);
+
+	if (command == NAND_CMD_READOOB) {
+		column += mtd->writesize;
+		command = NAND_CMD_READ0;
+	}
+
+	write_cmd_reg(nand, command & 0xff);
+
+	if (column != -1 || page_addr != -1) {
+
+		if (column != -1) {
+			if (chip->options & NAND_BUSWIDTH_16)
+				column >>= 1;
+			write_addr_reg(nand, column);
+			write_addr_reg(nand, column >> 8 | ENDADDR);
+		}
+		if (page_addr != -1) {
+			write_addr_reg(nand, page_addr);
+
+			if (chip->chipsize > (128 << 20)) {
+				write_addr_reg(nand, page_addr >> 8);
+				write_addr_reg(nand, page_addr >> 16 | ENDADDR);
+			} else {
+				write_addr_reg(nand, page_addr >> 8 | ENDADDR);
+			}
+		}
+	}
+
+	switch (command) {
+	case NAND_CMD_CACHEDPROG:
+	case NAND_CMD_PAGEPROG:
+	case NAND_CMD_ERASE1:
+	case NAND_CMD_ERASE2:
+	case NAND_CMD_SEQIN:
+	case NAND_CMD_RNDIN:
+	case NAND_CMD_STATUS:
+	case NAND_CMD_DEPLETE1:
+		return;
+
+	case NAND_CMD_STATUS_ERROR:
+	case NAND_CMD_STATUS_ERROR0:
+	case NAND_CMD_STATUS_ERROR1:
+	case NAND_CMD_STATUS_ERROR2:
+	case NAND_CMD_STATUS_ERROR3:
+		udelay(chip->chip_delay);
+		return;
+
+	case NAND_CMD_RESET:
+		if (chip->dev_ready)
+			break;
+		udelay(chip->chip_delay);
+
+		write_cmd_reg(nand, NAND_CMD_STATUS);
+		write_cmd_reg(nand, command);
+
+		while (!w90p910_check_rb(nand))
+			;
+
+		return;
+
+	case NAND_CMD_RNDOUT:
+		write_cmd_reg(nand, NAND_CMD_RNDOUTSTART);
+		return;
+
+	case NAND_CMD_READ0:
+
+		write_cmd_reg(nand, NAND_CMD_READSTART);
+	default:
+
+		if (!chip->dev_ready) {
+			udelay(chip->chip_delay);
+			return;
+		}
+	}
+
+	/* Apply this short delay always to ensure that we do wait tWB in
+	 * any case on any machine. */
+	ndelay(100);
+
+	while (!chip->dev_ready(mtd))
+		;
+}
+
+
+static void w90p910_nand_enable(struct w90p910_nand *nand)
+{
+	unsigned int val;
+	spin_lock(&nand->lock);
+	__raw_writel(RESET_FMI, (nand->reg + REG_FMICSR));
+
+	val = __raw_readl(nand->reg + REG_FMICSR);
+
+	if (!(val & NAND_EN))
+		__raw_writel(val | NAND_EN, REG_FMICSR);
+
+	val = __raw_readl(nand->reg + REG_SMCSR);
+
+	val &= ~(SWRST|PSIZE|DMARWEN|BUSWID|ECC4EN|NANDCS);
+	val |= WP;
+
+	__raw_writel(val, nand->reg + REG_SMCSR);
+
+	spin_unlock(&nand->lock);
+}
+
+static int __devinit w90p910_nand_probe(struct platform_device *pdev)
+{
+	struct w90p910_nand *w90p910_nand;
+	struct nand_chip *chip;
+	int retval;
+	struct resource *res;
+
+	retval = 0;
+
+	w90p910_nand = kzalloc(sizeof(struct w90p910_nand), GFP_KERNEL);
+	if (!w90p910_nand)
+		return -ENOMEM;
+	chip = &(w90p910_nand->chip);
+
+	w90p910_nand->mtd.priv	= chip;
+	w90p910_nand->mtd.owner	= THIS_MODULE;
+	spin_lock_init(&w90p910_nand->lock);
+
+	w90p910_nand->clk = clk_get(&pdev->dev, NULL);
+	if (IS_ERR(w90p910_nand->clk)) {
+		retval = -ENOENT;
+		goto fail1;
+	}
+	clk_enable(w90p910_nand->clk);
+
+	chip->cmdfunc		= w90p910_nand_command_lp;
+	chip->dev_ready		= w90p910_nand_devready;
+	chip->read_byte		= w90p910_nand_read_byte;
+	chip->write_buf		= w90p910_nand_write_buf;
+	chip->read_buf		= w90p910_nand_read_buf;
+	chip->verify_buf	= w90p910_verify_buf;
+	chip->chip_delay	= 50;
+	chip->options		= 0;
+	chip->ecc.mode		= NAND_ECC_SOFT;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		retval = -ENXIO;
+		goto fail1;
+	}
+
+	if (!request_mem_region(res->start, resource_size(res), pdev->name)) {
+		retval = -EBUSY;
+		goto fail1;
+	}
+
+	w90p910_nand->reg = ioremap(res->start, resource_size(res));
+	if (!w90p910_nand->reg) {
+		retval = -ENOMEM;
+		goto fail2;
+	}
+
+	w90p910_nand_enable(w90p910_nand);
+
+	if (nand_scan(&(w90p910_nand->mtd), 1)) {
+		retval = -ENXIO;
+		goto fail3;
+	}
+
+	add_mtd_partitions(&(w90p910_nand->mtd), partitions,
+						ARRAY_SIZE(partitions));
+
+	platform_set_drvdata(pdev, w90p910_nand);
+
+	return retval;
+
+fail3:	iounmap(w90p910_nand->reg);
+fail2:	release_mem_region(res->start, resource_size(res));
+fail1:	kfree(w90p910_nand);
+	return retval;
+}
+
+static int __devexit w90p910_nand_remove(struct platform_device *pdev)
+{
+	struct w90p910_nand *w90p910_nand = platform_get_drvdata(pdev);
+	struct resource *res;
+
+	iounmap(w90p910_nand->reg);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	release_mem_region(res->start, resource_size(res));
+
+	clk_disable(w90p910_nand->clk);
+	clk_put(w90p910_nand->clk);
+
+	kfree(w90p910_nand);
+
+	platform_set_drvdata(pdev, NULL);
+
+	return 0;
+}
+
+static struct platform_driver w90p910_nand_driver = {
+	.probe		= w90p910_nand_probe,
+	.remove		= __devexit_p(w90p910_nand_remove),
+	.driver		= {
+		.name	= "w90p910-fmi",
+		.owner	= THIS_MODULE,
+	},
+};
+
+static int __init w90p910_nand_init(void)
+{
+	return platform_driver_register(&w90p910_nand_driver);
+}
+
+static void __exit w90p910_nand_exit(void)
+{
+	platform_driver_unregister(&w90p910_nand_driver);
+}
+
+module_init(w90p910_nand_init);
+module_exit(w90p910_nand_exit);
+
+MODULE_AUTHOR("Wan ZongShun <mcuos.com@gmail.com>");
+MODULE_DESCRIPTION("w90p910 nand driver!");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:w90p910-fmi");
-- 
cgit v0.10.2


From 59e9c5ae17179fe561103fbe0808fac5976ca1bd Mon Sep 17 00:00:00 2001
From: vimal singh <vimalsingh@ti.com>
Date: Mon, 13 Jul 2009 16:26:24 +0530
Subject: mtd: omap: add support for nand prefetch-read and post-write

This patch adds prefetch support to access nand flash in mpu mode.
This patch also adds 8-bit nand support (omap_read/write_buf8).
Prefetch can be used for both 8- and 16-bit devices.

Signed-off-by: Vimal Singh <vimalsingh@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/arch/arm/mach-omap2/gpmc.c b/arch/arm/mach-omap2/gpmc.c
index f91934b..1587682 100644
--- a/arch/arm/mach-omap2/gpmc.c
+++ b/arch/arm/mach-omap2/gpmc.c
@@ -57,6 +57,11 @@
 #define GPMC_CHUNK_SHIFT	24		/* 16 MB */
 #define GPMC_SECTION_SHIFT	28		/* 128 MB */
 
+#define PREFETCH_FIFOTHRESHOLD	(0x40 << 8)
+#define CS_NUM_SHIFT		24
+#define ENABLE_PREFETCH		(0x1 << 7)
+#define DMA_MPU_MODE		2
+
 static struct resource	gpmc_mem_root;
 static struct resource	gpmc_cs_mem[GPMC_CS_NUM];
 static DEFINE_SPINLOCK(gpmc_mem_lock);
@@ -386,6 +391,63 @@ void gpmc_cs_free(int cs)
 }
 EXPORT_SYMBOL(gpmc_cs_free);
 
+/**
+ * gpmc_prefetch_enable - configures and starts prefetch transfer
+ * @cs: nand cs (chip select) number
+ * @dma_mode: dma mode enable (1) or disable (0)
+ * @u32_count: number of bytes to be transferred
+ * @is_write: prefetch read(0) or write post(1) mode
+ */
+int gpmc_prefetch_enable(int cs, int dma_mode,
+				unsigned int u32_count, int is_write)
+{
+	uint32_t prefetch_config1;
+
+	if (!(gpmc_read_reg(GPMC_PREFETCH_CONTROL))) {
+		/* Set the amount of bytes to be prefetched */
+		gpmc_write_reg(GPMC_PREFETCH_CONFIG2, u32_count);
+
+		/* Set dma/mpu mode, the prefetch read / post write and
+		 * enable the engine. Set which cs is has requested for.
+		 */
+		prefetch_config1 = ((cs << CS_NUM_SHIFT) |
+					PREFETCH_FIFOTHRESHOLD |
+					ENABLE_PREFETCH |
+					(dma_mode << DMA_MPU_MODE) |
+					(0x1 & is_write));
+		gpmc_write_reg(GPMC_PREFETCH_CONFIG1, prefetch_config1);
+	} else {
+		return -EBUSY;
+	}
+	/*  Start the prefetch engine */
+	gpmc_write_reg(GPMC_PREFETCH_CONTROL, 0x1);
+
+	return 0;
+}
+EXPORT_SYMBOL(gpmc_prefetch_enable);
+
+/**
+ * gpmc_prefetch_reset - disables and stops the prefetch engine
+ */
+void gpmc_prefetch_reset(void)
+{
+	/* Stop the PFPW engine */
+	gpmc_write_reg(GPMC_PREFETCH_CONTROL, 0x0);
+
+	/* Reset/disable the PFPW engine */
+	gpmc_write_reg(GPMC_PREFETCH_CONFIG1, 0x0);
+}
+EXPORT_SYMBOL(gpmc_prefetch_reset);
+
+/**
+ * gpmc_prefetch_status - reads prefetch status of engine
+ */
+int  gpmc_prefetch_status(void)
+{
+	return gpmc_read_reg(GPMC_PREFETCH_STATUS);
+}
+EXPORT_SYMBOL(gpmc_prefetch_status);
+
 static void __init gpmc_mem_init(void)
 {
 	int cs;
@@ -452,6 +514,5 @@ void __init gpmc_init(void)
 	l &= 0x03 << 3;
 	l |= (0x02 << 3) | (1 << 0);
 	gpmc_write_reg(GPMC_SYSCONFIG, l);
-
 	gpmc_mem_init();
 }
diff --git a/arch/arm/plat-omap/include/mach/gpmc.h b/arch/arm/plat-omap/include/mach/gpmc.h
index 921b165..9c99cda 100644
--- a/arch/arm/plat-omap/include/mach/gpmc.h
+++ b/arch/arm/plat-omap/include/mach/gpmc.h
@@ -103,6 +103,10 @@ extern int gpmc_cs_request(int cs, unsigned long size, unsigned long *base);
 extern void gpmc_cs_free(int cs);
 extern int gpmc_cs_set_reserved(int cs, int reserved);
 extern int gpmc_cs_reserved(int cs);
+extern int gpmc_prefetch_enable(int cs, int dma_mode,
+					unsigned int u32_count, int is_write);
+extern void gpmc_prefetch_reset(void);
+extern int gpmc_prefetch_status(void);
 extern void __init gpmc_init(void);
 
 #endif
diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index 707d7ee..7dab79c 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -80,6 +80,14 @@ config MTD_NAND_OMAP2
 	help
           Support for NAND flash on Texas Instruments OMAP2 and OMAP3 platforms.
 
+config MTD_NAND_OMAP_PREFETCH
+	bool "GPMC prefetch support for NAND Flash device"
+	depends on MTD_NAND && MTD_NAND_OMAP2
+	default y
+	help
+	 The NAND device can be accessed for Read/Write using GPMC PREFETCH engine
+	 to improve the performance.
+
 config MTD_NAND_TS7250
 	tristate "NAND Flash device on TS-7250 board"
 	depends on MACH_TS72XX
diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c
index ebd07e9..6736822 100644
--- a/drivers/mtd/nand/omap2.c
+++ b/drivers/mtd/nand/omap2.c
@@ -112,6 +112,16 @@
 static const char *part_probes[] = { "cmdlinepart", NULL };
 #endif
 
+#ifdef CONFIG_MTD_NAND_OMAP_PREFETCH
+static int use_prefetch = 1;
+
+/* "modprobe ... use_prefetch=0" etc */
+module_param(use_prefetch, bool, 0);
+MODULE_PARM_DESC(use_prefetch, "enable/disable use of PREFETCH");
+#else
+const int use_prefetch;
+#endif
+
 struct omap_nand_info {
 	struct nand_hw_control		controller;
 	struct omap_nand_platform_data	*pdata;
@@ -124,6 +134,7 @@ struct omap_nand_info {
 	unsigned long			phys_base;
 	void __iomem			*gpmc_cs_baseaddr;
 	void __iomem			*gpmc_baseaddr;
+	void __iomem			*nand_pref_fifo_add;
 };
 
 /**
@@ -189,6 +200,38 @@ static void omap_hwcontrol(struct mtd_info *mtd, int cmd, unsigned int ctrl)
 }
 
 /**
+ * omap_read_buf8 - read data from NAND controller into buffer
+ * @mtd: MTD device structure
+ * @buf: buffer to store date
+ * @len: number of bytes to read
+ */
+static void omap_read_buf8(struct mtd_info *mtd, u_char *buf, int len)
+{
+	struct nand_chip *nand = mtd->priv;
+
+	ioread8_rep(nand->IO_ADDR_R, buf, len);
+}
+
+/**
+ * omap_write_buf8 - write buffer to NAND controller
+ * @mtd: MTD device structure
+ * @buf: data buffer
+ * @len: number of bytes to write
+ */
+static void omap_write_buf8(struct mtd_info *mtd, const u_char *buf, int len)
+{
+	struct omap_nand_info *info = container_of(mtd,
+						struct omap_nand_info, mtd);
+	u_char *p = (u_char *)buf;
+
+	while (len--) {
+		iowrite8(*p++, info->nand.IO_ADDR_W);
+		while (GPMC_BUF_EMPTY == (readl(info->gpmc_baseaddr +
+						GPMC_STATUS) & GPMC_BUF_FULL));
+	}
+}
+
+/**
  * omap_read_buf16 - read data from NAND controller into buffer
  * @mtd: MTD device structure
  * @buf: buffer to store date
@@ -198,7 +241,7 @@ static void omap_read_buf16(struct mtd_info *mtd, u_char *buf, int len)
 {
 	struct nand_chip *nand = mtd->priv;
 
-	__raw_readsw(nand->IO_ADDR_R, buf, len / 2);
+	ioread16_rep(nand->IO_ADDR_R, buf, len / 2);
 }
 
 /**
@@ -217,13 +260,101 @@ static void omap_write_buf16(struct mtd_info *mtd, const u_char * buf, int len)
 	len >>= 1;
 
 	while (len--) {
-		writew(*p++, info->nand.IO_ADDR_W);
+		iowrite16(*p++, info->nand.IO_ADDR_W);
 
 		while (GPMC_BUF_EMPTY == (readl(info->gpmc_baseaddr +
 						GPMC_STATUS) & GPMC_BUF_FULL))
 			;
 	}
 }
+
+/**
+ * omap_read_buf_pref - read data from NAND controller into buffer
+ * @mtd: MTD device structure
+ * @buf: buffer to store date
+ * @len: number of bytes to read
+ */
+static void omap_read_buf_pref(struct mtd_info *mtd, u_char *buf, int len)
+{
+	struct omap_nand_info *info = container_of(mtd,
+						struct omap_nand_info, mtd);
+	uint32_t pfpw_status = 0, r_count = 0;
+	int ret = 0;
+	u32 *p = (u32 *)buf;
+
+	/* take care of subpage reads */
+	for (; len % 4 != 0; ) {
+		*buf++ = __raw_readb(info->nand.IO_ADDR_R);
+		len--;
+	}
+	p = (u32 *) buf;
+
+	/* configure and start prefetch transfer */
+	ret = gpmc_prefetch_enable(info->gpmc_cs, 0x0, len, 0x0);
+	if (ret) {
+		/* PFPW engine is busy, use cpu copy method */
+		if (info->nand.options & NAND_BUSWIDTH_16)
+			omap_read_buf16(mtd, buf, len);
+		else
+			omap_read_buf8(mtd, buf, len);
+	} else {
+		do {
+			pfpw_status = gpmc_prefetch_status();
+			r_count = ((pfpw_status >> 24) & 0x7F) >> 2;
+			ioread32_rep(info->nand_pref_fifo_add, p, r_count);
+			p += r_count;
+			len -= r_count << 2;
+		} while (len);
+
+		/* disable and stop the PFPW engine */
+		gpmc_prefetch_reset();
+	}
+}
+
+/**
+ * omap_write_buf_pref - write buffer to NAND controller
+ * @mtd: MTD device structure
+ * @buf: data buffer
+ * @len: number of bytes to write
+ */
+static void omap_write_buf_pref(struct mtd_info *mtd,
+					const u_char *buf, int len)
+{
+	struct omap_nand_info *info = container_of(mtd,
+						struct omap_nand_info, mtd);
+	uint32_t pfpw_status = 0, w_count = 0;
+	int i = 0, ret = 0;
+	u16 *p = (u16 *) buf;
+
+	/* take care of subpage writes */
+	if (len % 2 != 0) {
+		writeb(*buf, info->nand.IO_ADDR_R);
+		p = (u16 *)(buf + 1);
+		len--;
+	}
+
+	/*  configure and start prefetch transfer */
+	ret = gpmc_prefetch_enable(info->gpmc_cs, 0x0, len, 0x1);
+	if (ret) {
+		/* PFPW engine is busy, use cpu copy method */
+		if (info->nand.options & NAND_BUSWIDTH_16)
+			omap_write_buf16(mtd, buf, len);
+		else
+			omap_write_buf8(mtd, buf, len);
+	} else {
+		pfpw_status = gpmc_prefetch_status();
+		while (pfpw_status & 0x3FFF) {
+			w_count = ((pfpw_status >> 24) & 0x7F) >> 1;
+			for (i = 0; (i < w_count) && len; i++, len -= 2)
+				iowrite16(*p++, info->nand_pref_fifo_add);
+			pfpw_status = gpmc_prefetch_status();
+		}
+
+		/* disable and stop the PFPW engine */
+		gpmc_prefetch_reset();
+	}
+}
+
 /**
  * omap_verify_buf - Verify chip data against buffer
  * @mtd: MTD device structure
@@ -658,17 +789,12 @@ static int __devinit omap_nand_probe(struct platform_device *pdev)
 		err = -ENOMEM;
 		goto out_release_mem_region;
 	}
+
 	info->nand.controller = &info->controller;
 
 	info->nand.IO_ADDR_W = info->nand.IO_ADDR_R;
 	info->nand.cmd_ctrl  = omap_hwcontrol;
 
-	/* REVISIT:  only supports 16-bit NAND flash */
-
-	info->nand.read_buf   = omap_read_buf16;
-	info->nand.write_buf  = omap_write_buf16;
-	info->nand.verify_buf = omap_verify_buf;
-
 	/*
 	 * If RDY/BSY line is connected to OMAP then use the omap ready
 	 * funcrtion and the generic nand_wait function which reads the status
@@ -689,6 +815,23 @@ static int __devinit omap_nand_probe(struct platform_device *pdev)
 								== 0x1000)
 		info->nand.options  |= NAND_BUSWIDTH_16;
 
+	if (use_prefetch) {
+		/* copy the virtual address of nand base for fifo access */
+		info->nand_pref_fifo_add = info->nand.IO_ADDR_R;
+
+		info->nand.read_buf   = omap_read_buf_pref;
+		info->nand.write_buf  = omap_write_buf_pref;
+	} else {
+		if (info->nand.options & NAND_BUSWIDTH_16) {
+			info->nand.read_buf   = omap_read_buf16;
+			info->nand.write_buf  = omap_write_buf16;
+		} else {
+			info->nand.read_buf   = omap_read_buf8;
+			info->nand.write_buf  = omap_write_buf8;
+		}
+	}
+	info->nand.verify_buf = omap_verify_buf;
+
 #ifdef CONFIG_MTD_NAND_OMAP_HWECC
 	info->nand.ecc.bytes		= 3;
 	info->nand.ecc.size		= 512;
@@ -746,7 +889,7 @@ static int omap_nand_remove(struct platform_device *pdev)
 	platform_set_drvdata(pdev, NULL);
 	/* Release NAND device, its internal structures and partitions */
 	nand_release(&info->mtd);
-	iounmap(info->nand.IO_ADDR_R);
+	iounmap(info->nand_pref_fifo_add);
 	kfree(&info->mtd);
 	return 0;
 }
-- 
cgit v0.10.2


From dfe32893cbe3e599a39770199b9982a6ad5daa7b Mon Sep 17 00:00:00 2001
From: vimal singh <vimalsingh@ti.com>
Date: Mon, 13 Jul 2009 16:29:16 +0530
Subject: mtd: omap: adding DMA mode support in nand prefetch/post-write

This patch adds DMA mode support for nand prefetch/post-write engine.

Signed-off-by: Vimal Singh <vimalsingh@ti.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index 7dab79c..2c9a0ed 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -88,6 +88,15 @@ config MTD_NAND_OMAP_PREFETCH
 	 The NAND device can be accessed for Read/Write using GPMC PREFETCH engine
 	 to improve the performance.
 
+config MTD_NAND_OMAP_PREFETCH_DMA
+	depends on MTD_NAND_OMAP_PREFETCH
+	bool "DMA mode"
+	default n
+	help
+	 The GPMC PREFETCH engine can be configured eigther in MPU interrupt mode
+	 or in DMA interrupt mode.
+	 Say y for DMA mode or MPU mode will be used
+
 config MTD_NAND_TS7250
 	tristate "NAND Flash device on TS-7250 board"
 	depends on MACH_TS72XX
diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c
index 6736822..090ab87 100644
--- a/drivers/mtd/nand/omap2.c
+++ b/drivers/mtd/nand/omap2.c
@@ -18,8 +18,7 @@
 #include <linux/mtd/partitions.h>
 #include <linux/io.h>
 
-#include <asm/dma.h>
-
+#include <mach/dma.h>
 #include <mach/gpmc.h>
 #include <mach/nand.h>
 
@@ -118,8 +117,19 @@ static int use_prefetch = 1;
 /* "modprobe ... use_prefetch=0" etc */
 module_param(use_prefetch, bool, 0);
 MODULE_PARM_DESC(use_prefetch, "enable/disable use of PREFETCH");
+
+#ifdef CONFIG_MTD_NAND_OMAP_PREFETCH_DMA
+static int use_dma = 1;
+
+/* "modprobe ... use_dma=0" etc */
+module_param(use_dma, bool, 0);
+MODULE_PARM_DESC(use_dma, "enable/disable use of DMA");
+#else
+const int use_dma;
+#endif
 #else
 const int use_prefetch;
+const int use_dma;
 #endif
 
 struct omap_nand_info {
@@ -135,6 +145,8 @@ struct omap_nand_info {
 	void __iomem			*gpmc_cs_baseaddr;
 	void __iomem			*gpmc_baseaddr;
 	void __iomem			*nand_pref_fifo_add;
+	struct completion		comp;
+	int				dma_ch;
 };
 
 /**
@@ -355,6 +367,147 @@ static void omap_write_buf_pref(struct mtd_info *mtd,
 	}
 }
 
+#ifdef CONFIG_MTD_NAND_OMAP_PREFETCH_DMA
+/*
+ * omap_nand_dma_cb: callback on the completion of dma transfer
+ * @lch: logical channel
+ * @ch_satuts: channel status
+ * @data: pointer to completion data structure
+ */
+static void omap_nand_dma_cb(int lch, u16 ch_status, void *data)
+{
+	complete((struct completion *) data);
+}
+
+/*
+ * omap_nand_dma_transfer: configer and start dma transfer
+ * @mtd: MTD device structure
+ * @addr: virtual address in RAM of source/destination
+ * @len: number of data bytes to be transferred
+ * @is_write: flag for read/write operation
+ */
+static inline int omap_nand_dma_transfer(struct mtd_info *mtd, void *addr,
+					unsigned int len, int is_write)
+{
+	struct omap_nand_info *info = container_of(mtd,
+					struct omap_nand_info, mtd);
+	uint32_t prefetch_status = 0;
+	enum dma_data_direction dir = is_write ? DMA_TO_DEVICE :
+							DMA_FROM_DEVICE;
+	dma_addr_t dma_addr;
+	int ret;
+
+	/* The fifo depth is 64 bytes. We have a sync at each frame and frame
+	 * length is 64 bytes.
+	 */
+	int buf_len = len >> 6;
+
+	if (addr >= high_memory) {
+		struct page *p1;
+
+		if (((size_t)addr & PAGE_MASK) !=
+			((size_t)(addr + len - 1) & PAGE_MASK))
+			goto out_copy;
+		p1 = vmalloc_to_page(addr);
+		if (!p1)
+			goto out_copy;
+		addr = page_address(p1) + ((size_t)addr & ~PAGE_MASK);
+	}
+
+	dma_addr = dma_map_single(&info->pdev->dev, addr, len, dir);
+	if (dma_mapping_error(&info->pdev->dev, dma_addr)) {
+		dev_err(&info->pdev->dev,
+			"Couldn't DMA map a %d byte buffer\n", len);
+		goto out_copy;
+	}
+
+	if (is_write) {
+	    omap_set_dma_dest_params(info->dma_ch, 0, OMAP_DMA_AMODE_CONSTANT,
+						info->phys_base, 0, 0);
+	    omap_set_dma_src_params(info->dma_ch, 0, OMAP_DMA_AMODE_POST_INC,
+							dma_addr, 0, 0);
+	    omap_set_dma_transfer_params(info->dma_ch, OMAP_DMA_DATA_TYPE_S32,
+					0x10, buf_len, OMAP_DMA_SYNC_FRAME,
+					OMAP24XX_DMA_GPMC, OMAP_DMA_DST_SYNC);
+	} else {
+	    omap_set_dma_src_params(info->dma_ch, 0, OMAP_DMA_AMODE_CONSTANT,
+						info->phys_base, 0, 0);
+	    omap_set_dma_dest_params(info->dma_ch, 0, OMAP_DMA_AMODE_POST_INC,
+							dma_addr, 0, 0);
+	    omap_set_dma_transfer_params(info->dma_ch, OMAP_DMA_DATA_TYPE_S32,
+					0x10, buf_len, OMAP_DMA_SYNC_FRAME,
+					OMAP24XX_DMA_GPMC, OMAP_DMA_SRC_SYNC);
+	}
+	/*  configure and start prefetch transfer */
+	ret = gpmc_prefetch_enable(info->gpmc_cs, 0x1, len, is_write);
+	if (ret)
+		/* PFPW engine is busy, use cpu copy methode */
+		goto out_copy;
+
+	init_completion(&info->comp);
+
+	omap_start_dma(info->dma_ch);
+
+	/* setup and start DMA using dma_addr */
+	wait_for_completion(&info->comp);
+
+	while (0x3fff & (prefetch_status = gpmc_prefetch_status()))
+		;
+	/* disable and stop the PFPW engine */
+	gpmc_prefetch_reset();
+
+	dma_unmap_single(&info->pdev->dev, dma_addr, len, dir);
+	return 0;
+
+out_copy:
+	if (info->nand.options & NAND_BUSWIDTH_16)
+		is_write == 0 ? omap_read_buf16(mtd, (u_char *) addr, len)
+			: omap_write_buf16(mtd, (u_char *) addr, len);
+	else
+		is_write == 0 ? omap_read_buf8(mtd, (u_char *) addr, len)
+			: omap_write_buf8(mtd, (u_char *) addr, len);
+	return 0;
+}
+#else
+static void omap_nand_dma_cb(int lch, u16 ch_status, void *data) {}
+static inline int omap_nand_dma_transfer(struct mtd_info *mtd, void *addr,
+					unsigned int len, int is_write)
+{
+	return 0;
+}
+#endif
+
+/**
+ * omap_read_buf_dma_pref - read data from NAND controller into buffer
+ * @mtd: MTD device structure
+ * @buf: buffer to store date
+ * @len: number of bytes to read
+ */
+static void omap_read_buf_dma_pref(struct mtd_info *mtd, u_char *buf, int len)
+{
+	if (len <= mtd->oobsize)
+		omap_read_buf_pref(mtd, buf, len);
+	else
+		/* start transfer in DMA mode */
+		omap_nand_dma_transfer(mtd, buf, len, 0x0);
+}
+
+/**
+ * omap_write_buf_dma_pref - write buffer to NAND controller
+ * @mtd: MTD device structure
+ * @buf: data buffer
+ * @len: number of bytes to write
+ */
+static void omap_write_buf_dma_pref(struct mtd_info *mtd,
+					const u_char *buf, int len)
+{
+	if (len <= mtd->oobsize)
+		omap_write_buf_pref(mtd, buf, len);
+	else
+		/* start transfer in DMA mode */
+		omap_nand_dma_transfer(mtd, buf, len, 0x1);
+}
+
 /**
  * omap_verify_buf - Verify chip data against buffer
  * @mtd: MTD device structure
@@ -821,6 +974,23 @@ static int __devinit omap_nand_probe(struct platform_device *pdev)
 
 		info->nand.read_buf   = omap_read_buf_pref;
 		info->nand.write_buf  = omap_write_buf_pref;
+		if (use_dma) {
+			err = omap_request_dma(OMAP24XX_DMA_GPMC, "NAND",
+				omap_nand_dma_cb, &info->comp, &info->dma_ch);
+			if (err < 0) {
+				info->dma_ch = -1;
+				printk(KERN_WARNING "DMA request failed."
+					" Non-dma data transfer mode\n");
+			} else {
+				omap_set_dma_dest_burst_mode(info->dma_ch,
+						OMAP_DMA_DATA_BURST_16);
+				omap_set_dma_src_burst_mode(info->dma_ch,
+						OMAP_DMA_DATA_BURST_16);
+
+				info->nand.read_buf   = omap_read_buf_dma_pref;
+				info->nand.write_buf  = omap_write_buf_dma_pref;
+			}
+		}
 	} else {
 		if (info->nand.options & NAND_BUSWIDTH_16) {
 			info->nand.read_buf   = omap_read_buf16;
@@ -887,6 +1057,9 @@ static int omap_nand_remove(struct platform_device *pdev)
 	struct omap_nand_info *info = mtd->priv;
 
 	platform_set_drvdata(pdev, NULL);
+	if (use_dma)
+		omap_free_dma(info->dma_ch);
+
 	/* Release NAND device, its internal structures and partitions */
 	nand_release(&info->mtd);
 	iounmap(info->nand_pref_fifo_add);
@@ -906,6 +1079,15 @@ static struct platform_driver omap_nand_driver = {
 static int __init omap_nand_init(void)
 {
 	printk(KERN_INFO "%s driver initializing\n", DRIVER_NAME);
+
+	/* This check is required if driver is being
+	 * loaded run time as a module
+	 */
+	if ((1 == use_dma) && (0 == use_prefetch)) {
+		printk(KERN_INFO"Wrong parameters: 'use_dma' can not be 1 "
+				"without use_prefetch'. Prefetch will not be"
+				" used in either mode (mpu or dma)\n");
+	}
 	return platform_driver_register(&omap_nand_driver);
 }
 
-- 
cgit v0.10.2


From 9289d4ef065a6c62db6bf13d624307f61e57dbbb Mon Sep 17 00:00:00 2001
From: Sudhakar Rajashekhara <sudhakar.raj@ti.com>
Date: Tue, 18 Aug 2009 12:34:04 -0400
Subject: driver/Makefile: Initialize "mtd" and "spi" before "net"

On TI's da850/omap-l138 EVM, MAC address is stored in SPI flash.

This patch changes the initialization sequence of the drivers
by moving mtd and spi ahead of net in drivers/Makefile thereby
enabling da850/omap-l138 ethernet driver to read the MAC address
while booting.

Signed-off-by: Sudhakar Rajashekhara <sudhakar.raj@ti.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/Makefile b/drivers/Makefile
index bc4205d..2a1d41f 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -42,6 +42,8 @@ obj-y				+= macintosh/
 obj-$(CONFIG_IDE)		+= ide/
 obj-$(CONFIG_SCSI)		+= scsi/
 obj-$(CONFIG_ATA)		+= ata/
+obj-$(CONFIG_MTD)		+= mtd/
+obj-$(CONFIG_SPI)		+= spi/
 obj-y				+= net/
 obj-$(CONFIG_ATM)		+= atm/
 obj-$(CONFIG_FUSION)		+= message/
@@ -50,8 +52,6 @@ obj-y				+= ieee1394/
 obj-$(CONFIG_UIO)		+= uio/
 obj-y				+= cdrom/
 obj-y				+= auxdisplay/
-obj-$(CONFIG_MTD)		+= mtd/
-obj-$(CONFIG_SPI)		+= spi/
 obj-$(CONFIG_PCCARD)		+= pcmcia/
 obj-$(CONFIG_DIO)		+= dio/
 obj-$(CONFIG_SBUS)		+= sbus/
-- 
cgit v0.10.2


From ebd5a74db74ee2db833d43ea35108a4be9cab42f Mon Sep 17 00:00:00 2001
From: Benjamin Krill <ben@codiert.org>
Date: Tue, 25 Aug 2009 15:52:41 +0200
Subject: mtd: ofpart: Check availability of reg property instead of name
 property

The previous implementation breaks the dts binding "mtd-physmap.txt". This
implementation fixes the issue by checking the availability of the reg
property instead of the name property.

Cc: stable@kernel.org
Signed-off-by: Benjamin Krill <ben@codiert.org>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/mtd/ofpart.c b/drivers/mtd/ofpart.c
index 3e164f0..62d6a78 100644
--- a/drivers/mtd/ofpart.c
+++ b/drivers/mtd/ofpart.c
@@ -46,21 +46,12 @@ int __devinit of_mtd_parse_partitions(struct device *dev,
 		const u32 *reg;
 		int len;
 
-		/* check if this is a partition node */
-		partname = of_get_property(pp, "name", &len);
-		if (strcmp(partname, "partition") != 0) {
+		reg = of_get_property(pp, "reg", &len);
+		if (!reg) {
 			nr_parts--;
 			continue;
 		}
 
-		reg = of_get_property(pp, "reg", &len);
-		if (!reg || (len != 2 * sizeof(u32))) {
-			of_node_put(pp);
-			dev_err(dev, "Invalid 'reg' on %s\n", node->full_name);
-			kfree(*pparts);
-			*pparts = NULL;
-			return -EINVAL;
-		}
 		(*pparts)[i].offset = reg[0];
 		(*pparts)[i].size = reg[1];
 
@@ -75,6 +66,14 @@ int __devinit of_mtd_parse_partitions(struct device *dev,
 		i++;
 	}
 
+	if (!i) {
+		of_node_put(pp);
+		dev_err(dev, "No valid partition found on %s\n", node->full_name);
+		kfree(*pparts);
+		*pparts = NULL;
+		return -EINVAL;
+	}
+
 	return nr_parts;
 }
 EXPORT_SYMBOL(of_mtd_parse_partitions);
-- 
cgit v0.10.2


From 76c23c32e3b3ad48e07e07897075ab19ae1ef117 Mon Sep 17 00:00:00 2001
From: Feng Kan <fkan@amcc.com>
Date: Tue, 25 Aug 2009 11:27:20 -0700
Subject: mtd: nand: fix ECC Correction bug for SMC ordering for NDFC driver

Fix ECC Correction bug where the byte offset location were double
fliped causing correction routine to toggle the wrong byte location
in the ECC segment. The ndfc_calculate_ecc routine change the order
of getting the ECC code.
        /* The NDFC uses Smart Media (SMC) bytes order */
        ecc_code[0] = p[2];
        ecc_code[1] = p[1];
        ecc_code[2] = p[3];
But in the Correction algorithm when calculating the byte offset
location, the b1 is used as the upper part of the address. Which
again reverse the order making the final byte offset address
location incorrect.
	byte_addr = (addressbits[b1] << 4) + addressbits[b0];
The order is change to read it in straight and let the correction
function to revert it to SMC order.

Cc: stable@kernel.org
Signed-off-by: Feng Kan <fkan@amcc.com>
Acked-by: Victor Gallardo <vgallardo@amcc.com>
Acked-by: Prodyut Hazarika <phazarika@amcc.com>
Acked-by: Stefan Roese <sr@denx.de>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/mtd/nand/ndfc.c b/drivers/mtd/nand/ndfc.c
index 89bf85a..40b5658 100644
--- a/drivers/mtd/nand/ndfc.c
+++ b/drivers/mtd/nand/ndfc.c
@@ -102,8 +102,8 @@ static int ndfc_calculate_ecc(struct mtd_info *mtd,
 	wmb();
 	ecc = in_be32(ndfc->ndfcbase + NDFC_ECC);
 	/* The NDFC uses Smart Media (SMC) bytes order */
-	ecc_code[0] = p[2];
-	ecc_code[1] = p[1];
+	ecc_code[0] = p[1];
+	ecc_code[1] = p[2];
 	ecc_code[2] = p[3];
 
 	return 0;
-- 
cgit v0.10.2


From b4c8c8cf9ad6bac5a1d7fdae751ce38b8d3028ba Mon Sep 17 00:00:00 2001
From: Stefan Roese <sr@denx.de>
Date: Tue, 1 Sep 2009 11:51:25 +0300
Subject: mtd: jedec: fix compilation problem with I28F640C3B definition

Signed-off-by: Stefan Roese <sr@denx.de>
Signed-off-by: Vitaly Bordug <vitb@kernel.crashing.org>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/mtd/chips/jedec_probe.c b/drivers/mtd/chips/jedec_probe.c
index 0f7065d..16bc2fa 100644
--- a/drivers/mtd/chips/jedec_probe.c
+++ b/drivers/mtd/chips/jedec_probe.c
@@ -111,6 +111,11 @@
 #define I28F320B3B	0x8897
 #define I28F640B3T	0x8898
 #define I28F640B3B	0x8899
+#define I28F640C3B	0x88CD
+#define I28F160F3T	0x88F3
+#define I28F160F3B	0x88F4
+#define I28F160C3T	0x88C2
+#define I28F160C3B	0x88C3
 #define I82802AB	0x00ad
 #define I82802AC	0x00ac
 
@@ -1103,6 +1108,19 @@ static const struct amd_flash_info jedec_table[] = {
 		}
 	}, {
 		.mfr_id		= MANUFACTURER_INTEL,
+		.dev_id		= I28F640C3B,
+		.name		= "Intel 28F640C3B",
+		.devtypes	= CFI_DEVICETYPE_X16,
+		.uaddr		= MTD_UADDR_UNNECESSARY,
+		.dev_size	= SIZE_8MiB,
+		.cmd_set	= P_ID_INTEL_STD,
+		.nr_regions	= 2,
+		.regions	= {
+			ERASEINFO(0x02000, 8),
+			ERASEINFO(0x10000, 127),
+		}
+	}, {
+		.mfr_id		= MANUFACTURER_INTEL,
 		.dev_id		= I82802AB,
 		.name		= "Intel 82802AB",
 		.devtypes	= CFI_DEVICETYPE_X8,
-- 
cgit v0.10.2


From 6ad08ddd9e8b85ad6c49eeb6c5d940ccdd119bde Mon Sep 17 00:00:00 2001
From: Mohanlal Jangir <mohanlaljangir@gmail.com>
Date: Thu, 3 Sep 2009 22:56:17 +0900
Subject: mtd: inftl: fix fold chain block number

Signed-off-by: Mohan Lal Jangir <mohanlaljangir@gmail.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/mtd/inftlcore.c b/drivers/mtd/inftlcore.c
old mode 100644
new mode 100755
index d8cf29c..8aca552
--- a/drivers/mtd/inftlcore.c
+++ b/drivers/mtd/inftlcore.c
@@ -550,7 +550,7 @@ hitused:
 			 * waiting to be picked up. We're going to have to fold
 			 * a chain to make room.
 			 */
-			thisEUN = INFTL_makefreeblock(inftl, BLOCK_NIL);
+			thisEUN = INFTL_makefreeblock(inftl, block);
 
 			/*
 			 * Hopefully we free something, lets try again.
-- 
cgit v0.10.2


From 23af51ecfb04ff65bae51bd8e2270f4449abc789 Mon Sep 17 00:00:00 2001
From: Massimo Cirillo <maxcir@gmail.com>
Date: Thu, 3 Sep 2009 16:34:39 +0200
Subject: mtd: cfi_cmdset_0002: add 0xFF intolerance for M29W128G

The M29W128G Numonyx flash devices are intolerant to any 0xFF command:
in the Cfi_util.c the function cfi_qry_mode_off() (that resets the device
after the autoselect mode) must have a 0xF0 command after the 0xFF command.
This fix solves also the cause of the fixup_M29W128G_write_buffer() fix,
that can be removed now.
The following patch applies to 2.6.30 kernel.

Signed-off-by: Massimo Cirillo <maxcir@gmail.com>
Acked-by: Alexey Korolev <akorolev@infradead.org>
Cc: stable@kernel.org
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
index 61ea833..94bb61e 100644
--- a/drivers/mtd/chips/cfi_cmdset_0002.c
+++ b/drivers/mtd/chips/cfi_cmdset_0002.c
@@ -282,16 +282,6 @@ static void fixup_s29gl032n_sectors(struct mtd_info *mtd, void *param)
 	}
 }
 
-static void fixup_M29W128G_write_buffer(struct mtd_info *mtd, void *param)
-{
-	struct map_info *map = mtd->priv;
-	struct cfi_private *cfi = map->fldrv_priv;
-	if (cfi->cfiq->BufWriteTimeoutTyp) {
-		pr_warning("Don't use write buffer on ST flash M29W128G\n");
-		cfi->cfiq->BufWriteTimeoutTyp = 0;
-	}
-}
-
 static struct cfi_fixup cfi_fixup_table[] = {
 	{ CFI_MFR_ATMEL, CFI_ID_ANY, fixup_convert_atmel_pri, NULL },
 #ifdef AMD_BOOTLOC_BUG
@@ -308,7 +298,6 @@ static struct cfi_fixup cfi_fixup_table[] = {
 	{ CFI_MFR_AMD, 0x1301, fixup_s29gl064n_sectors, NULL, },
 	{ CFI_MFR_AMD, 0x1a00, fixup_s29gl032n_sectors, NULL, },
 	{ CFI_MFR_AMD, 0x1a01, fixup_s29gl032n_sectors, NULL, },
-	{ CFI_MFR_ST,  0x227E, fixup_M29W128G_write_buffer, NULL, },
 #if !FORCE_WORD_WRITE
 	{ CFI_MFR_ANY, CFI_ID_ANY, fixup_use_write_buffers, NULL, },
 #endif
diff --git a/drivers/mtd/chips/cfi_util.c b/drivers/mtd/chips/cfi_util.c
old mode 100644
new mode 100755
index 34d40e2..c5a84fd
--- a/drivers/mtd/chips/cfi_util.c
+++ b/drivers/mtd/chips/cfi_util.c
@@ -81,6 +81,10 @@ void __xipram cfi_qry_mode_off(uint32_t base, struct map_info *map,
 {
 	cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL);
 	cfi_send_gen_cmd(0xFF, 0, base, map, cfi, cfi->device_type, NULL);
+	/* M29W128G flashes require an additional reset command
+	   when exit qry mode */
+	if ((cfi->mfr == CFI_MFR_ST) && (cfi->id == 0x227E || cfi->id == 0x7E))
+		cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL);
 }
 EXPORT_SYMBOL_GPL(cfi_qry_mode_off);
 
-- 
cgit v0.10.2


From be2f092bfc4f6a415bb4c3e2dcbf521a1f2a0fe5 Mon Sep 17 00:00:00 2001
From: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Date: Sat, 5 Sep 2009 01:20:43 +0900
Subject: mtd: nand: add __nand_correct_data helper function

Split nand_correct_data() into two part, a pure calculation function
and a wrapper for mtd interface.

The tmio_nand driver can implement its ecc.correct function easily
using this __nand_correct_data helper.

Signed-off-by: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Acked-by: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Acked-by: Vimal Singh <vimalsingh@ti.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/mtd/nand/nand_ecc.c b/drivers/mtd/nand/nand_ecc.c
index c0cb87d..db7ae9d 100644
--- a/drivers/mtd/nand/nand_ecc.c
+++ b/drivers/mtd/nand/nand_ecc.c
@@ -417,22 +417,22 @@ int nand_calculate_ecc(struct mtd_info *mtd, const unsigned char *buf,
 EXPORT_SYMBOL(nand_calculate_ecc);
 
 /**
- * nand_correct_data - [NAND Interface] Detect and correct bit error(s)
- * @mtd:	MTD block structure
+ * __nand_correct_data - [NAND Interface] Detect and correct bit error(s)
  * @buf:	raw data read from the chip
  * @read_ecc:	ECC from the chip
  * @calc_ecc:	the ECC calculated from raw data
+ * @eccsize:	data bytes per ecc step (256 or 512)
  *
- * Detect and correct a 1 bit error for 256/512 byte block
+ * Detect and correct a 1 bit error for eccsize byte block
  */
-int nand_correct_data(struct mtd_info *mtd, unsigned char *buf,
-		      unsigned char *read_ecc, unsigned char *calc_ecc)
+int __nand_correct_data(unsigned char *buf,
+			unsigned char *read_ecc, unsigned char *calc_ecc,
+			unsigned int eccsize)
 {
 	unsigned char b0, b1, b2, bit_addr;
 	unsigned int byte_addr;
 	/* 256 or 512 bytes/ecc  */
-	const uint32_t eccsize_mult =
-			(((struct nand_chip *)mtd->priv)->ecc.size) >> 8;
+	const uint32_t eccsize_mult = eccsize >> 8;
 
 	/*
 	 * b0 to b2 indicate which bit is faulty (if any)
@@ -495,6 +495,23 @@ int nand_correct_data(struct mtd_info *mtd, unsigned char *buf,
 	printk(KERN_ERR "uncorrectable error : ");
 	return -1;
 }
+EXPORT_SYMBOL(__nand_correct_data);
+
+/**
+ * nand_correct_data - [NAND Interface] Detect and correct bit error(s)
+ * @mtd:	MTD block structure
+ * @buf:	raw data read from the chip
+ * @read_ecc:	ECC from the chip
+ * @calc_ecc:	the ECC calculated from raw data
+ *
+ * Detect and correct a 1 bit error for 256/512 byte block
+ */
+int nand_correct_data(struct mtd_info *mtd, unsigned char *buf,
+		      unsigned char *read_ecc, unsigned char *calc_ecc)
+{
+	return __nand_correct_data(buf, read_ecc, calc_ecc,
+				   ((struct nand_chip *)mtd->priv)->ecc.size);
+}
 EXPORT_SYMBOL(nand_correct_data);
 
 MODULE_LICENSE("GPL");
diff --git a/include/linux/mtd/nand_ecc.h b/include/linux/mtd/nand_ecc.h
index 090da50..052ea8c 100644
--- a/include/linux/mtd/nand_ecc.h
+++ b/include/linux/mtd/nand_ecc.h
@@ -21,6 +21,12 @@ struct mtd_info;
 int nand_calculate_ecc(struct mtd_info *mtd, const u_char *dat, u_char *ecc_code);
 
 /*
+ * Detect and correct a 1 bit error for eccsize byte block
+ */
+int __nand_correct_data(u_char *dat, u_char *read_ecc, u_char *calc_ecc,
+			unsigned int eccsize);
+
+/*
  * Detect and correct a 1 bit error for 256 byte block
  */
 int nand_correct_data(struct mtd_info *mtd, u_char *dat, u_char *read_ecc, u_char *calc_ecc);
-- 
cgit v0.10.2


From 0f777fb9318739baf517c4f4ef66347d8898643d Mon Sep 17 00:00:00 2001
From: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Date: Sat, 5 Sep 2009 01:20:44 +0900
Subject: mtd: nand: fix tmio_nand ecc correction

This driver may be reading 512 bytes at a times, but still calculates
256-byte sector ECC.  So the nand_correct_data() is not appropriate
for this driver.  Implement its ecc.correct function calling
__nand_correct_data() twice.

Signed-off-by: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Acked-by: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Acked-by: Vimal Singh <vimalsingh@ti.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/mtd/nand/tmio_nand.c b/drivers/mtd/nand/tmio_nand.c
index daa6a4c..92c7334 100644
--- a/drivers/mtd/nand/tmio_nand.c
+++ b/drivers/mtd/nand/tmio_nand.c
@@ -301,6 +301,21 @@ static int tmio_nand_calculate_ecc(struct mtd_info *mtd, const u_char *dat,
 	return 0;
 }
 
+static int tmio_nand_correct_data(struct mtd_info *mtd, unsigned char *buf,
+		unsigned char *read_ecc, unsigned char *calc_ecc)
+{
+	int r0, r1;
+
+	/* assume ecc.size = 512 and ecc.bytes = 6 */
+	r0 = __nand_correct_data(buf, read_ecc, calc_ecc, 256);
+	if (r0 < 0)
+		return r0;
+	r1 = __nand_correct_data(buf + 256, read_ecc + 3, calc_ecc + 3, 256);
+	if (r1 < 0)
+		return r1;
+	return r0 + r1;
+}
+
 static int tmio_hw_init(struct platform_device *dev, struct tmio_nand *tmio)
 {
 	struct mfd_cell *cell = (struct mfd_cell *)dev->dev.platform_data;
@@ -424,7 +439,7 @@ static int tmio_probe(struct platform_device *dev)
 	nand_chip->ecc.bytes = 6;
 	nand_chip->ecc.hwctl = tmio_nand_enable_hwecc;
 	nand_chip->ecc.calculate = tmio_nand_calculate_ecc;
-	nand_chip->ecc.correct = nand_correct_data;
+	nand_chip->ecc.correct = tmio_nand_correct_data;
 
 	if (data)
 		nand_chip->badblock_pattern = data->badblock_pattern;
-- 
cgit v0.10.2


From c0cbfd0e81d879a950ba6f0df3f75ea30c5ab16e Mon Sep 17 00:00:00 2001
From: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Date: Sat, 5 Sep 2009 01:20:45 +0900
Subject: mtd: nand: txx9ndfmc: transfer 512 byte at a time if possible

Using __nand_correct_data() helper function, this driver can read 512
byte (with 6 byte ECC) at a time.  This results minor performance
improvement.

Signed-off-by: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/mtd/nand/txx9ndfmc.c b/drivers/mtd/nand/txx9ndfmc.c
index 488088e..73af832 100644
--- a/drivers/mtd/nand/txx9ndfmc.c
+++ b/drivers/mtd/nand/txx9ndfmc.c
@@ -189,18 +189,43 @@ static int txx9ndfmc_calculate_ecc(struct mtd_info *mtd, const uint8_t *dat,
 				   uint8_t *ecc_code)
 {
 	struct platform_device *dev = mtd_to_platdev(mtd);
+	struct nand_chip *chip = mtd->priv;
+	int eccbytes;
 	u32 mcr = txx9ndfmc_read(dev, TXX9_NDFMCR);
 
 	mcr &= ~TXX9_NDFMCR_ECC_ALL;
 	txx9ndfmc_write(dev, mcr | TXX9_NDFMCR_ECC_OFF, TXX9_NDFMCR);
 	txx9ndfmc_write(dev, mcr | TXX9_NDFMCR_ECC_READ, TXX9_NDFMCR);
-	ecc_code[1] = txx9ndfmc_read(dev, TXX9_NDFDTR);
-	ecc_code[0] = txx9ndfmc_read(dev, TXX9_NDFDTR);
-	ecc_code[2] = txx9ndfmc_read(dev, TXX9_NDFDTR);
+	for (eccbytes = chip->ecc.bytes; eccbytes > 0; eccbytes -= 3) {
+		ecc_code[1] = txx9ndfmc_read(dev, TXX9_NDFDTR);
+		ecc_code[0] = txx9ndfmc_read(dev, TXX9_NDFDTR);
+		ecc_code[2] = txx9ndfmc_read(dev, TXX9_NDFDTR);
+		ecc_code += 3;
+	}
 	txx9ndfmc_write(dev, mcr | TXX9_NDFMCR_ECC_OFF, TXX9_NDFMCR);
 	return 0;
 }
 
+static int txx9ndfmc_correct_data(struct mtd_info *mtd, unsigned char *buf,
+		unsigned char *read_ecc, unsigned char *calc_ecc)
+{
+	struct nand_chip *chip = mtd->priv;
+	int eccsize;
+	int corrected = 0;
+	int stat;
+
+	for (eccsize = chip->ecc.size; eccsize > 0; eccsize -= 256) {
+		stat = __nand_correct_data(buf, read_ecc, calc_ecc, 256);
+		if (stat < 0)
+			return stat;
+		corrected += stat;
+		buf += 256;
+		read_ecc += 3;
+		calc_ecc += 3;
+	}
+	return corrected;
+}
+
 static void txx9ndfmc_enable_hwecc(struct mtd_info *mtd, int mode)
 {
 	struct platform_device *dev = mtd_to_platdev(mtd);
@@ -244,6 +269,22 @@ static void txx9ndfmc_initialize(struct platform_device *dev)
 #define TXX9NDFMC_NS_TO_CYC(gbusclk, ns) \
 	DIV_ROUND_UP((ns) * DIV_ROUND_UP(gbusclk, 1000), 1000000)
 
+static int txx9ndfmc_nand_scan(struct mtd_info *mtd)
+{
+	struct nand_chip *chip = mtd->priv;
+	int ret;
+
+	ret = nand_scan_ident(mtd, 1);
+	if (!ret) {
+		if (mtd->writesize >= 512) {
+			chip->ecc.size = mtd->writesize;
+			chip->ecc.bytes = 3 * (mtd->writesize / 256);
+		}
+		ret = nand_scan_tail(mtd);
+	}
+	return ret;
+}
+
 static int __init txx9ndfmc_probe(struct platform_device *dev)
 {
 	struct txx9ndfmc_platform_data *plat = dev->dev.platform_data;
@@ -321,9 +362,10 @@ static int __init txx9ndfmc_probe(struct platform_device *dev)
 		chip->cmd_ctrl = txx9ndfmc_cmd_ctrl;
 		chip->dev_ready = txx9ndfmc_dev_ready;
 		chip->ecc.calculate = txx9ndfmc_calculate_ecc;
-		chip->ecc.correct = nand_correct_data;
+		chip->ecc.correct = txx9ndfmc_correct_data;
 		chip->ecc.hwctl = txx9ndfmc_enable_hwecc;
 		chip->ecc.mode = NAND_ECC_HW;
+		/* txx9ndfmc_nand_scan will overwrite ecc.size and ecc.bytes */
 		chip->ecc.size = 256;
 		chip->ecc.bytes = 3;
 		chip->chip_delay = 100;
@@ -349,7 +391,7 @@ static int __init txx9ndfmc_probe(struct platform_device *dev)
 		if (plat->wide_mask & (1 << i))
 			chip->options |= NAND_BUSWIDTH_16;
 
-		if (nand_scan(mtd, 1)) {
+		if (txx9ndfmc_nand_scan(mtd)) {
 			kfree(txx9_priv->mtdname);
 			kfree(txx9_priv);
 			continue;
-- 
cgit v0.10.2


From b94996c99c8befed9cbbb8804a4625e203913318 Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Sat, 19 Sep 2009 15:28:12 -0700
Subject: intel-iommu: Disable PMRs after we enable translation, not before

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 52026d1..601c327 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -2403,11 +2403,12 @@ int __init init_dmars(void)
 
 		iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
 		iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
-		iommu_disable_protect_mem_regions(iommu);
 
 		ret = iommu_enable_translation(iommu);
 		if (ret)
 			goto error;
+
+		iommu_disable_protect_mem_regions(iommu);
 	}
 
 	return 0;
@@ -3066,8 +3067,8 @@ static int init_iommu_hw(void)
 					   DMA_CCMD_GLOBAL_INVL);
 		iommu->flush.flush_iotlb(iommu, 0, 0, 0,
 					 DMA_TLB_GLOBAL_FLUSH);
-		iommu_disable_protect_mem_regions(iommu);
 		iommu_enable_translation(iommu);
+		iommu_disable_protect_mem_regions(iommu);
 	}
 
 	return 0;
-- 
cgit v0.10.2


From 894572a363539dc2e8ddde83056bd22fadb30748 Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Sat, 19 Sep 2009 16:07:34 -0700
Subject: mtd: sh_flctl: register sh_flctl using platform_driver_probe()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As with orion_nand in commit f33dabbe79fdf7a8568c65faa1db7794c87ac4d3
("register orion_nand using platform_driver_probe()"), avoid .init.text
problems by using platform_device_probe(). This isn't going to be
hotplugged anyway.

Reported-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/mtd/nand/sh_flctl.c b/drivers/mtd/nand/sh_flctl.c
index c5df285..02bef21 100644
--- a/drivers/mtd/nand/sh_flctl.c
+++ b/drivers/mtd/nand/sh_flctl.c
@@ -857,7 +857,6 @@ static int __exit flctl_remove(struct platform_device *pdev)
 }
 
 static struct platform_driver flctl_driver = {
-	.probe		= flctl_probe,
 	.remove		= flctl_remove,
 	.driver = {
 		.name	= "sh_flctl",
@@ -867,7 +866,7 @@ static struct platform_driver flctl_driver = {
 
 static int __init flctl_nand_init(void)
 {
-	return platform_driver_register(&flctl_driver);
+	return platform_driver_probe(&flctl_driver, flctl_probe);
 }
 
 static void __exit flctl_nand_cleanup(void)
-- 
cgit v0.10.2


From dd799983e947539bf3b5c0a502eba650d3dcc29a Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Sat, 19 Sep 2009 16:14:01 -0700
Subject: jffs2: Use SLAB_HWCACHE_ALIGN for jffs2_raw_{dirent,inode} slabs

We may end up doing DMA to/from these. Until the new MTD API fixes the
issues, this should stop things from falling over.

Original idea from Gilles Casse <list@gcasse.net>

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/fs/jffs2/malloc.c b/fs/jffs2/malloc.c
index 9eff2bd..c082868 100644
--- a/fs/jffs2/malloc.c
+++ b/fs/jffs2/malloc.c
@@ -39,13 +39,13 @@ int __init jffs2_create_slab_caches(void)
 
 	raw_dirent_slab = kmem_cache_create("jffs2_raw_dirent",
 					    sizeof(struct jffs2_raw_dirent),
-					    0, 0, NULL);
+					    0, SLAB_HWCACHE_ALIGN, NULL);
 	if (!raw_dirent_slab)
 		goto err;
 
 	raw_inode_slab = kmem_cache_create("jffs2_raw_inode",
 					   sizeof(struct jffs2_raw_inode),
-					   0, 0, NULL);
+					   0, SLAB_HWCACHE_ALIGN, NULL);
 	if (!raw_inode_slab)
 		goto err;
 
-- 
cgit v0.10.2


From 304e6d5fe294b80e6d3107f99ec241816390ebcc Mon Sep 17 00:00:00 2001
From: Kevin Cernekee <cernekee@gmail.com>
Date: Fri, 18 Sep 2009 19:36:42 -0700
Subject: m25p80: Add Spansion S25FL129P serial flashes

Tested 64KiB block size only.

Signed-off-by: Kevin Cernekee <cernekee@gmail.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c
index 9b78869..0035586 100644
--- a/drivers/mtd/devices/m25p80.c
+++ b/drivers/mtd/devices/m25p80.c
@@ -633,8 +633,10 @@ static struct flash_info __devinitdata m25p_data [] = {
 	{ "s25sl016a", 0x010214, 0, 64 * 1024, 32, },
 	{ "s25sl032a", 0x010215, 0, 64 * 1024, 64, },
 	{ "s25sl064a", 0x010216, 0, 64 * 1024, 128, },
-        { "s25sl12800", 0x012018, 0x0300, 256 * 1024, 64, },
+	{ "s25sl12800", 0x012018, 0x0300, 256 * 1024, 64, },
 	{ "s25sl12801", 0x012018, 0x0301, 64 * 1024, 256, },
+	{ "s25fl129p0", 0x012018, 0x4d00, 256 * 1024, 64, },
+	{ "s25fl129p1", 0x012018, 0x4d01, 64 * 1024, 256, },
 
 	/* SST -- large erase sizes are "overlays", "sectors" are 4K */
 	{ "sst25vf040b", 0xbf258d, 0, 64 * 1024, 8, SECT_4K, },
-- 
cgit v0.10.2


From 63234717d170d39ee9cc873f29930b0fb142a114 Mon Sep 17 00:00:00 2001
From: Alessandro Rubini <rubini@unipv.it>
Date: Wed, 29 Jul 2009 18:51:56 +0200
Subject: mtd: nand: driver for Nomadik 8815 SoC (on NHK8815 board)

Signed-off-by: Alessandro Rubini <rubini@unipv.it>
Acked-by: Andrea Gallo <andrea.gallo@stericsson.com>
Acked-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/arch/arm/configs/nhk8815_defconfig b/arch/arm/configs/nhk8815_defconfig
index 9bb45b9..600cb27 100644
--- a/arch/arm/configs/nhk8815_defconfig
+++ b/arch/arm/configs/nhk8815_defconfig
@@ -498,7 +498,7 @@ CONFIG_MTD_CFI_I2=y
 # CONFIG_MTD_DOC2001PLUS is not set
 CONFIG_MTD_NAND=y
 CONFIG_MTD_NAND_VERIFY_WRITE=y
-# CONFIG_MTD_NAND_ECC_SMC is not set
+CONFIG_MTD_NAND_ECC_SMC=y
 # CONFIG_MTD_NAND_MUSEUM_IDS is not set
 # CONFIG_MTD_NAND_GPIO is not set
 CONFIG_MTD_NAND_IDS=y
diff --git a/arch/arm/mach-nomadik/board-nhk8815.c b/arch/arm/mach-nomadik/board-nhk8815.c
index 79bdea9..59c1dbb 100644
--- a/arch/arm/mach-nomadik/board-nhk8815.c
+++ b/arch/arm/mach-nomadik/board-nhk8815.c
@@ -16,12 +16,103 @@
 #include <linux/amba/bus.h>
 #include <linux/interrupt.h>
 #include <linux/gpio.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/nand.h>
+#include <linux/mtd/partitions.h>
+#include <linux/io.h>
+#include <asm/sizes.h>
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
 #include <asm/mach/irq.h>
 #include <mach/setup.h>
+#include <mach/nand.h>
+#include <mach/fsmc.h>
 #include "clock.h"
 
+/* These adresses span 16MB, so use three individual pages */
+static struct resource nhk8815_nand_resources[] = {
+	{
+		.name = "nand_addr",
+		.start = NAND_IO_ADDR,
+		.end = NAND_IO_ADDR + 0xfff,
+		.flags = IORESOURCE_MEM,
+	}, {
+		.name = "nand_cmd",
+		.start = NAND_IO_CMD,
+		.end = NAND_IO_CMD + 0xfff,
+		.flags = IORESOURCE_MEM,
+	}, {
+		.name = "nand_data",
+		.start = NAND_IO_DATA,
+		.end = NAND_IO_DATA + 0xfff,
+		.flags = IORESOURCE_MEM,
+	}
+};
+
+static int nhk8815_nand_init(void)
+{
+	/* FSMC setup for nand chip select (8-bit nand in 8815NHK) */
+	writel(0x0000000E, FSMC_PCR(0));
+	writel(0x000D0A00, FSMC_PMEM(0));
+	writel(0x00100A00, FSMC_PATT(0));
+
+	/* enable access to the chip select area */
+	writel(readl(FSMC_PCR(0)) | 0x04, FSMC_PCR(0));
+
+	return 0;
+}
+
+/*
+ * These partitions are the same as those used in the 2.6.20 release
+ * shipped by the vendor; the first two partitions are mandated
+ * by the boot ROM, and the bootloader area is somehow oversized...
+ */
+static struct mtd_partition nhk8815_partitions[] = {
+	{
+		.name	= "X-Loader(NAND)",
+		.offset = 0,
+		.size	= SZ_256K,
+	}, {
+		.name	= "MemInit(NAND)",
+		.offset	= MTDPART_OFS_APPEND,
+		.size	= SZ_256K,
+	}, {
+		.name	= "BootLoader(NAND)",
+		.offset	= MTDPART_OFS_APPEND,
+		.size	= SZ_2M,
+	}, {
+		.name	= "Kernel zImage(NAND)",
+		.offset	= MTDPART_OFS_APPEND,
+		.size	= 3 * SZ_1M,
+	}, {
+		.name	= "Root Filesystem(NAND)",
+		.offset	= MTDPART_OFS_APPEND,
+		.size	= 22 * SZ_1M,
+	}, {
+		.name	= "User Filesystem(NAND)",
+		.offset	= MTDPART_OFS_APPEND,
+		.size	= MTDPART_SIZ_FULL,
+	}
+};
+
+static struct nomadik_nand_platform_data nhk8815_nand_data = {
+	.parts		= nhk8815_partitions,
+	.nparts		= ARRAY_SIZE(nhk8815_partitions),
+	.options	= NAND_COPYBACK | NAND_CACHEPRG | NAND_NO_PADDING \
+			| NAND_NO_READRDY | NAND_NO_AUTOINCR,
+	.init		= nhk8815_nand_init,
+};
+
+static struct platform_device nhk8815_nand_device = {
+	.name		= "nomadik_nand",
+	.dev		= {
+		.platform_data = &nhk8815_nand_data,
+	},
+	.resource	= nhk8815_nand_resources,
+	.num_resources	= ARRAY_SIZE(nhk8815_nand_resources),
+};
+
+
 #define __MEM_4K_RESOURCE(x) \
 	.res = {.start = (x), .end = (x) + SZ_4K - 1, .flags = IORESOURCE_MEM}
 
@@ -81,6 +172,7 @@ static int __init nhk8815_eth_init(void)
 device_initcall(nhk8815_eth_init);
 
 static struct platform_device *nhk8815_platform_devices[] __initdata = {
+	&nhk8815_nand_device,
 	&nhk8815_eth_device,
 	/* will add more devices */
 };
diff --git a/arch/arm/mach-nomadik/include/mach/fsmc.h b/arch/arm/mach-nomadik/include/mach/fsmc.h
new file mode 100644
index 0000000..8c2c051
--- /dev/null
+++ b/arch/arm/mach-nomadik/include/mach/fsmc.h
@@ -0,0 +1,29 @@
+
+/* Definitions for the Nomadik FSMC "Flexible Static Memory controller" */
+
+#ifndef __ASM_ARCH_FSMC_H
+#define __ASM_ARCH_FSMC_H
+
+#include <mach/hardware.h>
+/*
+ * Register list
+ */
+
+/* bus control reg. and bus timing reg. for CS0..CS3 */
+#define FSMC_BCR(x)     (NOMADIK_FSMC_VA + (x << 3))
+#define FSMC_BTR(x)     (NOMADIK_FSMC_VA + (x << 3) + 0x04)
+
+/* PC-card and NAND:
+ * PCR = control register
+ * PMEM = memory timing
+ * PATT = attribute timing
+ * PIO = I/O timing
+ * PECCR = ECC result
+ */
+#define FSMC_PCR(x)     (NOMADIK_FSMC_VA + ((2 + x) << 5) + 0x00)
+#define FSMC_PMEM(x)    (NOMADIK_FSMC_VA + ((2 + x) << 5) + 0x08)
+#define FSMC_PATT(x)    (NOMADIK_FSMC_VA + ((2 + x) << 5) + 0x0c)
+#define FSMC_PIO(x)     (NOMADIK_FSMC_VA + ((2 + x) << 5) + 0x10)
+#define FSMC_PECCR(x)   (NOMADIK_FSMC_VA + ((2 + x) << 5) + 0x14)
+
+#endif /* __ASM_ARCH_FSMC_H */
diff --git a/arch/arm/mach-nomadik/include/mach/nand.h b/arch/arm/mach-nomadik/include/mach/nand.h
new file mode 100644
index 0000000..c3c8254
--- /dev/null
+++ b/arch/arm/mach-nomadik/include/mach/nand.h
@@ -0,0 +1,16 @@
+#ifndef __ASM_ARCH_NAND_H
+#define __ASM_ARCH_NAND_H
+
+struct nomadik_nand_platform_data {
+	struct mtd_partition *parts;
+	int nparts;
+	int options;
+	int (*init) (void);
+	int (*exit) (void);
+};
+
+#define NAND_IO_DATA	0x40000000
+#define NAND_IO_CMD	0x40800000
+#define NAND_IO_ADDR	0x41000000
+
+#endif				/* __ASM_ARCH_NAND_H */
diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index 2c9a0ed..2fda0b6 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -443,6 +443,12 @@ config MTD_NAND_MXC
 	  This enables the driver for the NAND flash controller on the
 	  MXC processors.
 
+config MTD_NAND_NOMADIK
+	tristate "ST Nomadik 8815 NAND support"
+	depends on ARCH_NOMADIK
+	help
+	  Driver for the NAND flash controller on the Nomadik, with ECC.
+
 config MTD_NAND_SH_FLCTL
 	tristate "Support for NAND on Renesas SuperH FLCTL"
 	depends on MTD_NAND && SUPERH && CPU_SUBTYPE_SH7723
diff --git a/drivers/mtd/nand/Makefile b/drivers/mtd/nand/Makefile
index 4f7b189..6950d3d 100644
--- a/drivers/mtd/nand/Makefile
+++ b/drivers/mtd/nand/Makefile
@@ -41,5 +41,6 @@ obj-$(CONFIG_MTD_NAND_MXC)		+= mxc_nand.o
 obj-$(CONFIG_MTD_NAND_SOCRATES)		+= socrates_nand.o
 obj-$(CONFIG_MTD_NAND_TXX9NDFMC)	+= txx9ndfmc.o
 obj-$(CONFIG_MTD_NAND_W90P910)		+= w90p910_nand.o
+obj-$(CONFIG_MTD_NAND_NOMADIK)		+= nomadik_nand.o
 
 nand-objs := nand_base.o nand_bbt.o
diff --git a/drivers/mtd/nand/nomadik_nand.c b/drivers/mtd/nand/nomadik_nand.c
new file mode 100644
index 0000000..7c302d5
--- /dev/null
+++ b/drivers/mtd/nand/nomadik_nand.c
@@ -0,0 +1,250 @@
+/*
+ *  drivers/mtd/nand/nomadik_nand.c
+ *
+ *  Overview:
+ *  	Driver for on-board NAND flash on Nomadik Platforms
+ *
+ * Copyright © 2007 STMicroelectronics Pvt. Ltd.
+ * Author: Sachin Verma <sachin.verma@st.com>
+ *
+ * Copyright © 2009 Alessandro Rubini
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/nand.h>
+#include <linux/mtd/nand_ecc.h>
+#include <linux/platform_device.h>
+#include <linux/mtd/partitions.h>
+#include <linux/io.h>
+#include <mach/nand.h>
+#include <mach/fsmc.h>
+
+#include <mtd/mtd-abi.h>
+
+struct nomadik_nand_host {
+	struct mtd_info		mtd;
+	struct nand_chip	nand;
+	void __iomem *data_va;
+	void __iomem *cmd_va;
+	void __iomem *addr_va;
+	struct nand_bbt_descr *bbt_desc;
+};
+
+static struct nand_ecclayout nomadik_ecc_layout = {
+	.eccbytes = 3 * 4,
+	.eccpos = { /* each subpage has 16 bytes: pos 2,3,4 hosts ECC */
+		0x02, 0x03, 0x04,
+		0x12, 0x13, 0x14,
+		0x22, 0x23, 0x24,
+		0x32, 0x33, 0x34},
+	/* let's keep bytes 5,6,7 for us, just in case we change ECC algo */
+	.oobfree = { {0x08, 0x08}, {0x18, 0x08}, {0x28, 0x08}, {0x38, 0x08} },
+};
+
+static void nomadik_ecc_control(struct mtd_info *mtd, int mode)
+{
+	/* No need to enable hw ecc, it's on by default */
+}
+
+static void nomadik_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl)
+{
+	struct nand_chip *nand = mtd->priv;
+	struct nomadik_nand_host *host = nand->priv;
+
+	if (cmd == NAND_CMD_NONE)
+		return;
+
+	if (ctrl & NAND_CLE)
+		writeb(cmd, host->cmd_va);
+	else
+		writeb(cmd, host->addr_va);
+}
+
+static int nomadik_nand_probe(struct platform_device *pdev)
+{
+	struct nomadik_nand_platform_data *pdata = pdev->dev.platform_data;
+	struct nomadik_nand_host *host;
+	struct mtd_info *mtd;
+	struct nand_chip *nand;
+	struct resource *res;
+	int ret = 0;
+
+	/* Allocate memory for the device structure (and zero it) */
+	host = kzalloc(sizeof(struct nomadik_nand_host), GFP_KERNEL);
+	if (!host) {
+		dev_err(&pdev->dev, "Failed to allocate device structure.\n");
+		return -ENOMEM;
+	}
+
+	/* Call the client's init function, if any */
+	if (pdata->init)
+		ret = pdata->init();
+	if (ret < 0) {
+		dev_err(&pdev->dev, "Init function failed\n");
+		goto err;
+	}
+
+	/* ioremap three regions */
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "nand_addr");
+	if (!res) {
+		ret = -EIO;
+		goto err_unmap;
+	}
+	host->addr_va = ioremap(res->start, res->end - res->start + 1);
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "nand_data");
+	if (!res) {
+		ret = -EIO;
+		goto err_unmap;
+	}
+	host->data_va = ioremap(res->start, res->end - res->start + 1);
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "nand_cmd");
+	if (!res) {
+		ret = -EIO;
+		goto err_unmap;
+	}
+	host->cmd_va = ioremap(res->start, res->end - res->start + 1);
+
+	if (!host->addr_va || !host->data_va || !host->cmd_va) {
+		ret = -ENOMEM;
+		goto err_unmap;
+	}
+
+	/* Link all private pointers */
+	mtd = &host->mtd;
+	nand = &host->nand;
+	mtd->priv = nand;
+	nand->priv = host;
+
+	host->mtd.owner = THIS_MODULE;
+	nand->IO_ADDR_R = host->data_va;
+	nand->IO_ADDR_W = host->data_va;
+	nand->cmd_ctrl = nomadik_cmd_ctrl;
+
+	/*
+	 * This stanza declares ECC_HW but uses soft routines. It's because
+	 * HW claims to make the calculation but not the correction. However,
+	 * I haven't managed to get the desired data out of it until now.
+	 */
+	nand->ecc.mode = NAND_ECC_SOFT;
+	nand->ecc.layout = &nomadik_ecc_layout;
+	nand->ecc.hwctl = nomadik_ecc_control;
+	nand->ecc.size = 512;
+	nand->ecc.bytes = 3;
+
+	nand->options = pdata->options;
+
+	/*
+	 * Scan to find existance of the device
+	 */
+	if (nand_scan(&host->mtd, 1)) {
+		ret = -ENXIO;
+		goto err_unmap;
+	}
+
+#ifdef CONFIG_MTD_PARTITIONS
+	add_mtd_partitions(&host->mtd, pdata->parts, pdata->nparts);
+#else
+	pr_info("Registering %s as whole device\n", mtd->name);
+	add_mtd_device(mtd);
+#endif
+
+	platform_set_drvdata(pdev, host);
+	return 0;
+
+ err_unmap:
+	if (host->cmd_va)
+		iounmap(host->cmd_va);
+	if (host->data_va)
+		iounmap(host->data_va);
+	if (host->addr_va)
+		iounmap(host->addr_va);
+ err:
+	kfree(host);
+	return ret;
+}
+
+/*
+ * Clean up routine
+ */
+static int nomadik_nand_remove(struct platform_device *pdev)
+{
+	struct nomadik_nand_host *host = platform_get_drvdata(pdev);
+	struct nomadik_nand_platform_data *pdata = pdev->dev.platform_data;
+
+	if (pdata->exit)
+		pdata->exit();
+
+	if (host) {
+		iounmap(host->cmd_va);
+		iounmap(host->data_va);
+		iounmap(host->addr_va);
+		kfree(host);
+	}
+	return 0;
+}
+
+static int nomadik_nand_suspend(struct device *dev)
+{
+	struct nomadik_nand_host *host = dev_get_drvdata(dev);
+	int ret = 0;
+	if (host)
+		ret = host->mtd.suspend(&host->mtd);
+	return ret;
+}
+
+static int nomadik_nand_resume(struct device *dev)
+{
+	struct nomadik_nand_host *host = dev_get_drvdata(dev);
+	if (host)
+		host->mtd.resume(&host->mtd);
+	return 0;
+}
+
+static struct dev_pm_ops nomadik_nand_pm_ops = {
+	.suspend = nomadik_nand_suspend,
+	.resume = nomadik_nand_resume,
+};
+
+static struct platform_driver nomadik_nand_driver = {
+	.probe = nomadik_nand_probe,
+	.remove = nomadik_nand_remove,
+	.driver = {
+		.owner = THIS_MODULE,
+		.name = "nomadik_nand",
+		.pm = &nomadik_nand_pm_ops,
+	},
+};
+
+static int __init nand_nomadik_init(void)
+{
+	pr_info("Nomadik NAND driver\n");
+	return platform_driver_register(&nomadik_nand_driver);
+}
+
+static void __exit nand_nomadik_exit(void)
+{
+	platform_driver_unregister(&nomadik_nand_driver);
+}
+
+module_init(nand_nomadik_init);
+module_exit(nand_nomadik_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("ST Microelectronics (sachin.verma@st.com)");
+MODULE_DESCRIPTION("NAND driver for Nomadik Platform");
-- 
cgit v0.10.2


From 8b85e7cbcfedfcdc2fa1bcc8945f23fc6ad3d07f Mon Sep 17 00:00:00 2001
From: Alessandro Rubini <rubini@unipv.it>
Date: Fri, 24 Jul 2009 13:30:04 +0200
Subject: mtd: OneNand support for Nomadik 8815 SoC (on NHK8815 board)

Signed-off-by: Alessandro Rubini <rubini@unipv.it>
Acked-by: Andrea Gallo <andrea.gallo@stericsson.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/arch/arm/mach-nomadik/board-nhk8815.c b/arch/arm/mach-nomadik/board-nhk8815.c
index 59c1dbb..6bfd537 100644
--- a/arch/arm/mach-nomadik/board-nhk8815.c
+++ b/arch/arm/mach-nomadik/board-nhk8815.c
@@ -24,6 +24,7 @@
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
 #include <asm/mach/irq.h>
+#include <asm/mach/flash.h>
 #include <mach/setup.h>
 #include <mach/nand.h>
 #include <mach/fsmc.h>
@@ -112,6 +113,66 @@ static struct platform_device nhk8815_nand_device = {
 	.num_resources	= ARRAY_SIZE(nhk8815_nand_resources),
 };
 
+/* These are the partitions for the OneNand device, different from above */
+static struct mtd_partition nhk8815_onenand_partitions[] = {
+	{
+		.name	= "X-Loader(OneNAND)",
+		.offset = 0,
+		.size	= SZ_256K,
+	}, {
+		.name	= "MemInit(OneNAND)",
+		.offset	= MTDPART_OFS_APPEND,
+		.size	= SZ_256K,
+	}, {
+		.name	= "BootLoader(OneNAND)",
+		.offset	= MTDPART_OFS_APPEND,
+		.size	= SZ_2M-SZ_256K,
+	}, {
+		.name	= "SysImage(OneNAND)",
+		.offset	= MTDPART_OFS_APPEND,
+		.size	= 4 * SZ_1M,
+	}, {
+		.name	= "Root Filesystem(OneNAND)",
+		.offset	= MTDPART_OFS_APPEND,
+		.size	= 22 * SZ_1M,
+	}, {
+		.name	= "User Filesystem(OneNAND)",
+		.offset	= MTDPART_OFS_APPEND,
+		.size	= MTDPART_SIZ_FULL,
+	}
+};
+
+static struct flash_platform_data nhk8815_onenand_data = {
+	.parts		= nhk8815_onenand_partitions,
+	.nr_parts	= ARRAY_SIZE(nhk8815_onenand_partitions),
+};
+
+static struct resource nhk8815_onenand_resource[] = {
+	{
+		.start		= 0x30000000,
+		.end		= 0x30000000 + SZ_128K - 1,
+		.flags		= IORESOURCE_MEM,
+	},
+};
+
+static struct platform_device nhk8815_onenand_device = {
+	.name		= "onenand",
+	.id		= -1,
+	.dev		= {
+		.platform_data	= &nhk8815_onenand_data,
+	},
+	.resource	= nhk8815_onenand_resource,
+	.num_resources	= ARRAY_SIZE(nhk8815_onenand_resource),
+};
+
+static void __init nhk8815_onenand_init(void)
+{
+#ifdef CONFIG_ONENAND
+       /* Set up SMCS0 for OneNand */
+       writel(0x000030db, FSMC_BCR0);
+       writel(0x02100551, FSMC_BTR0);
+#endif
+}
 
 #define __MEM_4K_RESOURCE(x) \
 	.res = {.start = (x), .end = (x) + SZ_4K - 1, .flags = IORESOURCE_MEM}
@@ -173,6 +234,7 @@ device_initcall(nhk8815_eth_init);
 
 static struct platform_device *nhk8815_platform_devices[] __initdata = {
 	&nhk8815_nand_device,
+	&nhk8815_onenand_device,
 	&nhk8815_eth_device,
 	/* will add more devices */
 };
@@ -182,6 +244,7 @@ static void __init nhk8815_platform_init(void)
 	int i;
 
 	cpu8815_platform_init();
+	nhk8815_onenand_init();
 	platform_add_devices(nhk8815_platform_devices,
 			     ARRAY_SIZE(nhk8815_platform_devices));
 
-- 
cgit v0.10.2


From 65abc8653c282ded3dbdb9ec1227784140ba28cd Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 21 Sep 2009 10:18:27 +0200
Subject: perf_counter: Rename list_entry -> group_entry, counter_list ->
 group_list

This is in preparation of the big rename, but also makes sense
in a standalone way: 'list_entry' is a bad name as we already
have a list_entry() in list.h.

Also, the 'counter list' is too vague, it doesnt tell us the
purpose of that list.

Clarify these names to show that it's all about the group
hiearchy.

Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 740caad..f648627 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -543,7 +543,7 @@ struct perf_pending_entry {
  */
 struct perf_counter {
 #ifdef CONFIG_PERF_COUNTERS
-	struct list_head		list_entry;
+	struct list_head		group_entry;
 	struct list_head		event_entry;
 	struct list_head		sibling_list;
 	int				nr_siblings;
@@ -649,7 +649,7 @@ struct perf_counter_context {
 	 */
 	struct mutex			mutex;
 
-	struct list_head		counter_list;
+	struct list_head		group_list;
 	struct list_head		event_list;
 	int				nr_counters;
 	int				nr_active;
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index cc768ab..13ad73a 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -258,9 +258,9 @@ list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
 	 * leader's sibling list:
 	 */
 	if (group_leader == counter)
-		list_add_tail(&counter->list_entry, &ctx->counter_list);
+		list_add_tail(&counter->group_entry, &ctx->group_list);
 	else {
-		list_add_tail(&counter->list_entry, &group_leader->sibling_list);
+		list_add_tail(&counter->group_entry, &group_leader->sibling_list);
 		group_leader->nr_siblings++;
 	}
 
@@ -279,13 +279,13 @@ list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
 {
 	struct perf_counter *sibling, *tmp;
 
-	if (list_empty(&counter->list_entry))
+	if (list_empty(&counter->group_entry))
 		return;
 	ctx->nr_counters--;
 	if (counter->attr.inherit_stat)
 		ctx->nr_stat--;
 
-	list_del_init(&counter->list_entry);
+	list_del_init(&counter->group_entry);
 	list_del_rcu(&counter->event_entry);
 
 	if (counter->group_leader != counter)
@@ -296,10 +296,9 @@ list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
 	 * upgrade the siblings to singleton counters by adding them
 	 * to the context list directly:
 	 */
-	list_for_each_entry_safe(sibling, tmp,
-				 &counter->sibling_list, list_entry) {
+	list_for_each_entry_safe(sibling, tmp, &counter->sibling_list, group_entry) {
 
-		list_move_tail(&sibling->list_entry, &ctx->counter_list);
+		list_move_tail(&sibling->group_entry, &ctx->group_list);
 		sibling->group_leader = sibling;
 	}
 }
@@ -343,7 +342,7 @@ group_sched_out(struct perf_counter *group_counter,
 	/*
 	 * Schedule out siblings (if any):
 	 */
-	list_for_each_entry(counter, &group_counter->sibling_list, list_entry)
+	list_for_each_entry(counter, &group_counter->sibling_list, group_entry)
 		counter_sched_out(counter, cpuctx, ctx);
 
 	if (group_counter->attr.exclusive)
@@ -435,7 +434,7 @@ retry:
 	/*
 	 * If the context is active we need to retry the smp call.
 	 */
-	if (ctx->nr_active && !list_empty(&counter->list_entry)) {
+	if (ctx->nr_active && !list_empty(&counter->group_entry)) {
 		spin_unlock_irq(&ctx->lock);
 		goto retry;
 	}
@@ -445,7 +444,7 @@ retry:
 	 * can remove the counter safely, if the call above did not
 	 * succeed.
 	 */
-	if (!list_empty(&counter->list_entry)) {
+	if (!list_empty(&counter->group_entry)) {
 		list_del_counter(counter, ctx);
 	}
 	spin_unlock_irq(&ctx->lock);
@@ -497,7 +496,7 @@ static void update_group_times(struct perf_counter *leader)
 	struct perf_counter *counter;
 
 	update_counter_times(leader);
-	list_for_each_entry(counter, &leader->sibling_list, list_entry)
+	list_for_each_entry(counter, &leader->sibling_list, group_entry)
 		update_counter_times(counter);
 }
 
@@ -643,7 +642,7 @@ group_sched_in(struct perf_counter *group_counter,
 	/*
 	 * Schedule in siblings as one group (if any):
 	 */
-	list_for_each_entry(counter, &group_counter->sibling_list, list_entry) {
+	list_for_each_entry(counter, &group_counter->sibling_list, group_entry) {
 		if (counter_sched_in(counter, cpuctx, ctx, cpu)) {
 			partial_group = counter;
 			goto group_error;
@@ -657,7 +656,7 @@ group_error:
 	 * Groups can be scheduled in as one unit only, so undo any
 	 * partial group before returning:
 	 */
-	list_for_each_entry(counter, &group_counter->sibling_list, list_entry) {
+	list_for_each_entry(counter, &group_counter->sibling_list, group_entry) {
 		if (counter == partial_group)
 			break;
 		counter_sched_out(counter, cpuctx, ctx);
@@ -678,7 +677,7 @@ static int is_software_only_group(struct perf_counter *leader)
 	if (!is_software_counter(leader))
 		return 0;
 
-	list_for_each_entry(counter, &leader->sibling_list, list_entry)
+	list_for_each_entry(counter, &leader->sibling_list, group_entry)
 		if (!is_software_counter(counter))
 			return 0;
 
@@ -842,7 +841,7 @@ retry:
 	/*
 	 * we need to retry the smp call.
 	 */
-	if (ctx->is_active && list_empty(&counter->list_entry)) {
+	if (ctx->is_active && list_empty(&counter->group_entry)) {
 		spin_unlock_irq(&ctx->lock);
 		goto retry;
 	}
@@ -852,7 +851,7 @@ retry:
 	 * can add the counter safely, if it the call above did not
 	 * succeed.
 	 */
-	if (list_empty(&counter->list_entry))
+	if (list_empty(&counter->group_entry))
 		add_counter_to_ctx(counter, ctx);
 	spin_unlock_irq(&ctx->lock);
 }
@@ -872,7 +871,7 @@ static void __perf_counter_mark_enabled(struct perf_counter *counter,
 
 	counter->state = PERF_COUNTER_STATE_INACTIVE;
 	counter->tstamp_enabled = ctx->time - counter->total_time_enabled;
-	list_for_each_entry(sub, &counter->sibling_list, list_entry)
+	list_for_each_entry(sub, &counter->sibling_list, group_entry)
 		if (sub->state >= PERF_COUNTER_STATE_INACTIVE)
 			sub->tstamp_enabled =
 				ctx->time - sub->total_time_enabled;
@@ -1032,7 +1031,7 @@ void __perf_counter_sched_out(struct perf_counter_context *ctx,
 
 	perf_disable();
 	if (ctx->nr_active) {
-		list_for_each_entry(counter, &ctx->counter_list, list_entry) {
+		list_for_each_entry(counter, &ctx->group_list, group_entry) {
 			if (counter != counter->group_leader)
 				counter_sched_out(counter, cpuctx, ctx);
 			else
@@ -1252,7 +1251,7 @@ __perf_counter_sched_in(struct perf_counter_context *ctx,
 	 * First go through the list and put on any pinned groups
 	 * in order to give them the best chance of going on.
 	 */
-	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
+	list_for_each_entry(counter, &ctx->group_list, group_entry) {
 		if (counter->state <= PERF_COUNTER_STATE_OFF ||
 		    !counter->attr.pinned)
 			continue;
@@ -1276,7 +1275,7 @@ __perf_counter_sched_in(struct perf_counter_context *ctx,
 		}
 	}
 
-	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
+	list_for_each_entry(counter, &ctx->group_list, group_entry) {
 		/*
 		 * Ignore counters in OFF or ERROR state, and
 		 * ignore pinned counters since we did them already.
@@ -1369,7 +1368,7 @@ static void perf_ctx_adjust_freq(struct perf_counter_context *ctx)
 	u64 interrupts, freq;
 
 	spin_lock(&ctx->lock);
-	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
+	list_for_each_entry(counter, &ctx->group_list, group_entry) {
 		if (counter->state != PERF_COUNTER_STATE_ACTIVE)
 			continue;
 
@@ -1441,8 +1440,8 @@ static void rotate_ctx(struct perf_counter_context *ctx)
 	 * Rotate the first entry last (works just fine for group counters too):
 	 */
 	perf_disable();
-	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
-		list_move_tail(&counter->list_entry, &ctx->counter_list);
+	list_for_each_entry(counter, &ctx->group_list, group_entry) {
+		list_move_tail(&counter->group_entry, &ctx->group_list);
 		break;
 	}
 	perf_enable();
@@ -1498,7 +1497,7 @@ static void perf_counter_enable_on_exec(struct task_struct *task)
 
 	spin_lock(&ctx->lock);
 
-	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
+	list_for_each_entry(counter, &ctx->group_list, group_entry) {
 		if (!counter->attr.enable_on_exec)
 			continue;
 		counter->attr.enable_on_exec = 0;
@@ -1575,7 +1574,7 @@ __perf_counter_init_context(struct perf_counter_context *ctx,
 	memset(ctx, 0, sizeof(*ctx));
 	spin_lock_init(&ctx->lock);
 	mutex_init(&ctx->mutex);
-	INIT_LIST_HEAD(&ctx->counter_list);
+	INIT_LIST_HEAD(&ctx->group_list);
 	INIT_LIST_HEAD(&ctx->event_list);
 	atomic_set(&ctx->refcount, 1);
 	ctx->task = task;
@@ -1818,7 +1817,7 @@ static int perf_counter_read_group(struct perf_counter *counter,
 
 	size += err;
 
-	list_for_each_entry(sub, &leader->sibling_list, list_entry) {
+	list_for_each_entry(sub, &leader->sibling_list, group_entry) {
 		err = perf_counter_read_entry(sub, read_format,
 				buf + size);
 		if (err < 0)
@@ -1948,7 +1947,7 @@ static void perf_counter_for_each(struct perf_counter *counter,
 
 	perf_counter_for_each_child(counter, func);
 	func(counter);
-	list_for_each_entry(sibling, &counter->sibling_list, list_entry)
+	list_for_each_entry(sibling, &counter->sibling_list, group_entry)
 		perf_counter_for_each_child(counter, func);
 	mutex_unlock(&ctx->mutex);
 }
@@ -2832,7 +2831,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,
 
 	perf_output_copy(handle, values, n * sizeof(u64));
 
-	list_for_each_entry(sub, &leader->sibling_list, list_entry) {
+	list_for_each_entry(sub, &leader->sibling_list, group_entry) {
 		n = 0;
 
 		if (sub != counter)
@@ -4118,7 +4117,7 @@ perf_counter_alloc(struct perf_counter_attr *attr,
 	mutex_init(&counter->child_mutex);
 	INIT_LIST_HEAD(&counter->child_list);
 
-	INIT_LIST_HEAD(&counter->list_entry);
+	INIT_LIST_HEAD(&counter->group_entry);
 	INIT_LIST_HEAD(&counter->event_entry);
 	INIT_LIST_HEAD(&counter->sibling_list);
 	init_waitqueue_head(&counter->waitq);
@@ -4544,7 +4543,7 @@ static int inherit_group(struct perf_counter *parent_counter,
 				 child, NULL, child_ctx);
 	if (IS_ERR(leader))
 		return PTR_ERR(leader);
-	list_for_each_entry(sub, &parent_counter->sibling_list, list_entry) {
+	list_for_each_entry(sub, &parent_counter->sibling_list, group_entry) {
 		child_ctr = inherit_counter(sub, parent, parent_ctx,
 					    child, leader, child_ctx);
 		if (IS_ERR(child_ctr))
@@ -4670,8 +4669,8 @@ void perf_counter_exit_task(struct task_struct *child)
 	mutex_lock_nested(&child_ctx->mutex, SINGLE_DEPTH_NESTING);
 
 again:
-	list_for_each_entry_safe(child_counter, tmp, &child_ctx->counter_list,
-				 list_entry)
+	list_for_each_entry_safe(child_counter, tmp, &child_ctx->group_list,
+				 group_entry)
 		__perf_counter_exit_task(child_counter, child_ctx, child);
 
 	/*
@@ -4679,7 +4678,7 @@ again:
 	 * its siblings to the list, but we obtained 'tmp' before that which
 	 * will still point to the list head terminating the iteration.
 	 */
-	if (!list_empty(&child_ctx->counter_list))
+	if (!list_empty(&child_ctx->group_list))
 		goto again;
 
 	mutex_unlock(&child_ctx->mutex);
@@ -4701,7 +4700,7 @@ void perf_counter_free_task(struct task_struct *task)
 
 	mutex_lock(&ctx->mutex);
 again:
-	list_for_each_entry_safe(counter, tmp, &ctx->counter_list, list_entry) {
+	list_for_each_entry_safe(counter, tmp, &ctx->group_list, group_entry) {
 		struct perf_counter *parent = counter->parent;
 
 		if (WARN_ON_ONCE(!parent))
@@ -4717,7 +4716,7 @@ again:
 		free_counter(counter);
 	}
 
-	if (!list_empty(&ctx->counter_list))
+	if (!list_empty(&ctx->group_list))
 		goto again;
 
 	mutex_unlock(&ctx->mutex);
@@ -4847,7 +4846,7 @@ static void __perf_counter_exit_cpu(void *info)
 	struct perf_counter_context *ctx = &cpuctx->ctx;
 	struct perf_counter *counter, *tmp;
 
-	list_for_each_entry_safe(counter, tmp, &ctx->counter_list, list_entry)
+	list_for_each_entry_safe(counter, tmp, &ctx->group_list, group_entry)
 		__perf_counter_remove_from_context(counter);
 }
 static void perf_counter_exit_cpu(int cpu)
-- 
cgit v0.10.2


From dfc65094d0313cc48969fa60bcf33d693aeb05a7 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 21 Sep 2009 11:31:35 +0200
Subject: perf_counter: Rename 'event' to event_id/hw_event

In preparation to the renames, to avoid a namespace clash.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index a6c8b27..b1f1156 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -124,9 +124,9 @@ static const u64 p6_perfmon_event_map[] =
   [PERF_COUNT_HW_BUS_CYCLES]		= 0x0062,
 };
 
-static u64 p6_pmu_event_map(int event)
+static u64 p6_pmu_event_map(int hw_event)
 {
-	return p6_perfmon_event_map[event];
+	return p6_perfmon_event_map[hw_event];
 }
 
 /*
@@ -137,7 +137,7 @@ static u64 p6_pmu_event_map(int event)
  */
 #define P6_NOP_COUNTER			0x0000002EULL
 
-static u64 p6_pmu_raw_event(u64 event)
+static u64 p6_pmu_raw_event(u64 hw_event)
 {
 #define P6_EVNTSEL_EVENT_MASK		0x000000FFULL
 #define P6_EVNTSEL_UNIT_MASK		0x0000FF00ULL
@@ -152,7 +152,7 @@ static u64 p6_pmu_raw_event(u64 event)
 	 P6_EVNTSEL_INV_MASK   |	\
 	 P6_EVNTSEL_COUNTER_MASK)
 
-	return event & P6_EVNTSEL_MASK;
+	return hw_event & P6_EVNTSEL_MASK;
 }
 
 
@@ -170,16 +170,16 @@ static const u64 intel_perfmon_event_map[] =
   [PERF_COUNT_HW_BUS_CYCLES]		= 0x013c,
 };
 
-static u64 intel_pmu_event_map(int event)
+static u64 intel_pmu_event_map(int hw_event)
 {
-	return intel_perfmon_event_map[event];
+	return intel_perfmon_event_map[hw_event];
 }
 
 /*
- * Generalized hw caching related event table, filled
+ * Generalized hw caching related hw_event table, filled
  * in on a per model basis. A value of 0 means
- * 'not supported', -1 means 'event makes no sense on
- * this CPU', any other value means the raw event
+ * 'not supported', -1 means 'hw_event makes no sense on
+ * this CPU', any other value means the raw hw_event
  * ID.
  */
 
@@ -463,7 +463,7 @@ static const u64 atom_hw_cache_event_ids
  },
 };
 
-static u64 intel_pmu_raw_event(u64 event)
+static u64 intel_pmu_raw_event(u64 hw_event)
 {
 #define CORE_EVNTSEL_EVENT_MASK		0x000000FFULL
 #define CORE_EVNTSEL_UNIT_MASK		0x0000FF00ULL
@@ -478,7 +478,7 @@ static u64 intel_pmu_raw_event(u64 event)
 	 CORE_EVNTSEL_INV_MASK  |	\
 	 CORE_EVNTSEL_COUNTER_MASK)
 
-	return event & CORE_EVNTSEL_MASK;
+	return hw_event & CORE_EVNTSEL_MASK;
 }
 
 static const u64 amd_hw_cache_event_ids
@@ -585,12 +585,12 @@ static const u64 amd_perfmon_event_map[] =
   [PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c5,
 };
 
-static u64 amd_pmu_event_map(int event)
+static u64 amd_pmu_event_map(int hw_event)
 {
-	return amd_perfmon_event_map[event];
+	return amd_perfmon_event_map[hw_event];
 }
 
-static u64 amd_pmu_raw_event(u64 event)
+static u64 amd_pmu_raw_event(u64 hw_event)
 {
 #define K7_EVNTSEL_EVENT_MASK	0x7000000FFULL
 #define K7_EVNTSEL_UNIT_MASK	0x00000FF00ULL
@@ -605,7 +605,7 @@ static u64 amd_pmu_raw_event(u64 event)
 	 K7_EVNTSEL_INV_MASK   |	\
 	 K7_EVNTSEL_COUNTER_MASK)
 
-	return event & K7_EVNTSEL_MASK;
+	return hw_event & K7_EVNTSEL_MASK;
 }
 
 /*
@@ -956,7 +956,7 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 	}
 
 	/*
-	 * Raw event type provide the config in the event structure
+	 * Raw hw_event type provide the config in the hw_event structure
 	 */
 	if (attr->type == PERF_TYPE_RAW) {
 		hwc->config |= x86_pmu.raw_event(attr->config);
@@ -1245,7 +1245,7 @@ x86_perf_counter_set_period(struct perf_counter *counter,
 		ret = 1;
 	}
 	/*
-	 * Quirk: certain CPUs dont like it if just 1 event is left:
+	 * Quirk: certain CPUs dont like it if just 1 hw_event is left:
 	 */
 	if (unlikely(left < 2))
 		left = 2;
@@ -1337,11 +1337,11 @@ static void amd_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
 static int
 fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)
 {
-	unsigned int event;
+	unsigned int hw_event;
 
-	event = hwc->config & ARCH_PERFMON_EVENT_MASK;
+	hw_event = hwc->config & ARCH_PERFMON_EVENT_MASK;
 
-	if (unlikely((event ==
+	if (unlikely((hw_event ==
 		      x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) &&
 		     (hwc->sample_period == 1)))
 		return X86_PMC_IDX_FIXED_BTS;
@@ -1349,11 +1349,11 @@ fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)
 	if (!x86_pmu.num_counters_fixed)
 		return -1;
 
-	if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS)))
+	if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS)))
 		return X86_PMC_IDX_FIXED_INSTRUCTIONS;
-	if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES)))
+	if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES)))
 		return X86_PMC_IDX_FIXED_CPU_CYCLES;
-	if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES)))
+	if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES)))
 		return X86_PMC_IDX_FIXED_BUS_CYCLES;
 
 	return -1;
@@ -1970,7 +1970,7 @@ static int intel_pmu_init(void)
 
 	/*
 	 * Check whether the Architectural PerfMon supports
-	 * Branch Misses Retired Event or not.
+	 * Branch Misses Retired hw_event or not.
 	 */
 	cpuid(10, &eax.full, &ebx, &unused, &edx.full);
 	if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 13ad73a..62de0db 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -3044,22 +3044,22 @@ perf_counter_read_event(struct perf_counter *counter,
 			struct task_struct *task)
 {
 	struct perf_output_handle handle;
-	struct perf_read_event event = {
+	struct perf_read_event read_event = {
 		.header = {
 			.type = PERF_EVENT_READ,
 			.misc = 0,
-			.size = sizeof(event) + perf_counter_read_size(counter),
+			.size = sizeof(read_event) + perf_counter_read_size(counter),
 		},
 		.pid = perf_counter_pid(counter, task),
 		.tid = perf_counter_tid(counter, task),
 	};
 	int ret;
 
-	ret = perf_output_begin(&handle, counter, event.header.size, 0, 0);
+	ret = perf_output_begin(&handle, counter, read_event.header.size, 0, 0);
 	if (ret)
 		return;
 
-	perf_output_put(&handle, event);
+	perf_output_put(&handle, read_event);
 	perf_output_read(&handle, counter);
 
 	perf_output_end(&handle);
@@ -3698,14 +3698,14 @@ static int perf_swcounter_is_counting(struct perf_counter *counter)
 
 static int perf_swcounter_match(struct perf_counter *counter,
 				enum perf_type_id type,
-				u32 event, struct pt_regs *regs)
+				u32 event_id, struct pt_regs *regs)
 {
 	if (!perf_swcounter_is_counting(counter))
 		return 0;
 
 	if (counter->attr.type != type)
 		return 0;
-	if (counter->attr.config != event)
+	if (counter->attr.config != event_id)
 		return 0;
 
 	if (regs) {
@@ -3721,7 +3721,7 @@ static int perf_swcounter_match(struct perf_counter *counter,
 
 static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
 				     enum perf_type_id type,
-				     u32 event, u64 nr, int nmi,
+				     u32 event_id, u64 nr, int nmi,
 				     struct perf_sample_data *data,
 				     struct pt_regs *regs)
 {
@@ -3732,7 +3732,7 @@ static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) {
-		if (perf_swcounter_match(counter, type, event, regs))
+		if (perf_swcounter_match(counter, type, event_id, regs))
 			perf_swcounter_add(counter, nr, nmi, data, regs);
 	}
 	rcu_read_unlock();
@@ -4036,17 +4036,17 @@ atomic_t perf_swcounter_enabled[PERF_COUNT_SW_MAX];
 
 static void sw_perf_counter_destroy(struct perf_counter *counter)
 {
-	u64 event = counter->attr.config;
+	u64 event_id = counter->attr.config;
 
 	WARN_ON(counter->parent);
 
-	atomic_dec(&perf_swcounter_enabled[event]);
+	atomic_dec(&perf_swcounter_enabled[event_id]);
 }
 
 static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
 {
 	const struct pmu *pmu = NULL;
-	u64 event = counter->attr.config;
+	u64 event_id = counter->attr.config;
 
 	/*
 	 * Software counters (currently) can't in general distinguish
@@ -4055,7 +4055,7 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
 	 * to be kernel events, and page faults are never hypervisor
 	 * events.
 	 */
-	switch (event) {
+	switch (event_id) {
 	case PERF_COUNT_SW_CPU_CLOCK:
 		pmu = &perf_ops_cpu_clock;
 
@@ -4077,7 +4077,7 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
 	case PERF_COUNT_SW_CONTEXT_SWITCHES:
 	case PERF_COUNT_SW_CPU_MIGRATIONS:
 		if (!counter->parent) {
-			atomic_inc(&perf_swcounter_enabled[event]);
+			atomic_inc(&perf_swcounter_enabled[event_id]);
 			counter->destroy = sw_perf_counter_destroy;
 		}
 		pmu = &perf_ops_generic;
-- 
cgit v0.10.2


From cdd6c482c9ff9c55475ee7392ec8f672eddb7be6 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 21 Sep 2009 12:02:48 +0200
Subject: perf: Do the big rename: Performance Counters -> Performance Events

Bye-bye Performance Counters, welcome Performance Events!

In the past few months the perfcounters subsystem has grown out its
initial role of counting hardware events, and has become (and is
becoming) a much broader generic event enumeration, reporting, logging,
monitoring, analysis facility.

Naming its core object 'perf_counter' and naming the subsystem
'perfcounters' has become more and more of a misnomer. With pending
code like hw-breakpoints support the 'counter' name is less and
less appropriate.

All in one, we've decided to rename the subsystem to 'performance
events' and to propagate this rename through all fields, variables
and API names. (in an ABI compatible fashion)

The word 'event' is also a bit shorter than 'counter' - which makes
it slightly more convenient to write/handle as well.

Thanks goes to Stephane Eranian who first observed this misnomer and
suggested a rename.

User-space tooling and ABI compatibility is not affected - this patch
should be function-invariant. (Also, defconfigs were not touched to
keep the size down.)

This patch has been generated via the following script:

  FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')

  sed -i \
    -e 's/PERF_EVENT_/PERF_RECORD_/g' \
    -e 's/PERF_COUNTER/PERF_EVENT/g' \
    -e 's/perf_counter/perf_event/g' \
    -e 's/nb_counters/nb_events/g' \
    -e 's/swcounter/swevent/g' \
    -e 's/tpcounter_event/tp_event/g' \
    $FILES

  for N in $(find . -name perf_counter.[ch]); do
    M=$(echo $N | sed 's/perf_counter/perf_event/g')
    mv $N $M
  done

  FILES=$(find . -name perf_event.*)

  sed -i \
    -e 's/COUNTER_MASK/REG_MASK/g' \
    -e 's/COUNTER/EVENT/g' \
    -e 's/\<event\>/event_id/g' \
    -e 's/counter/event/g' \
    -e 's/Counter/Event/g' \
    $FILES

... to keep it as correct as possible. This script can also be
used by anyone who has pending perfcounters patches - it converts
a Linux kernel tree over to the new naming. We tried to time this
change to the point in time where the amount of pending patches
is the smallest: the end of the merge window.

Namespace clashes were fixed up in a preparatory patch - and some
stylistic fallout will be fixed up in a subsequent patch.

( NOTE: 'counters' are still the proper terminology when we deal
  with hardware registers - and these sed scripts are a bit
  over-eager in renaming them. I've undone some of that, but
  in case there's something left where 'counter' would be
  better than 'event' we can undo that on an individual basis
  instead of touching an otherwise nicely automated patch. )

Suggested-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
index 9122c9e..89f7ead 100644
--- a/arch/arm/include/asm/unistd.h
+++ b/arch/arm/include/asm/unistd.h
@@ -390,7 +390,7 @@
 #define __NR_preadv			(__NR_SYSCALL_BASE+361)
 #define __NR_pwritev			(__NR_SYSCALL_BASE+362)
 #define __NR_rt_tgsigqueueinfo		(__NR_SYSCALL_BASE+363)
-#define __NR_perf_counter_open		(__NR_SYSCALL_BASE+364)
+#define __NR_perf_event_open		(__NR_SYSCALL_BASE+364)
 
 /*
  * The following SWIs are ARM private.
diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S
index ecfa989..fafce1b 100644
--- a/arch/arm/kernel/calls.S
+++ b/arch/arm/kernel/calls.S
@@ -373,7 +373,7 @@
 		CALL(sys_preadv)
 		CALL(sys_pwritev)
 		CALL(sys_rt_tgsigqueueinfo)
-		CALL(sys_perf_counter_open)
+		CALL(sys_perf_event_open)
 #ifndef syscalls_counted
 .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
 #define syscalls_counted
diff --git a/arch/blackfin/include/asm/unistd.h b/arch/blackfin/include/asm/unistd.h
index c8e7ee4..02b1529 100644
--- a/arch/blackfin/include/asm/unistd.h
+++ b/arch/blackfin/include/asm/unistd.h
@@ -381,7 +381,7 @@
 #define __NR_preadv		366
 #define __NR_pwritev		367
 #define __NR_rt_tgsigqueueinfo	368
-#define __NR_perf_counter_open	369
+#define __NR_perf_event_open	369
 
 #define __NR_syscall		370
 #define NR_syscalls		__NR_syscall
diff --git a/arch/blackfin/mach-common/entry.S b/arch/blackfin/mach-common/entry.S
index 01af24c..1e7cac2 100644
--- a/arch/blackfin/mach-common/entry.S
+++ b/arch/blackfin/mach-common/entry.S
@@ -1620,7 +1620,7 @@ ENTRY(_sys_call_table)
 	.long _sys_preadv
 	.long _sys_pwritev
 	.long _sys_rt_tgsigqueueinfo
-	.long _sys_perf_counter_open
+	.long _sys_perf_event_open
 
 	.rept NR_syscalls-(.-_sys_call_table)/4
 	.long _sys_ni_syscall
diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig
index b86e19c..4b5830b 100644
--- a/arch/frv/Kconfig
+++ b/arch/frv/Kconfig
@@ -7,7 +7,7 @@ config FRV
 	default y
 	select HAVE_IDE
 	select HAVE_ARCH_TRACEHOOK
-	select HAVE_PERF_COUNTERS
+	select HAVE_PERF_EVENTS
 
 config ZONE_DMA
 	bool
diff --git a/arch/frv/include/asm/perf_counter.h b/arch/frv/include/asm/perf_counter.h
deleted file mode 100644
index ccf726e..0000000
--- a/arch/frv/include/asm/perf_counter.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/* FRV performance counter support
- *
- * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
- */
-
-#ifndef _ASM_PERF_COUNTER_H
-#define _ASM_PERF_COUNTER_H
-
-#define PERF_COUNTER_INDEX_OFFSET	0
-
-#endif /* _ASM_PERF_COUNTER_H */
diff --git a/arch/frv/include/asm/perf_event.h b/arch/frv/include/asm/perf_event.h
new file mode 100644
index 0000000..a69e015
--- /dev/null
+++ b/arch/frv/include/asm/perf_event.h
@@ -0,0 +1,17 @@
+/* FRV performance event support
+ *
+ * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _ASM_PERF_EVENT_H
+#define _ASM_PERF_EVENT_H
+
+#define PERF_EVENT_INDEX_OFFSET	0
+
+#endif /* _ASM_PERF_EVENT_H */
diff --git a/arch/frv/include/asm/unistd.h b/arch/frv/include/asm/unistd.h
index 4a8fb42..be6ef0f 100644
--- a/arch/frv/include/asm/unistd.h
+++ b/arch/frv/include/asm/unistd.h
@@ -342,7 +342,7 @@
 #define __NR_preadv		333
 #define __NR_pwritev		334
 #define __NR_rt_tgsigqueueinfo	335
-#define __NR_perf_counter_open	336
+#define __NR_perf_event_open	336
 
 #ifdef __KERNEL__
 
diff --git a/arch/frv/kernel/entry.S b/arch/frv/kernel/entry.S
index fde1e44..189397e 100644
--- a/arch/frv/kernel/entry.S
+++ b/arch/frv/kernel/entry.S
@@ -1525,6 +1525,6 @@ sys_call_table:
 	.long sys_preadv
 	.long sys_pwritev
 	.long sys_rt_tgsigqueueinfo	/* 335 */
-	.long sys_perf_counter_open
+	.long sys_perf_event_open
 
 syscall_table_size = (. - sys_call_table)
diff --git a/arch/frv/lib/Makefile b/arch/frv/lib/Makefile
index 0a37721..f470975 100644
--- a/arch/frv/lib/Makefile
+++ b/arch/frv/lib/Makefile
@@ -5,4 +5,4 @@
 lib-y := \
 	__ashldi3.o __lshrdi3.o __muldi3.o __ashrdi3.o __negdi2.o __ucmpdi2.o \
 	checksum.o memcpy.o memset.o atomic-ops.o atomic64-ops.o \
-	outsl_ns.o outsl_sw.o insl_ns.o insl_sw.o cache.o perf_counter.o
+	outsl_ns.o outsl_sw.o insl_ns.o insl_sw.o cache.o perf_event.o
diff --git a/arch/frv/lib/perf_counter.c b/arch/frv/lib/perf_counter.c
deleted file mode 100644
index 2000fee..0000000
--- a/arch/frv/lib/perf_counter.c
+++ /dev/null
@@ -1,19 +0,0 @@
-/* Performance counter handling
- *
- * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
- */
-
-#include <linux/perf_counter.h>
-
-/*
- * mark the performance counter as pending
- */
-void set_perf_counter_pending(void)
-{
-}
diff --git a/arch/frv/lib/perf_event.c b/arch/frv/lib/perf_event.c
new file mode 100644
index 0000000..9ac5acf
--- /dev/null
+++ b/arch/frv/lib/perf_event.c
@@ -0,0 +1,19 @@
+/* Performance event handling
+ *
+ * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/perf_event.h>
+
+/*
+ * mark the performance event as pending
+ */
+void set_perf_event_pending(void)
+{
+}
diff --git a/arch/m68k/include/asm/unistd.h b/arch/m68k/include/asm/unistd.h
index 946d869..48b87f5 100644
--- a/arch/m68k/include/asm/unistd.h
+++ b/arch/m68k/include/asm/unistd.h
@@ -335,7 +335,7 @@
 #define __NR_preadv		329
 #define __NR_pwritev		330
 #define __NR_rt_tgsigqueueinfo	331
-#define __NR_perf_counter_open	332
+#define __NR_perf_event_open	332
 
 #ifdef __KERNEL__
 
diff --git a/arch/m68k/kernel/entry.S b/arch/m68k/kernel/entry.S
index 922f52e..c5b3363 100644
--- a/arch/m68k/kernel/entry.S
+++ b/arch/m68k/kernel/entry.S
@@ -756,5 +756,5 @@ sys_call_table:
 	.long sys_preadv
 	.long sys_pwritev		/* 330 */
 	.long sys_rt_tgsigqueueinfo
-	.long sys_perf_counter_open
+	.long sys_perf_event_open
 
diff --git a/arch/m68knommu/kernel/syscalltable.S b/arch/m68knommu/kernel/syscalltable.S
index 0ae123e..23535cc 100644
--- a/arch/m68knommu/kernel/syscalltable.S
+++ b/arch/m68knommu/kernel/syscalltable.S
@@ -350,7 +350,7 @@ ENTRY(sys_call_table)
 	.long sys_preadv
 	.long sys_pwritev		/* 330 */
 	.long sys_rt_tgsigqueueinfo
-	.long sys_perf_counter_open
+	.long sys_perf_event_open
 
 	.rept NR_syscalls-(.-sys_call_table)/4
 		.long sys_ni_syscall
diff --git a/arch/microblaze/include/asm/unistd.h b/arch/microblaze/include/asm/unistd.h
index 0b85232..cb05a07 100644
--- a/arch/microblaze/include/asm/unistd.h
+++ b/arch/microblaze/include/asm/unistd.h
@@ -381,7 +381,7 @@
 #define __NR_preadv		363 /* new */
 #define __NR_pwritev		364 /* new */
 #define __NR_rt_tgsigqueueinfo	365 /* new */
-#define __NR_perf_counter_open	366 /* new */
+#define __NR_perf_event_open	366 /* new */
 
 #define __NR_syscalls		367
 
diff --git a/arch/microblaze/kernel/syscall_table.S b/arch/microblaze/kernel/syscall_table.S
index 4572160..ecec191 100644
--- a/arch/microblaze/kernel/syscall_table.S
+++ b/arch/microblaze/kernel/syscall_table.S
@@ -370,4 +370,4 @@ ENTRY(sys_call_table)
 	.long sys_ni_syscall
 	.long sys_ni_syscall
 	.long sys_rt_tgsigqueueinfo	/* 365 */
-	.long sys_perf_counter_open
+	.long sys_perf_event_open
diff --git a/arch/mips/include/asm/unistd.h b/arch/mips/include/asm/unistd.h
index e753a77..8c9dfa9 100644
--- a/arch/mips/include/asm/unistd.h
+++ b/arch/mips/include/asm/unistd.h
@@ -353,7 +353,7 @@
 #define __NR_preadv			(__NR_Linux + 330)
 #define __NR_pwritev			(__NR_Linux + 331)
 #define __NR_rt_tgsigqueueinfo		(__NR_Linux + 332)
-#define __NR_perf_counter_open		(__NR_Linux + 333)
+#define __NR_perf_event_open		(__NR_Linux + 333)
 #define __NR_accept4			(__NR_Linux + 334)
 
 /*
@@ -664,7 +664,7 @@
 #define __NR_preadv			(__NR_Linux + 289)
 #define __NR_pwritev			(__NR_Linux + 290)
 #define __NR_rt_tgsigqueueinfo		(__NR_Linux + 291)
-#define __NR_perf_counter_open		(__NR_Linux + 292)
+#define __NR_perf_event_open		(__NR_Linux + 292)
 #define __NR_accept4			(__NR_Linux + 293)
 
 /*
@@ -979,7 +979,7 @@
 #define __NR_preadv			(__NR_Linux + 293)
 #define __NR_pwritev			(__NR_Linux + 294)
 #define __NR_rt_tgsigqueueinfo		(__NR_Linux + 295)
-#define __NR_perf_counter_open		(__NR_Linux + 296)
+#define __NR_perf_event_open		(__NR_Linux + 296)
 #define __NR_accept4			(__NR_Linux + 297)
 
 /*
diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
index 7c2de4f..fd2a9bb 100644
--- a/arch/mips/kernel/scall32-o32.S
+++ b/arch/mips/kernel/scall32-o32.S
@@ -581,7 +581,7 @@ einval:	li	v0, -ENOSYS
 	sys	sys_preadv		6	/* 4330 */
 	sys	sys_pwritev		6
 	sys	sys_rt_tgsigqueueinfo	4
-	sys	sys_perf_counter_open	5
+	sys	sys_perf_event_open	5
 	sys	sys_accept4		4
 	.endm
 
diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S
index b97b993..18bf7f3 100644
--- a/arch/mips/kernel/scall64-64.S
+++ b/arch/mips/kernel/scall64-64.S
@@ -418,6 +418,6 @@ sys_call_table:
 	PTR	sys_preadv
 	PTR	sys_pwritev			/* 5390 */
 	PTR	sys_rt_tgsigqueueinfo
-	PTR	sys_perf_counter_open
+	PTR	sys_perf_event_open
 	PTR	sys_accept4
 	.size	sys_call_table,.-sys_call_table
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index 1a6ae12..6ebc079 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -416,6 +416,6 @@ EXPORT(sysn32_call_table)
 	PTR	sys_preadv
 	PTR	sys_pwritev
 	PTR	compat_sys_rt_tgsigqueueinfo	/* 5295 */
-	PTR	sys_perf_counter_open
+	PTR	sys_perf_event_open
 	PTR	sys_accept4
 	.size	sysn32_call_table,.-sysn32_call_table
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index cd31087..9bbf977 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -536,6 +536,6 @@ sys_call_table:
 	PTR	compat_sys_preadv		/* 4330 */
 	PTR	compat_sys_pwritev
 	PTR	compat_sys_rt_tgsigqueueinfo
-	PTR	sys_perf_counter_open
+	PTR	sys_perf_event_open
 	PTR	sys_accept4
 	.size	sys_call_table,.-sys_call_table
diff --git a/arch/mn10300/include/asm/unistd.h b/arch/mn10300/include/asm/unistd.h
index fad6861..2a98393 100644
--- a/arch/mn10300/include/asm/unistd.h
+++ b/arch/mn10300/include/asm/unistd.h
@@ -347,7 +347,7 @@
 #define __NR_preadv		334
 #define __NR_pwritev		335
 #define __NR_rt_tgsigqueueinfo	336
-#define __NR_perf_counter_open	337
+#define __NR_perf_event_open	337
 
 #ifdef __KERNEL__
 
diff --git a/arch/mn10300/kernel/entry.S b/arch/mn10300/kernel/entry.S
index e0d2563..a94e7ea 100644
--- a/arch/mn10300/kernel/entry.S
+++ b/arch/mn10300/kernel/entry.S
@@ -723,7 +723,7 @@ ENTRY(sys_call_table)
 	.long sys_preadv
 	.long sys_pwritev		/* 335 */
 	.long sys_rt_tgsigqueueinfo
-	.long sys_perf_counter_open
+	.long sys_perf_event_open
 
 
 nr_syscalls=(.-sys_call_table)/4
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 06f8d5b..f388dc6 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -16,7 +16,7 @@ config PARISC
 	select RTC_DRV_GENERIC
 	select INIT_ALL_POSSIBLE
 	select BUG
-	select HAVE_PERF_COUNTERS
+	select HAVE_PERF_EVENTS
 	select GENERIC_ATOMIC64 if !64BIT
 	help
 	  The PA-RISC microprocessor is designed by Hewlett-Packard and used
diff --git a/arch/parisc/include/asm/perf_counter.h b/arch/parisc/include/asm/perf_counter.h
deleted file mode 100644
index dc9e829..0000000
--- a/arch/parisc/include/asm/perf_counter.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef __ASM_PARISC_PERF_COUNTER_H
-#define __ASM_PARISC_PERF_COUNTER_H
-
-/* parisc only supports software counters through this interface. */
-static inline void set_perf_counter_pending(void) { }
-
-#endif /* __ASM_PARISC_PERF_COUNTER_H */
diff --git a/arch/parisc/include/asm/perf_event.h b/arch/parisc/include/asm/perf_event.h
new file mode 100644
index 0000000..cc146427
--- /dev/null
+++ b/arch/parisc/include/asm/perf_event.h
@@ -0,0 +1,7 @@
+#ifndef __ASM_PARISC_PERF_EVENT_H
+#define __ASM_PARISC_PERF_EVENT_H
+
+/* parisc only supports software events through this interface. */
+static inline void set_perf_event_pending(void) { }
+
+#endif /* __ASM_PARISC_PERF_EVENT_H */
diff --git a/arch/parisc/include/asm/unistd.h b/arch/parisc/include/asm/unistd.h
index f3d3b8b..cda1583 100644
--- a/arch/parisc/include/asm/unistd.h
+++ b/arch/parisc/include/asm/unistd.h
@@ -810,9 +810,9 @@
 #define __NR_preadv		(__NR_Linux + 315)
 #define __NR_pwritev		(__NR_Linux + 316)
 #define __NR_rt_tgsigqueueinfo	(__NR_Linux + 317)
-#define __NR_perf_counter_open	(__NR_Linux + 318)
+#define __NR_perf_event_open	(__NR_Linux + 318)
 
-#define __NR_Linux_syscalls	(__NR_perf_counter_open + 1)
+#define __NR_Linux_syscalls	(__NR_perf_event_open + 1)
 
 
 #define __IGNORE_select		/* newselect */
diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S
index cf145eb..843f423 100644
--- a/arch/parisc/kernel/syscall_table.S
+++ b/arch/parisc/kernel/syscall_table.S
@@ -416,7 +416,7 @@
 	ENTRY_COMP(preadv)		/* 315 */
 	ENTRY_COMP(pwritev)
 	ENTRY_COMP(rt_tgsigqueueinfo)
-	ENTRY_SAME(perf_counter_open)
+	ENTRY_SAME(perf_event_open)
 
 	/* Nothing yet */
 
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 8250902..4fd4790 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -129,7 +129,7 @@ config PPC
 	select HAVE_OPROFILE
 	select HAVE_SYSCALL_WRAPPERS if PPC64
 	select GENERIC_ATOMIC64 if PPC32
-	select HAVE_PERF_COUNTERS
+	select HAVE_PERF_EVENTS
 
 config EARLY_PRINTK
 	bool
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index e73d554..abbc2aa 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -135,43 +135,43 @@ static inline int irqs_disabled_flags(unsigned long flags)
  */
 struct irq_chip;
 
-#ifdef CONFIG_PERF_COUNTERS
+#ifdef CONFIG_PERF_EVENTS
 
 #ifdef CONFIG_PPC64
-static inline unsigned long test_perf_counter_pending(void)
+static inline unsigned long test_perf_event_pending(void)
 {
 	unsigned long x;
 
 	asm volatile("lbz %0,%1(13)"
 		: "=r" (x)
-		: "i" (offsetof(struct paca_struct, perf_counter_pending)));
+		: "i" (offsetof(struct paca_struct, perf_event_pending)));
 	return x;
 }
 
-static inline void set_perf_counter_pending(void)
+static inline void set_perf_event_pending(void)
 {
 	asm volatile("stb %0,%1(13)" : :
 		"r" (1),
-		"i" (offsetof(struct paca_struct, perf_counter_pending)));
+		"i" (offsetof(struct paca_struct, perf_event_pending)));
 }
 
-static inline void clear_perf_counter_pending(void)
+static inline void clear_perf_event_pending(void)
 {
 	asm volatile("stb %0,%1(13)" : :
 		"r" (0),
-		"i" (offsetof(struct paca_struct, perf_counter_pending)));
+		"i" (offsetof(struct paca_struct, perf_event_pending)));
 }
 #endif /* CONFIG_PPC64 */
 
-#else  /* CONFIG_PERF_COUNTERS */
+#else  /* CONFIG_PERF_EVENTS */
 
-static inline unsigned long test_perf_counter_pending(void)
+static inline unsigned long test_perf_event_pending(void)
 {
 	return 0;
 }
 
-static inline void clear_perf_counter_pending(void) {}
-#endif /* CONFIG_PERF_COUNTERS */
+static inline void clear_perf_event_pending(void) {}
+#endif /* CONFIG_PERF_EVENTS */
 
 #endif	/* __KERNEL__ */
 #endif	/* _ASM_POWERPC_HW_IRQ_H */
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index b634456..154f405 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -122,7 +122,7 @@ struct paca_struct {
 	u8 soft_enabled;		/* irq soft-enable flag */
 	u8 hard_enabled;		/* set if irqs are enabled in MSR */
 	u8 io_sync;			/* writel() needs spin_unlock sync */
-	u8 perf_counter_pending;	/* PM interrupt while soft-disabled */
+	u8 perf_event_pending;	/* PM interrupt while soft-disabled */
 
 	/* Stuff for accurate time accounting */
 	u64 user_time;			/* accumulated usermode TB ticks */
diff --git a/arch/powerpc/include/asm/perf_counter.h b/arch/powerpc/include/asm/perf_counter.h
deleted file mode 100644
index 0ea0639..0000000
--- a/arch/powerpc/include/asm/perf_counter.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Performance counter support - PowerPC-specific definitions.
- *
- * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#include <linux/types.h>
-
-#include <asm/hw_irq.h>
-
-#define MAX_HWCOUNTERS		8
-#define MAX_EVENT_ALTERNATIVES	8
-#define MAX_LIMITED_HWCOUNTERS	2
-
-/*
- * This struct provides the constants and functions needed to
- * describe the PMU on a particular POWER-family CPU.
- */
-struct power_pmu {
-	const char	*name;
-	int		n_counter;
-	int		max_alternatives;
-	unsigned long	add_fields;
-	unsigned long	test_adder;
-	int		(*compute_mmcr)(u64 events[], int n_ev,
-				unsigned int hwc[], unsigned long mmcr[]);
-	int		(*get_constraint)(u64 event, unsigned long *mskp,
-				unsigned long *valp);
-	int		(*get_alternatives)(u64 event, unsigned int flags,
-				u64 alt[]);
-	void		(*disable_pmc)(unsigned int pmc, unsigned long mmcr[]);
-	int		(*limited_pmc_event)(u64 event);
-	u32		flags;
-	int		n_generic;
-	int		*generic_events;
-	int		(*cache_events)[PERF_COUNT_HW_CACHE_MAX]
-			       [PERF_COUNT_HW_CACHE_OP_MAX]
-			       [PERF_COUNT_HW_CACHE_RESULT_MAX];
-};
-
-/*
- * Values for power_pmu.flags
- */
-#define PPMU_LIMITED_PMC5_6	1	/* PMC5/6 have limited function */
-#define PPMU_ALT_SIPR		2	/* uses alternate posn for SIPR/HV */
-
-/*
- * Values for flags to get_alternatives()
- */
-#define PPMU_LIMITED_PMC_OK	1	/* can put this on a limited PMC */
-#define PPMU_LIMITED_PMC_REQD	2	/* have to put this on a limited PMC */
-#define PPMU_ONLY_COUNT_RUN	4	/* only counting in run state */
-
-extern int register_power_pmu(struct power_pmu *);
-
-struct pt_regs;
-extern unsigned long perf_misc_flags(struct pt_regs *regs);
-extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
-
-#define PERF_COUNTER_INDEX_OFFSET	1
-
-/*
- * Only override the default definitions in include/linux/perf_counter.h
- * if we have hardware PMU support.
- */
-#ifdef CONFIG_PPC_PERF_CTRS
-#define perf_misc_flags(regs)	perf_misc_flags(regs)
-#endif
-
-/*
- * The power_pmu.get_constraint function returns a 32/64-bit value and
- * a 32/64-bit mask that express the constraints between this event and
- * other events.
- *
- * The value and mask are divided up into (non-overlapping) bitfields
- * of three different types:
- *
- * Select field: this expresses the constraint that some set of bits
- * in MMCR* needs to be set to a specific value for this event.  For a
- * select field, the mask contains 1s in every bit of the field, and
- * the value contains a unique value for each possible setting of the
- * MMCR* bits.  The constraint checking code will ensure that two events
- * that set the same field in their masks have the same value in their
- * value dwords.
- *
- * Add field: this expresses the constraint that there can be at most
- * N events in a particular class.  A field of k bits can be used for
- * N <= 2^(k-1) - 1.  The mask has the most significant bit of the field
- * set (and the other bits 0), and the value has only the least significant
- * bit of the field set.  In addition, the 'add_fields' and 'test_adder'
- * in the struct power_pmu for this processor come into play.  The
- * add_fields value contains 1 in the LSB of the field, and the
- * test_adder contains 2^(k-1) - 1 - N in the field.
- *
- * NAND field: this expresses the constraint that you may not have events
- * in all of a set of classes.  (For example, on PPC970, you can't select
- * events from the FPU, ISU and IDU simultaneously, although any two are
- * possible.)  For N classes, the field is N+1 bits wide, and each class
- * is assigned one bit from the least-significant N bits.  The mask has
- * only the most-significant bit set, and the value has only the bit
- * for the event's class set.  The test_adder has the least significant
- * bit set in the field.
- *
- * If an event is not subject to the constraint expressed by a particular
- * field, then it will have 0 in both the mask and value for that field.
- */
diff --git a/arch/powerpc/include/asm/perf_event.h b/arch/powerpc/include/asm/perf_event.h
new file mode 100644
index 0000000..2499aaa
--- /dev/null
+++ b/arch/powerpc/include/asm/perf_event.h
@@ -0,0 +1,110 @@
+/*
+ * Performance event support - PowerPC-specific definitions.
+ *
+ * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/types.h>
+
+#include <asm/hw_irq.h>
+
+#define MAX_HWEVENTS		8
+#define MAX_EVENT_ALTERNATIVES	8
+#define MAX_LIMITED_HWEVENTS	2
+
+/*
+ * This struct provides the constants and functions needed to
+ * describe the PMU on a particular POWER-family CPU.
+ */
+struct power_pmu {
+	const char	*name;
+	int		n_event;
+	int		max_alternatives;
+	unsigned long	add_fields;
+	unsigned long	test_adder;
+	int		(*compute_mmcr)(u64 events[], int n_ev,
+				unsigned int hwc[], unsigned long mmcr[]);
+	int		(*get_constraint)(u64 event_id, unsigned long *mskp,
+				unsigned long *valp);
+	int		(*get_alternatives)(u64 event_id, unsigned int flags,
+				u64 alt[]);
+	void		(*disable_pmc)(unsigned int pmc, unsigned long mmcr[]);
+	int		(*limited_pmc_event)(u64 event_id);
+	u32		flags;
+	int		n_generic;
+	int		*generic_events;
+	int		(*cache_events)[PERF_COUNT_HW_CACHE_MAX]
+			       [PERF_COUNT_HW_CACHE_OP_MAX]
+			       [PERF_COUNT_HW_CACHE_RESULT_MAX];
+};
+
+/*
+ * Values for power_pmu.flags
+ */
+#define PPMU_LIMITED_PMC5_6	1	/* PMC5/6 have limited function */
+#define PPMU_ALT_SIPR		2	/* uses alternate posn for SIPR/HV */
+
+/*
+ * Values for flags to get_alternatives()
+ */
+#define PPMU_LIMITED_PMC_OK	1	/* can put this on a limited PMC */
+#define PPMU_LIMITED_PMC_REQD	2	/* have to put this on a limited PMC */
+#define PPMU_ONLY_COUNT_RUN	4	/* only counting in run state */
+
+extern int register_power_pmu(struct power_pmu *);
+
+struct pt_regs;
+extern unsigned long perf_misc_flags(struct pt_regs *regs);
+extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
+
+#define PERF_EVENT_INDEX_OFFSET	1
+
+/*
+ * Only override the default definitions in include/linux/perf_event.h
+ * if we have hardware PMU support.
+ */
+#ifdef CONFIG_PPC_PERF_CTRS
+#define perf_misc_flags(regs)	perf_misc_flags(regs)
+#endif
+
+/*
+ * The power_pmu.get_constraint function returns a 32/64-bit value and
+ * a 32/64-bit mask that express the constraints between this event_id and
+ * other events.
+ *
+ * The value and mask are divided up into (non-overlapping) bitfields
+ * of three different types:
+ *
+ * Select field: this expresses the constraint that some set of bits
+ * in MMCR* needs to be set to a specific value for this event_id.  For a
+ * select field, the mask contains 1s in every bit of the field, and
+ * the value contains a unique value for each possible setting of the
+ * MMCR* bits.  The constraint checking code will ensure that two events
+ * that set the same field in their masks have the same value in their
+ * value dwords.
+ *
+ * Add field: this expresses the constraint that there can be at most
+ * N events in a particular class.  A field of k bits can be used for
+ * N <= 2^(k-1) - 1.  The mask has the most significant bit of the field
+ * set (and the other bits 0), and the value has only the least significant
+ * bit of the field set.  In addition, the 'add_fields' and 'test_adder'
+ * in the struct power_pmu for this processor come into play.  The
+ * add_fields value contains 1 in the LSB of the field, and the
+ * test_adder contains 2^(k-1) - 1 - N in the field.
+ *
+ * NAND field: this expresses the constraint that you may not have events
+ * in all of a set of classes.  (For example, on PPC970, you can't select
+ * events from the FPU, ISU and IDU simultaneously, although any two are
+ * possible.)  For N classes, the field is N+1 bits wide, and each class
+ * is assigned one bit from the least-significant N bits.  The mask has
+ * only the most-significant bit set, and the value has only the bit
+ * for the event_id's class set.  The test_adder has the least significant
+ * bit set in the field.
+ *
+ * If an event_id is not subject to the constraint expressed by a particular
+ * field, then it will have 0 in both the mask and value for that field.
+ */
diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index ed24bd9..c7d671a 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -322,7 +322,7 @@ SYSCALL_SPU(epoll_create1)
 SYSCALL_SPU(dup3)
 SYSCALL_SPU(pipe2)
 SYSCALL(inotify_init1)
-SYSCALL_SPU(perf_counter_open)
+SYSCALL_SPU(perf_event_open)
 COMPAT_SYS_SPU(preadv)
 COMPAT_SYS_SPU(pwritev)
 COMPAT_SYS(rt_tgsigqueueinfo)
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index cef080b..f6ca761 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -341,7 +341,7 @@
 #define __NR_dup3		316
 #define __NR_pipe2		317
 #define __NR_inotify_init1	318
-#define __NR_perf_counter_open	319
+#define __NR_perf_event_open	319
 #define __NR_preadv		320
 #define __NR_pwritev		321
 #define __NR_rt_tgsigqueueinfo	322
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 569f79c..b23664a 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -97,7 +97,7 @@ obj64-$(CONFIG_AUDIT)		+= compat_audit.o
 
 obj-$(CONFIG_DYNAMIC_FTRACE)	+= ftrace.o
 obj-$(CONFIG_FUNCTION_GRAPH_TRACER)	+= ftrace.o
-obj-$(CONFIG_PPC_PERF_CTRS)	+= perf_counter.o perf_callchain.o
+obj-$(CONFIG_PPC_PERF_CTRS)	+= perf_event.o perf_callchain.o
 obj64-$(CONFIG_PPC_PERF_CTRS)	+= power4-pmu.o ppc970-pmu.o power5-pmu.o \
 				   power5+-pmu.o power6-pmu.o power7-pmu.o
 obj32-$(CONFIG_PPC_PERF_CTRS)	+= mpc7450-pmu.o
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index f0df285..0812b0f 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -133,7 +133,7 @@ int main(void)
 	DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr));
 	DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled));
 	DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled));
-	DEFINE(PACAPERFPEND, offsetof(struct paca_struct, perf_counter_pending));
+	DEFINE(PACAPERFPEND, offsetof(struct paca_struct, perf_event_pending));
 	DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
 #ifdef CONFIG_PPC_MM_SLICES
 	DEFINE(PACALOWSLICESPSIZE, offsetof(struct paca_struct,
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 66bcda3..900e0ee 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -556,14 +556,14 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES)
 2:
 	TRACE_AND_RESTORE_IRQ(r5);
 
-#ifdef CONFIG_PERF_COUNTERS
-	/* check paca->perf_counter_pending if we're enabling ints */
+#ifdef CONFIG_PERF_EVENTS
+	/* check paca->perf_event_pending if we're enabling ints */
 	lbz	r3,PACAPERFPEND(r13)
 	and.	r3,r3,r5
 	beq	27f
-	bl	.perf_counter_do_pending
+	bl	.perf_event_do_pending
 27:
-#endif /* CONFIG_PERF_COUNTERS */
+#endif /* CONFIG_PERF_EVENTS */
 
 	/* extract EE bit and use it to restore paca->hard_enabled */
 	ld	r3,_MSR(r1)
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index f7f376e..e5d1211 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -53,7 +53,7 @@
 #include <linux/bootmem.h>
 #include <linux/pci.h>
 #include <linux/debugfs.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -138,9 +138,9 @@ notrace void raw_local_irq_restore(unsigned long en)
 	}
 #endif /* CONFIG_PPC_STD_MMU_64 */
 
-	if (test_perf_counter_pending()) {
-		clear_perf_counter_pending();
-		perf_counter_do_pending();
+	if (test_perf_event_pending()) {
+		clear_perf_event_pending();
+		perf_event_do_pending();
 	}
 
 	/*
diff --git a/arch/powerpc/kernel/mpc7450-pmu.c b/arch/powerpc/kernel/mpc7450-pmu.c
index cc466d0..09d7202 100644
--- a/arch/powerpc/kernel/mpc7450-pmu.c
+++ b/arch/powerpc/kernel/mpc7450-pmu.c
@@ -9,7 +9,7 @@
  * 2 of the License, or (at your option) any later version.
  */
 #include <linux/string.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <asm/reg.h>
 #include <asm/cputable.h>
 
diff --git a/arch/powerpc/kernel/perf_callchain.c b/arch/powerpc/kernel/perf_callchain.c
index f74b62c..0a03cf7 100644
--- a/arch/powerpc/kernel/perf_callchain.c
+++ b/arch/powerpc/kernel/perf_callchain.c
@@ -10,7 +10,7 @@
  */
 #include <linux/kernel.h>
 #include <linux/sched.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <linux/percpu.h>
 #include <linux/uaccess.h>
 #include <linux/mm.h>
diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
deleted file mode 100644
index 5ccf9bc..0000000
--- a/arch/powerpc/kernel/perf_counter.c
+++ /dev/null
@@ -1,1315 +0,0 @@
-/*
- * Performance counter support - powerpc architecture code
- *
- * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/perf_counter.h>
-#include <linux/percpu.h>
-#include <linux/hardirq.h>
-#include <asm/reg.h>
-#include <asm/pmc.h>
-#include <asm/machdep.h>
-#include <asm/firmware.h>
-#include <asm/ptrace.h>
-
-struct cpu_hw_counters {
-	int n_counters;
-	int n_percpu;
-	int disabled;
-	int n_added;
-	int n_limited;
-	u8  pmcs_enabled;
-	struct perf_counter *counter[MAX_HWCOUNTERS];
-	u64 events[MAX_HWCOUNTERS];
-	unsigned int flags[MAX_HWCOUNTERS];
-	unsigned long mmcr[3];
-	struct perf_counter *limited_counter[MAX_LIMITED_HWCOUNTERS];
-	u8  limited_hwidx[MAX_LIMITED_HWCOUNTERS];
-	u64 alternatives[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
-	unsigned long amasks[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
-	unsigned long avalues[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
-};
-DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters);
-
-struct power_pmu *ppmu;
-
-/*
- * Normally, to ignore kernel events we set the FCS (freeze counters
- * in supervisor mode) bit in MMCR0, but if the kernel runs with the
- * hypervisor bit set in the MSR, or if we are running on a processor
- * where the hypervisor bit is forced to 1 (as on Apple G5 processors),
- * then we need to use the FCHV bit to ignore kernel events.
- */
-static unsigned int freeze_counters_kernel = MMCR0_FCS;
-
-/*
- * 32-bit doesn't have MMCRA but does have an MMCR2,
- * and a few other names are different.
- */
-#ifdef CONFIG_PPC32
-
-#define MMCR0_FCHV		0
-#define MMCR0_PMCjCE		MMCR0_PMCnCE
-
-#define SPRN_MMCRA		SPRN_MMCR2
-#define MMCRA_SAMPLE_ENABLE	0
-
-static inline unsigned long perf_ip_adjust(struct pt_regs *regs)
-{
-	return 0;
-}
-static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) { }
-static inline u32 perf_get_misc_flags(struct pt_regs *regs)
-{
-	return 0;
-}
-static inline void perf_read_regs(struct pt_regs *regs) { }
-static inline int perf_intr_is_nmi(struct pt_regs *regs)
-{
-	return 0;
-}
-
-#endif /* CONFIG_PPC32 */
-
-/*
- * Things that are specific to 64-bit implementations.
- */
-#ifdef CONFIG_PPC64
-
-static inline unsigned long perf_ip_adjust(struct pt_regs *regs)
-{
-	unsigned long mmcra = regs->dsisr;
-
-	if ((mmcra & MMCRA_SAMPLE_ENABLE) && !(ppmu->flags & PPMU_ALT_SIPR)) {
-		unsigned long slot = (mmcra & MMCRA_SLOT) >> MMCRA_SLOT_SHIFT;
-		if (slot > 1)
-			return 4 * (slot - 1);
-	}
-	return 0;
-}
-
-/*
- * The user wants a data address recorded.
- * If we're not doing instruction sampling, give them the SDAR
- * (sampled data address).  If we are doing instruction sampling, then
- * only give them the SDAR if it corresponds to the instruction
- * pointed to by SIAR; this is indicated by the [POWER6_]MMCRA_SDSYNC
- * bit in MMCRA.
- */
-static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp)
-{
-	unsigned long mmcra = regs->dsisr;
-	unsigned long sdsync = (ppmu->flags & PPMU_ALT_SIPR) ?
-		POWER6_MMCRA_SDSYNC : MMCRA_SDSYNC;
-
-	if (!(mmcra & MMCRA_SAMPLE_ENABLE) || (mmcra & sdsync))
-		*addrp = mfspr(SPRN_SDAR);
-}
-
-static inline u32 perf_get_misc_flags(struct pt_regs *regs)
-{
-	unsigned long mmcra = regs->dsisr;
-
-	if (TRAP(regs) != 0xf00)
-		return 0;	/* not a PMU interrupt */
-
-	if (ppmu->flags & PPMU_ALT_SIPR) {
-		if (mmcra & POWER6_MMCRA_SIHV)
-			return PERF_EVENT_MISC_HYPERVISOR;
-		return (mmcra & POWER6_MMCRA_SIPR) ?
-			PERF_EVENT_MISC_USER : PERF_EVENT_MISC_KERNEL;
-	}
-	if (mmcra & MMCRA_SIHV)
-		return PERF_EVENT_MISC_HYPERVISOR;
-	return (mmcra & MMCRA_SIPR) ? PERF_EVENT_MISC_USER :
-		PERF_EVENT_MISC_KERNEL;
-}
-
-/*
- * Overload regs->dsisr to store MMCRA so we only need to read it once
- * on each interrupt.
- */
-static inline void perf_read_regs(struct pt_regs *regs)
-{
-	regs->dsisr = mfspr(SPRN_MMCRA);
-}
-
-/*
- * If interrupts were soft-disabled when a PMU interrupt occurs, treat
- * it as an NMI.
- */
-static inline int perf_intr_is_nmi(struct pt_regs *regs)
-{
-	return !regs->softe;
-}
-
-#endif /* CONFIG_PPC64 */
-
-static void perf_counter_interrupt(struct pt_regs *regs);
-
-void perf_counter_print_debug(void)
-{
-}
-
-/*
- * Read one performance monitor counter (PMC).
- */
-static unsigned long read_pmc(int idx)
-{
-	unsigned long val;
-
-	switch (idx) {
-	case 1:
-		val = mfspr(SPRN_PMC1);
-		break;
-	case 2:
-		val = mfspr(SPRN_PMC2);
-		break;
-	case 3:
-		val = mfspr(SPRN_PMC3);
-		break;
-	case 4:
-		val = mfspr(SPRN_PMC4);
-		break;
-	case 5:
-		val = mfspr(SPRN_PMC5);
-		break;
-	case 6:
-		val = mfspr(SPRN_PMC6);
-		break;
-#ifdef CONFIG_PPC64
-	case 7:
-		val = mfspr(SPRN_PMC7);
-		break;
-	case 8:
-		val = mfspr(SPRN_PMC8);
-		break;
-#endif /* CONFIG_PPC64 */
-	default:
-		printk(KERN_ERR "oops trying to read PMC%d\n", idx);
-		val = 0;
-	}
-	return val;
-}
-
-/*
- * Write one PMC.
- */
-static void write_pmc(int idx, unsigned long val)
-{
-	switch (idx) {
-	case 1:
-		mtspr(SPRN_PMC1, val);
-		break;
-	case 2:
-		mtspr(SPRN_PMC2, val);
-		break;
-	case 3:
-		mtspr(SPRN_PMC3, val);
-		break;
-	case 4:
-		mtspr(SPRN_PMC4, val);
-		break;
-	case 5:
-		mtspr(SPRN_PMC5, val);
-		break;
-	case 6:
-		mtspr(SPRN_PMC6, val);
-		break;
-#ifdef CONFIG_PPC64
-	case 7:
-		mtspr(SPRN_PMC7, val);
-		break;
-	case 8:
-		mtspr(SPRN_PMC8, val);
-		break;
-#endif /* CONFIG_PPC64 */
-	default:
-		printk(KERN_ERR "oops trying to write PMC%d\n", idx);
-	}
-}
-
-/*
- * Check if a set of events can all go on the PMU at once.
- * If they can't, this will look at alternative codes for the events
- * and see if any combination of alternative codes is feasible.
- * The feasible set is returned in event[].
- */
-static int power_check_constraints(struct cpu_hw_counters *cpuhw,
-				   u64 event[], unsigned int cflags[],
-				   int n_ev)
-{
-	unsigned long mask, value, nv;
-	unsigned long smasks[MAX_HWCOUNTERS], svalues[MAX_HWCOUNTERS];
-	int n_alt[MAX_HWCOUNTERS], choice[MAX_HWCOUNTERS];
-	int i, j;
-	unsigned long addf = ppmu->add_fields;
-	unsigned long tadd = ppmu->test_adder;
-
-	if (n_ev > ppmu->n_counter)
-		return -1;
-
-	/* First see if the events will go on as-is */
-	for (i = 0; i < n_ev; ++i) {
-		if ((cflags[i] & PPMU_LIMITED_PMC_REQD)
-		    && !ppmu->limited_pmc_event(event[i])) {
-			ppmu->get_alternatives(event[i], cflags[i],
-					       cpuhw->alternatives[i]);
-			event[i] = cpuhw->alternatives[i][0];
-		}
-		if (ppmu->get_constraint(event[i], &cpuhw->amasks[i][0],
-					 &cpuhw->avalues[i][0]))
-			return -1;
-	}
-	value = mask = 0;
-	for (i = 0; i < n_ev; ++i) {
-		nv = (value | cpuhw->avalues[i][0]) +
-			(value & cpuhw->avalues[i][0] & addf);
-		if ((((nv + tadd) ^ value) & mask) != 0 ||
-		    (((nv + tadd) ^ cpuhw->avalues[i][0]) &
-		     cpuhw->amasks[i][0]) != 0)
-			break;
-		value = nv;
-		mask |= cpuhw->amasks[i][0];
-	}
-	if (i == n_ev)
-		return 0;	/* all OK */
-
-	/* doesn't work, gather alternatives... */
-	if (!ppmu->get_alternatives)
-		return -1;
-	for (i = 0; i < n_ev; ++i) {
-		choice[i] = 0;
-		n_alt[i] = ppmu->get_alternatives(event[i], cflags[i],
-						  cpuhw->alternatives[i]);
-		for (j = 1; j < n_alt[i]; ++j)
-			ppmu->get_constraint(cpuhw->alternatives[i][j],
-					     &cpuhw->amasks[i][j],
-					     &cpuhw->avalues[i][j]);
-	}
-
-	/* enumerate all possibilities and see if any will work */
-	i = 0;
-	j = -1;
-	value = mask = nv = 0;
-	while (i < n_ev) {
-		if (j >= 0) {
-			/* we're backtracking, restore context */
-			value = svalues[i];
-			mask = smasks[i];
-			j = choice[i];
-		}
-		/*
-		 * See if any alternative k for event i,
-		 * where k > j, will satisfy the constraints.
-		 */
-		while (++j < n_alt[i]) {
-			nv = (value | cpuhw->avalues[i][j]) +
-				(value & cpuhw->avalues[i][j] & addf);
-			if ((((nv + tadd) ^ value) & mask) == 0 &&
-			    (((nv + tadd) ^ cpuhw->avalues[i][j])
-			     & cpuhw->amasks[i][j]) == 0)
-				break;
-		}
-		if (j >= n_alt[i]) {
-			/*
-			 * No feasible alternative, backtrack
-			 * to event i-1 and continue enumerating its
-			 * alternatives from where we got up to.
-			 */
-			if (--i < 0)
-				return -1;
-		} else {
-			/*
-			 * Found a feasible alternative for event i,
-			 * remember where we got up to with this event,
-			 * go on to the next event, and start with
-			 * the first alternative for it.
-			 */
-			choice[i] = j;
-			svalues[i] = value;
-			smasks[i] = mask;
-			value = nv;
-			mask |= cpuhw->amasks[i][j];
-			++i;
-			j = -1;
-		}
-	}
-
-	/* OK, we have a feasible combination, tell the caller the solution */
-	for (i = 0; i < n_ev; ++i)
-		event[i] = cpuhw->alternatives[i][choice[i]];
-	return 0;
-}
-
-/*
- * Check if newly-added counters have consistent settings for
- * exclude_{user,kernel,hv} with each other and any previously
- * added counters.
- */
-static int check_excludes(struct perf_counter **ctrs, unsigned int cflags[],
-			  int n_prev, int n_new)
-{
-	int eu = 0, ek = 0, eh = 0;
-	int i, n, first;
-	struct perf_counter *counter;
-
-	n = n_prev + n_new;
-	if (n <= 1)
-		return 0;
-
-	first = 1;
-	for (i = 0; i < n; ++i) {
-		if (cflags[i] & PPMU_LIMITED_PMC_OK) {
-			cflags[i] &= ~PPMU_LIMITED_PMC_REQD;
-			continue;
-		}
-		counter = ctrs[i];
-		if (first) {
-			eu = counter->attr.exclude_user;
-			ek = counter->attr.exclude_kernel;
-			eh = counter->attr.exclude_hv;
-			first = 0;
-		} else if (counter->attr.exclude_user != eu ||
-			   counter->attr.exclude_kernel != ek ||
-			   counter->attr.exclude_hv != eh) {
-			return -EAGAIN;
-		}
-	}
-
-	if (eu || ek || eh)
-		for (i = 0; i < n; ++i)
-			if (cflags[i] & PPMU_LIMITED_PMC_OK)
-				cflags[i] |= PPMU_LIMITED_PMC_REQD;
-
-	return 0;
-}
-
-static void power_pmu_read(struct perf_counter *counter)
-{
-	s64 val, delta, prev;
-
-	if (!counter->hw.idx)
-		return;
-	/*
-	 * Performance monitor interrupts come even when interrupts
-	 * are soft-disabled, as long as interrupts are hard-enabled.
-	 * Therefore we treat them like NMIs.
-	 */
-	do {
-		prev = atomic64_read(&counter->hw.prev_count);
-		barrier();
-		val = read_pmc(counter->hw.idx);
-	} while (atomic64_cmpxchg(&counter->hw.prev_count, prev, val) != prev);
-
-	/* The counters are only 32 bits wide */
-	delta = (val - prev) & 0xfffffffful;
-	atomic64_add(delta, &counter->count);
-	atomic64_sub(delta, &counter->hw.period_left);
-}
-
-/*
- * On some machines, PMC5 and PMC6 can't be written, don't respect
- * the freeze conditions, and don't generate interrupts.  This tells
- * us if `counter' is using such a PMC.
- */
-static int is_limited_pmc(int pmcnum)
-{
-	return (ppmu->flags & PPMU_LIMITED_PMC5_6)
-		&& (pmcnum == 5 || pmcnum == 6);
-}
-
-static void freeze_limited_counters(struct cpu_hw_counters *cpuhw,
-				    unsigned long pmc5, unsigned long pmc6)
-{
-	struct perf_counter *counter;
-	u64 val, prev, delta;
-	int i;
-
-	for (i = 0; i < cpuhw->n_limited; ++i) {
-		counter = cpuhw->limited_counter[i];
-		if (!counter->hw.idx)
-			continue;
-		val = (counter->hw.idx == 5) ? pmc5 : pmc6;
-		prev = atomic64_read(&counter->hw.prev_count);
-		counter->hw.idx = 0;
-		delta = (val - prev) & 0xfffffffful;
-		atomic64_add(delta, &counter->count);
-	}
-}
-
-static void thaw_limited_counters(struct cpu_hw_counters *cpuhw,
-				  unsigned long pmc5, unsigned long pmc6)
-{
-	struct perf_counter *counter;
-	u64 val;
-	int i;
-
-	for (i = 0; i < cpuhw->n_limited; ++i) {
-		counter = cpuhw->limited_counter[i];
-		counter->hw.idx = cpuhw->limited_hwidx[i];
-		val = (counter->hw.idx == 5) ? pmc5 : pmc6;
-		atomic64_set(&counter->hw.prev_count, val);
-		perf_counter_update_userpage(counter);
-	}
-}
-
-/*
- * Since limited counters don't respect the freeze conditions, we
- * have to read them immediately after freezing or unfreezing the
- * other counters.  We try to keep the values from the limited
- * counters as consistent as possible by keeping the delay (in
- * cycles and instructions) between freezing/unfreezing and reading
- * the limited counters as small and consistent as possible.
- * Therefore, if any limited counters are in use, we read them
- * both, and always in the same order, to minimize variability,
- * and do it inside the same asm that writes MMCR0.
- */
-static void write_mmcr0(struct cpu_hw_counters *cpuhw, unsigned long mmcr0)
-{
-	unsigned long pmc5, pmc6;
-
-	if (!cpuhw->n_limited) {
-		mtspr(SPRN_MMCR0, mmcr0);
-		return;
-	}
-
-	/*
-	 * Write MMCR0, then read PMC5 and PMC6 immediately.
-	 * To ensure we don't get a performance monitor interrupt
-	 * between writing MMCR0 and freezing/thawing the limited
-	 * counters, we first write MMCR0 with the counter overflow
-	 * interrupt enable bits turned off.
-	 */
-	asm volatile("mtspr %3,%2; mfspr %0,%4; mfspr %1,%5"
-		     : "=&r" (pmc5), "=&r" (pmc6)
-		     : "r" (mmcr0 & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)),
-		       "i" (SPRN_MMCR0),
-		       "i" (SPRN_PMC5), "i" (SPRN_PMC6));
-
-	if (mmcr0 & MMCR0_FC)
-		freeze_limited_counters(cpuhw, pmc5, pmc6);
-	else
-		thaw_limited_counters(cpuhw, pmc5, pmc6);
-
-	/*
-	 * Write the full MMCR0 including the counter overflow interrupt
-	 * enable bits, if necessary.
-	 */
-	if (mmcr0 & (MMCR0_PMC1CE | MMCR0_PMCjCE))
-		mtspr(SPRN_MMCR0, mmcr0);
-}
-
-/*
- * Disable all counters to prevent PMU interrupts and to allow
- * counters to be added or removed.
- */
-void hw_perf_disable(void)
-{
-	struct cpu_hw_counters *cpuhw;
-	unsigned long flags;
-
-	if (!ppmu)
-		return;
-	local_irq_save(flags);
-	cpuhw = &__get_cpu_var(cpu_hw_counters);
-
-	if (!cpuhw->disabled) {
-		cpuhw->disabled = 1;
-		cpuhw->n_added = 0;
-
-		/*
-		 * Check if we ever enabled the PMU on this cpu.
-		 */
-		if (!cpuhw->pmcs_enabled) {
-			ppc_enable_pmcs();
-			cpuhw->pmcs_enabled = 1;
-		}
-
-		/*
-		 * Disable instruction sampling if it was enabled
-		 */
-		if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) {
-			mtspr(SPRN_MMCRA,
-			      cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
-			mb();
-		}
-
-		/*
-		 * Set the 'freeze counters' bit.
-		 * The barrier is to make sure the mtspr has been
-		 * executed and the PMU has frozen the counters
-		 * before we return.
-		 */
-		write_mmcr0(cpuhw, mfspr(SPRN_MMCR0) | MMCR0_FC);
-		mb();
-	}
-	local_irq_restore(flags);
-}
-
-/*
- * Re-enable all counters if disable == 0.
- * If we were previously disabled and counters were added, then
- * put the new config on the PMU.
- */
-void hw_perf_enable(void)
-{
-	struct perf_counter *counter;
-	struct cpu_hw_counters *cpuhw;
-	unsigned long flags;
-	long i;
-	unsigned long val;
-	s64 left;
-	unsigned int hwc_index[MAX_HWCOUNTERS];
-	int n_lim;
-	int idx;
-
-	if (!ppmu)
-		return;
-	local_irq_save(flags);
-	cpuhw = &__get_cpu_var(cpu_hw_counters);
-	if (!cpuhw->disabled) {
-		local_irq_restore(flags);
-		return;
-	}
-	cpuhw->disabled = 0;
-
-	/*
-	 * If we didn't change anything, or only removed counters,
-	 * no need to recalculate MMCR* settings and reset the PMCs.
-	 * Just reenable the PMU with the current MMCR* settings
-	 * (possibly updated for removal of counters).
-	 */
-	if (!cpuhw->n_added) {
-		mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
-		mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
-		if (cpuhw->n_counters == 0)
-			ppc_set_pmu_inuse(0);
-		goto out_enable;
-	}
-
-	/*
-	 * Compute MMCR* values for the new set of counters
-	 */
-	if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_counters, hwc_index,
-			       cpuhw->mmcr)) {
-		/* shouldn't ever get here */
-		printk(KERN_ERR "oops compute_mmcr failed\n");
-		goto out;
-	}
-
-	/*
-	 * Add in MMCR0 freeze bits corresponding to the
-	 * attr.exclude_* bits for the first counter.
-	 * We have already checked that all counters have the
-	 * same values for these bits as the first counter.
-	 */
-	counter = cpuhw->counter[0];
-	if (counter->attr.exclude_user)
-		cpuhw->mmcr[0] |= MMCR0_FCP;
-	if (counter->attr.exclude_kernel)
-		cpuhw->mmcr[0] |= freeze_counters_kernel;
-	if (counter->attr.exclude_hv)
-		cpuhw->mmcr[0] |= MMCR0_FCHV;
-
-	/*
-	 * Write the new configuration to MMCR* with the freeze
-	 * bit set and set the hardware counters to their initial values.
-	 * Then unfreeze the counters.
-	 */
-	ppc_set_pmu_inuse(1);
-	mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
-	mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
-	mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE))
-				| MMCR0_FC);
-
-	/*
-	 * Read off any pre-existing counters that need to move
-	 * to another PMC.
-	 */
-	for (i = 0; i < cpuhw->n_counters; ++i) {
-		counter = cpuhw->counter[i];
-		if (counter->hw.idx && counter->hw.idx != hwc_index[i] + 1) {
-			power_pmu_read(counter);
-			write_pmc(counter->hw.idx, 0);
-			counter->hw.idx = 0;
-		}
-	}
-
-	/*
-	 * Initialize the PMCs for all the new and moved counters.
-	 */
-	cpuhw->n_limited = n_lim = 0;
-	for (i = 0; i < cpuhw->n_counters; ++i) {
-		counter = cpuhw->counter[i];
-		if (counter->hw.idx)
-			continue;
-		idx = hwc_index[i] + 1;
-		if (is_limited_pmc(idx)) {
-			cpuhw->limited_counter[n_lim] = counter;
-			cpuhw->limited_hwidx[n_lim] = idx;
-			++n_lim;
-			continue;
-		}
-		val = 0;
-		if (counter->hw.sample_period) {
-			left = atomic64_read(&counter->hw.period_left);
-			if (left < 0x80000000L)
-				val = 0x80000000L - left;
-		}
-		atomic64_set(&counter->hw.prev_count, val);
-		counter->hw.idx = idx;
-		write_pmc(idx, val);
-		perf_counter_update_userpage(counter);
-	}
-	cpuhw->n_limited = n_lim;
-	cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE;
-
- out_enable:
-	mb();
-	write_mmcr0(cpuhw, cpuhw->mmcr[0]);
-
-	/*
-	 * Enable instruction sampling if necessary
-	 */
-	if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) {
-		mb();
-		mtspr(SPRN_MMCRA, cpuhw->mmcr[2]);
-	}
-
- out:
-	local_irq_restore(flags);
-}
-
-static int collect_events(struct perf_counter *group, int max_count,
-			  struct perf_counter *ctrs[], u64 *events,
-			  unsigned int *flags)
-{
-	int n = 0;
-	struct perf_counter *counter;
-
-	if (!is_software_counter(group)) {
-		if (n >= max_count)
-			return -1;
-		ctrs[n] = group;
-		flags[n] = group->hw.counter_base;
-		events[n++] = group->hw.config;
-	}
-	list_for_each_entry(counter, &group->sibling_list, list_entry) {
-		if (!is_software_counter(counter) &&
-		    counter->state != PERF_COUNTER_STATE_OFF) {
-			if (n >= max_count)
-				return -1;
-			ctrs[n] = counter;
-			flags[n] = counter->hw.counter_base;
-			events[n++] = counter->hw.config;
-		}
-	}
-	return n;
-}
-
-static void counter_sched_in(struct perf_counter *counter, int cpu)
-{
-	counter->state = PERF_COUNTER_STATE_ACTIVE;
-	counter->oncpu = cpu;
-	counter->tstamp_running += counter->ctx->time - counter->tstamp_stopped;
-	if (is_software_counter(counter))
-		counter->pmu->enable(counter);
-}
-
-/*
- * Called to enable a whole group of counters.
- * Returns 1 if the group was enabled, or -EAGAIN if it could not be.
- * Assumes the caller has disabled interrupts and has
- * frozen the PMU with hw_perf_save_disable.
- */
-int hw_perf_group_sched_in(struct perf_counter *group_leader,
-	       struct perf_cpu_context *cpuctx,
-	       struct perf_counter_context *ctx, int cpu)
-{
-	struct cpu_hw_counters *cpuhw;
-	long i, n, n0;
-	struct perf_counter *sub;
-
-	if (!ppmu)
-		return 0;
-	cpuhw = &__get_cpu_var(cpu_hw_counters);
-	n0 = cpuhw->n_counters;
-	n = collect_events(group_leader, ppmu->n_counter - n0,
-			   &cpuhw->counter[n0], &cpuhw->events[n0],
-			   &cpuhw->flags[n0]);
-	if (n < 0)
-		return -EAGAIN;
-	if (check_excludes(cpuhw->counter, cpuhw->flags, n0, n))
-		return -EAGAIN;
-	i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n + n0);
-	if (i < 0)
-		return -EAGAIN;
-	cpuhw->n_counters = n0 + n;
-	cpuhw->n_added += n;
-
-	/*
-	 * OK, this group can go on; update counter states etc.,
-	 * and enable any software counters
-	 */
-	for (i = n0; i < n0 + n; ++i)
-		cpuhw->counter[i]->hw.config = cpuhw->events[i];
-	cpuctx->active_oncpu += n;
-	n = 1;
-	counter_sched_in(group_leader, cpu);
-	list_for_each_entry(sub, &group_leader->sibling_list, list_entry) {
-		if (sub->state != PERF_COUNTER_STATE_OFF) {
-			counter_sched_in(sub, cpu);
-			++n;
-		}
-	}
-	ctx->nr_active += n;
-
-	return 1;
-}
-
-/*
- * Add a counter to the PMU.
- * If all counters are not already frozen, then we disable and
- * re-enable the PMU in order to get hw_perf_enable to do the
- * actual work of reconfiguring the PMU.
- */
-static int power_pmu_enable(struct perf_counter *counter)
-{
-	struct cpu_hw_counters *cpuhw;
-	unsigned long flags;
-	int n0;
-	int ret = -EAGAIN;
-
-	local_irq_save(flags);
-	perf_disable();
-
-	/*
-	 * Add the counter to the list (if there is room)
-	 * and check whether the total set is still feasible.
-	 */
-	cpuhw = &__get_cpu_var(cpu_hw_counters);
-	n0 = cpuhw->n_counters;
-	if (n0 >= ppmu->n_counter)
-		goto out;
-	cpuhw->counter[n0] = counter;
-	cpuhw->events[n0] = counter->hw.config;
-	cpuhw->flags[n0] = counter->hw.counter_base;
-	if (check_excludes(cpuhw->counter, cpuhw->flags, n0, 1))
-		goto out;
-	if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1))
-		goto out;
-
-	counter->hw.config = cpuhw->events[n0];
-	++cpuhw->n_counters;
-	++cpuhw->n_added;
-
-	ret = 0;
- out:
-	perf_enable();
-	local_irq_restore(flags);
-	return ret;
-}
-
-/*
- * Remove a counter from the PMU.
- */
-static void power_pmu_disable(struct perf_counter *counter)
-{
-	struct cpu_hw_counters *cpuhw;
-	long i;
-	unsigned long flags;
-
-	local_irq_save(flags);
-	perf_disable();
-
-	power_pmu_read(counter);
-
-	cpuhw = &__get_cpu_var(cpu_hw_counters);
-	for (i = 0; i < cpuhw->n_counters; ++i) {
-		if (counter == cpuhw->counter[i]) {
-			while (++i < cpuhw->n_counters)
-				cpuhw->counter[i-1] = cpuhw->counter[i];
-			--cpuhw->n_counters;
-			ppmu->disable_pmc(counter->hw.idx - 1, cpuhw->mmcr);
-			if (counter->hw.idx) {
-				write_pmc(counter->hw.idx, 0);
-				counter->hw.idx = 0;
-			}
-			perf_counter_update_userpage(counter);
-			break;
-		}
-	}
-	for (i = 0; i < cpuhw->n_limited; ++i)
-		if (counter == cpuhw->limited_counter[i])
-			break;
-	if (i < cpuhw->n_limited) {
-		while (++i < cpuhw->n_limited) {
-			cpuhw->limited_counter[i-1] = cpuhw->limited_counter[i];
-			cpuhw->limited_hwidx[i-1] = cpuhw->limited_hwidx[i];
-		}
-		--cpuhw->n_limited;
-	}
-	if (cpuhw->n_counters == 0) {
-		/* disable exceptions if no counters are running */
-		cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE);
-	}
-
-	perf_enable();
-	local_irq_restore(flags);
-}
-
-/*
- * Re-enable interrupts on a counter after they were throttled
- * because they were coming too fast.
- */
-static void power_pmu_unthrottle(struct perf_counter *counter)
-{
-	s64 val, left;
-	unsigned long flags;
-
-	if (!counter->hw.idx || !counter->hw.sample_period)
-		return;
-	local_irq_save(flags);
-	perf_disable();
-	power_pmu_read(counter);
-	left = counter->hw.sample_period;
-	counter->hw.last_period = left;
-	val = 0;
-	if (left < 0x80000000L)
-		val = 0x80000000L - left;
-	write_pmc(counter->hw.idx, val);
-	atomic64_set(&counter->hw.prev_count, val);
-	atomic64_set(&counter->hw.period_left, left);
-	perf_counter_update_userpage(counter);
-	perf_enable();
-	local_irq_restore(flags);
-}
-
-struct pmu power_pmu = {
-	.enable		= power_pmu_enable,
-	.disable	= power_pmu_disable,
-	.read		= power_pmu_read,
-	.unthrottle	= power_pmu_unthrottle,
-};
-
-/*
- * Return 1 if we might be able to put counter on a limited PMC,
- * or 0 if not.
- * A counter can only go on a limited PMC if it counts something
- * that a limited PMC can count, doesn't require interrupts, and
- * doesn't exclude any processor mode.
- */
-static int can_go_on_limited_pmc(struct perf_counter *counter, u64 ev,
-				 unsigned int flags)
-{
-	int n;
-	u64 alt[MAX_EVENT_ALTERNATIVES];
-
-	if (counter->attr.exclude_user
-	    || counter->attr.exclude_kernel
-	    || counter->attr.exclude_hv
-	    || counter->attr.sample_period)
-		return 0;
-
-	if (ppmu->limited_pmc_event(ev))
-		return 1;
-
-	/*
-	 * The requested event isn't on a limited PMC already;
-	 * see if any alternative code goes on a limited PMC.
-	 */
-	if (!ppmu->get_alternatives)
-		return 0;
-
-	flags |= PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD;
-	n = ppmu->get_alternatives(ev, flags, alt);
-
-	return n > 0;
-}
-
-/*
- * Find an alternative event that goes on a normal PMC, if possible,
- * and return the event code, or 0 if there is no such alternative.
- * (Note: event code 0 is "don't count" on all machines.)
- */
-static u64 normal_pmc_alternative(u64 ev, unsigned long flags)
-{
-	u64 alt[MAX_EVENT_ALTERNATIVES];
-	int n;
-
-	flags &= ~(PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD);
-	n = ppmu->get_alternatives(ev, flags, alt);
-	if (!n)
-		return 0;
-	return alt[0];
-}
-
-/* Number of perf_counters counting hardware events */
-static atomic_t num_counters;
-/* Used to avoid races in calling reserve/release_pmc_hardware */
-static DEFINE_MUTEX(pmc_reserve_mutex);
-
-/*
- * Release the PMU if this is the last perf_counter.
- */
-static void hw_perf_counter_destroy(struct perf_counter *counter)
-{
-	if (!atomic_add_unless(&num_counters, -1, 1)) {
-		mutex_lock(&pmc_reserve_mutex);
-		if (atomic_dec_return(&num_counters) == 0)
-			release_pmc_hardware();
-		mutex_unlock(&pmc_reserve_mutex);
-	}
-}
-
-/*
- * Translate a generic cache event config to a raw event code.
- */
-static int hw_perf_cache_event(u64 config, u64 *eventp)
-{
-	unsigned long type, op, result;
-	int ev;
-
-	if (!ppmu->cache_events)
-		return -EINVAL;
-
-	/* unpack config */
-	type = config & 0xff;
-	op = (config >> 8) & 0xff;
-	result = (config >> 16) & 0xff;
-
-	if (type >= PERF_COUNT_HW_CACHE_MAX ||
-	    op >= PERF_COUNT_HW_CACHE_OP_MAX ||
-	    result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
-		return -EINVAL;
-
-	ev = (*ppmu->cache_events)[type][op][result];
-	if (ev == 0)
-		return -EOPNOTSUPP;
-	if (ev == -1)
-		return -EINVAL;
-	*eventp = ev;
-	return 0;
-}
-
-const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
-{
-	u64 ev;
-	unsigned long flags;
-	struct perf_counter *ctrs[MAX_HWCOUNTERS];
-	u64 events[MAX_HWCOUNTERS];
-	unsigned int cflags[MAX_HWCOUNTERS];
-	int n;
-	int err;
-	struct cpu_hw_counters *cpuhw;
-
-	if (!ppmu)
-		return ERR_PTR(-ENXIO);
-	switch (counter->attr.type) {
-	case PERF_TYPE_HARDWARE:
-		ev = counter->attr.config;
-		if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
-			return ERR_PTR(-EOPNOTSUPP);
-		ev = ppmu->generic_events[ev];
-		break;
-	case PERF_TYPE_HW_CACHE:
-		err = hw_perf_cache_event(counter->attr.config, &ev);
-		if (err)
-			return ERR_PTR(err);
-		break;
-	case PERF_TYPE_RAW:
-		ev = counter->attr.config;
-		break;
-	default:
-		return ERR_PTR(-EINVAL);
-	}
-	counter->hw.config_base = ev;
-	counter->hw.idx = 0;
-
-	/*
-	 * If we are not running on a hypervisor, force the
-	 * exclude_hv bit to 0 so that we don't care what
-	 * the user set it to.
-	 */
-	if (!firmware_has_feature(FW_FEATURE_LPAR))
-		counter->attr.exclude_hv = 0;
-
-	/*
-	 * If this is a per-task counter, then we can use
-	 * PM_RUN_* events interchangeably with their non RUN_*
-	 * equivalents, e.g. PM_RUN_CYC instead of PM_CYC.
-	 * XXX we should check if the task is an idle task.
-	 */
-	flags = 0;
-	if (counter->ctx->task)
-		flags |= PPMU_ONLY_COUNT_RUN;
-
-	/*
-	 * If this machine has limited counters, check whether this
-	 * event could go on a limited counter.
-	 */
-	if (ppmu->flags & PPMU_LIMITED_PMC5_6) {
-		if (can_go_on_limited_pmc(counter, ev, flags)) {
-			flags |= PPMU_LIMITED_PMC_OK;
-		} else if (ppmu->limited_pmc_event(ev)) {
-			/*
-			 * The requested event is on a limited PMC,
-			 * but we can't use a limited PMC; see if any
-			 * alternative goes on a normal PMC.
-			 */
-			ev = normal_pmc_alternative(ev, flags);
-			if (!ev)
-				return ERR_PTR(-EINVAL);
-		}
-	}
-
-	/*
-	 * If this is in a group, check if it can go on with all the
-	 * other hardware counters in the group.  We assume the counter
-	 * hasn't been linked into its leader's sibling list at this point.
-	 */
-	n = 0;
-	if (counter->group_leader != counter) {
-		n = collect_events(counter->group_leader, ppmu->n_counter - 1,
-				   ctrs, events, cflags);
-		if (n < 0)
-			return ERR_PTR(-EINVAL);
-	}
-	events[n] = ev;
-	ctrs[n] = counter;
-	cflags[n] = flags;
-	if (check_excludes(ctrs, cflags, n, 1))
-		return ERR_PTR(-EINVAL);
-
-	cpuhw = &get_cpu_var(cpu_hw_counters);
-	err = power_check_constraints(cpuhw, events, cflags, n + 1);
-	put_cpu_var(cpu_hw_counters);
-	if (err)
-		return ERR_PTR(-EINVAL);
-
-	counter->hw.config = events[n];
-	counter->hw.counter_base = cflags[n];
-	counter->hw.last_period = counter->hw.sample_period;
-	atomic64_set(&counter->hw.period_left, counter->hw.last_period);
-
-	/*
-	 * See if we need to reserve the PMU.
-	 * If no counters are currently in use, then we have to take a
-	 * mutex to ensure that we don't race with another task doing
-	 * reserve_pmc_hardware or release_pmc_hardware.
-	 */
-	err = 0;
-	if (!atomic_inc_not_zero(&num_counters)) {
-		mutex_lock(&pmc_reserve_mutex);
-		if (atomic_read(&num_counters) == 0 &&
-		    reserve_pmc_hardware(perf_counter_interrupt))
-			err = -EBUSY;
-		else
-			atomic_inc(&num_counters);
-		mutex_unlock(&pmc_reserve_mutex);
-	}
-	counter->destroy = hw_perf_counter_destroy;
-
-	if (err)
-		return ERR_PTR(err);
-	return &power_pmu;
-}
-
-/*
- * A counter has overflowed; update its count and record
- * things if requested.  Note that interrupts are hard-disabled
- * here so there is no possibility of being interrupted.
- */
-static void record_and_restart(struct perf_counter *counter, unsigned long val,
-			       struct pt_regs *regs, int nmi)
-{
-	u64 period = counter->hw.sample_period;
-	s64 prev, delta, left;
-	int record = 0;
-
-	/* we don't have to worry about interrupts here */
-	prev = atomic64_read(&counter->hw.prev_count);
-	delta = (val - prev) & 0xfffffffful;
-	atomic64_add(delta, &counter->count);
-
-	/*
-	 * See if the total period for this counter has expired,
-	 * and update for the next period.
-	 */
-	val = 0;
-	left = atomic64_read(&counter->hw.period_left) - delta;
-	if (period) {
-		if (left <= 0) {
-			left += period;
-			if (left <= 0)
-				left = period;
-			record = 1;
-		}
-		if (left < 0x80000000LL)
-			val = 0x80000000LL - left;
-	}
-
-	/*
-	 * Finally record data if requested.
-	 */
-	if (record) {
-		struct perf_sample_data data = {
-			.addr	= 0,
-			.period	= counter->hw.last_period,
-		};
-
-		if (counter->attr.sample_type & PERF_SAMPLE_ADDR)
-			perf_get_data_addr(regs, &data.addr);
-
-		if (perf_counter_overflow(counter, nmi, &data, regs)) {
-			/*
-			 * Interrupts are coming too fast - throttle them
-			 * by setting the counter to 0, so it will be
-			 * at least 2^30 cycles until the next interrupt
-			 * (assuming each counter counts at most 2 counts
-			 * per cycle).
-			 */
-			val = 0;
-			left = ~0ULL >> 1;
-		}
-	}
-
-	write_pmc(counter->hw.idx, val);
-	atomic64_set(&counter->hw.prev_count, val);
-	atomic64_set(&counter->hw.period_left, left);
-	perf_counter_update_userpage(counter);
-}
-
-/*
- * Called from generic code to get the misc flags (i.e. processor mode)
- * for an event.
- */
-unsigned long perf_misc_flags(struct pt_regs *regs)
-{
-	u32 flags = perf_get_misc_flags(regs);
-
-	if (flags)
-		return flags;
-	return user_mode(regs) ? PERF_EVENT_MISC_USER :
-		PERF_EVENT_MISC_KERNEL;
-}
-
-/*
- * Called from generic code to get the instruction pointer
- * for an event.
- */
-unsigned long perf_instruction_pointer(struct pt_regs *regs)
-{
-	unsigned long ip;
-
-	if (TRAP(regs) != 0xf00)
-		return regs->nip;	/* not a PMU interrupt */
-
-	ip = mfspr(SPRN_SIAR) + perf_ip_adjust(regs);
-	return ip;
-}
-
-/*
- * Performance monitor interrupt stuff
- */
-static void perf_counter_interrupt(struct pt_regs *regs)
-{
-	int i;
-	struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters);
-	struct perf_counter *counter;
-	unsigned long val;
-	int found = 0;
-	int nmi;
-
-	if (cpuhw->n_limited)
-		freeze_limited_counters(cpuhw, mfspr(SPRN_PMC5),
-					mfspr(SPRN_PMC6));
-
-	perf_read_regs(regs);
-
-	nmi = perf_intr_is_nmi(regs);
-	if (nmi)
-		nmi_enter();
-	else
-		irq_enter();
-
-	for (i = 0; i < cpuhw->n_counters; ++i) {
-		counter = cpuhw->counter[i];
-		if (!counter->hw.idx || is_limited_pmc(counter->hw.idx))
-			continue;
-		val = read_pmc(counter->hw.idx);
-		if ((int)val < 0) {
-			/* counter has overflowed */
-			found = 1;
-			record_and_restart(counter, val, regs, nmi);
-		}
-	}
-
-	/*
-	 * In case we didn't find and reset the counter that caused
-	 * the interrupt, scan all counters and reset any that are
-	 * negative, to avoid getting continual interrupts.
-	 * Any that we processed in the previous loop will not be negative.
-	 */
-	if (!found) {
-		for (i = 0; i < ppmu->n_counter; ++i) {
-			if (is_limited_pmc(i + 1))
-				continue;
-			val = read_pmc(i + 1);
-			if ((int)val < 0)
-				write_pmc(i + 1, 0);
-		}
-	}
-
-	/*
-	 * Reset MMCR0 to its normal value.  This will set PMXE and
-	 * clear FC (freeze counters) and PMAO (perf mon alert occurred)
-	 * and thus allow interrupts to occur again.
-	 * XXX might want to use MSR.PM to keep the counters frozen until
-	 * we get back out of this interrupt.
-	 */
-	write_mmcr0(cpuhw, cpuhw->mmcr[0]);
-
-	if (nmi)
-		nmi_exit();
-	else
-		irq_exit();
-}
-
-void hw_perf_counter_setup(int cpu)
-{
-	struct cpu_hw_counters *cpuhw = &per_cpu(cpu_hw_counters, cpu);
-
-	if (!ppmu)
-		return;
-	memset(cpuhw, 0, sizeof(*cpuhw));
-	cpuhw->mmcr[0] = MMCR0_FC;
-}
-
-int register_power_pmu(struct power_pmu *pmu)
-{
-	if (ppmu)
-		return -EBUSY;		/* something's already registered */
-
-	ppmu = pmu;
-	pr_info("%s performance monitor hardware support registered\n",
-		pmu->name);
-
-#ifdef MSR_HV
-	/*
-	 * Use FCHV to ignore kernel events if MSR.HV is set.
-	 */
-	if (mfmsr() & MSR_HV)
-		freeze_counters_kernel = MMCR0_FCHV;
-#endif /* CONFIG_PPC64 */
-
-	return 0;
-}
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
new file mode 100644
index 0000000..c98321f
--- /dev/null
+++ b/arch/powerpc/kernel/perf_event.c
@@ -0,0 +1,1315 @@
+/*
+ * Performance event support - powerpc architecture code
+ *
+ * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/perf_event.h>
+#include <linux/percpu.h>
+#include <linux/hardirq.h>
+#include <asm/reg.h>
+#include <asm/pmc.h>
+#include <asm/machdep.h>
+#include <asm/firmware.h>
+#include <asm/ptrace.h>
+
+struct cpu_hw_events {
+	int n_events;
+	int n_percpu;
+	int disabled;
+	int n_added;
+	int n_limited;
+	u8  pmcs_enabled;
+	struct perf_event *event[MAX_HWEVENTS];
+	u64 events[MAX_HWEVENTS];
+	unsigned int flags[MAX_HWEVENTS];
+	unsigned long mmcr[3];
+	struct perf_event *limited_event[MAX_LIMITED_HWEVENTS];
+	u8  limited_hwidx[MAX_LIMITED_HWEVENTS];
+	u64 alternatives[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES];
+	unsigned long amasks[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES];
+	unsigned long avalues[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES];
+};
+DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
+
+struct power_pmu *ppmu;
+
+/*
+ * Normally, to ignore kernel events we set the FCS (freeze events
+ * in supervisor mode) bit in MMCR0, but if the kernel runs with the
+ * hypervisor bit set in the MSR, or if we are running on a processor
+ * where the hypervisor bit is forced to 1 (as on Apple G5 processors),
+ * then we need to use the FCHV bit to ignore kernel events.
+ */
+static unsigned int freeze_events_kernel = MMCR0_FCS;
+
+/*
+ * 32-bit doesn't have MMCRA but does have an MMCR2,
+ * and a few other names are different.
+ */
+#ifdef CONFIG_PPC32
+
+#define MMCR0_FCHV		0
+#define MMCR0_PMCjCE		MMCR0_PMCnCE
+
+#define SPRN_MMCRA		SPRN_MMCR2
+#define MMCRA_SAMPLE_ENABLE	0
+
+static inline unsigned long perf_ip_adjust(struct pt_regs *regs)
+{
+	return 0;
+}
+static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) { }
+static inline u32 perf_get_misc_flags(struct pt_regs *regs)
+{
+	return 0;
+}
+static inline void perf_read_regs(struct pt_regs *regs) { }
+static inline int perf_intr_is_nmi(struct pt_regs *regs)
+{
+	return 0;
+}
+
+#endif /* CONFIG_PPC32 */
+
+/*
+ * Things that are specific to 64-bit implementations.
+ */
+#ifdef CONFIG_PPC64
+
+static inline unsigned long perf_ip_adjust(struct pt_regs *regs)
+{
+	unsigned long mmcra = regs->dsisr;
+
+	if ((mmcra & MMCRA_SAMPLE_ENABLE) && !(ppmu->flags & PPMU_ALT_SIPR)) {
+		unsigned long slot = (mmcra & MMCRA_SLOT) >> MMCRA_SLOT_SHIFT;
+		if (slot > 1)
+			return 4 * (slot - 1);
+	}
+	return 0;
+}
+
+/*
+ * The user wants a data address recorded.
+ * If we're not doing instruction sampling, give them the SDAR
+ * (sampled data address).  If we are doing instruction sampling, then
+ * only give them the SDAR if it corresponds to the instruction
+ * pointed to by SIAR; this is indicated by the [POWER6_]MMCRA_SDSYNC
+ * bit in MMCRA.
+ */
+static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp)
+{
+	unsigned long mmcra = regs->dsisr;
+	unsigned long sdsync = (ppmu->flags & PPMU_ALT_SIPR) ?
+		POWER6_MMCRA_SDSYNC : MMCRA_SDSYNC;
+
+	if (!(mmcra & MMCRA_SAMPLE_ENABLE) || (mmcra & sdsync))
+		*addrp = mfspr(SPRN_SDAR);
+}
+
+static inline u32 perf_get_misc_flags(struct pt_regs *regs)
+{
+	unsigned long mmcra = regs->dsisr;
+
+	if (TRAP(regs) != 0xf00)
+		return 0;	/* not a PMU interrupt */
+
+	if (ppmu->flags & PPMU_ALT_SIPR) {
+		if (mmcra & POWER6_MMCRA_SIHV)
+			return PERF_RECORD_MISC_HYPERVISOR;
+		return (mmcra & POWER6_MMCRA_SIPR) ?
+			PERF_RECORD_MISC_USER : PERF_RECORD_MISC_KERNEL;
+	}
+	if (mmcra & MMCRA_SIHV)
+		return PERF_RECORD_MISC_HYPERVISOR;
+	return (mmcra & MMCRA_SIPR) ? PERF_RECORD_MISC_USER :
+		PERF_RECORD_MISC_KERNEL;
+}
+
+/*
+ * Overload regs->dsisr to store MMCRA so we only need to read it once
+ * on each interrupt.
+ */
+static inline void perf_read_regs(struct pt_regs *regs)
+{
+	regs->dsisr = mfspr(SPRN_MMCRA);
+}
+
+/*
+ * If interrupts were soft-disabled when a PMU interrupt occurs, treat
+ * it as an NMI.
+ */
+static inline int perf_intr_is_nmi(struct pt_regs *regs)
+{
+	return !regs->softe;
+}
+
+#endif /* CONFIG_PPC64 */
+
+static void perf_event_interrupt(struct pt_regs *regs);
+
+void perf_event_print_debug(void)
+{
+}
+
+/*
+ * Read one performance monitor event (PMC).
+ */
+static unsigned long read_pmc(int idx)
+{
+	unsigned long val;
+
+	switch (idx) {
+	case 1:
+		val = mfspr(SPRN_PMC1);
+		break;
+	case 2:
+		val = mfspr(SPRN_PMC2);
+		break;
+	case 3:
+		val = mfspr(SPRN_PMC3);
+		break;
+	case 4:
+		val = mfspr(SPRN_PMC4);
+		break;
+	case 5:
+		val = mfspr(SPRN_PMC5);
+		break;
+	case 6:
+		val = mfspr(SPRN_PMC6);
+		break;
+#ifdef CONFIG_PPC64
+	case 7:
+		val = mfspr(SPRN_PMC7);
+		break;
+	case 8:
+		val = mfspr(SPRN_PMC8);
+		break;
+#endif /* CONFIG_PPC64 */
+	default:
+		printk(KERN_ERR "oops trying to read PMC%d\n", idx);
+		val = 0;
+	}
+	return val;
+}
+
+/*
+ * Write one PMC.
+ */
+static void write_pmc(int idx, unsigned long val)
+{
+	switch (idx) {
+	case 1:
+		mtspr(SPRN_PMC1, val);
+		break;
+	case 2:
+		mtspr(SPRN_PMC2, val);
+		break;
+	case 3:
+		mtspr(SPRN_PMC3, val);
+		break;
+	case 4:
+		mtspr(SPRN_PMC4, val);
+		break;
+	case 5:
+		mtspr(SPRN_PMC5, val);
+		break;
+	case 6:
+		mtspr(SPRN_PMC6, val);
+		break;
+#ifdef CONFIG_PPC64
+	case 7:
+		mtspr(SPRN_PMC7, val);
+		break;
+	case 8:
+		mtspr(SPRN_PMC8, val);
+		break;
+#endif /* CONFIG_PPC64 */
+	default:
+		printk(KERN_ERR "oops trying to write PMC%d\n", idx);
+	}
+}
+
+/*
+ * Check if a set of events can all go on the PMU at once.
+ * If they can't, this will look at alternative codes for the events
+ * and see if any combination of alternative codes is feasible.
+ * The feasible set is returned in event_id[].
+ */
+static int power_check_constraints(struct cpu_hw_events *cpuhw,
+				   u64 event_id[], unsigned int cflags[],
+				   int n_ev)
+{
+	unsigned long mask, value, nv;
+	unsigned long smasks[MAX_HWEVENTS], svalues[MAX_HWEVENTS];
+	int n_alt[MAX_HWEVENTS], choice[MAX_HWEVENTS];
+	int i, j;
+	unsigned long addf = ppmu->add_fields;
+	unsigned long tadd = ppmu->test_adder;
+
+	if (n_ev > ppmu->n_event)
+		return -1;
+
+	/* First see if the events will go on as-is */
+	for (i = 0; i < n_ev; ++i) {
+		if ((cflags[i] & PPMU_LIMITED_PMC_REQD)
+		    && !ppmu->limited_pmc_event(event_id[i])) {
+			ppmu->get_alternatives(event_id[i], cflags[i],
+					       cpuhw->alternatives[i]);
+			event_id[i] = cpuhw->alternatives[i][0];
+		}
+		if (ppmu->get_constraint(event_id[i], &cpuhw->amasks[i][0],
+					 &cpuhw->avalues[i][0]))
+			return -1;
+	}
+	value = mask = 0;
+	for (i = 0; i < n_ev; ++i) {
+		nv = (value | cpuhw->avalues[i][0]) +
+			(value & cpuhw->avalues[i][0] & addf);
+		if ((((nv + tadd) ^ value) & mask) != 0 ||
+		    (((nv + tadd) ^ cpuhw->avalues[i][0]) &
+		     cpuhw->amasks[i][0]) != 0)
+			break;
+		value = nv;
+		mask |= cpuhw->amasks[i][0];
+	}
+	if (i == n_ev)
+		return 0;	/* all OK */
+
+	/* doesn't work, gather alternatives... */
+	if (!ppmu->get_alternatives)
+		return -1;
+	for (i = 0; i < n_ev; ++i) {
+		choice[i] = 0;
+		n_alt[i] = ppmu->get_alternatives(event_id[i], cflags[i],
+						  cpuhw->alternatives[i]);
+		for (j = 1; j < n_alt[i]; ++j)
+			ppmu->get_constraint(cpuhw->alternatives[i][j],
+					     &cpuhw->amasks[i][j],
+					     &cpuhw->avalues[i][j]);
+	}
+
+	/* enumerate all possibilities and see if any will work */
+	i = 0;
+	j = -1;
+	value = mask = nv = 0;
+	while (i < n_ev) {
+		if (j >= 0) {
+			/* we're backtracking, restore context */
+			value = svalues[i];
+			mask = smasks[i];
+			j = choice[i];
+		}
+		/*
+		 * See if any alternative k for event_id i,
+		 * where k > j, will satisfy the constraints.
+		 */
+		while (++j < n_alt[i]) {
+			nv = (value | cpuhw->avalues[i][j]) +
+				(value & cpuhw->avalues[i][j] & addf);
+			if ((((nv + tadd) ^ value) & mask) == 0 &&
+			    (((nv + tadd) ^ cpuhw->avalues[i][j])
+			     & cpuhw->amasks[i][j]) == 0)
+				break;
+		}
+		if (j >= n_alt[i]) {
+			/*
+			 * No feasible alternative, backtrack
+			 * to event_id i-1 and continue enumerating its
+			 * alternatives from where we got up to.
+			 */
+			if (--i < 0)
+				return -1;
+		} else {
+			/*
+			 * Found a feasible alternative for event_id i,
+			 * remember where we got up to with this event_id,
+			 * go on to the next event_id, and start with
+			 * the first alternative for it.
+			 */
+			choice[i] = j;
+			svalues[i] = value;
+			smasks[i] = mask;
+			value = nv;
+			mask |= cpuhw->amasks[i][j];
+			++i;
+			j = -1;
+		}
+	}
+
+	/* OK, we have a feasible combination, tell the caller the solution */
+	for (i = 0; i < n_ev; ++i)
+		event_id[i] = cpuhw->alternatives[i][choice[i]];
+	return 0;
+}
+
+/*
+ * Check if newly-added events have consistent settings for
+ * exclude_{user,kernel,hv} with each other and any previously
+ * added events.
+ */
+static int check_excludes(struct perf_event **ctrs, unsigned int cflags[],
+			  int n_prev, int n_new)
+{
+	int eu = 0, ek = 0, eh = 0;
+	int i, n, first;
+	struct perf_event *event;
+
+	n = n_prev + n_new;
+	if (n <= 1)
+		return 0;
+
+	first = 1;
+	for (i = 0; i < n; ++i) {
+		if (cflags[i] & PPMU_LIMITED_PMC_OK) {
+			cflags[i] &= ~PPMU_LIMITED_PMC_REQD;
+			continue;
+		}
+		event = ctrs[i];
+		if (first) {
+			eu = event->attr.exclude_user;
+			ek = event->attr.exclude_kernel;
+			eh = event->attr.exclude_hv;
+			first = 0;
+		} else if (event->attr.exclude_user != eu ||
+			   event->attr.exclude_kernel != ek ||
+			   event->attr.exclude_hv != eh) {
+			return -EAGAIN;
+		}
+	}
+
+	if (eu || ek || eh)
+		for (i = 0; i < n; ++i)
+			if (cflags[i] & PPMU_LIMITED_PMC_OK)
+				cflags[i] |= PPMU_LIMITED_PMC_REQD;
+
+	return 0;
+}
+
+static void power_pmu_read(struct perf_event *event)
+{
+	s64 val, delta, prev;
+
+	if (!event->hw.idx)
+		return;
+	/*
+	 * Performance monitor interrupts come even when interrupts
+	 * are soft-disabled, as long as interrupts are hard-enabled.
+	 * Therefore we treat them like NMIs.
+	 */
+	do {
+		prev = atomic64_read(&event->hw.prev_count);
+		barrier();
+		val = read_pmc(event->hw.idx);
+	} while (atomic64_cmpxchg(&event->hw.prev_count, prev, val) != prev);
+
+	/* The events are only 32 bits wide */
+	delta = (val - prev) & 0xfffffffful;
+	atomic64_add(delta, &event->count);
+	atomic64_sub(delta, &event->hw.period_left);
+}
+
+/*
+ * On some machines, PMC5 and PMC6 can't be written, don't respect
+ * the freeze conditions, and don't generate interrupts.  This tells
+ * us if `event' is using such a PMC.
+ */
+static int is_limited_pmc(int pmcnum)
+{
+	return (ppmu->flags & PPMU_LIMITED_PMC5_6)
+		&& (pmcnum == 5 || pmcnum == 6);
+}
+
+static void freeze_limited_events(struct cpu_hw_events *cpuhw,
+				    unsigned long pmc5, unsigned long pmc6)
+{
+	struct perf_event *event;
+	u64 val, prev, delta;
+	int i;
+
+	for (i = 0; i < cpuhw->n_limited; ++i) {
+		event = cpuhw->limited_event[i];
+		if (!event->hw.idx)
+			continue;
+		val = (event->hw.idx == 5) ? pmc5 : pmc6;
+		prev = atomic64_read(&event->hw.prev_count);
+		event->hw.idx = 0;
+		delta = (val - prev) & 0xfffffffful;
+		atomic64_add(delta, &event->count);
+	}
+}
+
+static void thaw_limited_events(struct cpu_hw_events *cpuhw,
+				  unsigned long pmc5, unsigned long pmc6)
+{
+	struct perf_event *event;
+	u64 val;
+	int i;
+
+	for (i = 0; i < cpuhw->n_limited; ++i) {
+		event = cpuhw->limited_event[i];
+		event->hw.idx = cpuhw->limited_hwidx[i];
+		val = (event->hw.idx == 5) ? pmc5 : pmc6;
+		atomic64_set(&event->hw.prev_count, val);
+		perf_event_update_userpage(event);
+	}
+}
+
+/*
+ * Since limited events don't respect the freeze conditions, we
+ * have to read them immediately after freezing or unfreezing the
+ * other events.  We try to keep the values from the limited
+ * events as consistent as possible by keeping the delay (in
+ * cycles and instructions) between freezing/unfreezing and reading
+ * the limited events as small and consistent as possible.
+ * Therefore, if any limited events are in use, we read them
+ * both, and always in the same order, to minimize variability,
+ * and do it inside the same asm that writes MMCR0.
+ */
+static void write_mmcr0(struct cpu_hw_events *cpuhw, unsigned long mmcr0)
+{
+	unsigned long pmc5, pmc6;
+
+	if (!cpuhw->n_limited) {
+		mtspr(SPRN_MMCR0, mmcr0);
+		return;
+	}
+
+	/*
+	 * Write MMCR0, then read PMC5 and PMC6 immediately.
+	 * To ensure we don't get a performance monitor interrupt
+	 * between writing MMCR0 and freezing/thawing the limited
+	 * events, we first write MMCR0 with the event overflow
+	 * interrupt enable bits turned off.
+	 */
+	asm volatile("mtspr %3,%2; mfspr %0,%4; mfspr %1,%5"
+		     : "=&r" (pmc5), "=&r" (pmc6)
+		     : "r" (mmcr0 & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)),
+		       "i" (SPRN_MMCR0),
+		       "i" (SPRN_PMC5), "i" (SPRN_PMC6));
+
+	if (mmcr0 & MMCR0_FC)
+		freeze_limited_events(cpuhw, pmc5, pmc6);
+	else
+		thaw_limited_events(cpuhw, pmc5, pmc6);
+
+	/*
+	 * Write the full MMCR0 including the event overflow interrupt
+	 * enable bits, if necessary.
+	 */
+	if (mmcr0 & (MMCR0_PMC1CE | MMCR0_PMCjCE))
+		mtspr(SPRN_MMCR0, mmcr0);
+}
+
+/*
+ * Disable all events to prevent PMU interrupts and to allow
+ * events to be added or removed.
+ */
+void hw_perf_disable(void)
+{
+	struct cpu_hw_events *cpuhw;
+	unsigned long flags;
+
+	if (!ppmu)
+		return;
+	local_irq_save(flags);
+	cpuhw = &__get_cpu_var(cpu_hw_events);
+
+	if (!cpuhw->disabled) {
+		cpuhw->disabled = 1;
+		cpuhw->n_added = 0;
+
+		/*
+		 * Check if we ever enabled the PMU on this cpu.
+		 */
+		if (!cpuhw->pmcs_enabled) {
+			ppc_enable_pmcs();
+			cpuhw->pmcs_enabled = 1;
+		}
+
+		/*
+		 * Disable instruction sampling if it was enabled
+		 */
+		if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) {
+			mtspr(SPRN_MMCRA,
+			      cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
+			mb();
+		}
+
+		/*
+		 * Set the 'freeze events' bit.
+		 * The barrier is to make sure the mtspr has been
+		 * executed and the PMU has frozen the events
+		 * before we return.
+		 */
+		write_mmcr0(cpuhw, mfspr(SPRN_MMCR0) | MMCR0_FC);
+		mb();
+	}
+	local_irq_restore(flags);
+}
+
+/*
+ * Re-enable all events if disable == 0.
+ * If we were previously disabled and events were added, then
+ * put the new config on the PMU.
+ */
+void hw_perf_enable(void)
+{
+	struct perf_event *event;
+	struct cpu_hw_events *cpuhw;
+	unsigned long flags;
+	long i;
+	unsigned long val;
+	s64 left;
+	unsigned int hwc_index[MAX_HWEVENTS];
+	int n_lim;
+	int idx;
+
+	if (!ppmu)
+		return;
+	local_irq_save(flags);
+	cpuhw = &__get_cpu_var(cpu_hw_events);
+	if (!cpuhw->disabled) {
+		local_irq_restore(flags);
+		return;
+	}
+	cpuhw->disabled = 0;
+
+	/*
+	 * If we didn't change anything, or only removed events,
+	 * no need to recalculate MMCR* settings and reset the PMCs.
+	 * Just reenable the PMU with the current MMCR* settings
+	 * (possibly updated for removal of events).
+	 */
+	if (!cpuhw->n_added) {
+		mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
+		mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
+		if (cpuhw->n_events == 0)
+			ppc_set_pmu_inuse(0);
+		goto out_enable;
+	}
+
+	/*
+	 * Compute MMCR* values for the new set of events
+	 */
+	if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_events, hwc_index,
+			       cpuhw->mmcr)) {
+		/* shouldn't ever get here */
+		printk(KERN_ERR "oops compute_mmcr failed\n");
+		goto out;
+	}
+
+	/*
+	 * Add in MMCR0 freeze bits corresponding to the
+	 * attr.exclude_* bits for the first event.
+	 * We have already checked that all events have the
+	 * same values for these bits as the first event.
+	 */
+	event = cpuhw->event[0];
+	if (event->attr.exclude_user)
+		cpuhw->mmcr[0] |= MMCR0_FCP;
+	if (event->attr.exclude_kernel)
+		cpuhw->mmcr[0] |= freeze_events_kernel;
+	if (event->attr.exclude_hv)
+		cpuhw->mmcr[0] |= MMCR0_FCHV;
+
+	/*
+	 * Write the new configuration to MMCR* with the freeze
+	 * bit set and set the hardware events to their initial values.
+	 * Then unfreeze the events.
+	 */
+	ppc_set_pmu_inuse(1);
+	mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
+	mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
+	mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE))
+				| MMCR0_FC);
+
+	/*
+	 * Read off any pre-existing events that need to move
+	 * to another PMC.
+	 */
+	for (i = 0; i < cpuhw->n_events; ++i) {
+		event = cpuhw->event[i];
+		if (event->hw.idx && event->hw.idx != hwc_index[i] + 1) {
+			power_pmu_read(event);
+			write_pmc(event->hw.idx, 0);
+			event->hw.idx = 0;
+		}
+	}
+
+	/*
+	 * Initialize the PMCs for all the new and moved events.
+	 */
+	cpuhw->n_limited = n_lim = 0;
+	for (i = 0; i < cpuhw->n_events; ++i) {
+		event = cpuhw->event[i];
+		if (event->hw.idx)
+			continue;
+		idx = hwc_index[i] + 1;
+		if (is_limited_pmc(idx)) {
+			cpuhw->limited_event[n_lim] = event;
+			cpuhw->limited_hwidx[n_lim] = idx;
+			++n_lim;
+			continue;
+		}
+		val = 0;
+		if (event->hw.sample_period) {
+			left = atomic64_read(&event->hw.period_left);
+			if (left < 0x80000000L)
+				val = 0x80000000L - left;
+		}
+		atomic64_set(&event->hw.prev_count, val);
+		event->hw.idx = idx;
+		write_pmc(idx, val);
+		perf_event_update_userpage(event);
+	}
+	cpuhw->n_limited = n_lim;
+	cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE;
+
+ out_enable:
+	mb();
+	write_mmcr0(cpuhw, cpuhw->mmcr[0]);
+
+	/*
+	 * Enable instruction sampling if necessary
+	 */
+	if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) {
+		mb();
+		mtspr(SPRN_MMCRA, cpuhw->mmcr[2]);
+	}
+
+ out:
+	local_irq_restore(flags);
+}
+
+static int collect_events(struct perf_event *group, int max_count,
+			  struct perf_event *ctrs[], u64 *events,
+			  unsigned int *flags)
+{
+	int n = 0;
+	struct perf_event *event;
+
+	if (!is_software_event(group)) {
+		if (n >= max_count)
+			return -1;
+		ctrs[n] = group;
+		flags[n] = group->hw.event_base;
+		events[n++] = group->hw.config;
+	}
+	list_for_each_entry(event, &group->sibling_list, list_entry) {
+		if (!is_software_event(event) &&
+		    event->state != PERF_EVENT_STATE_OFF) {
+			if (n >= max_count)
+				return -1;
+			ctrs[n] = event;
+			flags[n] = event->hw.event_base;
+			events[n++] = event->hw.config;
+		}
+	}
+	return n;
+}
+
+static void event_sched_in(struct perf_event *event, int cpu)
+{
+	event->state = PERF_EVENT_STATE_ACTIVE;
+	event->oncpu = cpu;
+	event->tstamp_running += event->ctx->time - event->tstamp_stopped;
+	if (is_software_event(event))
+		event->pmu->enable(event);
+}
+
+/*
+ * Called to enable a whole group of events.
+ * Returns 1 if the group was enabled, or -EAGAIN if it could not be.
+ * Assumes the caller has disabled interrupts and has
+ * frozen the PMU with hw_perf_save_disable.
+ */
+int hw_perf_group_sched_in(struct perf_event *group_leader,
+	       struct perf_cpu_context *cpuctx,
+	       struct perf_event_context *ctx, int cpu)
+{
+	struct cpu_hw_events *cpuhw;
+	long i, n, n0;
+	struct perf_event *sub;
+
+	if (!ppmu)
+		return 0;
+	cpuhw = &__get_cpu_var(cpu_hw_events);
+	n0 = cpuhw->n_events;
+	n = collect_events(group_leader, ppmu->n_event - n0,
+			   &cpuhw->event[n0], &cpuhw->events[n0],
+			   &cpuhw->flags[n0]);
+	if (n < 0)
+		return -EAGAIN;
+	if (check_excludes(cpuhw->event, cpuhw->flags, n0, n))
+		return -EAGAIN;
+	i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n + n0);
+	if (i < 0)
+		return -EAGAIN;
+	cpuhw->n_events = n0 + n;
+	cpuhw->n_added += n;
+
+	/*
+	 * OK, this group can go on; update event states etc.,
+	 * and enable any software events
+	 */
+	for (i = n0; i < n0 + n; ++i)
+		cpuhw->event[i]->hw.config = cpuhw->events[i];
+	cpuctx->active_oncpu += n;
+	n = 1;
+	event_sched_in(group_leader, cpu);
+	list_for_each_entry(sub, &group_leader->sibling_list, list_entry) {
+		if (sub->state != PERF_EVENT_STATE_OFF) {
+			event_sched_in(sub, cpu);
+			++n;
+		}
+	}
+	ctx->nr_active += n;
+
+	return 1;
+}
+
+/*
+ * Add a event to the PMU.
+ * If all events are not already frozen, then we disable and
+ * re-enable the PMU in order to get hw_perf_enable to do the
+ * actual work of reconfiguring the PMU.
+ */
+static int power_pmu_enable(struct perf_event *event)
+{
+	struct cpu_hw_events *cpuhw;
+	unsigned long flags;
+	int n0;
+	int ret = -EAGAIN;
+
+	local_irq_save(flags);
+	perf_disable();
+
+	/*
+	 * Add the event to the list (if there is room)
+	 * and check whether the total set is still feasible.
+	 */
+	cpuhw = &__get_cpu_var(cpu_hw_events);
+	n0 = cpuhw->n_events;
+	if (n0 >= ppmu->n_event)
+		goto out;
+	cpuhw->event[n0] = event;
+	cpuhw->events[n0] = event->hw.config;
+	cpuhw->flags[n0] = event->hw.event_base;
+	if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1))
+		goto out;
+	if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1))
+		goto out;
+
+	event->hw.config = cpuhw->events[n0];
+	++cpuhw->n_events;
+	++cpuhw->n_added;
+
+	ret = 0;
+ out:
+	perf_enable();
+	local_irq_restore(flags);
+	return ret;
+}
+
+/*
+ * Remove a event from the PMU.
+ */
+static void power_pmu_disable(struct perf_event *event)
+{
+	struct cpu_hw_events *cpuhw;
+	long i;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	perf_disable();
+
+	power_pmu_read(event);
+
+	cpuhw = &__get_cpu_var(cpu_hw_events);
+	for (i = 0; i < cpuhw->n_events; ++i) {
+		if (event == cpuhw->event[i]) {
+			while (++i < cpuhw->n_events)
+				cpuhw->event[i-1] = cpuhw->event[i];
+			--cpuhw->n_events;
+			ppmu->disable_pmc(event->hw.idx - 1, cpuhw->mmcr);
+			if (event->hw.idx) {
+				write_pmc(event->hw.idx, 0);
+				event->hw.idx = 0;
+			}
+			perf_event_update_userpage(event);
+			break;
+		}
+	}
+	for (i = 0; i < cpuhw->n_limited; ++i)
+		if (event == cpuhw->limited_event[i])
+			break;
+	if (i < cpuhw->n_limited) {
+		while (++i < cpuhw->n_limited) {
+			cpuhw->limited_event[i-1] = cpuhw->limited_event[i];
+			cpuhw->limited_hwidx[i-1] = cpuhw->limited_hwidx[i];
+		}
+		--cpuhw->n_limited;
+	}
+	if (cpuhw->n_events == 0) {
+		/* disable exceptions if no events are running */
+		cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE);
+	}
+
+	perf_enable();
+	local_irq_restore(flags);
+}
+
+/*
+ * Re-enable interrupts on a event after they were throttled
+ * because they were coming too fast.
+ */
+static void power_pmu_unthrottle(struct perf_event *event)
+{
+	s64 val, left;
+	unsigned long flags;
+
+	if (!event->hw.idx || !event->hw.sample_period)
+		return;
+	local_irq_save(flags);
+	perf_disable();
+	power_pmu_read(event);
+	left = event->hw.sample_period;
+	event->hw.last_period = left;
+	val = 0;
+	if (left < 0x80000000L)
+		val = 0x80000000L - left;
+	write_pmc(event->hw.idx, val);
+	atomic64_set(&event->hw.prev_count, val);
+	atomic64_set(&event->hw.period_left, left);
+	perf_event_update_userpage(event);
+	perf_enable();
+	local_irq_restore(flags);
+}
+
+struct pmu power_pmu = {
+	.enable		= power_pmu_enable,
+	.disable	= power_pmu_disable,
+	.read		= power_pmu_read,
+	.unthrottle	= power_pmu_unthrottle,
+};
+
+/*
+ * Return 1 if we might be able to put event on a limited PMC,
+ * or 0 if not.
+ * A event can only go on a limited PMC if it counts something
+ * that a limited PMC can count, doesn't require interrupts, and
+ * doesn't exclude any processor mode.
+ */
+static int can_go_on_limited_pmc(struct perf_event *event, u64 ev,
+				 unsigned int flags)
+{
+	int n;
+	u64 alt[MAX_EVENT_ALTERNATIVES];
+
+	if (event->attr.exclude_user
+	    || event->attr.exclude_kernel
+	    || event->attr.exclude_hv
+	    || event->attr.sample_period)
+		return 0;
+
+	if (ppmu->limited_pmc_event(ev))
+		return 1;
+
+	/*
+	 * The requested event_id isn't on a limited PMC already;
+	 * see if any alternative code goes on a limited PMC.
+	 */
+	if (!ppmu->get_alternatives)
+		return 0;
+
+	flags |= PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD;
+	n = ppmu->get_alternatives(ev, flags, alt);
+
+	return n > 0;
+}
+
+/*
+ * Find an alternative event_id that goes on a normal PMC, if possible,
+ * and return the event_id code, or 0 if there is no such alternative.
+ * (Note: event_id code 0 is "don't count" on all machines.)
+ */
+static u64 normal_pmc_alternative(u64 ev, unsigned long flags)
+{
+	u64 alt[MAX_EVENT_ALTERNATIVES];
+	int n;
+
+	flags &= ~(PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD);
+	n = ppmu->get_alternatives(ev, flags, alt);
+	if (!n)
+		return 0;
+	return alt[0];
+}
+
+/* Number of perf_events counting hardware events */
+static atomic_t num_events;
+/* Used to avoid races in calling reserve/release_pmc_hardware */
+static DEFINE_MUTEX(pmc_reserve_mutex);
+
+/*
+ * Release the PMU if this is the last perf_event.
+ */
+static void hw_perf_event_destroy(struct perf_event *event)
+{
+	if (!atomic_add_unless(&num_events, -1, 1)) {
+		mutex_lock(&pmc_reserve_mutex);
+		if (atomic_dec_return(&num_events) == 0)
+			release_pmc_hardware();
+		mutex_unlock(&pmc_reserve_mutex);
+	}
+}
+
+/*
+ * Translate a generic cache event_id config to a raw event_id code.
+ */
+static int hw_perf_cache_event(u64 config, u64 *eventp)
+{
+	unsigned long type, op, result;
+	int ev;
+
+	if (!ppmu->cache_events)
+		return -EINVAL;
+
+	/* unpack config */
+	type = config & 0xff;
+	op = (config >> 8) & 0xff;
+	result = (config >> 16) & 0xff;
+
+	if (type >= PERF_COUNT_HW_CACHE_MAX ||
+	    op >= PERF_COUNT_HW_CACHE_OP_MAX ||
+	    result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
+		return -EINVAL;
+
+	ev = (*ppmu->cache_events)[type][op][result];
+	if (ev == 0)
+		return -EOPNOTSUPP;
+	if (ev == -1)
+		return -EINVAL;
+	*eventp = ev;
+	return 0;
+}
+
+const struct pmu *hw_perf_event_init(struct perf_event *event)
+{
+	u64 ev;
+	unsigned long flags;
+	struct perf_event *ctrs[MAX_HWEVENTS];
+	u64 events[MAX_HWEVENTS];
+	unsigned int cflags[MAX_HWEVENTS];
+	int n;
+	int err;
+	struct cpu_hw_events *cpuhw;
+
+	if (!ppmu)
+		return ERR_PTR(-ENXIO);
+	switch (event->attr.type) {
+	case PERF_TYPE_HARDWARE:
+		ev = event->attr.config;
+		if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
+			return ERR_PTR(-EOPNOTSUPP);
+		ev = ppmu->generic_events[ev];
+		break;
+	case PERF_TYPE_HW_CACHE:
+		err = hw_perf_cache_event(event->attr.config, &ev);
+		if (err)
+			return ERR_PTR(err);
+		break;
+	case PERF_TYPE_RAW:
+		ev = event->attr.config;
+		break;
+	default:
+		return ERR_PTR(-EINVAL);
+	}
+	event->hw.config_base = ev;
+	event->hw.idx = 0;
+
+	/*
+	 * If we are not running on a hypervisor, force the
+	 * exclude_hv bit to 0 so that we don't care what
+	 * the user set it to.
+	 */
+	if (!firmware_has_feature(FW_FEATURE_LPAR))
+		event->attr.exclude_hv = 0;
+
+	/*
+	 * If this is a per-task event, then we can use
+	 * PM_RUN_* events interchangeably with their non RUN_*
+	 * equivalents, e.g. PM_RUN_CYC instead of PM_CYC.
+	 * XXX we should check if the task is an idle task.
+	 */
+	flags = 0;
+	if (event->ctx->task)
+		flags |= PPMU_ONLY_COUNT_RUN;
+
+	/*
+	 * If this machine has limited events, check whether this
+	 * event_id could go on a limited event.
+	 */
+	if (ppmu->flags & PPMU_LIMITED_PMC5_6) {
+		if (can_go_on_limited_pmc(event, ev, flags)) {
+			flags |= PPMU_LIMITED_PMC_OK;
+		} else if (ppmu->limited_pmc_event(ev)) {
+			/*
+			 * The requested event_id is on a limited PMC,
+			 * but we can't use a limited PMC; see if any
+			 * alternative goes on a normal PMC.
+			 */
+			ev = normal_pmc_alternative(ev, flags);
+			if (!ev)
+				return ERR_PTR(-EINVAL);
+		}
+	}
+
+	/*
+	 * If this is in a group, check if it can go on with all the
+	 * other hardware events in the group.  We assume the event
+	 * hasn't been linked into its leader's sibling list at this point.
+	 */
+	n = 0;
+	if (event->group_leader != event) {
+		n = collect_events(event->group_leader, ppmu->n_event - 1,
+				   ctrs, events, cflags);
+		if (n < 0)
+			return ERR_PTR(-EINVAL);
+	}
+	events[n] = ev;
+	ctrs[n] = event;
+	cflags[n] = flags;
+	if (check_excludes(ctrs, cflags, n, 1))
+		return ERR_PTR(-EINVAL);
+
+	cpuhw = &get_cpu_var(cpu_hw_events);
+	err = power_check_constraints(cpuhw, events, cflags, n + 1);
+	put_cpu_var(cpu_hw_events);
+	if (err)
+		return ERR_PTR(-EINVAL);
+
+	event->hw.config = events[n];
+	event->hw.event_base = cflags[n];
+	event->hw.last_period = event->hw.sample_period;
+	atomic64_set(&event->hw.period_left, event->hw.last_period);
+
+	/*
+	 * See if we need to reserve the PMU.
+	 * If no events are currently in use, then we have to take a
+	 * mutex to ensure that we don't race with another task doing
+	 * reserve_pmc_hardware or release_pmc_hardware.
+	 */
+	err = 0;
+	if (!atomic_inc_not_zero(&num_events)) {
+		mutex_lock(&pmc_reserve_mutex);
+		if (atomic_read(&num_events) == 0 &&
+		    reserve_pmc_hardware(perf_event_interrupt))
+			err = -EBUSY;
+		else
+			atomic_inc(&num_events);
+		mutex_unlock(&pmc_reserve_mutex);
+	}
+	event->destroy = hw_perf_event_destroy;
+
+	if (err)
+		return ERR_PTR(err);
+	return &power_pmu;
+}
+
+/*
+ * A event has overflowed; update its count and record
+ * things if requested.  Note that interrupts are hard-disabled
+ * here so there is no possibility of being interrupted.
+ */
+static void record_and_restart(struct perf_event *event, unsigned long val,
+			       struct pt_regs *regs, int nmi)
+{
+	u64 period = event->hw.sample_period;
+	s64 prev, delta, left;
+	int record = 0;
+
+	/* we don't have to worry about interrupts here */
+	prev = atomic64_read(&event->hw.prev_count);
+	delta = (val - prev) & 0xfffffffful;
+	atomic64_add(delta, &event->count);
+
+	/*
+	 * See if the total period for this event has expired,
+	 * and update for the next period.
+	 */
+	val = 0;
+	left = atomic64_read(&event->hw.period_left) - delta;
+	if (period) {
+		if (left <= 0) {
+			left += period;
+			if (left <= 0)
+				left = period;
+			record = 1;
+		}
+		if (left < 0x80000000LL)
+			val = 0x80000000LL - left;
+	}
+
+	/*
+	 * Finally record data if requested.
+	 */
+	if (record) {
+		struct perf_sample_data data = {
+			.addr	= 0,
+			.period	= event->hw.last_period,
+		};
+
+		if (event->attr.sample_type & PERF_SAMPLE_ADDR)
+			perf_get_data_addr(regs, &data.addr);
+
+		if (perf_event_overflow(event, nmi, &data, regs)) {
+			/*
+			 * Interrupts are coming too fast - throttle them
+			 * by setting the event to 0, so it will be
+			 * at least 2^30 cycles until the next interrupt
+			 * (assuming each event counts at most 2 counts
+			 * per cycle).
+			 */
+			val = 0;
+			left = ~0ULL >> 1;
+		}
+	}
+
+	write_pmc(event->hw.idx, val);
+	atomic64_set(&event->hw.prev_count, val);
+	atomic64_set(&event->hw.period_left, left);
+	perf_event_update_userpage(event);
+}
+
+/*
+ * Called from generic code to get the misc flags (i.e. processor mode)
+ * for an event_id.
+ */
+unsigned long perf_misc_flags(struct pt_regs *regs)
+{
+	u32 flags = perf_get_misc_flags(regs);
+
+	if (flags)
+		return flags;
+	return user_mode(regs) ? PERF_RECORD_MISC_USER :
+		PERF_RECORD_MISC_KERNEL;
+}
+
+/*
+ * Called from generic code to get the instruction pointer
+ * for an event_id.
+ */
+unsigned long perf_instruction_pointer(struct pt_regs *regs)
+{
+	unsigned long ip;
+
+	if (TRAP(regs) != 0xf00)
+		return regs->nip;	/* not a PMU interrupt */
+
+	ip = mfspr(SPRN_SIAR) + perf_ip_adjust(regs);
+	return ip;
+}
+
+/*
+ * Performance monitor interrupt stuff
+ */
+static void perf_event_interrupt(struct pt_regs *regs)
+{
+	int i;
+	struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+	struct perf_event *event;
+	unsigned long val;
+	int found = 0;
+	int nmi;
+
+	if (cpuhw->n_limited)
+		freeze_limited_events(cpuhw, mfspr(SPRN_PMC5),
+					mfspr(SPRN_PMC6));
+
+	perf_read_regs(regs);
+
+	nmi = perf_intr_is_nmi(regs);
+	if (nmi)
+		nmi_enter();
+	else
+		irq_enter();
+
+	for (i = 0; i < cpuhw->n_events; ++i) {
+		event = cpuhw->event[i];
+		if (!event->hw.idx || is_limited_pmc(event->hw.idx))
+			continue;
+		val = read_pmc(event->hw.idx);
+		if ((int)val < 0) {
+			/* event has overflowed */
+			found = 1;
+			record_and_restart(event, val, regs, nmi);
+		}
+	}
+
+	/*
+	 * In case we didn't find and reset the event that caused
+	 * the interrupt, scan all events and reset any that are
+	 * negative, to avoid getting continual interrupts.
+	 * Any that we processed in the previous loop will not be negative.
+	 */
+	if (!found) {
+		for (i = 0; i < ppmu->n_event; ++i) {
+			if (is_limited_pmc(i + 1))
+				continue;
+			val = read_pmc(i + 1);
+			if ((int)val < 0)
+				write_pmc(i + 1, 0);
+		}
+	}
+
+	/*
+	 * Reset MMCR0 to its normal value.  This will set PMXE and
+	 * clear FC (freeze events) and PMAO (perf mon alert occurred)
+	 * and thus allow interrupts to occur again.
+	 * XXX might want to use MSR.PM to keep the events frozen until
+	 * we get back out of this interrupt.
+	 */
+	write_mmcr0(cpuhw, cpuhw->mmcr[0]);
+
+	if (nmi)
+		nmi_exit();
+	else
+		irq_exit();
+}
+
+void hw_perf_event_setup(int cpu)
+{
+	struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu);
+
+	if (!ppmu)
+		return;
+	memset(cpuhw, 0, sizeof(*cpuhw));
+	cpuhw->mmcr[0] = MMCR0_FC;
+}
+
+int register_power_pmu(struct power_pmu *pmu)
+{
+	if (ppmu)
+		return -EBUSY;		/* something's already registered */
+
+	ppmu = pmu;
+	pr_info("%s performance monitor hardware support registered\n",
+		pmu->name);
+
+#ifdef MSR_HV
+	/*
+	 * Use FCHV to ignore kernel events if MSR.HV is set.
+	 */
+	if (mfmsr() & MSR_HV)
+		freeze_events_kernel = MMCR0_FCHV;
+#endif /* CONFIG_PPC64 */
+
+	return 0;
+}
diff --git a/arch/powerpc/kernel/power4-pmu.c b/arch/powerpc/kernel/power4-pmu.c
index 3c90a3d..2a361cd 100644
--- a/arch/powerpc/kernel/power4-pmu.c
+++ b/arch/powerpc/kernel/power4-pmu.c
@@ -9,7 +9,7 @@
  * 2 of the License, or (at your option) any later version.
  */
 #include <linux/kernel.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <linux/string.h>
 #include <asm/reg.h>
 #include <asm/cputable.h>
diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c
index 31918af..0f4c1c7 100644
--- a/arch/powerpc/kernel/power5+-pmu.c
+++ b/arch/powerpc/kernel/power5+-pmu.c
@@ -9,7 +9,7 @@
  * 2 of the License, or (at your option) any later version.
  */
 #include <linux/kernel.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <linux/string.h>
 #include <asm/reg.h>
 #include <asm/cputable.h>
diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c
index 867f6f6..c351b3a 100644
--- a/arch/powerpc/kernel/power5-pmu.c
+++ b/arch/powerpc/kernel/power5-pmu.c
@@ -9,7 +9,7 @@
  * 2 of the License, or (at your option) any later version.
  */
 #include <linux/kernel.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <linux/string.h>
 #include <asm/reg.h>
 #include <asm/cputable.h>
diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c
index fa21890..ca399ba 100644
--- a/arch/powerpc/kernel/power6-pmu.c
+++ b/arch/powerpc/kernel/power6-pmu.c
@@ -9,7 +9,7 @@
  * 2 of the License, or (at your option) any later version.
  */
 #include <linux/kernel.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <linux/string.h>
 #include <asm/reg.h>
 #include <asm/cputable.h>
diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c
index 018d094..28a4daa 100644
--- a/arch/powerpc/kernel/power7-pmu.c
+++ b/arch/powerpc/kernel/power7-pmu.c
@@ -9,7 +9,7 @@
  * 2 of the License, or (at your option) any later version.
  */
 #include <linux/kernel.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <linux/string.h>
 #include <asm/reg.h>
 #include <asm/cputable.h>
diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c
index 75dccb7..4795744 100644
--- a/arch/powerpc/kernel/ppc970-pmu.c
+++ b/arch/powerpc/kernel/ppc970-pmu.c
@@ -9,7 +9,7 @@
  * 2 of the License, or (at your option) any later version.
  */
 #include <linux/string.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <asm/reg.h>
 #include <asm/cputable.h>
 
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 465e498..df45a74 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -53,7 +53,7 @@
 #include <linux/posix-timers.h>
 #include <linux/irq.h>
 #include <linux/delay.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 
 #include <asm/io.h>
 #include <asm/processor.h>
@@ -527,25 +527,25 @@ void __init iSeries_time_init_early(void)
 }
 #endif /* CONFIG_PPC_ISERIES */
 
-#if defined(CONFIG_PERF_COUNTERS) && defined(CONFIG_PPC32)
-DEFINE_PER_CPU(u8, perf_counter_pending);
+#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_PPC32)
+DEFINE_PER_CPU(u8, perf_event_pending);
 
-void set_perf_counter_pending(void)
+void set_perf_event_pending(void)
 {
-	get_cpu_var(perf_counter_pending) = 1;
+	get_cpu_var(perf_event_pending) = 1;
 	set_dec(1);
-	put_cpu_var(perf_counter_pending);
+	put_cpu_var(perf_event_pending);
 }
 
-#define test_perf_counter_pending()	__get_cpu_var(perf_counter_pending)
-#define clear_perf_counter_pending()	__get_cpu_var(perf_counter_pending) = 0
+#define test_perf_event_pending()	__get_cpu_var(perf_event_pending)
+#define clear_perf_event_pending()	__get_cpu_var(perf_event_pending) = 0
 
-#else  /* CONFIG_PERF_COUNTERS && CONFIG_PPC32 */
+#else  /* CONFIG_PERF_EVENTS && CONFIG_PPC32 */
 
-#define test_perf_counter_pending()	0
-#define clear_perf_counter_pending()
+#define test_perf_event_pending()	0
+#define clear_perf_event_pending()
 
-#endif /* CONFIG_PERF_COUNTERS && CONFIG_PPC32 */
+#endif /* CONFIG_PERF_EVENTS && CONFIG_PPC32 */
 
 /*
  * For iSeries shared processors, we have to let the hypervisor
@@ -573,9 +573,9 @@ void timer_interrupt(struct pt_regs * regs)
 	set_dec(DECREMENTER_MAX);
 
 #ifdef CONFIG_PPC32
-	if (test_perf_counter_pending()) {
-		clear_perf_counter_pending();
-		perf_counter_do_pending();
+	if (test_perf_event_pending()) {
+		clear_perf_event_pending();
+		perf_event_do_pending();
 	}
 	if (atomic_read(&ppc_n_lost_interrupts) != 0)
 		do_IRQ(regs);
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 830bef0..e7dae82 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -29,7 +29,7 @@
 #include <linux/module.h>
 #include <linux/kprobes.h>
 #include <linux/kdebug.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 
 #include <asm/firmware.h>
 #include <asm/page.h>
@@ -171,7 +171,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
 		die("Weird page fault", regs, SIGSEGV);
 	}
 
-	perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
+	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
 
 	/* When running in the kernel we expect faults to occur only to
 	 * addresses in user space.  All other faults represent errors in the
@@ -312,7 +312,7 @@ good_area:
 	}
 	if (ret & VM_FAULT_MAJOR) {
 		current->maj_flt++;
-		perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
 				     regs, address);
 #ifdef CONFIG_PPC_SMLPAR
 		if (firmware_has_feature(FW_FEATURE_CMO)) {
@@ -323,7 +323,7 @@ good_area:
 #endif
 	} else {
 		current->min_flt++;
-		perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
 				     regs, address);
 	}
 	up_read(&mm->mmap_sem);
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 9efc8bd..e382cae 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -280,9 +280,9 @@ config PPC_HAVE_PMU_SUPPORT
 
 config PPC_PERF_CTRS
        def_bool y
-       depends on PERF_COUNTERS && PPC_HAVE_PMU_SUPPORT
+       depends on PERF_EVENTS && PPC_HAVE_PMU_SUPPORT
        help
-         This enables the powerpc-specific perf_counter back-end.
+         This enables the powerpc-specific perf_event back-end.
 
 config SMP
 	depends on PPC_BOOK3S || PPC_BOOK3E || FSL_BOOKE
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 1c866ef..43c0aca 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -94,7 +94,7 @@ config S390
 	select HAVE_KVM if 64BIT
 	select HAVE_ARCH_TRACEHOOK
 	select INIT_ALL_POSSIBLE
-	select HAVE_PERF_COUNTERS
+	select HAVE_PERF_EVENTS
 
 config SCHED_OMIT_FRAME_POINTER
 	bool
diff --git a/arch/s390/include/asm/perf_counter.h b/arch/s390/include/asm/perf_counter.h
deleted file mode 100644
index 7015188..0000000
--- a/arch/s390/include/asm/perf_counter.h
+++ /dev/null
@@ -1,10 +0,0 @@
-/*
- * Performance counter support - s390 specific definitions.
- *
- * Copyright 2009 Martin Schwidefsky, IBM Corporation.
- */
-
-static inline void set_perf_counter_pending(void) {}
-static inline void clear_perf_counter_pending(void) {}
-
-#define PERF_COUNTER_INDEX_OFFSET 0
diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h
new file mode 100644
index 0000000..3840cbe
--- /dev/null
+++ b/arch/s390/include/asm/perf_event.h
@@ -0,0 +1,10 @@
+/*
+ * Performance event support - s390 specific definitions.
+ *
+ * Copyright 2009 Martin Schwidefsky, IBM Corporation.
+ */
+
+static inline void set_perf_event_pending(void) {}
+static inline void clear_perf_event_pending(void) {}
+
+#define PERF_EVENT_INDEX_OFFSET 0
diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h
index c80602d..cb5232d 100644
--- a/arch/s390/include/asm/unistd.h
+++ b/arch/s390/include/asm/unistd.h
@@ -268,7 +268,7 @@
 #define	__NR_preadv		328
 #define	__NR_pwritev		329
 #define __NR_rt_tgsigqueueinfo	330
-#define __NR_perf_counter_open	331
+#define __NR_perf_event_open	331
 #define NR_syscalls 332
 
 /* 
diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S
index 88a83366..6247900 100644
--- a/arch/s390/kernel/compat_wrapper.S
+++ b/arch/s390/kernel/compat_wrapper.S
@@ -1832,11 +1832,11 @@ compat_sys_rt_tgsigqueueinfo_wrapper:
 	llgtr	%r5,%r5			# struct compat_siginfo *
 	jg	compat_sys_rt_tgsigqueueinfo_wrapper # branch to system call
 
-	.globl	sys_perf_counter_open_wrapper
-sys_perf_counter_open_wrapper:
-	llgtr	%r2,%r2			# const struct perf_counter_attr *
+	.globl	sys_perf_event_open_wrapper
+sys_perf_event_open_wrapper:
+	llgtr	%r2,%r2			# const struct perf_event_attr *
 	lgfr	%r3,%r3			# pid_t
 	lgfr	%r4,%r4			# int
 	lgfr	%r5,%r5			# int
 	llgfr	%r6,%r6			# unsigned long
-	jg	sys_perf_counter_open	# branch to system call
+	jg	sys_perf_event_open	# branch to system call
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index ad1acd2..0b50836 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -339,4 +339,4 @@ SYSCALL(sys_epoll_create1,sys_epoll_create1,sys_epoll_create1_wrapper)
 SYSCALL(sys_preadv,sys_preadv,compat_sys_preadv_wrapper)
 SYSCALL(sys_pwritev,sys_pwritev,compat_sys_pwritev_wrapper)
 SYSCALL(sys_rt_tgsigqueueinfo,sys_rt_tgsigqueueinfo,compat_sys_rt_tgsigqueueinfo_wrapper) /* 330 */
-SYSCALL(sys_perf_counter_open,sys_perf_counter_open,sys_perf_counter_open_wrapper)
+SYSCALL(sys_perf_event_open,sys_perf_event_open,sys_perf_event_open_wrapper)
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 1abbadd..6d50746 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -10,7 +10,7 @@
  *    Copyright (C) 1995  Linus Torvalds
  */
 
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <linux/signal.h>
 #include <linux/sched.h>
 #include <linux/kernel.h>
@@ -306,7 +306,7 @@ do_exception(struct pt_regs *regs, unsigned long error_code, int write)
 	 * interrupts again and then search the VMAs
 	 */
 	local_irq_enable();
-	perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
+	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
 	down_read(&mm->mmap_sem);
 
 	si_code = SEGV_MAPERR;
@@ -366,11 +366,11 @@ good_area:
 	}
 	if (fault & VM_FAULT_MAJOR) {
 		tsk->maj_flt++;
-		perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
 				     regs, address);
 	} else {
 		tsk->min_flt++;
-		perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
 				     regs, address);
 	}
         up_read(&mm->mmap_sem);
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 4df3570..b940424 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -16,7 +16,7 @@ config SUPERH
 	select HAVE_IOREMAP_PROT if MMU
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_DMA_API_DEBUG
-	select HAVE_PERF_COUNTERS
+	select HAVE_PERF_EVENTS
 	select HAVE_KERNEL_GZIP
 	select HAVE_KERNEL_BZIP2
 	select HAVE_KERNEL_LZMA
diff --git a/arch/sh/include/asm/perf_counter.h b/arch/sh/include/asm/perf_counter.h
deleted file mode 100644
index d8e6bb9..0000000
--- a/arch/sh/include/asm/perf_counter.h
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef __ASM_SH_PERF_COUNTER_H
-#define __ASM_SH_PERF_COUNTER_H
-
-/* SH only supports software counters through this interface. */
-static inline void set_perf_counter_pending(void) {}
-
-#define PERF_COUNTER_INDEX_OFFSET	0
-
-#endif /* __ASM_SH_PERF_COUNTER_H */
diff --git a/arch/sh/include/asm/perf_event.h b/arch/sh/include/asm/perf_event.h
new file mode 100644
index 0000000..11a3022
--- /dev/null
+++ b/arch/sh/include/asm/perf_event.h
@@ -0,0 +1,9 @@
+#ifndef __ASM_SH_PERF_EVENT_H
+#define __ASM_SH_PERF_EVENT_H
+
+/* SH only supports software events through this interface. */
+static inline void set_perf_event_pending(void) {}
+
+#define PERF_EVENT_INDEX_OFFSET	0
+
+#endif /* __ASM_SH_PERF_EVENT_H */
diff --git a/arch/sh/include/asm/unistd_32.h b/arch/sh/include/asm/unistd_32.h
index 925dd40..f3fd1b9 100644
--- a/arch/sh/include/asm/unistd_32.h
+++ b/arch/sh/include/asm/unistd_32.h
@@ -344,7 +344,7 @@
 #define __NR_preadv		333
 #define __NR_pwritev		334
 #define __NR_rt_tgsigqueueinfo	335
-#define __NR_perf_counter_open	336
+#define __NR_perf_event_open	336
 
 #define NR_syscalls 337
 
diff --git a/arch/sh/include/asm/unistd_64.h b/arch/sh/include/asm/unistd_64.h
index 2b84bc9..343ce8f 100644
--- a/arch/sh/include/asm/unistd_64.h
+++ b/arch/sh/include/asm/unistd_64.h
@@ -384,7 +384,7 @@
 #define __NR_preadv		361
 #define __NR_pwritev		362
 #define __NR_rt_tgsigqueueinfo	363
-#define __NR_perf_counter_open	364
+#define __NR_perf_event_open	364
 
 #ifdef __KERNEL__
 
diff --git a/arch/sh/kernel/syscalls_32.S b/arch/sh/kernel/syscalls_32.S
index 16ba225..19fd11d 100644
--- a/arch/sh/kernel/syscalls_32.S
+++ b/arch/sh/kernel/syscalls_32.S
@@ -352,4 +352,4 @@ ENTRY(sys_call_table)
 	.long sys_preadv
 	.long sys_pwritev
 	.long sys_rt_tgsigqueueinfo	/* 335 */
-	.long sys_perf_counter_open
+	.long sys_perf_event_open
diff --git a/arch/sh/kernel/syscalls_64.S b/arch/sh/kernel/syscalls_64.S
index af6fb74..5bfde6c 100644
--- a/arch/sh/kernel/syscalls_64.S
+++ b/arch/sh/kernel/syscalls_64.S
@@ -390,4 +390,4 @@ sys_call_table:
 	.long sys_preadv
 	.long sys_pwritev
 	.long sys_rt_tgsigqueueinfo
-	.long sys_perf_counter_open
+	.long sys_perf_event_open
diff --git a/arch/sh/mm/fault_32.c b/arch/sh/mm/fault_32.c
index 781b413..4753010 100644
--- a/arch/sh/mm/fault_32.c
+++ b/arch/sh/mm/fault_32.c
@@ -15,7 +15,7 @@
 #include <linux/mm.h>
 #include <linux/hardirq.h>
 #include <linux/kprobes.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <asm/io_trapped.h>
 #include <asm/system.h>
 #include <asm/mmu_context.h>
@@ -157,7 +157,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
 	if ((regs->sr & SR_IMASK) != SR_IMASK)
 		local_irq_enable();
 
-	perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
+	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
 
 	/*
 	 * If we're in an interrupt, have no user context or are running
@@ -208,11 +208,11 @@ survive:
 	}
 	if (fault & VM_FAULT_MAJOR) {
 		tsk->maj_flt++;
-		perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
 				     regs, address);
 	} else {
 		tsk->min_flt++;
-		perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
 				     regs, address);
 	}
 
diff --git a/arch/sh/mm/tlbflush_64.c b/arch/sh/mm/tlbflush_64.c
index 2dcc485..de0b0e88 100644
--- a/arch/sh/mm/tlbflush_64.c
+++ b/arch/sh/mm/tlbflush_64.c
@@ -20,7 +20,7 @@
 #include <linux/mman.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <linux/interrupt.h>
 #include <asm/system.h>
 #include <asm/io.h>
@@ -116,7 +116,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long writeaccess,
 	/* Not an IO address, so reenable interrupts */
 	local_irq_enable();
 
-	perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
+	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
 
 	/*
 	 * If we're in an interrupt or have no user
@@ -201,11 +201,11 @@ survive:
 
 	if (fault & VM_FAULT_MAJOR) {
 		tsk->maj_flt++;
-		perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
 				     regs, address);
 	} else {
 		tsk->min_flt++;
-		perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
 				     regs, address);
 	}
 
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 86b8234..97fca46 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -25,7 +25,7 @@ config SPARC
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select RTC_CLASS
 	select RTC_DRV_M48T59
-	select HAVE_PERF_COUNTERS
+	select HAVE_PERF_EVENTS
 	select HAVE_DMA_ATTRS
 	select HAVE_DMA_API_DEBUG
 
@@ -47,7 +47,7 @@ config SPARC64
 	select RTC_DRV_BQ4802
 	select RTC_DRV_SUN4V
 	select RTC_DRV_STARFIRE
-	select HAVE_PERF_COUNTERS
+	select HAVE_PERF_EVENTS
 
 config ARCH_DEFCONFIG
 	string
diff --git a/arch/sparc/include/asm/perf_counter.h b/arch/sparc/include/asm/perf_counter.h
deleted file mode 100644
index 5d7a8ca..0000000
--- a/arch/sparc/include/asm/perf_counter.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef __ASM_SPARC_PERF_COUNTER_H
-#define __ASM_SPARC_PERF_COUNTER_H
-
-extern void set_perf_counter_pending(void);
-
-#define	PERF_COUNTER_INDEX_OFFSET	0
-
-#ifdef CONFIG_PERF_COUNTERS
-extern void init_hw_perf_counters(void);
-#else
-static inline void init_hw_perf_counters(void)	{ }
-#endif
-
-#endif
diff --git a/arch/sparc/include/asm/perf_event.h b/arch/sparc/include/asm/perf_event.h
new file mode 100644
index 0000000..7e26698
--- /dev/null
+++ b/arch/sparc/include/asm/perf_event.h
@@ -0,0 +1,14 @@
+#ifndef __ASM_SPARC_PERF_EVENT_H
+#define __ASM_SPARC_PERF_EVENT_H
+
+extern void set_perf_event_pending(void);
+
+#define	PERF_EVENT_INDEX_OFFSET	0
+
+#ifdef CONFIG_PERF_EVENTS
+extern void init_hw_perf_events(void);
+#else
+static inline void init_hw_perf_events(void)	{ }
+#endif
+
+#endif
diff --git a/arch/sparc/include/asm/unistd.h b/arch/sparc/include/asm/unistd.h
index 706df66..42f2316 100644
--- a/arch/sparc/include/asm/unistd.h
+++ b/arch/sparc/include/asm/unistd.h
@@ -395,7 +395,7 @@
 #define __NR_preadv		324
 #define __NR_pwritev		325
 #define __NR_rt_tgsigqueueinfo	326
-#define __NR_perf_counter_open	327
+#define __NR_perf_event_open	327
 
 #define NR_SYSCALLS		328
 
diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile
index 247cc62..3a048fa 100644
--- a/arch/sparc/kernel/Makefile
+++ b/arch/sparc/kernel/Makefile
@@ -104,5 +104,5 @@ obj-$(CONFIG_AUDIT)     += audit.o
 audit--$(CONFIG_AUDIT)  := compat_audit.o
 obj-$(CONFIG_COMPAT)    += $(audit--y)
 
-pc--$(CONFIG_PERF_COUNTERS) := perf_counter.o
+pc--$(CONFIG_PERF_EVENTS) := perf_event.o
 obj-$(CONFIG_SPARC64)	+= $(pc--y)
diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c
index 378eb53..b129611 100644
--- a/arch/sparc/kernel/nmi.c
+++ b/arch/sparc/kernel/nmi.c
@@ -19,7 +19,7 @@
 #include <linux/delay.h>
 #include <linux/smp.h>
 
-#include <asm/perf_counter.h>
+#include <asm/perf_event.h>
 #include <asm/ptrace.h>
 #include <asm/local.h>
 #include <asm/pcr.h>
@@ -265,7 +265,7 @@ int __init nmi_init(void)
 		}
 	}
 	if (!err)
-		init_hw_perf_counters();
+		init_hw_perf_events();
 
 	return err;
 }
diff --git a/arch/sparc/kernel/pcr.c b/arch/sparc/kernel/pcr.c
index 68ff001..2d94e7a 100644
--- a/arch/sparc/kernel/pcr.c
+++ b/arch/sparc/kernel/pcr.c
@@ -7,7 +7,7 @@
 #include <linux/init.h>
 #include <linux/irq.h>
 
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 
 #include <asm/pil.h>
 #include <asm/pcr.h>
@@ -15,7 +15,7 @@
 
 /* This code is shared between various users of the performance
  * counters.  Users will be oprofile, pseudo-NMI watchdog, and the
- * perf_counter support layer.
+ * perf_event support layer.
  */
 
 #define PCR_SUN4U_ENABLE	(PCR_PIC_PRIV | PCR_STRACE | PCR_UTRACE)
@@ -42,14 +42,14 @@ void deferred_pcr_work_irq(int irq, struct pt_regs *regs)
 
 	old_regs = set_irq_regs(regs);
 	irq_enter();
-#ifdef CONFIG_PERF_COUNTERS
-	perf_counter_do_pending();
+#ifdef CONFIG_PERF_EVENTS
+	perf_event_do_pending();
 #endif
 	irq_exit();
 	set_irq_regs(old_regs);
 }
 
-void set_perf_counter_pending(void)
+void set_perf_event_pending(void)
 {
 	set_softint(1 << PIL_DEFERRED_PCR_WORK);
 }
diff --git a/arch/sparc/kernel/perf_counter.c b/arch/sparc/kernel/perf_counter.c
deleted file mode 100644
index b1265ce..0000000
--- a/arch/sparc/kernel/perf_counter.c
+++ /dev/null
@@ -1,556 +0,0 @@
-/* Performance counter support for sparc64.
- *
- * Copyright (C) 2009 David S. Miller <davem@davemloft.net>
- *
- * This code is based almost entirely upon the x86 perf counter
- * code, which is:
- *
- *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
- *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
- *  Copyright (C) 2009 Jaswinder Singh Rajput
- *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
- *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
- */
-
-#include <linux/perf_counter.h>
-#include <linux/kprobes.h>
-#include <linux/kernel.h>
-#include <linux/kdebug.h>
-#include <linux/mutex.h>
-
-#include <asm/cpudata.h>
-#include <asm/atomic.h>
-#include <asm/nmi.h>
-#include <asm/pcr.h>
-
-/* Sparc64 chips have two performance counters, 32-bits each, with
- * overflow interrupts generated on transition from 0xffffffff to 0.
- * The counters are accessed in one go using a 64-bit register.
- *
- * Both counters are controlled using a single control register.  The
- * only way to stop all sampling is to clear all of the context (user,
- * supervisor, hypervisor) sampling enable bits.  But these bits apply
- * to both counters, thus the two counters can't be enabled/disabled
- * individually.
- *
- * The control register has two event fields, one for each of the two
- * counters.  It's thus nearly impossible to have one counter going
- * while keeping the other one stopped.  Therefore it is possible to
- * get overflow interrupts for counters not currently "in use" and
- * that condition must be checked in the overflow interrupt handler.
- *
- * So we use a hack, in that we program inactive counters with the
- * "sw_count0" and "sw_count1" events.  These count how many times
- * the instruction "sethi %hi(0xfc000), %g0" is executed.  It's an
- * unusual way to encode a NOP and therefore will not trigger in
- * normal code.
- */
-
-#define MAX_HWCOUNTERS			2
-#define MAX_PERIOD			((1UL << 32) - 1)
-
-#define PIC_UPPER_INDEX			0
-#define PIC_LOWER_INDEX			1
-
-struct cpu_hw_counters {
-	struct perf_counter	*counters[MAX_HWCOUNTERS];
-	unsigned long		used_mask[BITS_TO_LONGS(MAX_HWCOUNTERS)];
-	unsigned long		active_mask[BITS_TO_LONGS(MAX_HWCOUNTERS)];
-	int enabled;
-};
-DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = { .enabled = 1, };
-
-struct perf_event_map {
-	u16	encoding;
-	u8	pic_mask;
-#define PIC_NONE	0x00
-#define PIC_UPPER	0x01
-#define PIC_LOWER	0x02
-};
-
-struct sparc_pmu {
-	const struct perf_event_map	*(*event_map)(int);
-	int				max_events;
-	int				upper_shift;
-	int				lower_shift;
-	int				event_mask;
-	int				hv_bit;
-	int				irq_bit;
-	int				upper_nop;
-	int				lower_nop;
-};
-
-static const struct perf_event_map ultra3i_perfmon_event_map[] = {
-	[PERF_COUNT_HW_CPU_CYCLES] = { 0x0000, PIC_UPPER | PIC_LOWER },
-	[PERF_COUNT_HW_INSTRUCTIONS] = { 0x0001, PIC_UPPER | PIC_LOWER },
-	[PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0009, PIC_LOWER },
-	[PERF_COUNT_HW_CACHE_MISSES] = { 0x0009, PIC_UPPER },
-};
-
-static const struct perf_event_map *ultra3i_event_map(int event)
-{
-	return &ultra3i_perfmon_event_map[event];
-}
-
-static const struct sparc_pmu ultra3i_pmu = {
-	.event_map	= ultra3i_event_map,
-	.max_events	= ARRAY_SIZE(ultra3i_perfmon_event_map),
-	.upper_shift	= 11,
-	.lower_shift	= 4,
-	.event_mask	= 0x3f,
-	.upper_nop	= 0x1c,
-	.lower_nop	= 0x14,
-};
-
-static const struct perf_event_map niagara2_perfmon_event_map[] = {
-	[PERF_COUNT_HW_CPU_CYCLES] = { 0x02ff, PIC_UPPER | PIC_LOWER },
-	[PERF_COUNT_HW_INSTRUCTIONS] = { 0x02ff, PIC_UPPER | PIC_LOWER },
-	[PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0208, PIC_UPPER | PIC_LOWER },
-	[PERF_COUNT_HW_CACHE_MISSES] = { 0x0302, PIC_UPPER | PIC_LOWER },
-	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x0201, PIC_UPPER | PIC_LOWER },
-	[PERF_COUNT_HW_BRANCH_MISSES] = { 0x0202, PIC_UPPER | PIC_LOWER },
-};
-
-static const struct perf_event_map *niagara2_event_map(int event)
-{
-	return &niagara2_perfmon_event_map[event];
-}
-
-static const struct sparc_pmu niagara2_pmu = {
-	.event_map	= niagara2_event_map,
-	.max_events	= ARRAY_SIZE(niagara2_perfmon_event_map),
-	.upper_shift	= 19,
-	.lower_shift	= 6,
-	.event_mask	= 0xfff,
-	.hv_bit		= 0x8,
-	.irq_bit	= 0x03,
-	.upper_nop	= 0x220,
-	.lower_nop	= 0x220,
-};
-
-static const struct sparc_pmu *sparc_pmu __read_mostly;
-
-static u64 event_encoding(u64 event, int idx)
-{
-	if (idx == PIC_UPPER_INDEX)
-		event <<= sparc_pmu->upper_shift;
-	else
-		event <<= sparc_pmu->lower_shift;
-	return event;
-}
-
-static u64 mask_for_index(int idx)
-{
-	return event_encoding(sparc_pmu->event_mask, idx);
-}
-
-static u64 nop_for_index(int idx)
-{
-	return event_encoding(idx == PIC_UPPER_INDEX ?
-			      sparc_pmu->upper_nop :
-			      sparc_pmu->lower_nop, idx);
-}
-
-static inline void sparc_pmu_enable_counter(struct hw_perf_counter *hwc,
-					    int idx)
-{
-	u64 val, mask = mask_for_index(idx);
-
-	val = pcr_ops->read();
-	pcr_ops->write((val & ~mask) | hwc->config);
-}
-
-static inline void sparc_pmu_disable_counter(struct hw_perf_counter *hwc,
-					     int idx)
-{
-	u64 mask = mask_for_index(idx);
-	u64 nop = nop_for_index(idx);
-	u64 val = pcr_ops->read();
-
-	pcr_ops->write((val & ~mask) | nop);
-}
-
-void hw_perf_enable(void)
-{
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-	u64 val;
-	int i;
-
-	if (cpuc->enabled)
-		return;
-
-	cpuc->enabled = 1;
-	barrier();
-
-	val = pcr_ops->read();
-
-	for (i = 0; i < MAX_HWCOUNTERS; i++) {
-		struct perf_counter *cp = cpuc->counters[i];
-		struct hw_perf_counter *hwc;
-
-		if (!cp)
-			continue;
-		hwc = &cp->hw;
-		val |= hwc->config_base;
-	}
-
-	pcr_ops->write(val);
-}
-
-void hw_perf_disable(void)
-{
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-	u64 val;
-
-	if (!cpuc->enabled)
-		return;
-
-	cpuc->enabled = 0;
-
-	val = pcr_ops->read();
-	val &= ~(PCR_UTRACE | PCR_STRACE |
-		 sparc_pmu->hv_bit | sparc_pmu->irq_bit);
-	pcr_ops->write(val);
-}
-
-static u32 read_pmc(int idx)
-{
-	u64 val;
-
-	read_pic(val);
-	if (idx == PIC_UPPER_INDEX)
-		val >>= 32;
-
-	return val & 0xffffffff;
-}
-
-static void write_pmc(int idx, u64 val)
-{
-	u64 shift, mask, pic;
-
-	shift = 0;
-	if (idx == PIC_UPPER_INDEX)
-		shift = 32;
-
-	mask = ((u64) 0xffffffff) << shift;
-	val <<= shift;
-
-	read_pic(pic);
-	pic &= ~mask;
-	pic |= val;
-	write_pic(pic);
-}
-
-static int sparc_perf_counter_set_period(struct perf_counter *counter,
-					 struct hw_perf_counter *hwc, int idx)
-{
-	s64 left = atomic64_read(&hwc->period_left);
-	s64 period = hwc->sample_period;
-	int ret = 0;
-
-	if (unlikely(left <= -period)) {
-		left = period;
-		atomic64_set(&hwc->period_left, left);
-		hwc->last_period = period;
-		ret = 1;
-	}
-
-	if (unlikely(left <= 0)) {
-		left += period;
-		atomic64_set(&hwc->period_left, left);
-		hwc->last_period = period;
-		ret = 1;
-	}
-	if (left > MAX_PERIOD)
-		left = MAX_PERIOD;
-
-	atomic64_set(&hwc->prev_count, (u64)-left);
-
-	write_pmc(idx, (u64)(-left) & 0xffffffff);
-
-	perf_counter_update_userpage(counter);
-
-	return ret;
-}
-
-static int sparc_pmu_enable(struct perf_counter *counter)
-{
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-	struct hw_perf_counter *hwc = &counter->hw;
-	int idx = hwc->idx;
-
-	if (test_and_set_bit(idx, cpuc->used_mask))
-		return -EAGAIN;
-
-	sparc_pmu_disable_counter(hwc, idx);
-
-	cpuc->counters[idx] = counter;
-	set_bit(idx, cpuc->active_mask);
-
-	sparc_perf_counter_set_period(counter, hwc, idx);
-	sparc_pmu_enable_counter(hwc, idx);
-	perf_counter_update_userpage(counter);
-	return 0;
-}
-
-static u64 sparc_perf_counter_update(struct perf_counter *counter,
-				     struct hw_perf_counter *hwc, int idx)
-{
-	int shift = 64 - 32;
-	u64 prev_raw_count, new_raw_count;
-	s64 delta;
-
-again:
-	prev_raw_count = atomic64_read(&hwc->prev_count);
-	new_raw_count = read_pmc(idx);
-
-	if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
-			     new_raw_count) != prev_raw_count)
-		goto again;
-
-	delta = (new_raw_count << shift) - (prev_raw_count << shift);
-	delta >>= shift;
-
-	atomic64_add(delta, &counter->count);
-	atomic64_sub(delta, &hwc->period_left);
-
-	return new_raw_count;
-}
-
-static void sparc_pmu_disable(struct perf_counter *counter)
-{
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-	struct hw_perf_counter *hwc = &counter->hw;
-	int idx = hwc->idx;
-
-	clear_bit(idx, cpuc->active_mask);
-	sparc_pmu_disable_counter(hwc, idx);
-
-	barrier();
-
-	sparc_perf_counter_update(counter, hwc, idx);
-	cpuc->counters[idx] = NULL;
-	clear_bit(idx, cpuc->used_mask);
-
-	perf_counter_update_userpage(counter);
-}
-
-static void sparc_pmu_read(struct perf_counter *counter)
-{
-	struct hw_perf_counter *hwc = &counter->hw;
-	sparc_perf_counter_update(counter, hwc, hwc->idx);
-}
-
-static void sparc_pmu_unthrottle(struct perf_counter *counter)
-{
-	struct hw_perf_counter *hwc = &counter->hw;
-	sparc_pmu_enable_counter(hwc, hwc->idx);
-}
-
-static atomic_t active_counters = ATOMIC_INIT(0);
-static DEFINE_MUTEX(pmc_grab_mutex);
-
-void perf_counter_grab_pmc(void)
-{
-	if (atomic_inc_not_zero(&active_counters))
-		return;
-
-	mutex_lock(&pmc_grab_mutex);
-	if (atomic_read(&active_counters) == 0) {
-		if (atomic_read(&nmi_active) > 0) {
-			on_each_cpu(stop_nmi_watchdog, NULL, 1);
-			BUG_ON(atomic_read(&nmi_active) != 0);
-		}
-		atomic_inc(&active_counters);
-	}
-	mutex_unlock(&pmc_grab_mutex);
-}
-
-void perf_counter_release_pmc(void)
-{
-	if (atomic_dec_and_mutex_lock(&active_counters, &pmc_grab_mutex)) {
-		if (atomic_read(&nmi_active) == 0)
-			on_each_cpu(start_nmi_watchdog, NULL, 1);
-		mutex_unlock(&pmc_grab_mutex);
-	}
-}
-
-static void hw_perf_counter_destroy(struct perf_counter *counter)
-{
-	perf_counter_release_pmc();
-}
-
-static int __hw_perf_counter_init(struct perf_counter *counter)
-{
-	struct perf_counter_attr *attr = &counter->attr;
-	struct hw_perf_counter *hwc = &counter->hw;
-	const struct perf_event_map *pmap;
-	u64 enc;
-
-	if (atomic_read(&nmi_active) < 0)
-		return -ENODEV;
-
-	if (attr->type != PERF_TYPE_HARDWARE)
-		return -EOPNOTSUPP;
-
-	if (attr->config >= sparc_pmu->max_events)
-		return -EINVAL;
-
-	perf_counter_grab_pmc();
-	counter->destroy = hw_perf_counter_destroy;
-
-	/* We save the enable bits in the config_base.  So to
-	 * turn off sampling just write 'config', and to enable
-	 * things write 'config | config_base'.
-	 */
-	hwc->config_base = sparc_pmu->irq_bit;
-	if (!attr->exclude_user)
-		hwc->config_base |= PCR_UTRACE;
-	if (!attr->exclude_kernel)
-		hwc->config_base |= PCR_STRACE;
-	if (!attr->exclude_hv)
-		hwc->config_base |= sparc_pmu->hv_bit;
-
-	if (!hwc->sample_period) {
-		hwc->sample_period = MAX_PERIOD;
-		hwc->last_period = hwc->sample_period;
-		atomic64_set(&hwc->period_left, hwc->sample_period);
-	}
-
-	pmap = sparc_pmu->event_map(attr->config);
-
-	enc = pmap->encoding;
-	if (pmap->pic_mask & PIC_UPPER) {
-		hwc->idx = PIC_UPPER_INDEX;
-		enc <<= sparc_pmu->upper_shift;
-	} else {
-		hwc->idx = PIC_LOWER_INDEX;
-		enc <<= sparc_pmu->lower_shift;
-	}
-
-	hwc->config |= enc;
-	return 0;
-}
-
-static const struct pmu pmu = {
-	.enable		= sparc_pmu_enable,
-	.disable	= sparc_pmu_disable,
-	.read		= sparc_pmu_read,
-	.unthrottle	= sparc_pmu_unthrottle,
-};
-
-const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
-{
-	int err = __hw_perf_counter_init(counter);
-
-	if (err)
-		return ERR_PTR(err);
-	return &pmu;
-}
-
-void perf_counter_print_debug(void)
-{
-	unsigned long flags;
-	u64 pcr, pic;
-	int cpu;
-
-	if (!sparc_pmu)
-		return;
-
-	local_irq_save(flags);
-
-	cpu = smp_processor_id();
-
-	pcr = pcr_ops->read();
-	read_pic(pic);
-
-	pr_info("\n");
-	pr_info("CPU#%d: PCR[%016llx] PIC[%016llx]\n",
-		cpu, pcr, pic);
-
-	local_irq_restore(flags);
-}
-
-static int __kprobes perf_counter_nmi_handler(struct notifier_block *self,
-					      unsigned long cmd, void *__args)
-{
-	struct die_args *args = __args;
-	struct perf_sample_data data;
-	struct cpu_hw_counters *cpuc;
-	struct pt_regs *regs;
-	int idx;
-
-	if (!atomic_read(&active_counters))
-		return NOTIFY_DONE;
-
-	switch (cmd) {
-	case DIE_NMI:
-		break;
-
-	default:
-		return NOTIFY_DONE;
-	}
-
-	regs = args->regs;
-
-	data.addr = 0;
-
-	cpuc = &__get_cpu_var(cpu_hw_counters);
-	for (idx = 0; idx < MAX_HWCOUNTERS; idx++) {
-		struct perf_counter *counter = cpuc->counters[idx];
-		struct hw_perf_counter *hwc;
-		u64 val;
-
-		if (!test_bit(idx, cpuc->active_mask))
-			continue;
-		hwc = &counter->hw;
-		val = sparc_perf_counter_update(counter, hwc, idx);
-		if (val & (1ULL << 31))
-			continue;
-
-		data.period = counter->hw.last_period;
-		if (!sparc_perf_counter_set_period(counter, hwc, idx))
-			continue;
-
-		if (perf_counter_overflow(counter, 1, &data, regs))
-			sparc_pmu_disable_counter(hwc, idx);
-	}
-
-	return NOTIFY_STOP;
-}
-
-static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
-	.notifier_call		= perf_counter_nmi_handler,
-};
-
-static bool __init supported_pmu(void)
-{
-	if (!strcmp(sparc_pmu_type, "ultra3i")) {
-		sparc_pmu = &ultra3i_pmu;
-		return true;
-	}
-	if (!strcmp(sparc_pmu_type, "niagara2")) {
-		sparc_pmu = &niagara2_pmu;
-		return true;
-	}
-	return false;
-}
-
-void __init init_hw_perf_counters(void)
-{
-	pr_info("Performance counters: ");
-
-	if (!supported_pmu()) {
-		pr_cont("No support for PMU type '%s'\n", sparc_pmu_type);
-		return;
-	}
-
-	pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type);
-
-	/* All sparc64 PMUs currently have 2 counters.  But this simple
-	 * driver only supports one active counter at a time.
-	 */
-	perf_max_counters = 1;
-
-	register_die_notifier(&perf_counter_nmi_notifier);
-}
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
new file mode 100644
index 0000000..2d6a1b1
--- /dev/null
+++ b/arch/sparc/kernel/perf_event.c
@@ -0,0 +1,556 @@
+/* Performance event support for sparc64.
+ *
+ * Copyright (C) 2009 David S. Miller <davem@davemloft.net>
+ *
+ * This code is based almost entirely upon the x86 perf event
+ * code, which is:
+ *
+ *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
+ *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
+ *  Copyright (C) 2009 Jaswinder Singh Rajput
+ *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
+ *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ */
+
+#include <linux/perf_event.h>
+#include <linux/kprobes.h>
+#include <linux/kernel.h>
+#include <linux/kdebug.h>
+#include <linux/mutex.h>
+
+#include <asm/cpudata.h>
+#include <asm/atomic.h>
+#include <asm/nmi.h>
+#include <asm/pcr.h>
+
+/* Sparc64 chips have two performance counters, 32-bits each, with
+ * overflow interrupts generated on transition from 0xffffffff to 0.
+ * The counters are accessed in one go using a 64-bit register.
+ *
+ * Both counters are controlled using a single control register.  The
+ * only way to stop all sampling is to clear all of the context (user,
+ * supervisor, hypervisor) sampling enable bits.  But these bits apply
+ * to both counters, thus the two counters can't be enabled/disabled
+ * individually.
+ *
+ * The control register has two event fields, one for each of the two
+ * counters.  It's thus nearly impossible to have one counter going
+ * while keeping the other one stopped.  Therefore it is possible to
+ * get overflow interrupts for counters not currently "in use" and
+ * that condition must be checked in the overflow interrupt handler.
+ *
+ * So we use a hack, in that we program inactive counters with the
+ * "sw_count0" and "sw_count1" events.  These count how many times
+ * the instruction "sethi %hi(0xfc000), %g0" is executed.  It's an
+ * unusual way to encode a NOP and therefore will not trigger in
+ * normal code.
+ */
+
+#define MAX_HWEVENTS			2
+#define MAX_PERIOD			((1UL << 32) - 1)
+
+#define PIC_UPPER_INDEX			0
+#define PIC_LOWER_INDEX			1
+
+struct cpu_hw_events {
+	struct perf_event	*events[MAX_HWEVENTS];
+	unsigned long		used_mask[BITS_TO_LONGS(MAX_HWEVENTS)];
+	unsigned long		active_mask[BITS_TO_LONGS(MAX_HWEVENTS)];
+	int enabled;
+};
+DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, };
+
+struct perf_event_map {
+	u16	encoding;
+	u8	pic_mask;
+#define PIC_NONE	0x00
+#define PIC_UPPER	0x01
+#define PIC_LOWER	0x02
+};
+
+struct sparc_pmu {
+	const struct perf_event_map	*(*event_map)(int);
+	int				max_events;
+	int				upper_shift;
+	int				lower_shift;
+	int				event_mask;
+	int				hv_bit;
+	int				irq_bit;
+	int				upper_nop;
+	int				lower_nop;
+};
+
+static const struct perf_event_map ultra3i_perfmon_event_map[] = {
+	[PERF_COUNT_HW_CPU_CYCLES] = { 0x0000, PIC_UPPER | PIC_LOWER },
+	[PERF_COUNT_HW_INSTRUCTIONS] = { 0x0001, PIC_UPPER | PIC_LOWER },
+	[PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0009, PIC_LOWER },
+	[PERF_COUNT_HW_CACHE_MISSES] = { 0x0009, PIC_UPPER },
+};
+
+static const struct perf_event_map *ultra3i_event_map(int event_id)
+{
+	return &ultra3i_perfmon_event_map[event_id];
+}
+
+static const struct sparc_pmu ultra3i_pmu = {
+	.event_map	= ultra3i_event_map,
+	.max_events	= ARRAY_SIZE(ultra3i_perfmon_event_map),
+	.upper_shift	= 11,
+	.lower_shift	= 4,
+	.event_mask	= 0x3f,
+	.upper_nop	= 0x1c,
+	.lower_nop	= 0x14,
+};
+
+static const struct perf_event_map niagara2_perfmon_event_map[] = {
+	[PERF_COUNT_HW_CPU_CYCLES] = { 0x02ff, PIC_UPPER | PIC_LOWER },
+	[PERF_COUNT_HW_INSTRUCTIONS] = { 0x02ff, PIC_UPPER | PIC_LOWER },
+	[PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0208, PIC_UPPER | PIC_LOWER },
+	[PERF_COUNT_HW_CACHE_MISSES] = { 0x0302, PIC_UPPER | PIC_LOWER },
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x0201, PIC_UPPER | PIC_LOWER },
+	[PERF_COUNT_HW_BRANCH_MISSES] = { 0x0202, PIC_UPPER | PIC_LOWER },
+};
+
+static const struct perf_event_map *niagara2_event_map(int event_id)
+{
+	return &niagara2_perfmon_event_map[event_id];
+}
+
+static const struct sparc_pmu niagara2_pmu = {
+	.event_map	= niagara2_event_map,
+	.max_events	= ARRAY_SIZE(niagara2_perfmon_event_map),
+	.upper_shift	= 19,
+	.lower_shift	= 6,
+	.event_mask	= 0xfff,
+	.hv_bit		= 0x8,
+	.irq_bit	= 0x03,
+	.upper_nop	= 0x220,
+	.lower_nop	= 0x220,
+};
+
+static const struct sparc_pmu *sparc_pmu __read_mostly;
+
+static u64 event_encoding(u64 event_id, int idx)
+{
+	if (idx == PIC_UPPER_INDEX)
+		event_id <<= sparc_pmu->upper_shift;
+	else
+		event_id <<= sparc_pmu->lower_shift;
+	return event_id;
+}
+
+static u64 mask_for_index(int idx)
+{
+	return event_encoding(sparc_pmu->event_mask, idx);
+}
+
+static u64 nop_for_index(int idx)
+{
+	return event_encoding(idx == PIC_UPPER_INDEX ?
+			      sparc_pmu->upper_nop :
+			      sparc_pmu->lower_nop, idx);
+}
+
+static inline void sparc_pmu_enable_event(struct hw_perf_event *hwc,
+					    int idx)
+{
+	u64 val, mask = mask_for_index(idx);
+
+	val = pcr_ops->read();
+	pcr_ops->write((val & ~mask) | hwc->config);
+}
+
+static inline void sparc_pmu_disable_event(struct hw_perf_event *hwc,
+					     int idx)
+{
+	u64 mask = mask_for_index(idx);
+	u64 nop = nop_for_index(idx);
+	u64 val = pcr_ops->read();
+
+	pcr_ops->write((val & ~mask) | nop);
+}
+
+void hw_perf_enable(void)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	u64 val;
+	int i;
+
+	if (cpuc->enabled)
+		return;
+
+	cpuc->enabled = 1;
+	barrier();
+
+	val = pcr_ops->read();
+
+	for (i = 0; i < MAX_HWEVENTS; i++) {
+		struct perf_event *cp = cpuc->events[i];
+		struct hw_perf_event *hwc;
+
+		if (!cp)
+			continue;
+		hwc = &cp->hw;
+		val |= hwc->config_base;
+	}
+
+	pcr_ops->write(val);
+}
+
+void hw_perf_disable(void)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	u64 val;
+
+	if (!cpuc->enabled)
+		return;
+
+	cpuc->enabled = 0;
+
+	val = pcr_ops->read();
+	val &= ~(PCR_UTRACE | PCR_STRACE |
+		 sparc_pmu->hv_bit | sparc_pmu->irq_bit);
+	pcr_ops->write(val);
+}
+
+static u32 read_pmc(int idx)
+{
+	u64 val;
+
+	read_pic(val);
+	if (idx == PIC_UPPER_INDEX)
+		val >>= 32;
+
+	return val & 0xffffffff;
+}
+
+static void write_pmc(int idx, u64 val)
+{
+	u64 shift, mask, pic;
+
+	shift = 0;
+	if (idx == PIC_UPPER_INDEX)
+		shift = 32;
+
+	mask = ((u64) 0xffffffff) << shift;
+	val <<= shift;
+
+	read_pic(pic);
+	pic &= ~mask;
+	pic |= val;
+	write_pic(pic);
+}
+
+static int sparc_perf_event_set_period(struct perf_event *event,
+					 struct hw_perf_event *hwc, int idx)
+{
+	s64 left = atomic64_read(&hwc->period_left);
+	s64 period = hwc->sample_period;
+	int ret = 0;
+
+	if (unlikely(left <= -period)) {
+		left = period;
+		atomic64_set(&hwc->period_left, left);
+		hwc->last_period = period;
+		ret = 1;
+	}
+
+	if (unlikely(left <= 0)) {
+		left += period;
+		atomic64_set(&hwc->period_left, left);
+		hwc->last_period = period;
+		ret = 1;
+	}
+	if (left > MAX_PERIOD)
+		left = MAX_PERIOD;
+
+	atomic64_set(&hwc->prev_count, (u64)-left);
+
+	write_pmc(idx, (u64)(-left) & 0xffffffff);
+
+	perf_event_update_userpage(event);
+
+	return ret;
+}
+
+static int sparc_pmu_enable(struct perf_event *event)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	if (test_and_set_bit(idx, cpuc->used_mask))
+		return -EAGAIN;
+
+	sparc_pmu_disable_event(hwc, idx);
+
+	cpuc->events[idx] = event;
+	set_bit(idx, cpuc->active_mask);
+
+	sparc_perf_event_set_period(event, hwc, idx);
+	sparc_pmu_enable_event(hwc, idx);
+	perf_event_update_userpage(event);
+	return 0;
+}
+
+static u64 sparc_perf_event_update(struct perf_event *event,
+				     struct hw_perf_event *hwc, int idx)
+{
+	int shift = 64 - 32;
+	u64 prev_raw_count, new_raw_count;
+	s64 delta;
+
+again:
+	prev_raw_count = atomic64_read(&hwc->prev_count);
+	new_raw_count = read_pmc(idx);
+
+	if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
+			     new_raw_count) != prev_raw_count)
+		goto again;
+
+	delta = (new_raw_count << shift) - (prev_raw_count << shift);
+	delta >>= shift;
+
+	atomic64_add(delta, &event->count);
+	atomic64_sub(delta, &hwc->period_left);
+
+	return new_raw_count;
+}
+
+static void sparc_pmu_disable(struct perf_event *event)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	clear_bit(idx, cpuc->active_mask);
+	sparc_pmu_disable_event(hwc, idx);
+
+	barrier();
+
+	sparc_perf_event_update(event, hwc, idx);
+	cpuc->events[idx] = NULL;
+	clear_bit(idx, cpuc->used_mask);
+
+	perf_event_update_userpage(event);
+}
+
+static void sparc_pmu_read(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	sparc_perf_event_update(event, hwc, hwc->idx);
+}
+
+static void sparc_pmu_unthrottle(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	sparc_pmu_enable_event(hwc, hwc->idx);
+}
+
+static atomic_t active_events = ATOMIC_INIT(0);
+static DEFINE_MUTEX(pmc_grab_mutex);
+
+void perf_event_grab_pmc(void)
+{
+	if (atomic_inc_not_zero(&active_events))
+		return;
+
+	mutex_lock(&pmc_grab_mutex);
+	if (atomic_read(&active_events) == 0) {
+		if (atomic_read(&nmi_active) > 0) {
+			on_each_cpu(stop_nmi_watchdog, NULL, 1);
+			BUG_ON(atomic_read(&nmi_active) != 0);
+		}
+		atomic_inc(&active_events);
+	}
+	mutex_unlock(&pmc_grab_mutex);
+}
+
+void perf_event_release_pmc(void)
+{
+	if (atomic_dec_and_mutex_lock(&active_events, &pmc_grab_mutex)) {
+		if (atomic_read(&nmi_active) == 0)
+			on_each_cpu(start_nmi_watchdog, NULL, 1);
+		mutex_unlock(&pmc_grab_mutex);
+	}
+}
+
+static void hw_perf_event_destroy(struct perf_event *event)
+{
+	perf_event_release_pmc();
+}
+
+static int __hw_perf_event_init(struct perf_event *event)
+{
+	struct perf_event_attr *attr = &event->attr;
+	struct hw_perf_event *hwc = &event->hw;
+	const struct perf_event_map *pmap;
+	u64 enc;
+
+	if (atomic_read(&nmi_active) < 0)
+		return -ENODEV;
+
+	if (attr->type != PERF_TYPE_HARDWARE)
+		return -EOPNOTSUPP;
+
+	if (attr->config >= sparc_pmu->max_events)
+		return -EINVAL;
+
+	perf_event_grab_pmc();
+	event->destroy = hw_perf_event_destroy;
+
+	/* We save the enable bits in the config_base.  So to
+	 * turn off sampling just write 'config', and to enable
+	 * things write 'config | config_base'.
+	 */
+	hwc->config_base = sparc_pmu->irq_bit;
+	if (!attr->exclude_user)
+		hwc->config_base |= PCR_UTRACE;
+	if (!attr->exclude_kernel)
+		hwc->config_base |= PCR_STRACE;
+	if (!attr->exclude_hv)
+		hwc->config_base |= sparc_pmu->hv_bit;
+
+	if (!hwc->sample_period) {
+		hwc->sample_period = MAX_PERIOD;
+		hwc->last_period = hwc->sample_period;
+		atomic64_set(&hwc->period_left, hwc->sample_period);
+	}
+
+	pmap = sparc_pmu->event_map(attr->config);
+
+	enc = pmap->encoding;
+	if (pmap->pic_mask & PIC_UPPER) {
+		hwc->idx = PIC_UPPER_INDEX;
+		enc <<= sparc_pmu->upper_shift;
+	} else {
+		hwc->idx = PIC_LOWER_INDEX;
+		enc <<= sparc_pmu->lower_shift;
+	}
+
+	hwc->config |= enc;
+	return 0;
+}
+
+static const struct pmu pmu = {
+	.enable		= sparc_pmu_enable,
+	.disable	= sparc_pmu_disable,
+	.read		= sparc_pmu_read,
+	.unthrottle	= sparc_pmu_unthrottle,
+};
+
+const struct pmu *hw_perf_event_init(struct perf_event *event)
+{
+	int err = __hw_perf_event_init(event);
+
+	if (err)
+		return ERR_PTR(err);
+	return &pmu;
+}
+
+void perf_event_print_debug(void)
+{
+	unsigned long flags;
+	u64 pcr, pic;
+	int cpu;
+
+	if (!sparc_pmu)
+		return;
+
+	local_irq_save(flags);
+
+	cpu = smp_processor_id();
+
+	pcr = pcr_ops->read();
+	read_pic(pic);
+
+	pr_info("\n");
+	pr_info("CPU#%d: PCR[%016llx] PIC[%016llx]\n",
+		cpu, pcr, pic);
+
+	local_irq_restore(flags);
+}
+
+static int __kprobes perf_event_nmi_handler(struct notifier_block *self,
+					      unsigned long cmd, void *__args)
+{
+	struct die_args *args = __args;
+	struct perf_sample_data data;
+	struct cpu_hw_events *cpuc;
+	struct pt_regs *regs;
+	int idx;
+
+	if (!atomic_read(&active_events))
+		return NOTIFY_DONE;
+
+	switch (cmd) {
+	case DIE_NMI:
+		break;
+
+	default:
+		return NOTIFY_DONE;
+	}
+
+	regs = args->regs;
+
+	data.addr = 0;
+
+	cpuc = &__get_cpu_var(cpu_hw_events);
+	for (idx = 0; idx < MAX_HWEVENTS; idx++) {
+		struct perf_event *event = cpuc->events[idx];
+		struct hw_perf_event *hwc;
+		u64 val;
+
+		if (!test_bit(idx, cpuc->active_mask))
+			continue;
+		hwc = &event->hw;
+		val = sparc_perf_event_update(event, hwc, idx);
+		if (val & (1ULL << 31))
+			continue;
+
+		data.period = event->hw.last_period;
+		if (!sparc_perf_event_set_period(event, hwc, idx))
+			continue;
+
+		if (perf_event_overflow(event, 1, &data, regs))
+			sparc_pmu_disable_event(hwc, idx);
+	}
+
+	return NOTIFY_STOP;
+}
+
+static __read_mostly struct notifier_block perf_event_nmi_notifier = {
+	.notifier_call		= perf_event_nmi_handler,
+};
+
+static bool __init supported_pmu(void)
+{
+	if (!strcmp(sparc_pmu_type, "ultra3i")) {
+		sparc_pmu = &ultra3i_pmu;
+		return true;
+	}
+	if (!strcmp(sparc_pmu_type, "niagara2")) {
+		sparc_pmu = &niagara2_pmu;
+		return true;
+	}
+	return false;
+}
+
+void __init init_hw_perf_events(void)
+{
+	pr_info("Performance events: ");
+
+	if (!supported_pmu()) {
+		pr_cont("No support for PMU type '%s'\n", sparc_pmu_type);
+		return;
+	}
+
+	pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type);
+
+	/* All sparc64 PMUs currently have 2 events.  But this simple
+	 * driver only supports one active event at a time.
+	 */
+	perf_max_events = 1;
+
+	register_die_notifier(&perf_event_nmi_notifier);
+}
diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S
index 0418157..0f1658d 100644
--- a/arch/sparc/kernel/systbls_32.S
+++ b/arch/sparc/kernel/systbls_32.S
@@ -82,5 +82,5 @@ sys_call_table:
 /*310*/	.long sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate
 /*315*/	.long sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1
 /*320*/	.long sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv
-/*325*/	.long sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_counter_open
+/*325*/	.long sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open
 
diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S
index 91b06b7..009825f 100644
--- a/arch/sparc/kernel/systbls_64.S
+++ b/arch/sparc/kernel/systbls_64.S
@@ -83,7 +83,7 @@ sys_call_table32:
 /*310*/	.word compat_sys_utimensat, compat_sys_signalfd, sys_timerfd_create, sys_eventfd, compat_sys_fallocate
 	.word compat_sys_timerfd_settime, compat_sys_timerfd_gettime, compat_sys_signalfd4, sys_eventfd2, sys_epoll_create1
 /*320*/	.word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, compat_sys_preadv
-	.word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_counter_open
+	.word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_event_open
 
 #endif /* CONFIG_COMPAT */
 
@@ -158,4 +158,4 @@ sys_call_table:
 /*310*/	.word sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate
 	.word sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1
 /*320*/	.word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv
-	.word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_counter_open
+	.word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 51c5901..e4ff5d1 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -24,7 +24,7 @@ config X86
 	select HAVE_UNSTABLE_SCHED_CLOCK
 	select HAVE_IDE
 	select HAVE_OPROFILE
-	select HAVE_PERF_COUNTERS if (!M386 && !M486)
+	select HAVE_PERF_EVENTS if (!M386 && !M486)
 	select HAVE_IOREMAP_PROT
 	select HAVE_KPROBES
 	select ARCH_WANT_OPTIONAL_GPIOLIB
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index ba331bf..74619c4 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -831,5 +831,5 @@ ia32_sys_call_table:
 	.quad compat_sys_preadv
 	.quad compat_sys_pwritev
 	.quad compat_sys_rt_tgsigqueueinfo	/* 335 */
-	.quad sys_perf_counter_open
+	.quad sys_perf_event_open
 ia32_syscall_end:
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
index 5e3f204..f5693c8 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -49,7 +49,7 @@ BUILD_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR)
 BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR)
 BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
 
-#ifdef CONFIG_PERF_COUNTERS
+#ifdef CONFIG_PERF_EVENTS
 BUILD_INTERRUPT(perf_pending_interrupt, LOCAL_PENDING_VECTOR)
 #endif
 
diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h
deleted file mode 100644
index e7b7c93..0000000
--- a/arch/x86/include/asm/perf_counter.h
+++ /dev/null
@@ -1,108 +0,0 @@
-#ifndef _ASM_X86_PERF_COUNTER_H
-#define _ASM_X86_PERF_COUNTER_H
-
-/*
- * Performance counter hw details:
- */
-
-#define X86_PMC_MAX_GENERIC					8
-#define X86_PMC_MAX_FIXED					3
-
-#define X86_PMC_IDX_GENERIC				        0
-#define X86_PMC_IDX_FIXED				       32
-#define X86_PMC_IDX_MAX					       64
-
-#define MSR_ARCH_PERFMON_PERFCTR0			      0xc1
-#define MSR_ARCH_PERFMON_PERFCTR1			      0xc2
-
-#define MSR_ARCH_PERFMON_EVENTSEL0			     0x186
-#define MSR_ARCH_PERFMON_EVENTSEL1			     0x187
-
-#define ARCH_PERFMON_EVENTSEL0_ENABLE			  (1 << 22)
-#define ARCH_PERFMON_EVENTSEL_INT			  (1 << 20)
-#define ARCH_PERFMON_EVENTSEL_OS			  (1 << 17)
-#define ARCH_PERFMON_EVENTSEL_USR			  (1 << 16)
-
-/*
- * Includes eventsel and unit mask as well:
- */
-#define ARCH_PERFMON_EVENT_MASK				    0xffff
-
-#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL		      0x3c
-#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK		(0x00 << 8)
-#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 		 0
-#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \
-		(1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX))
-
-#define ARCH_PERFMON_BRANCH_MISSES_RETIRED			 6
-
-/*
- * Intel "Architectural Performance Monitoring" CPUID
- * detection/enumeration details:
- */
-union cpuid10_eax {
-	struct {
-		unsigned int version_id:8;
-		unsigned int num_counters:8;
-		unsigned int bit_width:8;
-		unsigned int mask_length:8;
-	} split;
-	unsigned int full;
-};
-
-union cpuid10_edx {
-	struct {
-		unsigned int num_counters_fixed:4;
-		unsigned int reserved:28;
-	} split;
-	unsigned int full;
-};
-
-
-/*
- * Fixed-purpose performance counters:
- */
-
-/*
- * All 3 fixed-mode PMCs are configured via this single MSR:
- */
-#define MSR_ARCH_PERFMON_FIXED_CTR_CTRL			0x38d
-
-/*
- * The counts are available in three separate MSRs:
- */
-
-/* Instr_Retired.Any: */
-#define MSR_ARCH_PERFMON_FIXED_CTR0			0x309
-#define X86_PMC_IDX_FIXED_INSTRUCTIONS			(X86_PMC_IDX_FIXED + 0)
-
-/* CPU_CLK_Unhalted.Core: */
-#define MSR_ARCH_PERFMON_FIXED_CTR1			0x30a
-#define X86_PMC_IDX_FIXED_CPU_CYCLES			(X86_PMC_IDX_FIXED + 1)
-
-/* CPU_CLK_Unhalted.Ref: */
-#define MSR_ARCH_PERFMON_FIXED_CTR2			0x30b
-#define X86_PMC_IDX_FIXED_BUS_CYCLES			(X86_PMC_IDX_FIXED + 2)
-
-/*
- * We model BTS tracing as another fixed-mode PMC.
- *
- * We choose a value in the middle of the fixed counter range, since lower
- * values are used by actual fixed counters and higher values are used
- * to indicate other overflow conditions in the PERF_GLOBAL_STATUS msr.
- */
-#define X86_PMC_IDX_FIXED_BTS				(X86_PMC_IDX_FIXED + 16)
-
-
-#ifdef CONFIG_PERF_COUNTERS
-extern void init_hw_perf_counters(void);
-extern void perf_counters_lapic_init(void);
-
-#define PERF_COUNTER_INDEX_OFFSET			0
-
-#else
-static inline void init_hw_perf_counters(void)		{ }
-static inline void perf_counters_lapic_init(void)	{ }
-#endif
-
-#endif /* _ASM_X86_PERF_COUNTER_H */
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
new file mode 100644
index 0000000..ad7ce3f
--- /dev/null
+++ b/arch/x86/include/asm/perf_event.h
@@ -0,0 +1,108 @@
+#ifndef _ASM_X86_PERF_EVENT_H
+#define _ASM_X86_PERF_EVENT_H
+
+/*
+ * Performance event hw details:
+ */
+
+#define X86_PMC_MAX_GENERIC					8
+#define X86_PMC_MAX_FIXED					3
+
+#define X86_PMC_IDX_GENERIC				        0
+#define X86_PMC_IDX_FIXED				       32
+#define X86_PMC_IDX_MAX					       64
+
+#define MSR_ARCH_PERFMON_PERFCTR0			      0xc1
+#define MSR_ARCH_PERFMON_PERFCTR1			      0xc2
+
+#define MSR_ARCH_PERFMON_EVENTSEL0			     0x186
+#define MSR_ARCH_PERFMON_EVENTSEL1			     0x187
+
+#define ARCH_PERFMON_EVENTSEL0_ENABLE			  (1 << 22)
+#define ARCH_PERFMON_EVENTSEL_INT			  (1 << 20)
+#define ARCH_PERFMON_EVENTSEL_OS			  (1 << 17)
+#define ARCH_PERFMON_EVENTSEL_USR			  (1 << 16)
+
+/*
+ * Includes eventsel and unit mask as well:
+ */
+#define ARCH_PERFMON_EVENT_MASK				    0xffff
+
+#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL		      0x3c
+#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK		(0x00 << 8)
+#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 		 0
+#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \
+		(1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX))
+
+#define ARCH_PERFMON_BRANCH_MISSES_RETIRED			 6
+
+/*
+ * Intel "Architectural Performance Monitoring" CPUID
+ * detection/enumeration details:
+ */
+union cpuid10_eax {
+	struct {
+		unsigned int version_id:8;
+		unsigned int num_events:8;
+		unsigned int bit_width:8;
+		unsigned int mask_length:8;
+	} split;
+	unsigned int full;
+};
+
+union cpuid10_edx {
+	struct {
+		unsigned int num_events_fixed:4;
+		unsigned int reserved:28;
+	} split;
+	unsigned int full;
+};
+
+
+/*
+ * Fixed-purpose performance events:
+ */
+
+/*
+ * All 3 fixed-mode PMCs are configured via this single MSR:
+ */
+#define MSR_ARCH_PERFMON_FIXED_CTR_CTRL			0x38d
+
+/*
+ * The counts are available in three separate MSRs:
+ */
+
+/* Instr_Retired.Any: */
+#define MSR_ARCH_PERFMON_FIXED_CTR0			0x309
+#define X86_PMC_IDX_FIXED_INSTRUCTIONS			(X86_PMC_IDX_FIXED + 0)
+
+/* CPU_CLK_Unhalted.Core: */
+#define MSR_ARCH_PERFMON_FIXED_CTR1			0x30a
+#define X86_PMC_IDX_FIXED_CPU_CYCLES			(X86_PMC_IDX_FIXED + 1)
+
+/* CPU_CLK_Unhalted.Ref: */
+#define MSR_ARCH_PERFMON_FIXED_CTR2			0x30b
+#define X86_PMC_IDX_FIXED_BUS_CYCLES			(X86_PMC_IDX_FIXED + 2)
+
+/*
+ * We model BTS tracing as another fixed-mode PMC.
+ *
+ * We choose a value in the middle of the fixed event range, since lower
+ * values are used by actual fixed events and higher values are used
+ * to indicate other overflow conditions in the PERF_GLOBAL_STATUS msr.
+ */
+#define X86_PMC_IDX_FIXED_BTS				(X86_PMC_IDX_FIXED + 16)
+
+
+#ifdef CONFIG_PERF_EVENTS
+extern void init_hw_perf_events(void);
+extern void perf_events_lapic_init(void);
+
+#define PERF_EVENT_INDEX_OFFSET			0
+
+#else
+static inline void init_hw_perf_events(void)		{ }
+static inline void perf_events_lapic_init(void)	{ }
+#endif
+
+#endif /* _ASM_X86_PERF_EVENT_H */
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index 8deaada..6fb3c20 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -341,7 +341,7 @@
 #define __NR_preadv		333
 #define __NR_pwritev		334
 #define __NR_rt_tgsigqueueinfo	335
-#define __NR_perf_counter_open	336
+#define __NR_perf_event_open	336
 
 #ifdef __KERNEL__
 
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index b9f3c60..8d3ad0a 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -659,8 +659,8 @@ __SYSCALL(__NR_preadv, sys_preadv)
 __SYSCALL(__NR_pwritev, sys_pwritev)
 #define __NR_rt_tgsigqueueinfo			297
 __SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo)
-#define __NR_perf_counter_open			298
-__SYSCALL(__NR_perf_counter_open, sys_perf_counter_open)
+#define __NR_perf_event_open			298
+__SYSCALL(__NR_perf_event_open, sys_perf_event_open)
 
 #ifndef __NO_STUBS
 #define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index a34601f..754174d 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -14,7 +14,7 @@
  *	Mikael Pettersson	:	PM converted to driver model.
  */
 
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <linux/kernel_stat.h>
 #include <linux/mc146818rtc.h>
 #include <linux/acpi_pmtmr.h>
@@ -35,7 +35,7 @@
 #include <linux/smp.h>
 #include <linux/mm.h>
 
-#include <asm/perf_counter.h>
+#include <asm/perf_event.h>
 #include <asm/x86_init.h>
 #include <asm/pgalloc.h>
 #include <asm/atomic.h>
@@ -1189,7 +1189,7 @@ void __cpuinit setup_local_APIC(void)
 		apic_write(APIC_ESR, 0);
 	}
 #endif
-	perf_counters_lapic_init();
+	perf_events_lapic_init();
 
 	preempt_disable();
 
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 8dd3063..68537e9 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -27,7 +27,7 @@ obj-$(CONFIG_CPU_SUP_CENTAUR)		+= centaur.o
 obj-$(CONFIG_CPU_SUP_TRANSMETA_32)	+= transmeta.o
 obj-$(CONFIG_CPU_SUP_UMC_32)		+= umc.o
 
-obj-$(CONFIG_PERF_COUNTERS)		+= perf_counter.o
+obj-$(CONFIG_PERF_EVENTS)		+= perf_event.o
 
 obj-$(CONFIG_X86_MCE)			+= mcheck/
 obj-$(CONFIG_MTRR)			+= mtrr/
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 2fea97e..cc25c2b 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -13,7 +13,7 @@
 #include <linux/io.h>
 
 #include <asm/stackprotector.h>
-#include <asm/perf_counter.h>
+#include <asm/perf_event.h>
 #include <asm/mmu_context.h>
 #include <asm/hypervisor.h>
 #include <asm/processor.h>
@@ -869,7 +869,7 @@ void __init identify_boot_cpu(void)
 #else
 	vgetcpu_set_mode();
 #endif
-	init_hw_perf_counters();
+	init_hw_perf_events();
 }
 
 void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
deleted file mode 100644
index b1f1156..0000000
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ /dev/null
@@ -1,2298 +0,0 @@
-/*
- * Performance counter x86 architecture code
- *
- *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
- *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
- *  Copyright (C) 2009 Jaswinder Singh Rajput
- *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
- *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
- *  Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
- *
- *  For licencing details see kernel-base/COPYING
- */
-
-#include <linux/perf_counter.h>
-#include <linux/capability.h>
-#include <linux/notifier.h>
-#include <linux/hardirq.h>
-#include <linux/kprobes.h>
-#include <linux/module.h>
-#include <linux/kdebug.h>
-#include <linux/sched.h>
-#include <linux/uaccess.h>
-#include <linux/highmem.h>
-#include <linux/cpu.h>
-
-#include <asm/apic.h>
-#include <asm/stacktrace.h>
-#include <asm/nmi.h>
-
-static u64 perf_counter_mask __read_mostly;
-
-/* The maximal number of PEBS counters: */
-#define MAX_PEBS_COUNTERS	4
-
-/* The size of a BTS record in bytes: */
-#define BTS_RECORD_SIZE		24
-
-/* The size of a per-cpu BTS buffer in bytes: */
-#define BTS_BUFFER_SIZE		(BTS_RECORD_SIZE * 2048)
-
-/* The BTS overflow threshold in bytes from the end of the buffer: */
-#define BTS_OVFL_TH		(BTS_RECORD_SIZE * 128)
-
-
-/*
- * Bits in the debugctlmsr controlling branch tracing.
- */
-#define X86_DEBUGCTL_TR			(1 << 6)
-#define X86_DEBUGCTL_BTS		(1 << 7)
-#define X86_DEBUGCTL_BTINT		(1 << 8)
-#define X86_DEBUGCTL_BTS_OFF_OS		(1 << 9)
-#define X86_DEBUGCTL_BTS_OFF_USR	(1 << 10)
-
-/*
- * A debug store configuration.
- *
- * We only support architectures that use 64bit fields.
- */
-struct debug_store {
-	u64	bts_buffer_base;
-	u64	bts_index;
-	u64	bts_absolute_maximum;
-	u64	bts_interrupt_threshold;
-	u64	pebs_buffer_base;
-	u64	pebs_index;
-	u64	pebs_absolute_maximum;
-	u64	pebs_interrupt_threshold;
-	u64	pebs_counter_reset[MAX_PEBS_COUNTERS];
-};
-
-struct cpu_hw_counters {
-	struct perf_counter	*counters[X86_PMC_IDX_MAX];
-	unsigned long		used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
-	unsigned long		active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
-	unsigned long		interrupts;
-	int			enabled;
-	struct debug_store	*ds;
-};
-
-/*
- * struct x86_pmu - generic x86 pmu
- */
-struct x86_pmu {
-	const char	*name;
-	int		version;
-	int		(*handle_irq)(struct pt_regs *);
-	void		(*disable_all)(void);
-	void		(*enable_all)(void);
-	void		(*enable)(struct hw_perf_counter *, int);
-	void		(*disable)(struct hw_perf_counter *, int);
-	unsigned	eventsel;
-	unsigned	perfctr;
-	u64		(*event_map)(int);
-	u64		(*raw_event)(u64);
-	int		max_events;
-	int		num_counters;
-	int		num_counters_fixed;
-	int		counter_bits;
-	u64		counter_mask;
-	int		apic;
-	u64		max_period;
-	u64		intel_ctrl;
-	void		(*enable_bts)(u64 config);
-	void		(*disable_bts)(void);
-};
-
-static struct x86_pmu x86_pmu __read_mostly;
-
-static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = {
-	.enabled = 1,
-};
-
-/*
- * Not sure about some of these
- */
-static const u64 p6_perfmon_event_map[] =
-{
-  [PERF_COUNT_HW_CPU_CYCLES]		= 0x0079,
-  [PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
-  [PERF_COUNT_HW_CACHE_REFERENCES]	= 0x0f2e,
-  [PERF_COUNT_HW_CACHE_MISSES]		= 0x012e,
-  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c4,
-  [PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c5,
-  [PERF_COUNT_HW_BUS_CYCLES]		= 0x0062,
-};
-
-static u64 p6_pmu_event_map(int hw_event)
-{
-	return p6_perfmon_event_map[hw_event];
-}
-
-/*
- * Counter setting that is specified not to count anything.
- * We use this to effectively disable a counter.
- *
- * L2_RQSTS with 0 MESI unit mask.
- */
-#define P6_NOP_COUNTER			0x0000002EULL
-
-static u64 p6_pmu_raw_event(u64 hw_event)
-{
-#define P6_EVNTSEL_EVENT_MASK		0x000000FFULL
-#define P6_EVNTSEL_UNIT_MASK		0x0000FF00ULL
-#define P6_EVNTSEL_EDGE_MASK		0x00040000ULL
-#define P6_EVNTSEL_INV_MASK		0x00800000ULL
-#define P6_EVNTSEL_COUNTER_MASK		0xFF000000ULL
-
-#define P6_EVNTSEL_MASK			\
-	(P6_EVNTSEL_EVENT_MASK |	\
-	 P6_EVNTSEL_UNIT_MASK  |	\
-	 P6_EVNTSEL_EDGE_MASK  |	\
-	 P6_EVNTSEL_INV_MASK   |	\
-	 P6_EVNTSEL_COUNTER_MASK)
-
-	return hw_event & P6_EVNTSEL_MASK;
-}
-
-
-/*
- * Intel PerfMon v3. Used on Core2 and later.
- */
-static const u64 intel_perfmon_event_map[] =
-{
-  [PERF_COUNT_HW_CPU_CYCLES]		= 0x003c,
-  [PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
-  [PERF_COUNT_HW_CACHE_REFERENCES]	= 0x4f2e,
-  [PERF_COUNT_HW_CACHE_MISSES]		= 0x412e,
-  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c4,
-  [PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c5,
-  [PERF_COUNT_HW_BUS_CYCLES]		= 0x013c,
-};
-
-static u64 intel_pmu_event_map(int hw_event)
-{
-	return intel_perfmon_event_map[hw_event];
-}
-
-/*
- * Generalized hw caching related hw_event table, filled
- * in on a per model basis. A value of 0 means
- * 'not supported', -1 means 'hw_event makes no sense on
- * this CPU', any other value means the raw hw_event
- * ID.
- */
-
-#define C(x) PERF_COUNT_HW_CACHE_##x
-
-static u64 __read_mostly hw_cache_event_ids
-				[PERF_COUNT_HW_CACHE_MAX]
-				[PERF_COUNT_HW_CACHE_OP_MAX]
-				[PERF_COUNT_HW_CACHE_RESULT_MAX];
-
-static const u64 nehalem_hw_cache_event_ids
-				[PERF_COUNT_HW_CACHE_MAX]
-				[PERF_COUNT_HW_CACHE_OP_MAX]
-				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(L1D) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI            */
-		[ C(RESULT_MISS)   ] = 0x0140, /* L1D_CACHE_LD.I_STATE         */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI            */
-		[ C(RESULT_MISS)   ] = 0x0141, /* L1D_CACHE_ST.I_STATE         */
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS        */
-		[ C(RESULT_MISS)   ] = 0x024e, /* L1D_PREFETCH.MISS            */
-	},
- },
- [ C(L1I ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                    */
-		[ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                   */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0,
-		[ C(RESULT_MISS)   ] = 0x0,
-	},
- },
- [ C(LL  ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS               */
-		[ C(RESULT_MISS)   ] = 0x0224, /* L2_RQSTS.LD_MISS             */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS                */
-		[ C(RESULT_MISS)   ] = 0x0824, /* L2_RQSTS.RFO_MISS            */
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference                */
-		[ C(RESULT_MISS)   ] = 0x412e, /* LLC Misses                   */
-	},
- },
- [ C(DTLB) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI   (alias)  */
-		[ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.ANY         */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI   (alias)  */
-		[ C(RESULT_MISS)   ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS  */
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0,
-		[ C(RESULT_MISS)   ] = 0x0,
-	},
- },
- [ C(ITLB) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P           */
-		[ C(RESULT_MISS)   ] = 0x20c8, /* ITLB_MISS_RETIRED            */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
- },
- [ C(BPU ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
-		[ C(RESULT_MISS)   ] = 0x03e8, /* BPU_CLEARS.ANY               */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
- },
-};
-
-static const u64 core2_hw_cache_event_ids
-				[PERF_COUNT_HW_CACHE_MAX]
-				[PERF_COUNT_HW_CACHE_OP_MAX]
-				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(L1D) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI          */
-		[ C(RESULT_MISS)   ] = 0x0140, /* L1D_CACHE_LD.I_STATE       */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI          */
-		[ C(RESULT_MISS)   ] = 0x0141, /* L1D_CACHE_ST.I_STATE       */
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS      */
-		[ C(RESULT_MISS)   ] = 0,
-	},
- },
- [ C(L1I ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS                  */
-		[ C(RESULT_MISS)   ] = 0x0081, /* L1I.MISSES                 */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0,
-		[ C(RESULT_MISS)   ] = 0,
-	},
- },
- [ C(LL  ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
-		[ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
-		[ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0,
-		[ C(RESULT_MISS)   ] = 0,
-	},
- },
- [ C(DTLB) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI  (alias) */
-		[ C(RESULT_MISS)   ] = 0x0208, /* DTLB_MISSES.MISS_LD        */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI  (alias) */
-		[ C(RESULT_MISS)   ] = 0x0808, /* DTLB_MISSES.MISS_ST        */
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0,
-		[ C(RESULT_MISS)   ] = 0,
-	},
- },
- [ C(ITLB) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
-		[ C(RESULT_MISS)   ] = 0x1282, /* ITLBMISSES                 */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
- },
- [ C(BPU ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
-		[ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
- },
-};
-
-static const u64 atom_hw_cache_event_ids
-				[PERF_COUNT_HW_CACHE_MAX]
-				[PERF_COUNT_HW_CACHE_OP_MAX]
-				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(L1D) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD               */
-		[ C(RESULT_MISS)   ] = 0,
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST               */
-		[ C(RESULT_MISS)   ] = 0,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0,
-		[ C(RESULT_MISS)   ] = 0,
-	},
- },
- [ C(L1I ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                  */
-		[ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                 */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0,
-		[ C(RESULT_MISS)   ] = 0,
-	},
- },
- [ C(LL  ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
-		[ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
-		[ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0,
-		[ C(RESULT_MISS)   ] = 0,
-	},
- },
- [ C(DTLB) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI  (alias) */
-		[ C(RESULT_MISS)   ] = 0x0508, /* DTLB_MISSES.MISS_LD        */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI  (alias) */
-		[ C(RESULT_MISS)   ] = 0x0608, /* DTLB_MISSES.MISS_ST        */
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0,
-		[ C(RESULT_MISS)   ] = 0,
-	},
- },
- [ C(ITLB) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
-		[ C(RESULT_MISS)   ] = 0x0282, /* ITLB.MISSES                */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
- },
- [ C(BPU ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
-		[ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
- },
-};
-
-static u64 intel_pmu_raw_event(u64 hw_event)
-{
-#define CORE_EVNTSEL_EVENT_MASK		0x000000FFULL
-#define CORE_EVNTSEL_UNIT_MASK		0x0000FF00ULL
-#define CORE_EVNTSEL_EDGE_MASK		0x00040000ULL
-#define CORE_EVNTSEL_INV_MASK		0x00800000ULL
-#define CORE_EVNTSEL_COUNTER_MASK	0xFF000000ULL
-
-#define CORE_EVNTSEL_MASK		\
-	(CORE_EVNTSEL_EVENT_MASK |	\
-	 CORE_EVNTSEL_UNIT_MASK  |	\
-	 CORE_EVNTSEL_EDGE_MASK  |	\
-	 CORE_EVNTSEL_INV_MASK  |	\
-	 CORE_EVNTSEL_COUNTER_MASK)
-
-	return hw_event & CORE_EVNTSEL_MASK;
-}
-
-static const u64 amd_hw_cache_event_ids
-				[PERF_COUNT_HW_CACHE_MAX]
-				[PERF_COUNT_HW_CACHE_OP_MAX]
-				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(L1D) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
-		[ C(RESULT_MISS)   ] = 0x0041, /* Data Cache Misses          */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */
-		[ C(RESULT_MISS)   ] = 0,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts  */
-		[ C(RESULT_MISS)   ] = 0x0167, /* Data Prefetcher :cancelled */
-	},
- },
- [ C(L1I ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches  */
-		[ C(RESULT_MISS)   ] = 0x0081, /* Instruction cache misses   */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
-		[ C(RESULT_MISS)   ] = 0,
-	},
- },
- [ C(LL  ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
-		[ C(RESULT_MISS)   ] = 0x037E, /* L2 Cache Misses : IC+DC     */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback           */
-		[ C(RESULT_MISS)   ] = 0,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0,
-		[ C(RESULT_MISS)   ] = 0,
-	},
- },
- [ C(DTLB) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
-		[ C(RESULT_MISS)   ] = 0x0046, /* L1 DTLB and L2 DLTB Miss   */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0,
-		[ C(RESULT_MISS)   ] = 0,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0,
-		[ C(RESULT_MISS)   ] = 0,
-	},
- },
- [ C(ITLB) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes        */
-		[ C(RESULT_MISS)   ] = 0x0085, /* Instr. fetch ITLB misses   */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
- },
- [ C(BPU ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr.      */
-		[ C(RESULT_MISS)   ] = 0x00c3, /* Retired Mispredicted BI    */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
- },
-};
-
-/*
- * AMD Performance Monitor K7 and later.
- */
-static const u64 amd_perfmon_event_map[] =
-{
-  [PERF_COUNT_HW_CPU_CYCLES]		= 0x0076,
-  [PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
-  [PERF_COUNT_HW_CACHE_REFERENCES]	= 0x0080,
-  [PERF_COUNT_HW_CACHE_MISSES]		= 0x0081,
-  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c4,
-  [PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c5,
-};
-
-static u64 amd_pmu_event_map(int hw_event)
-{
-	return amd_perfmon_event_map[hw_event];
-}
-
-static u64 amd_pmu_raw_event(u64 hw_event)
-{
-#define K7_EVNTSEL_EVENT_MASK	0x7000000FFULL
-#define K7_EVNTSEL_UNIT_MASK	0x00000FF00ULL
-#define K7_EVNTSEL_EDGE_MASK	0x000040000ULL
-#define K7_EVNTSEL_INV_MASK	0x000800000ULL
-#define K7_EVNTSEL_COUNTER_MASK	0x0FF000000ULL
-
-#define K7_EVNTSEL_MASK			\
-	(K7_EVNTSEL_EVENT_MASK |	\
-	 K7_EVNTSEL_UNIT_MASK  |	\
-	 K7_EVNTSEL_EDGE_MASK  |	\
-	 K7_EVNTSEL_INV_MASK   |	\
-	 K7_EVNTSEL_COUNTER_MASK)
-
-	return hw_event & K7_EVNTSEL_MASK;
-}
-
-/*
- * Propagate counter elapsed time into the generic counter.
- * Can only be executed on the CPU where the counter is active.
- * Returns the delta events processed.
- */
-static u64
-x86_perf_counter_update(struct perf_counter *counter,
-			struct hw_perf_counter *hwc, int idx)
-{
-	int shift = 64 - x86_pmu.counter_bits;
-	u64 prev_raw_count, new_raw_count;
-	s64 delta;
-
-	if (idx == X86_PMC_IDX_FIXED_BTS)
-		return 0;
-
-	/*
-	 * Careful: an NMI might modify the previous counter value.
-	 *
-	 * Our tactic to handle this is to first atomically read and
-	 * exchange a new raw count - then add that new-prev delta
-	 * count to the generic counter atomically:
-	 */
-again:
-	prev_raw_count = atomic64_read(&hwc->prev_count);
-	rdmsrl(hwc->counter_base + idx, new_raw_count);
-
-	if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
-					new_raw_count) != prev_raw_count)
-		goto again;
-
-	/*
-	 * Now we have the new raw value and have updated the prev
-	 * timestamp already. We can now calculate the elapsed delta
-	 * (counter-)time and add that to the generic counter.
-	 *
-	 * Careful, not all hw sign-extends above the physical width
-	 * of the count.
-	 */
-	delta = (new_raw_count << shift) - (prev_raw_count << shift);
-	delta >>= shift;
-
-	atomic64_add(delta, &counter->count);
-	atomic64_sub(delta, &hwc->period_left);
-
-	return new_raw_count;
-}
-
-static atomic_t active_counters;
-static DEFINE_MUTEX(pmc_reserve_mutex);
-
-static bool reserve_pmc_hardware(void)
-{
-#ifdef CONFIG_X86_LOCAL_APIC
-	int i;
-
-	if (nmi_watchdog == NMI_LOCAL_APIC)
-		disable_lapic_nmi_watchdog();
-
-	for (i = 0; i < x86_pmu.num_counters; i++) {
-		if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
-			goto perfctr_fail;
-	}
-
-	for (i = 0; i < x86_pmu.num_counters; i++) {
-		if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
-			goto eventsel_fail;
-	}
-#endif
-
-	return true;
-
-#ifdef CONFIG_X86_LOCAL_APIC
-eventsel_fail:
-	for (i--; i >= 0; i--)
-		release_evntsel_nmi(x86_pmu.eventsel + i);
-
-	i = x86_pmu.num_counters;
-
-perfctr_fail:
-	for (i--; i >= 0; i--)
-		release_perfctr_nmi(x86_pmu.perfctr + i);
-
-	if (nmi_watchdog == NMI_LOCAL_APIC)
-		enable_lapic_nmi_watchdog();
-
-	return false;
-#endif
-}
-
-static void release_pmc_hardware(void)
-{
-#ifdef CONFIG_X86_LOCAL_APIC
-	int i;
-
-	for (i = 0; i < x86_pmu.num_counters; i++) {
-		release_perfctr_nmi(x86_pmu.perfctr + i);
-		release_evntsel_nmi(x86_pmu.eventsel + i);
-	}
-
-	if (nmi_watchdog == NMI_LOCAL_APIC)
-		enable_lapic_nmi_watchdog();
-#endif
-}
-
-static inline bool bts_available(void)
-{
-	return x86_pmu.enable_bts != NULL;
-}
-
-static inline void init_debug_store_on_cpu(int cpu)
-{
-	struct debug_store *ds = per_cpu(cpu_hw_counters, cpu).ds;
-
-	if (!ds)
-		return;
-
-	wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
-		     (u32)((u64)(unsigned long)ds),
-		     (u32)((u64)(unsigned long)ds >> 32));
-}
-
-static inline void fini_debug_store_on_cpu(int cpu)
-{
-	if (!per_cpu(cpu_hw_counters, cpu).ds)
-		return;
-
-	wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
-}
-
-static void release_bts_hardware(void)
-{
-	int cpu;
-
-	if (!bts_available())
-		return;
-
-	get_online_cpus();
-
-	for_each_online_cpu(cpu)
-		fini_debug_store_on_cpu(cpu);
-
-	for_each_possible_cpu(cpu) {
-		struct debug_store *ds = per_cpu(cpu_hw_counters, cpu).ds;
-
-		if (!ds)
-			continue;
-
-		per_cpu(cpu_hw_counters, cpu).ds = NULL;
-
-		kfree((void *)(unsigned long)ds->bts_buffer_base);
-		kfree(ds);
-	}
-
-	put_online_cpus();
-}
-
-static int reserve_bts_hardware(void)
-{
-	int cpu, err = 0;
-
-	if (!bts_available())
-		return 0;
-
-	get_online_cpus();
-
-	for_each_possible_cpu(cpu) {
-		struct debug_store *ds;
-		void *buffer;
-
-		err = -ENOMEM;
-		buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL);
-		if (unlikely(!buffer))
-			break;
-
-		ds = kzalloc(sizeof(*ds), GFP_KERNEL);
-		if (unlikely(!ds)) {
-			kfree(buffer);
-			break;
-		}
-
-		ds->bts_buffer_base = (u64)(unsigned long)buffer;
-		ds->bts_index = ds->bts_buffer_base;
-		ds->bts_absolute_maximum =
-			ds->bts_buffer_base + BTS_BUFFER_SIZE;
-		ds->bts_interrupt_threshold =
-			ds->bts_absolute_maximum - BTS_OVFL_TH;
-
-		per_cpu(cpu_hw_counters, cpu).ds = ds;
-		err = 0;
-	}
-
-	if (err)
-		release_bts_hardware();
-	else {
-		for_each_online_cpu(cpu)
-			init_debug_store_on_cpu(cpu);
-	}
-
-	put_online_cpus();
-
-	return err;
-}
-
-static void hw_perf_counter_destroy(struct perf_counter *counter)
-{
-	if (atomic_dec_and_mutex_lock(&active_counters, &pmc_reserve_mutex)) {
-		release_pmc_hardware();
-		release_bts_hardware();
-		mutex_unlock(&pmc_reserve_mutex);
-	}
-}
-
-static inline int x86_pmu_initialized(void)
-{
-	return x86_pmu.handle_irq != NULL;
-}
-
-static inline int
-set_ext_hw_attr(struct hw_perf_counter *hwc, struct perf_counter_attr *attr)
-{
-	unsigned int cache_type, cache_op, cache_result;
-	u64 config, val;
-
-	config = attr->config;
-
-	cache_type = (config >>  0) & 0xff;
-	if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
-		return -EINVAL;
-
-	cache_op = (config >>  8) & 0xff;
-	if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
-		return -EINVAL;
-
-	cache_result = (config >> 16) & 0xff;
-	if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
-		return -EINVAL;
-
-	val = hw_cache_event_ids[cache_type][cache_op][cache_result];
-
-	if (val == 0)
-		return -ENOENT;
-
-	if (val == -1)
-		return -EINVAL;
-
-	hwc->config |= val;
-
-	return 0;
-}
-
-static void intel_pmu_enable_bts(u64 config)
-{
-	unsigned long debugctlmsr;
-
-	debugctlmsr = get_debugctlmsr();
-
-	debugctlmsr |= X86_DEBUGCTL_TR;
-	debugctlmsr |= X86_DEBUGCTL_BTS;
-	debugctlmsr |= X86_DEBUGCTL_BTINT;
-
-	if (!(config & ARCH_PERFMON_EVENTSEL_OS))
-		debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS;
-
-	if (!(config & ARCH_PERFMON_EVENTSEL_USR))
-		debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR;
-
-	update_debugctlmsr(debugctlmsr);
-}
-
-static void intel_pmu_disable_bts(void)
-{
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-	unsigned long debugctlmsr;
-
-	if (!cpuc->ds)
-		return;
-
-	debugctlmsr = get_debugctlmsr();
-
-	debugctlmsr &=
-		~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT |
-		  X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR);
-
-	update_debugctlmsr(debugctlmsr);
-}
-
-/*
- * Setup the hardware configuration for a given attr_type
- */
-static int __hw_perf_counter_init(struct perf_counter *counter)
-{
-	struct perf_counter_attr *attr = &counter->attr;
-	struct hw_perf_counter *hwc = &counter->hw;
-	u64 config;
-	int err;
-
-	if (!x86_pmu_initialized())
-		return -ENODEV;
-
-	err = 0;
-	if (!atomic_inc_not_zero(&active_counters)) {
-		mutex_lock(&pmc_reserve_mutex);
-		if (atomic_read(&active_counters) == 0) {
-			if (!reserve_pmc_hardware())
-				err = -EBUSY;
-			else
-				err = reserve_bts_hardware();
-		}
-		if (!err)
-			atomic_inc(&active_counters);
-		mutex_unlock(&pmc_reserve_mutex);
-	}
-	if (err)
-		return err;
-
-	counter->destroy = hw_perf_counter_destroy;
-
-	/*
-	 * Generate PMC IRQs:
-	 * (keep 'enabled' bit clear for now)
-	 */
-	hwc->config = ARCH_PERFMON_EVENTSEL_INT;
-
-	/*
-	 * Count user and OS events unless requested not to.
-	 */
-	if (!attr->exclude_user)
-		hwc->config |= ARCH_PERFMON_EVENTSEL_USR;
-	if (!attr->exclude_kernel)
-		hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
-
-	if (!hwc->sample_period) {
-		hwc->sample_period = x86_pmu.max_period;
-		hwc->last_period = hwc->sample_period;
-		atomic64_set(&hwc->period_left, hwc->sample_period);
-	} else {
-		/*
-		 * If we have a PMU initialized but no APIC
-		 * interrupts, we cannot sample hardware
-		 * counters (user-space has to fall back and
-		 * sample via a hrtimer based software counter):
-		 */
-		if (!x86_pmu.apic)
-			return -EOPNOTSUPP;
-	}
-
-	/*
-	 * Raw hw_event type provide the config in the hw_event structure
-	 */
-	if (attr->type == PERF_TYPE_RAW) {
-		hwc->config |= x86_pmu.raw_event(attr->config);
-		return 0;
-	}
-
-	if (attr->type == PERF_TYPE_HW_CACHE)
-		return set_ext_hw_attr(hwc, attr);
-
-	if (attr->config >= x86_pmu.max_events)
-		return -EINVAL;
-
-	/*
-	 * The generic map:
-	 */
-	config = x86_pmu.event_map(attr->config);
-
-	if (config == 0)
-		return -ENOENT;
-
-	if (config == -1LL)
-		return -EINVAL;
-
-	/*
-	 * Branch tracing:
-	 */
-	if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
-	    (hwc->sample_period == 1)) {
-		/* BTS is not supported by this architecture. */
-		if (!bts_available())
-			return -EOPNOTSUPP;
-
-		/* BTS is currently only allowed for user-mode. */
-		if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
-			return -EOPNOTSUPP;
-	}
-
-	hwc->config |= config;
-
-	return 0;
-}
-
-static void p6_pmu_disable_all(void)
-{
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-	u64 val;
-
-	if (!cpuc->enabled)
-		return;
-
-	cpuc->enabled = 0;
-	barrier();
-
-	/* p6 only has one enable register */
-	rdmsrl(MSR_P6_EVNTSEL0, val);
-	val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
-	wrmsrl(MSR_P6_EVNTSEL0, val);
-}
-
-static void intel_pmu_disable_all(void)
-{
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-
-	if (!cpuc->enabled)
-		return;
-
-	cpuc->enabled = 0;
-	barrier();
-
-	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
-
-	if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))
-		intel_pmu_disable_bts();
-}
-
-static void amd_pmu_disable_all(void)
-{
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-	int idx;
-
-	if (!cpuc->enabled)
-		return;
-
-	cpuc->enabled = 0;
-	/*
-	 * ensure we write the disable before we start disabling the
-	 * counters proper, so that amd_pmu_enable_counter() does the
-	 * right thing.
-	 */
-	barrier();
-
-	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
-		u64 val;
-
-		if (!test_bit(idx, cpuc->active_mask))
-			continue;
-		rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
-		if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE))
-			continue;
-		val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
-		wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
-	}
-}
-
-void hw_perf_disable(void)
-{
-	if (!x86_pmu_initialized())
-		return;
-	return x86_pmu.disable_all();
-}
-
-static void p6_pmu_enable_all(void)
-{
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-	unsigned long val;
-
-	if (cpuc->enabled)
-		return;
-
-	cpuc->enabled = 1;
-	barrier();
-
-	/* p6 only has one enable register */
-	rdmsrl(MSR_P6_EVNTSEL0, val);
-	val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
-	wrmsrl(MSR_P6_EVNTSEL0, val);
-}
-
-static void intel_pmu_enable_all(void)
-{
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-
-	if (cpuc->enabled)
-		return;
-
-	cpuc->enabled = 1;
-	barrier();
-
-	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
-
-	if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
-		struct perf_counter *counter =
-			cpuc->counters[X86_PMC_IDX_FIXED_BTS];
-
-		if (WARN_ON_ONCE(!counter))
-			return;
-
-		intel_pmu_enable_bts(counter->hw.config);
-	}
-}
-
-static void amd_pmu_enable_all(void)
-{
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-	int idx;
-
-	if (cpuc->enabled)
-		return;
-
-	cpuc->enabled = 1;
-	barrier();
-
-	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
-		struct perf_counter *counter = cpuc->counters[idx];
-		u64 val;
-
-		if (!test_bit(idx, cpuc->active_mask))
-			continue;
-
-		val = counter->hw.config;
-		val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
-		wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
-	}
-}
-
-void hw_perf_enable(void)
-{
-	if (!x86_pmu_initialized())
-		return;
-	x86_pmu.enable_all();
-}
-
-static inline u64 intel_pmu_get_status(void)
-{
-	u64 status;
-
-	rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
-
-	return status;
-}
-
-static inline void intel_pmu_ack_status(u64 ack)
-{
-	wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
-}
-
-static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
-{
-	(void)checking_wrmsrl(hwc->config_base + idx,
-			      hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE);
-}
-
-static inline void x86_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
-{
-	(void)checking_wrmsrl(hwc->config_base + idx, hwc->config);
-}
-
-static inline void
-intel_pmu_disable_fixed(struct hw_perf_counter *hwc, int __idx)
-{
-	int idx = __idx - X86_PMC_IDX_FIXED;
-	u64 ctrl_val, mask;
-
-	mask = 0xfULL << (idx * 4);
-
-	rdmsrl(hwc->config_base, ctrl_val);
-	ctrl_val &= ~mask;
-	(void)checking_wrmsrl(hwc->config_base, ctrl_val);
-}
-
-static inline void
-p6_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
-{
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-	u64 val = P6_NOP_COUNTER;
-
-	if (cpuc->enabled)
-		val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
-
-	(void)checking_wrmsrl(hwc->config_base + idx, val);
-}
-
-static inline void
-intel_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
-{
-	if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
-		intel_pmu_disable_bts();
-		return;
-	}
-
-	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
-		intel_pmu_disable_fixed(hwc, idx);
-		return;
-	}
-
-	x86_pmu_disable_counter(hwc, idx);
-}
-
-static inline void
-amd_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
-{
-	x86_pmu_disable_counter(hwc, idx);
-}
-
-static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
-
-/*
- * Set the next IRQ period, based on the hwc->period_left value.
- * To be called with the counter disabled in hw:
- */
-static int
-x86_perf_counter_set_period(struct perf_counter *counter,
-			     struct hw_perf_counter *hwc, int idx)
-{
-	s64 left = atomic64_read(&hwc->period_left);
-	s64 period = hwc->sample_period;
-	int err, ret = 0;
-
-	if (idx == X86_PMC_IDX_FIXED_BTS)
-		return 0;
-
-	/*
-	 * If we are way outside a reasoable range then just skip forward:
-	 */
-	if (unlikely(left <= -period)) {
-		left = period;
-		atomic64_set(&hwc->period_left, left);
-		hwc->last_period = period;
-		ret = 1;
-	}
-
-	if (unlikely(left <= 0)) {
-		left += period;
-		atomic64_set(&hwc->period_left, left);
-		hwc->last_period = period;
-		ret = 1;
-	}
-	/*
-	 * Quirk: certain CPUs dont like it if just 1 hw_event is left:
-	 */
-	if (unlikely(left < 2))
-		left = 2;
-
-	if (left > x86_pmu.max_period)
-		left = x86_pmu.max_period;
-
-	per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;
-
-	/*
-	 * The hw counter starts counting from this counter offset,
-	 * mark it to be able to extra future deltas:
-	 */
-	atomic64_set(&hwc->prev_count, (u64)-left);
-
-	err = checking_wrmsrl(hwc->counter_base + idx,
-			     (u64)(-left) & x86_pmu.counter_mask);
-
-	perf_counter_update_userpage(counter);
-
-	return ret;
-}
-
-static inline void
-intel_pmu_enable_fixed(struct hw_perf_counter *hwc, int __idx)
-{
-	int idx = __idx - X86_PMC_IDX_FIXED;
-	u64 ctrl_val, bits, mask;
-	int err;
-
-	/*
-	 * Enable IRQ generation (0x8),
-	 * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
-	 * if requested:
-	 */
-	bits = 0x8ULL;
-	if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
-		bits |= 0x2;
-	if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
-		bits |= 0x1;
-	bits <<= (idx * 4);
-	mask = 0xfULL << (idx * 4);
-
-	rdmsrl(hwc->config_base, ctrl_val);
-	ctrl_val &= ~mask;
-	ctrl_val |= bits;
-	err = checking_wrmsrl(hwc->config_base, ctrl_val);
-}
-
-static void p6_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
-{
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-	u64 val;
-
-	val = hwc->config;
-	if (cpuc->enabled)
-		val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
-
-	(void)checking_wrmsrl(hwc->config_base + idx, val);
-}
-
-
-static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
-{
-	if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
-		if (!__get_cpu_var(cpu_hw_counters).enabled)
-			return;
-
-		intel_pmu_enable_bts(hwc->config);
-		return;
-	}
-
-	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
-		intel_pmu_enable_fixed(hwc, idx);
-		return;
-	}
-
-	x86_pmu_enable_counter(hwc, idx);
-}
-
-static void amd_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
-{
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-
-	if (cpuc->enabled)
-		x86_pmu_enable_counter(hwc, idx);
-}
-
-static int
-fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)
-{
-	unsigned int hw_event;
-
-	hw_event = hwc->config & ARCH_PERFMON_EVENT_MASK;
-
-	if (unlikely((hw_event ==
-		      x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) &&
-		     (hwc->sample_period == 1)))
-		return X86_PMC_IDX_FIXED_BTS;
-
-	if (!x86_pmu.num_counters_fixed)
-		return -1;
-
-	if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS)))
-		return X86_PMC_IDX_FIXED_INSTRUCTIONS;
-	if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES)))
-		return X86_PMC_IDX_FIXED_CPU_CYCLES;
-	if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES)))
-		return X86_PMC_IDX_FIXED_BUS_CYCLES;
-
-	return -1;
-}
-
-/*
- * Find a PMC slot for the freshly enabled / scheduled in counter:
- */
-static int x86_pmu_enable(struct perf_counter *counter)
-{
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-	struct hw_perf_counter *hwc = &counter->hw;
-	int idx;
-
-	idx = fixed_mode_idx(counter, hwc);
-	if (idx == X86_PMC_IDX_FIXED_BTS) {
-		/* BTS is already occupied. */
-		if (test_and_set_bit(idx, cpuc->used_mask))
-			return -EAGAIN;
-
-		hwc->config_base	= 0;
-		hwc->counter_base	= 0;
-		hwc->idx		= idx;
-	} else if (idx >= 0) {
-		/*
-		 * Try to get the fixed counter, if that is already taken
-		 * then try to get a generic counter:
-		 */
-		if (test_and_set_bit(idx, cpuc->used_mask))
-			goto try_generic;
-
-		hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
-		/*
-		 * We set it so that counter_base + idx in wrmsr/rdmsr maps to
-		 * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
-		 */
-		hwc->counter_base =
-			MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
-		hwc->idx = idx;
-	} else {
-		idx = hwc->idx;
-		/* Try to get the previous generic counter again */
-		if (test_and_set_bit(idx, cpuc->used_mask)) {
-try_generic:
-			idx = find_first_zero_bit(cpuc->used_mask,
-						  x86_pmu.num_counters);
-			if (idx == x86_pmu.num_counters)
-				return -EAGAIN;
-
-			set_bit(idx, cpuc->used_mask);
-			hwc->idx = idx;
-		}
-		hwc->config_base  = x86_pmu.eventsel;
-		hwc->counter_base = x86_pmu.perfctr;
-	}
-
-	perf_counters_lapic_init();
-
-	x86_pmu.disable(hwc, idx);
-
-	cpuc->counters[idx] = counter;
-	set_bit(idx, cpuc->active_mask);
-
-	x86_perf_counter_set_period(counter, hwc, idx);
-	x86_pmu.enable(hwc, idx);
-
-	perf_counter_update_userpage(counter);
-
-	return 0;
-}
-
-static void x86_pmu_unthrottle(struct perf_counter *counter)
-{
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-	struct hw_perf_counter *hwc = &counter->hw;
-
-	if (WARN_ON_ONCE(hwc->idx >= X86_PMC_IDX_MAX ||
-				cpuc->counters[hwc->idx] != counter))
-		return;
-
-	x86_pmu.enable(hwc, hwc->idx);
-}
-
-void perf_counter_print_debug(void)
-{
-	u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
-	struct cpu_hw_counters *cpuc;
-	unsigned long flags;
-	int cpu, idx;
-
-	if (!x86_pmu.num_counters)
-		return;
-
-	local_irq_save(flags);
-
-	cpu = smp_processor_id();
-	cpuc = &per_cpu(cpu_hw_counters, cpu);
-
-	if (x86_pmu.version >= 2) {
-		rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
-		rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
-		rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
-		rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
-
-		pr_info("\n");
-		pr_info("CPU#%d: ctrl:       %016llx\n", cpu, ctrl);
-		pr_info("CPU#%d: status:     %016llx\n", cpu, status);
-		pr_info("CPU#%d: overflow:   %016llx\n", cpu, overflow);
-		pr_info("CPU#%d: fixed:      %016llx\n", cpu, fixed);
-	}
-	pr_info("CPU#%d: used:       %016llx\n", cpu, *(u64 *)cpuc->used_mask);
-
-	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
-		rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
-		rdmsrl(x86_pmu.perfctr  + idx, pmc_count);
-
-		prev_left = per_cpu(pmc_prev_left[idx], cpu);
-
-		pr_info("CPU#%d:   gen-PMC%d ctrl:  %016llx\n",
-			cpu, idx, pmc_ctrl);
-		pr_info("CPU#%d:   gen-PMC%d count: %016llx\n",
-			cpu, idx, pmc_count);
-		pr_info("CPU#%d:   gen-PMC%d left:  %016llx\n",
-			cpu, idx, prev_left);
-	}
-	for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
-		rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
-
-		pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
-			cpu, idx, pmc_count);
-	}
-	local_irq_restore(flags);
-}
-
-static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc)
-{
-	struct debug_store *ds = cpuc->ds;
-	struct bts_record {
-		u64	from;
-		u64	to;
-		u64	flags;
-	};
-	struct perf_counter *counter = cpuc->counters[X86_PMC_IDX_FIXED_BTS];
-	struct bts_record *at, *top;
-	struct perf_output_handle handle;
-	struct perf_event_header header;
-	struct perf_sample_data data;
-	struct pt_regs regs;
-
-	if (!counter)
-		return;
-
-	if (!ds)
-		return;
-
-	at  = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
-	top = (struct bts_record *)(unsigned long)ds->bts_index;
-
-	if (top <= at)
-		return;
-
-	ds->bts_index = ds->bts_buffer_base;
-
-
-	data.period	= counter->hw.last_period;
-	data.addr	= 0;
-	regs.ip		= 0;
-
-	/*
-	 * Prepare a generic sample, i.e. fill in the invariant fields.
-	 * We will overwrite the from and to address before we output
-	 * the sample.
-	 */
-	perf_prepare_sample(&header, &data, counter, &regs);
-
-	if (perf_output_begin(&handle, counter,
-			      header.size * (top - at), 1, 1))
-		return;
-
-	for (; at < top; at++) {
-		data.ip		= at->from;
-		data.addr	= at->to;
-
-		perf_output_sample(&handle, &header, &data, counter);
-	}
-
-	perf_output_end(&handle);
-
-	/* There's new data available. */
-	counter->hw.interrupts++;
-	counter->pending_kill = POLL_IN;
-}
-
-static void x86_pmu_disable(struct perf_counter *counter)
-{
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-	struct hw_perf_counter *hwc = &counter->hw;
-	int idx = hwc->idx;
-
-	/*
-	 * Must be done before we disable, otherwise the nmi handler
-	 * could reenable again:
-	 */
-	clear_bit(idx, cpuc->active_mask);
-	x86_pmu.disable(hwc, idx);
-
-	/*
-	 * Make sure the cleared pointer becomes visible before we
-	 * (potentially) free the counter:
-	 */
-	barrier();
-
-	/*
-	 * Drain the remaining delta count out of a counter
-	 * that we are disabling:
-	 */
-	x86_perf_counter_update(counter, hwc, idx);
-
-	/* Drain the remaining BTS records. */
-	if (unlikely(idx == X86_PMC_IDX_FIXED_BTS))
-		intel_pmu_drain_bts_buffer(cpuc);
-
-	cpuc->counters[idx] = NULL;
-	clear_bit(idx, cpuc->used_mask);
-
-	perf_counter_update_userpage(counter);
-}
-
-/*
- * Save and restart an expired counter. Called by NMI contexts,
- * so it has to be careful about preempting normal counter ops:
- */
-static int intel_pmu_save_and_restart(struct perf_counter *counter)
-{
-	struct hw_perf_counter *hwc = &counter->hw;
-	int idx = hwc->idx;
-	int ret;
-
-	x86_perf_counter_update(counter, hwc, idx);
-	ret = x86_perf_counter_set_period(counter, hwc, idx);
-
-	if (counter->state == PERF_COUNTER_STATE_ACTIVE)
-		intel_pmu_enable_counter(hwc, idx);
-
-	return ret;
-}
-
-static void intel_pmu_reset(void)
-{
-	struct debug_store *ds = __get_cpu_var(cpu_hw_counters).ds;
-	unsigned long flags;
-	int idx;
-
-	if (!x86_pmu.num_counters)
-		return;
-
-	local_irq_save(flags);
-
-	printk("clearing PMU state on CPU#%d\n", smp_processor_id());
-
-	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
-		checking_wrmsrl(x86_pmu.eventsel + idx, 0ull);
-		checking_wrmsrl(x86_pmu.perfctr  + idx, 0ull);
-	}
-	for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
-		checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
-	}
-	if (ds)
-		ds->bts_index = ds->bts_buffer_base;
-
-	local_irq_restore(flags);
-}
-
-static int p6_pmu_handle_irq(struct pt_regs *regs)
-{
-	struct perf_sample_data data;
-	struct cpu_hw_counters *cpuc;
-	struct perf_counter *counter;
-	struct hw_perf_counter *hwc;
-	int idx, handled = 0;
-	u64 val;
-
-	data.addr = 0;
-
-	cpuc = &__get_cpu_var(cpu_hw_counters);
-
-	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
-		if (!test_bit(idx, cpuc->active_mask))
-			continue;
-
-		counter = cpuc->counters[idx];
-		hwc = &counter->hw;
-
-		val = x86_perf_counter_update(counter, hwc, idx);
-		if (val & (1ULL << (x86_pmu.counter_bits - 1)))
-			continue;
-
-		/*
-		 * counter overflow
-		 */
-		handled		= 1;
-		data.period	= counter->hw.last_period;
-
-		if (!x86_perf_counter_set_period(counter, hwc, idx))
-			continue;
-
-		if (perf_counter_overflow(counter, 1, &data, regs))
-			p6_pmu_disable_counter(hwc, idx);
-	}
-
-	if (handled)
-		inc_irq_stat(apic_perf_irqs);
-
-	return handled;
-}
-
-/*
- * This handler is triggered by the local APIC, so the APIC IRQ handling
- * rules apply:
- */
-static int intel_pmu_handle_irq(struct pt_regs *regs)
-{
-	struct perf_sample_data data;
-	struct cpu_hw_counters *cpuc;
-	int bit, loops;
-	u64 ack, status;
-
-	data.addr = 0;
-
-	cpuc = &__get_cpu_var(cpu_hw_counters);
-
-	perf_disable();
-	intel_pmu_drain_bts_buffer(cpuc);
-	status = intel_pmu_get_status();
-	if (!status) {
-		perf_enable();
-		return 0;
-	}
-
-	loops = 0;
-again:
-	if (++loops > 100) {
-		WARN_ONCE(1, "perfcounters: irq loop stuck!\n");
-		perf_counter_print_debug();
-		intel_pmu_reset();
-		perf_enable();
-		return 1;
-	}
-
-	inc_irq_stat(apic_perf_irqs);
-	ack = status;
-	for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
-		struct perf_counter *counter = cpuc->counters[bit];
-
-		clear_bit(bit, (unsigned long *) &status);
-		if (!test_bit(bit, cpuc->active_mask))
-			continue;
-
-		if (!intel_pmu_save_and_restart(counter))
-			continue;
-
-		data.period = counter->hw.last_period;
-
-		if (perf_counter_overflow(counter, 1, &data, regs))
-			intel_pmu_disable_counter(&counter->hw, bit);
-	}
-
-	intel_pmu_ack_status(ack);
-
-	/*
-	 * Repeat if there is more work to be done:
-	 */
-	status = intel_pmu_get_status();
-	if (status)
-		goto again;
-
-	perf_enable();
-
-	return 1;
-}
-
-static int amd_pmu_handle_irq(struct pt_regs *regs)
-{
-	struct perf_sample_data data;
-	struct cpu_hw_counters *cpuc;
-	struct perf_counter *counter;
-	struct hw_perf_counter *hwc;
-	int idx, handled = 0;
-	u64 val;
-
-	data.addr = 0;
-
-	cpuc = &__get_cpu_var(cpu_hw_counters);
-
-	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
-		if (!test_bit(idx, cpuc->active_mask))
-			continue;
-
-		counter = cpuc->counters[idx];
-		hwc = &counter->hw;
-
-		val = x86_perf_counter_update(counter, hwc, idx);
-		if (val & (1ULL << (x86_pmu.counter_bits - 1)))
-			continue;
-
-		/*
-		 * counter overflow
-		 */
-		handled		= 1;
-		data.period	= counter->hw.last_period;
-
-		if (!x86_perf_counter_set_period(counter, hwc, idx))
-			continue;
-
-		if (perf_counter_overflow(counter, 1, &data, regs))
-			amd_pmu_disable_counter(hwc, idx);
-	}
-
-	if (handled)
-		inc_irq_stat(apic_perf_irqs);
-
-	return handled;
-}
-
-void smp_perf_pending_interrupt(struct pt_regs *regs)
-{
-	irq_enter();
-	ack_APIC_irq();
-	inc_irq_stat(apic_pending_irqs);
-	perf_counter_do_pending();
-	irq_exit();
-}
-
-void set_perf_counter_pending(void)
-{
-#ifdef CONFIG_X86_LOCAL_APIC
-	apic->send_IPI_self(LOCAL_PENDING_VECTOR);
-#endif
-}
-
-void perf_counters_lapic_init(void)
-{
-#ifdef CONFIG_X86_LOCAL_APIC
-	if (!x86_pmu.apic || !x86_pmu_initialized())
-		return;
-
-	/*
-	 * Always use NMI for PMU
-	 */
-	apic_write(APIC_LVTPC, APIC_DM_NMI);
-#endif
-}
-
-static int __kprobes
-perf_counter_nmi_handler(struct notifier_block *self,
-			 unsigned long cmd, void *__args)
-{
-	struct die_args *args = __args;
-	struct pt_regs *regs;
-
-	if (!atomic_read(&active_counters))
-		return NOTIFY_DONE;
-
-	switch (cmd) {
-	case DIE_NMI:
-	case DIE_NMI_IPI:
-		break;
-
-	default:
-		return NOTIFY_DONE;
-	}
-
-	regs = args->regs;
-
-#ifdef CONFIG_X86_LOCAL_APIC
-	apic_write(APIC_LVTPC, APIC_DM_NMI);
-#endif
-	/*
-	 * Can't rely on the handled return value to say it was our NMI, two
-	 * counters could trigger 'simultaneously' raising two back-to-back NMIs.
-	 *
-	 * If the first NMI handles both, the latter will be empty and daze
-	 * the CPU.
-	 */
-	x86_pmu.handle_irq(regs);
-
-	return NOTIFY_STOP;
-}
-
-static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
-	.notifier_call		= perf_counter_nmi_handler,
-	.next			= NULL,
-	.priority		= 1
-};
-
-static struct x86_pmu p6_pmu = {
-	.name			= "p6",
-	.handle_irq		= p6_pmu_handle_irq,
-	.disable_all		= p6_pmu_disable_all,
-	.enable_all		= p6_pmu_enable_all,
-	.enable			= p6_pmu_enable_counter,
-	.disable		= p6_pmu_disable_counter,
-	.eventsel		= MSR_P6_EVNTSEL0,
-	.perfctr		= MSR_P6_PERFCTR0,
-	.event_map		= p6_pmu_event_map,
-	.raw_event		= p6_pmu_raw_event,
-	.max_events		= ARRAY_SIZE(p6_perfmon_event_map),
-	.apic			= 1,
-	.max_period		= (1ULL << 31) - 1,
-	.version		= 0,
-	.num_counters		= 2,
-	/*
-	 * Counters have 40 bits implemented. However they are designed such
-	 * that bits [32-39] are sign extensions of bit 31. As such the
-	 * effective width of a counter for P6-like PMU is 32 bits only.
-	 *
-	 * See IA-32 Intel Architecture Software developer manual Vol 3B
-	 */
-	.counter_bits		= 32,
-	.counter_mask		= (1ULL << 32) - 1,
-};
-
-static struct x86_pmu intel_pmu = {
-	.name			= "Intel",
-	.handle_irq		= intel_pmu_handle_irq,
-	.disable_all		= intel_pmu_disable_all,
-	.enable_all		= intel_pmu_enable_all,
-	.enable			= intel_pmu_enable_counter,
-	.disable		= intel_pmu_disable_counter,
-	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0,
-	.perfctr		= MSR_ARCH_PERFMON_PERFCTR0,
-	.event_map		= intel_pmu_event_map,
-	.raw_event		= intel_pmu_raw_event,
-	.max_events		= ARRAY_SIZE(intel_perfmon_event_map),
-	.apic			= 1,
-	/*
-	 * Intel PMCs cannot be accessed sanely above 32 bit width,
-	 * so we install an artificial 1<<31 period regardless of
-	 * the generic counter period:
-	 */
-	.max_period		= (1ULL << 31) - 1,
-	.enable_bts		= intel_pmu_enable_bts,
-	.disable_bts		= intel_pmu_disable_bts,
-};
-
-static struct x86_pmu amd_pmu = {
-	.name			= "AMD",
-	.handle_irq		= amd_pmu_handle_irq,
-	.disable_all		= amd_pmu_disable_all,
-	.enable_all		= amd_pmu_enable_all,
-	.enable			= amd_pmu_enable_counter,
-	.disable		= amd_pmu_disable_counter,
-	.eventsel		= MSR_K7_EVNTSEL0,
-	.perfctr		= MSR_K7_PERFCTR0,
-	.event_map		= amd_pmu_event_map,
-	.raw_event		= amd_pmu_raw_event,
-	.max_events		= ARRAY_SIZE(amd_perfmon_event_map),
-	.num_counters		= 4,
-	.counter_bits		= 48,
-	.counter_mask		= (1ULL << 48) - 1,
-	.apic			= 1,
-	/* use highest bit to detect overflow */
-	.max_period		= (1ULL << 47) - 1,
-};
-
-static int p6_pmu_init(void)
-{
-	switch (boot_cpu_data.x86_model) {
-	case 1:
-	case 3:  /* Pentium Pro */
-	case 5:
-	case 6:  /* Pentium II */
-	case 7:
-	case 8:
-	case 11: /* Pentium III */
-		break;
-	case 9:
-	case 13:
-		/* Pentium M */
-		break;
-	default:
-		pr_cont("unsupported p6 CPU model %d ",
-			boot_cpu_data.x86_model);
-		return -ENODEV;
-	}
-
-	x86_pmu = p6_pmu;
-
-	if (!cpu_has_apic) {
-		pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
-		pr_info("no hardware sampling interrupt available.\n");
-		x86_pmu.apic = 0;
-	}
-
-	return 0;
-}
-
-static int intel_pmu_init(void)
-{
-	union cpuid10_edx edx;
-	union cpuid10_eax eax;
-	unsigned int unused;
-	unsigned int ebx;
-	int version;
-
-	if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
-		/* check for P6 processor family */
-	   if (boot_cpu_data.x86 == 6) {
-		return p6_pmu_init();
-	   } else {
-		return -ENODEV;
-	   }
-	}
-
-	/*
-	 * Check whether the Architectural PerfMon supports
-	 * Branch Misses Retired hw_event or not.
-	 */
-	cpuid(10, &eax.full, &ebx, &unused, &edx.full);
-	if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
-		return -ENODEV;
-
-	version = eax.split.version_id;
-	if (version < 2)
-		return -ENODEV;
-
-	x86_pmu				= intel_pmu;
-	x86_pmu.version			= version;
-	x86_pmu.num_counters		= eax.split.num_counters;
-	x86_pmu.counter_bits		= eax.split.bit_width;
-	x86_pmu.counter_mask		= (1ULL << eax.split.bit_width) - 1;
-
-	/*
-	 * Quirk: v2 perfmon does not report fixed-purpose counters, so
-	 * assume at least 3 counters:
-	 */
-	x86_pmu.num_counters_fixed	= max((int)edx.split.num_counters_fixed, 3);
-
-	/*
-	 * Install the hw-cache-events table:
-	 */
-	switch (boot_cpu_data.x86_model) {
-	case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
-	case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
-	case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
-	case 29: /* six-core 45 nm xeon "Dunnington" */
-		memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
-		       sizeof(hw_cache_event_ids));
-
-		pr_cont("Core2 events, ");
-		break;
-	default:
-	case 26:
-		memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
-		       sizeof(hw_cache_event_ids));
-
-		pr_cont("Nehalem/Corei7 events, ");
-		break;
-	case 28:
-		memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
-		       sizeof(hw_cache_event_ids));
-
-		pr_cont("Atom events, ");
-		break;
-	}
-	return 0;
-}
-
-static int amd_pmu_init(void)
-{
-	/* Performance-monitoring supported from K7 and later: */
-	if (boot_cpu_data.x86 < 6)
-		return -ENODEV;
-
-	x86_pmu = amd_pmu;
-
-	/* Events are common for all AMDs */
-	memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
-	       sizeof(hw_cache_event_ids));
-
-	return 0;
-}
-
-void __init init_hw_perf_counters(void)
-{
-	int err;
-
-	pr_info("Performance Counters: ");
-
-	switch (boot_cpu_data.x86_vendor) {
-	case X86_VENDOR_INTEL:
-		err = intel_pmu_init();
-		break;
-	case X86_VENDOR_AMD:
-		err = amd_pmu_init();
-		break;
-	default:
-		return;
-	}
-	if (err != 0) {
-		pr_cont("no PMU driver, software counters only.\n");
-		return;
-	}
-
-	pr_cont("%s PMU driver.\n", x86_pmu.name);
-
-	if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) {
-		WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!",
-		     x86_pmu.num_counters, X86_PMC_MAX_GENERIC);
-		x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
-	}
-	perf_counter_mask = (1 << x86_pmu.num_counters) - 1;
-	perf_max_counters = x86_pmu.num_counters;
-
-	if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
-		WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!",
-		     x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED);
-		x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED;
-	}
-
-	perf_counter_mask |=
-		((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED;
-	x86_pmu.intel_ctrl = perf_counter_mask;
-
-	perf_counters_lapic_init();
-	register_die_notifier(&perf_counter_nmi_notifier);
-
-	pr_info("... version:                 %d\n",     x86_pmu.version);
-	pr_info("... bit width:               %d\n",     x86_pmu.counter_bits);
-	pr_info("... generic counters:        %d\n",     x86_pmu.num_counters);
-	pr_info("... value mask:              %016Lx\n", x86_pmu.counter_mask);
-	pr_info("... max period:              %016Lx\n", x86_pmu.max_period);
-	pr_info("... fixed-purpose counters:  %d\n",     x86_pmu.num_counters_fixed);
-	pr_info("... counter mask:            %016Lx\n", perf_counter_mask);
-}
-
-static inline void x86_pmu_read(struct perf_counter *counter)
-{
-	x86_perf_counter_update(counter, &counter->hw, counter->hw.idx);
-}
-
-static const struct pmu pmu = {
-	.enable		= x86_pmu_enable,
-	.disable	= x86_pmu_disable,
-	.read		= x86_pmu_read,
-	.unthrottle	= x86_pmu_unthrottle,
-};
-
-const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
-{
-	int err;
-
-	err = __hw_perf_counter_init(counter);
-	if (err) {
-		if (counter->destroy)
-			counter->destroy(counter);
-		return ERR_PTR(err);
-	}
-
-	return &pmu;
-}
-
-/*
- * callchain support
- */
-
-static inline
-void callchain_store(struct perf_callchain_entry *entry, u64 ip)
-{
-	if (entry->nr < PERF_MAX_STACK_DEPTH)
-		entry->ip[entry->nr++] = ip;
-}
-
-static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
-static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry);
-static DEFINE_PER_CPU(int, in_nmi_frame);
-
-
-static void
-backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
-{
-	/* Ignore warnings */
-}
-
-static void backtrace_warning(void *data, char *msg)
-{
-	/* Ignore warnings */
-}
-
-static int backtrace_stack(void *data, char *name)
-{
-	per_cpu(in_nmi_frame, smp_processor_id()) =
-			x86_is_stack_id(NMI_STACK, name);
-
-	return 0;
-}
-
-static void backtrace_address(void *data, unsigned long addr, int reliable)
-{
-	struct perf_callchain_entry *entry = data;
-
-	if (per_cpu(in_nmi_frame, smp_processor_id()))
-		return;
-
-	if (reliable)
-		callchain_store(entry, addr);
-}
-
-static const struct stacktrace_ops backtrace_ops = {
-	.warning		= backtrace_warning,
-	.warning_symbol		= backtrace_warning_symbol,
-	.stack			= backtrace_stack,
-	.address		= backtrace_address,
-};
-
-#include "../dumpstack.h"
-
-static void
-perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
-{
-	callchain_store(entry, PERF_CONTEXT_KERNEL);
-	callchain_store(entry, regs->ip);
-
-	dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry);
-}
-
-/*
- * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
- */
-static unsigned long
-copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
-{
-	unsigned long offset, addr = (unsigned long)from;
-	int type = in_nmi() ? KM_NMI : KM_IRQ0;
-	unsigned long size, len = 0;
-	struct page *page;
-	void *map;
-	int ret;
-
-	do {
-		ret = __get_user_pages_fast(addr, 1, 0, &page);
-		if (!ret)
-			break;
-
-		offset = addr & (PAGE_SIZE - 1);
-		size = min(PAGE_SIZE - offset, n - len);
-
-		map = kmap_atomic(page, type);
-		memcpy(to, map+offset, size);
-		kunmap_atomic(map, type);
-		put_page(page);
-
-		len  += size;
-		to   += size;
-		addr += size;
-
-	} while (len < n);
-
-	return len;
-}
-
-static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
-{
-	unsigned long bytes;
-
-	bytes = copy_from_user_nmi(frame, fp, sizeof(*frame));
-
-	return bytes == sizeof(*frame);
-}
-
-static void
-perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
-{
-	struct stack_frame frame;
-	const void __user *fp;
-
-	if (!user_mode(regs))
-		regs = task_pt_regs(current);
-
-	fp = (void __user *)regs->bp;
-
-	callchain_store(entry, PERF_CONTEXT_USER);
-	callchain_store(entry, regs->ip);
-
-	while (entry->nr < PERF_MAX_STACK_DEPTH) {
-		frame.next_frame	     = NULL;
-		frame.return_address = 0;
-
-		if (!copy_stack_frame(fp, &frame))
-			break;
-
-		if ((unsigned long)fp < regs->sp)
-			break;
-
-		callchain_store(entry, frame.return_address);
-		fp = frame.next_frame;
-	}
-}
-
-static void
-perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
-{
-	int is_user;
-
-	if (!regs)
-		return;
-
-	is_user = user_mode(regs);
-
-	if (!current || current->pid == 0)
-		return;
-
-	if (is_user && current->state != TASK_RUNNING)
-		return;
-
-	if (!is_user)
-		perf_callchain_kernel(regs, entry);
-
-	if (current->mm)
-		perf_callchain_user(regs, entry);
-}
-
-struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
-{
-	struct perf_callchain_entry *entry;
-
-	if (in_nmi())
-		entry = &__get_cpu_var(pmc_nmi_entry);
-	else
-		entry = &__get_cpu_var(pmc_irq_entry);
-
-	entry->nr = 0;
-
-	perf_do_callchain(regs, entry);
-
-	return entry;
-}
-
-void hw_perf_counter_setup_online(int cpu)
-{
-	init_debug_store_on_cpu(cpu);
-}
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
new file mode 100644
index 0000000..0d03629
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -0,0 +1,2298 @@
+/*
+ * Performance events x86 architecture code
+ *
+ *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
+ *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
+ *  Copyright (C) 2009 Jaswinder Singh Rajput
+ *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
+ *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ *  Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
+ *
+ *  For licencing details see kernel-base/COPYING
+ */
+
+#include <linux/perf_event.h>
+#include <linux/capability.h>
+#include <linux/notifier.h>
+#include <linux/hardirq.h>
+#include <linux/kprobes.h>
+#include <linux/module.h>
+#include <linux/kdebug.h>
+#include <linux/sched.h>
+#include <linux/uaccess.h>
+#include <linux/highmem.h>
+#include <linux/cpu.h>
+
+#include <asm/apic.h>
+#include <asm/stacktrace.h>
+#include <asm/nmi.h>
+
+static u64 perf_event_mask __read_mostly;
+
+/* The maximal number of PEBS events: */
+#define MAX_PEBS_EVENTS	4
+
+/* The size of a BTS record in bytes: */
+#define BTS_RECORD_SIZE		24
+
+/* The size of a per-cpu BTS buffer in bytes: */
+#define BTS_BUFFER_SIZE		(BTS_RECORD_SIZE * 2048)
+
+/* The BTS overflow threshold in bytes from the end of the buffer: */
+#define BTS_OVFL_TH		(BTS_RECORD_SIZE * 128)
+
+
+/*
+ * Bits in the debugctlmsr controlling branch tracing.
+ */
+#define X86_DEBUGCTL_TR			(1 << 6)
+#define X86_DEBUGCTL_BTS		(1 << 7)
+#define X86_DEBUGCTL_BTINT		(1 << 8)
+#define X86_DEBUGCTL_BTS_OFF_OS		(1 << 9)
+#define X86_DEBUGCTL_BTS_OFF_USR	(1 << 10)
+
+/*
+ * A debug store configuration.
+ *
+ * We only support architectures that use 64bit fields.
+ */
+struct debug_store {
+	u64	bts_buffer_base;
+	u64	bts_index;
+	u64	bts_absolute_maximum;
+	u64	bts_interrupt_threshold;
+	u64	pebs_buffer_base;
+	u64	pebs_index;
+	u64	pebs_absolute_maximum;
+	u64	pebs_interrupt_threshold;
+	u64	pebs_event_reset[MAX_PEBS_EVENTS];
+};
+
+struct cpu_hw_events {
+	struct perf_event	*events[X86_PMC_IDX_MAX];
+	unsigned long		used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+	unsigned long		active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+	unsigned long		interrupts;
+	int			enabled;
+	struct debug_store	*ds;
+};
+
+/*
+ * struct x86_pmu - generic x86 pmu
+ */
+struct x86_pmu {
+	const char	*name;
+	int		version;
+	int		(*handle_irq)(struct pt_regs *);
+	void		(*disable_all)(void);
+	void		(*enable_all)(void);
+	void		(*enable)(struct hw_perf_event *, int);
+	void		(*disable)(struct hw_perf_event *, int);
+	unsigned	eventsel;
+	unsigned	perfctr;
+	u64		(*event_map)(int);
+	u64		(*raw_event)(u64);
+	int		max_events;
+	int		num_events;
+	int		num_events_fixed;
+	int		event_bits;
+	u64		event_mask;
+	int		apic;
+	u64		max_period;
+	u64		intel_ctrl;
+	void		(*enable_bts)(u64 config);
+	void		(*disable_bts)(void);
+};
+
+static struct x86_pmu x86_pmu __read_mostly;
+
+static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
+	.enabled = 1,
+};
+
+/*
+ * Not sure about some of these
+ */
+static const u64 p6_perfmon_event_map[] =
+{
+  [PERF_COUNT_HW_CPU_CYCLES]		= 0x0079,
+  [PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
+  [PERF_COUNT_HW_CACHE_REFERENCES]	= 0x0f2e,
+  [PERF_COUNT_HW_CACHE_MISSES]		= 0x012e,
+  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c4,
+  [PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c5,
+  [PERF_COUNT_HW_BUS_CYCLES]		= 0x0062,
+};
+
+static u64 p6_pmu_event_map(int hw_event)
+{
+	return p6_perfmon_event_map[hw_event];
+}
+
+/*
+ * Event setting that is specified not to count anything.
+ * We use this to effectively disable a counter.
+ *
+ * L2_RQSTS with 0 MESI unit mask.
+ */
+#define P6_NOP_EVENT			0x0000002EULL
+
+static u64 p6_pmu_raw_event(u64 hw_event)
+{
+#define P6_EVNTSEL_EVENT_MASK		0x000000FFULL
+#define P6_EVNTSEL_UNIT_MASK		0x0000FF00ULL
+#define P6_EVNTSEL_EDGE_MASK		0x00040000ULL
+#define P6_EVNTSEL_INV_MASK		0x00800000ULL
+#define P6_EVNTSEL_REG_MASK		0xFF000000ULL
+
+#define P6_EVNTSEL_MASK			\
+	(P6_EVNTSEL_EVENT_MASK |	\
+	 P6_EVNTSEL_UNIT_MASK  |	\
+	 P6_EVNTSEL_EDGE_MASK  |	\
+	 P6_EVNTSEL_INV_MASK   |	\
+	 P6_EVNTSEL_REG_MASK)
+
+	return hw_event & P6_EVNTSEL_MASK;
+}
+
+
+/*
+ * Intel PerfMon v3. Used on Core2 and later.
+ */
+static const u64 intel_perfmon_event_map[] =
+{
+  [PERF_COUNT_HW_CPU_CYCLES]		= 0x003c,
+  [PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
+  [PERF_COUNT_HW_CACHE_REFERENCES]	= 0x4f2e,
+  [PERF_COUNT_HW_CACHE_MISSES]		= 0x412e,
+  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c4,
+  [PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c5,
+  [PERF_COUNT_HW_BUS_CYCLES]		= 0x013c,
+};
+
+static u64 intel_pmu_event_map(int hw_event)
+{
+	return intel_perfmon_event_map[hw_event];
+}
+
+/*
+ * Generalized hw caching related hw_event table, filled
+ * in on a per model basis. A value of 0 means
+ * 'not supported', -1 means 'hw_event makes no sense on
+ * this CPU', any other value means the raw hw_event
+ * ID.
+ */
+
+#define C(x) PERF_COUNT_HW_CACHE_##x
+
+static u64 __read_mostly hw_cache_event_ids
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX];
+
+static const u64 nehalem_hw_cache_event_ids
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI            */
+		[ C(RESULT_MISS)   ] = 0x0140, /* L1D_CACHE_LD.I_STATE         */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI            */
+		[ C(RESULT_MISS)   ] = 0x0141, /* L1D_CACHE_ST.I_STATE         */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS        */
+		[ C(RESULT_MISS)   ] = 0x024e, /* L1D_PREFETCH.MISS            */
+	},
+ },
+ [ C(L1I ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                    */
+		[ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                   */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0x0,
+	},
+ },
+ [ C(LL  ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS               */
+		[ C(RESULT_MISS)   ] = 0x0224, /* L2_RQSTS.LD_MISS             */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS                */
+		[ C(RESULT_MISS)   ] = 0x0824, /* L2_RQSTS.RFO_MISS            */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference                */
+		[ C(RESULT_MISS)   ] = 0x412e, /* LLC Misses                   */
+	},
+ },
+ [ C(DTLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI   (alias)  */
+		[ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.ANY         */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI   (alias)  */
+		[ C(RESULT_MISS)   ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS  */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0x0,
+	},
+ },
+ [ C(ITLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P           */
+		[ C(RESULT_MISS)   ] = 0x20c8, /* ITLB_MISS_RETIRED            */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+ [ C(BPU ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
+		[ C(RESULT_MISS)   ] = 0x03e8, /* BPU_CLEARS.ANY               */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+};
+
+static const u64 core2_hw_cache_event_ids
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI          */
+		[ C(RESULT_MISS)   ] = 0x0140, /* L1D_CACHE_LD.I_STATE       */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI          */
+		[ C(RESULT_MISS)   ] = 0x0141, /* L1D_CACHE_ST.I_STATE       */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS      */
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(L1I ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS                  */
+		[ C(RESULT_MISS)   ] = 0x0081, /* L1I.MISSES                 */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(LL  ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
+		[ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
+		[ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(DTLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI  (alias) */
+		[ C(RESULT_MISS)   ] = 0x0208, /* DTLB_MISSES.MISS_LD        */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI  (alias) */
+		[ C(RESULT_MISS)   ] = 0x0808, /* DTLB_MISSES.MISS_ST        */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(ITLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
+		[ C(RESULT_MISS)   ] = 0x1282, /* ITLBMISSES                 */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+ [ C(BPU ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
+		[ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+};
+
+static const u64 atom_hw_cache_event_ids
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD               */
+		[ C(RESULT_MISS)   ] = 0,
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST               */
+		[ C(RESULT_MISS)   ] = 0,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(L1I ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                  */
+		[ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                 */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(LL  ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
+		[ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
+		[ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(DTLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI  (alias) */
+		[ C(RESULT_MISS)   ] = 0x0508, /* DTLB_MISSES.MISS_LD        */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI  (alias) */
+		[ C(RESULT_MISS)   ] = 0x0608, /* DTLB_MISSES.MISS_ST        */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(ITLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
+		[ C(RESULT_MISS)   ] = 0x0282, /* ITLB.MISSES                */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+ [ C(BPU ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
+		[ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+};
+
+static u64 intel_pmu_raw_event(u64 hw_event)
+{
+#define CORE_EVNTSEL_EVENT_MASK		0x000000FFULL
+#define CORE_EVNTSEL_UNIT_MASK		0x0000FF00ULL
+#define CORE_EVNTSEL_EDGE_MASK		0x00040000ULL
+#define CORE_EVNTSEL_INV_MASK		0x00800000ULL
+#define CORE_EVNTSEL_REG_MASK	0xFF000000ULL
+
+#define CORE_EVNTSEL_MASK		\
+	(CORE_EVNTSEL_EVENT_MASK |	\
+	 CORE_EVNTSEL_UNIT_MASK  |	\
+	 CORE_EVNTSEL_EDGE_MASK  |	\
+	 CORE_EVNTSEL_INV_MASK  |	\
+	 CORE_EVNTSEL_REG_MASK)
+
+	return hw_event & CORE_EVNTSEL_MASK;
+}
+
+static const u64 amd_hw_cache_event_ids
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
+		[ C(RESULT_MISS)   ] = 0x0041, /* Data Cache Misses          */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */
+		[ C(RESULT_MISS)   ] = 0,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts  */
+		[ C(RESULT_MISS)   ] = 0x0167, /* Data Prefetcher :cancelled */
+	},
+ },
+ [ C(L1I ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches  */
+		[ C(RESULT_MISS)   ] = 0x0081, /* Instruction cache misses   */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(LL  ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
+		[ C(RESULT_MISS)   ] = 0x037E, /* L2 Cache Misses : IC+DC     */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback           */
+		[ C(RESULT_MISS)   ] = 0,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(DTLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
+		[ C(RESULT_MISS)   ] = 0x0046, /* L1 DTLB and L2 DLTB Miss   */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(ITLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes        */
+		[ C(RESULT_MISS)   ] = 0x0085, /* Instr. fetch ITLB misses   */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+ [ C(BPU ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr.      */
+		[ C(RESULT_MISS)   ] = 0x00c3, /* Retired Mispredicted BI    */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+};
+
+/*
+ * AMD Performance Monitor K7 and later.
+ */
+static const u64 amd_perfmon_event_map[] =
+{
+  [PERF_COUNT_HW_CPU_CYCLES]		= 0x0076,
+  [PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
+  [PERF_COUNT_HW_CACHE_REFERENCES]	= 0x0080,
+  [PERF_COUNT_HW_CACHE_MISSES]		= 0x0081,
+  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c4,
+  [PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c5,
+};
+
+static u64 amd_pmu_event_map(int hw_event)
+{
+	return amd_perfmon_event_map[hw_event];
+}
+
+static u64 amd_pmu_raw_event(u64 hw_event)
+{
+#define K7_EVNTSEL_EVENT_MASK	0x7000000FFULL
+#define K7_EVNTSEL_UNIT_MASK	0x00000FF00ULL
+#define K7_EVNTSEL_EDGE_MASK	0x000040000ULL
+#define K7_EVNTSEL_INV_MASK	0x000800000ULL
+#define K7_EVNTSEL_REG_MASK	0x0FF000000ULL
+
+#define K7_EVNTSEL_MASK			\
+	(K7_EVNTSEL_EVENT_MASK |	\
+	 K7_EVNTSEL_UNIT_MASK  |	\
+	 K7_EVNTSEL_EDGE_MASK  |	\
+	 K7_EVNTSEL_INV_MASK   |	\
+	 K7_EVNTSEL_REG_MASK)
+
+	return hw_event & K7_EVNTSEL_MASK;
+}
+
+/*
+ * Propagate event elapsed time into the generic event.
+ * Can only be executed on the CPU where the event is active.
+ * Returns the delta events processed.
+ */
+static u64
+x86_perf_event_update(struct perf_event *event,
+			struct hw_perf_event *hwc, int idx)
+{
+	int shift = 64 - x86_pmu.event_bits;
+	u64 prev_raw_count, new_raw_count;
+	s64 delta;
+
+	if (idx == X86_PMC_IDX_FIXED_BTS)
+		return 0;
+
+	/*
+	 * Careful: an NMI might modify the previous event value.
+	 *
+	 * Our tactic to handle this is to first atomically read and
+	 * exchange a new raw count - then add that new-prev delta
+	 * count to the generic event atomically:
+	 */
+again:
+	prev_raw_count = atomic64_read(&hwc->prev_count);
+	rdmsrl(hwc->event_base + idx, new_raw_count);
+
+	if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
+					new_raw_count) != prev_raw_count)
+		goto again;
+
+	/*
+	 * Now we have the new raw value and have updated the prev
+	 * timestamp already. We can now calculate the elapsed delta
+	 * (event-)time and add that to the generic event.
+	 *
+	 * Careful, not all hw sign-extends above the physical width
+	 * of the count.
+	 */
+	delta = (new_raw_count << shift) - (prev_raw_count << shift);
+	delta >>= shift;
+
+	atomic64_add(delta, &event->count);
+	atomic64_sub(delta, &hwc->period_left);
+
+	return new_raw_count;
+}
+
+static atomic_t active_events;
+static DEFINE_MUTEX(pmc_reserve_mutex);
+
+static bool reserve_pmc_hardware(void)
+{
+#ifdef CONFIG_X86_LOCAL_APIC
+	int i;
+
+	if (nmi_watchdog == NMI_LOCAL_APIC)
+		disable_lapic_nmi_watchdog();
+
+	for (i = 0; i < x86_pmu.num_events; i++) {
+		if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
+			goto perfctr_fail;
+	}
+
+	for (i = 0; i < x86_pmu.num_events; i++) {
+		if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
+			goto eventsel_fail;
+	}
+#endif
+
+	return true;
+
+#ifdef CONFIG_X86_LOCAL_APIC
+eventsel_fail:
+	for (i--; i >= 0; i--)
+		release_evntsel_nmi(x86_pmu.eventsel + i);
+
+	i = x86_pmu.num_events;
+
+perfctr_fail:
+	for (i--; i >= 0; i--)
+		release_perfctr_nmi(x86_pmu.perfctr + i);
+
+	if (nmi_watchdog == NMI_LOCAL_APIC)
+		enable_lapic_nmi_watchdog();
+
+	return false;
+#endif
+}
+
+static void release_pmc_hardware(void)
+{
+#ifdef CONFIG_X86_LOCAL_APIC
+	int i;
+
+	for (i = 0; i < x86_pmu.num_events; i++) {
+		release_perfctr_nmi(x86_pmu.perfctr + i);
+		release_evntsel_nmi(x86_pmu.eventsel + i);
+	}
+
+	if (nmi_watchdog == NMI_LOCAL_APIC)
+		enable_lapic_nmi_watchdog();
+#endif
+}
+
+static inline bool bts_available(void)
+{
+	return x86_pmu.enable_bts != NULL;
+}
+
+static inline void init_debug_store_on_cpu(int cpu)
+{
+	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+
+	if (!ds)
+		return;
+
+	wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
+		     (u32)((u64)(unsigned long)ds),
+		     (u32)((u64)(unsigned long)ds >> 32));
+}
+
+static inline void fini_debug_store_on_cpu(int cpu)
+{
+	if (!per_cpu(cpu_hw_events, cpu).ds)
+		return;
+
+	wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
+}
+
+static void release_bts_hardware(void)
+{
+	int cpu;
+
+	if (!bts_available())
+		return;
+
+	get_online_cpus();
+
+	for_each_online_cpu(cpu)
+		fini_debug_store_on_cpu(cpu);
+
+	for_each_possible_cpu(cpu) {
+		struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+
+		if (!ds)
+			continue;
+
+		per_cpu(cpu_hw_events, cpu).ds = NULL;
+
+		kfree((void *)(unsigned long)ds->bts_buffer_base);
+		kfree(ds);
+	}
+
+	put_online_cpus();
+}
+
+static int reserve_bts_hardware(void)
+{
+	int cpu, err = 0;
+
+	if (!bts_available())
+		return 0;
+
+	get_online_cpus();
+
+	for_each_possible_cpu(cpu) {
+		struct debug_store *ds;
+		void *buffer;
+
+		err = -ENOMEM;
+		buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL);
+		if (unlikely(!buffer))
+			break;
+
+		ds = kzalloc(sizeof(*ds), GFP_KERNEL);
+		if (unlikely(!ds)) {
+			kfree(buffer);
+			break;
+		}
+
+		ds->bts_buffer_base = (u64)(unsigned long)buffer;
+		ds->bts_index = ds->bts_buffer_base;
+		ds->bts_absolute_maximum =
+			ds->bts_buffer_base + BTS_BUFFER_SIZE;
+		ds->bts_interrupt_threshold =
+			ds->bts_absolute_maximum - BTS_OVFL_TH;
+
+		per_cpu(cpu_hw_events, cpu).ds = ds;
+		err = 0;
+	}
+
+	if (err)
+		release_bts_hardware();
+	else {
+		for_each_online_cpu(cpu)
+			init_debug_store_on_cpu(cpu);
+	}
+
+	put_online_cpus();
+
+	return err;
+}
+
+static void hw_perf_event_destroy(struct perf_event *event)
+{
+	if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) {
+		release_pmc_hardware();
+		release_bts_hardware();
+		mutex_unlock(&pmc_reserve_mutex);
+	}
+}
+
+static inline int x86_pmu_initialized(void)
+{
+	return x86_pmu.handle_irq != NULL;
+}
+
+static inline int
+set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr)
+{
+	unsigned int cache_type, cache_op, cache_result;
+	u64 config, val;
+
+	config = attr->config;
+
+	cache_type = (config >>  0) & 0xff;
+	if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
+		return -EINVAL;
+
+	cache_op = (config >>  8) & 0xff;
+	if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
+		return -EINVAL;
+
+	cache_result = (config >> 16) & 0xff;
+	if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
+		return -EINVAL;
+
+	val = hw_cache_event_ids[cache_type][cache_op][cache_result];
+
+	if (val == 0)
+		return -ENOENT;
+
+	if (val == -1)
+		return -EINVAL;
+
+	hwc->config |= val;
+
+	return 0;
+}
+
+static void intel_pmu_enable_bts(u64 config)
+{
+	unsigned long debugctlmsr;
+
+	debugctlmsr = get_debugctlmsr();
+
+	debugctlmsr |= X86_DEBUGCTL_TR;
+	debugctlmsr |= X86_DEBUGCTL_BTS;
+	debugctlmsr |= X86_DEBUGCTL_BTINT;
+
+	if (!(config & ARCH_PERFMON_EVENTSEL_OS))
+		debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS;
+
+	if (!(config & ARCH_PERFMON_EVENTSEL_USR))
+		debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR;
+
+	update_debugctlmsr(debugctlmsr);
+}
+
+static void intel_pmu_disable_bts(void)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	unsigned long debugctlmsr;
+
+	if (!cpuc->ds)
+		return;
+
+	debugctlmsr = get_debugctlmsr();
+
+	debugctlmsr &=
+		~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT |
+		  X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR);
+
+	update_debugctlmsr(debugctlmsr);
+}
+
+/*
+ * Setup the hardware configuration for a given attr_type
+ */
+static int __hw_perf_event_init(struct perf_event *event)
+{
+	struct perf_event_attr *attr = &event->attr;
+	struct hw_perf_event *hwc = &event->hw;
+	u64 config;
+	int err;
+
+	if (!x86_pmu_initialized())
+		return -ENODEV;
+
+	err = 0;
+	if (!atomic_inc_not_zero(&active_events)) {
+		mutex_lock(&pmc_reserve_mutex);
+		if (atomic_read(&active_events) == 0) {
+			if (!reserve_pmc_hardware())
+				err = -EBUSY;
+			else
+				err = reserve_bts_hardware();
+		}
+		if (!err)
+			atomic_inc(&active_events);
+		mutex_unlock(&pmc_reserve_mutex);
+	}
+	if (err)
+		return err;
+
+	event->destroy = hw_perf_event_destroy;
+
+	/*
+	 * Generate PMC IRQs:
+	 * (keep 'enabled' bit clear for now)
+	 */
+	hwc->config = ARCH_PERFMON_EVENTSEL_INT;
+
+	/*
+	 * Count user and OS events unless requested not to.
+	 */
+	if (!attr->exclude_user)
+		hwc->config |= ARCH_PERFMON_EVENTSEL_USR;
+	if (!attr->exclude_kernel)
+		hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
+
+	if (!hwc->sample_period) {
+		hwc->sample_period = x86_pmu.max_period;
+		hwc->last_period = hwc->sample_period;
+		atomic64_set(&hwc->period_left, hwc->sample_period);
+	} else {
+		/*
+		 * If we have a PMU initialized but no APIC
+		 * interrupts, we cannot sample hardware
+		 * events (user-space has to fall back and
+		 * sample via a hrtimer based software event):
+		 */
+		if (!x86_pmu.apic)
+			return -EOPNOTSUPP;
+	}
+
+	/*
+	 * Raw hw_event type provide the config in the hw_event structure
+	 */
+	if (attr->type == PERF_TYPE_RAW) {
+		hwc->config |= x86_pmu.raw_event(attr->config);
+		return 0;
+	}
+
+	if (attr->type == PERF_TYPE_HW_CACHE)
+		return set_ext_hw_attr(hwc, attr);
+
+	if (attr->config >= x86_pmu.max_events)
+		return -EINVAL;
+
+	/*
+	 * The generic map:
+	 */
+	config = x86_pmu.event_map(attr->config);
+
+	if (config == 0)
+		return -ENOENT;
+
+	if (config == -1LL)
+		return -EINVAL;
+
+	/*
+	 * Branch tracing:
+	 */
+	if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
+	    (hwc->sample_period == 1)) {
+		/* BTS is not supported by this architecture. */
+		if (!bts_available())
+			return -EOPNOTSUPP;
+
+		/* BTS is currently only allowed for user-mode. */
+		if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
+			return -EOPNOTSUPP;
+	}
+
+	hwc->config |= config;
+
+	return 0;
+}
+
+static void p6_pmu_disable_all(void)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	u64 val;
+
+	if (!cpuc->enabled)
+		return;
+
+	cpuc->enabled = 0;
+	barrier();
+
+	/* p6 only has one enable register */
+	rdmsrl(MSR_P6_EVNTSEL0, val);
+	val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
+	wrmsrl(MSR_P6_EVNTSEL0, val);
+}
+
+static void intel_pmu_disable_all(void)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+
+	if (!cpuc->enabled)
+		return;
+
+	cpuc->enabled = 0;
+	barrier();
+
+	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+
+	if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))
+		intel_pmu_disable_bts();
+}
+
+static void amd_pmu_disable_all(void)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	int idx;
+
+	if (!cpuc->enabled)
+		return;
+
+	cpuc->enabled = 0;
+	/*
+	 * ensure we write the disable before we start disabling the
+	 * events proper, so that amd_pmu_enable_event() does the
+	 * right thing.
+	 */
+	barrier();
+
+	for (idx = 0; idx < x86_pmu.num_events; idx++) {
+		u64 val;
+
+		if (!test_bit(idx, cpuc->active_mask))
+			continue;
+		rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
+		if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE))
+			continue;
+		val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
+		wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
+	}
+}
+
+void hw_perf_disable(void)
+{
+	if (!x86_pmu_initialized())
+		return;
+	return x86_pmu.disable_all();
+}
+
+static void p6_pmu_enable_all(void)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	unsigned long val;
+
+	if (cpuc->enabled)
+		return;
+
+	cpuc->enabled = 1;
+	barrier();
+
+	/* p6 only has one enable register */
+	rdmsrl(MSR_P6_EVNTSEL0, val);
+	val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+	wrmsrl(MSR_P6_EVNTSEL0, val);
+}
+
+static void intel_pmu_enable_all(void)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+
+	if (cpuc->enabled)
+		return;
+
+	cpuc->enabled = 1;
+	barrier();
+
+	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
+
+	if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
+		struct perf_event *event =
+			cpuc->events[X86_PMC_IDX_FIXED_BTS];
+
+		if (WARN_ON_ONCE(!event))
+			return;
+
+		intel_pmu_enable_bts(event->hw.config);
+	}
+}
+
+static void amd_pmu_enable_all(void)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	int idx;
+
+	if (cpuc->enabled)
+		return;
+
+	cpuc->enabled = 1;
+	barrier();
+
+	for (idx = 0; idx < x86_pmu.num_events; idx++) {
+		struct perf_event *event = cpuc->events[idx];
+		u64 val;
+
+		if (!test_bit(idx, cpuc->active_mask))
+			continue;
+
+		val = event->hw.config;
+		val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+		wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
+	}
+}
+
+void hw_perf_enable(void)
+{
+	if (!x86_pmu_initialized())
+		return;
+	x86_pmu.enable_all();
+}
+
+static inline u64 intel_pmu_get_status(void)
+{
+	u64 status;
+
+	rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
+
+	return status;
+}
+
+static inline void intel_pmu_ack_status(u64 ack)
+{
+	wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
+}
+
+static inline void x86_pmu_enable_event(struct hw_perf_event *hwc, int idx)
+{
+	(void)checking_wrmsrl(hwc->config_base + idx,
+			      hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE);
+}
+
+static inline void x86_pmu_disable_event(struct hw_perf_event *hwc, int idx)
+{
+	(void)checking_wrmsrl(hwc->config_base + idx, hwc->config);
+}
+
+static inline void
+intel_pmu_disable_fixed(struct hw_perf_event *hwc, int __idx)
+{
+	int idx = __idx - X86_PMC_IDX_FIXED;
+	u64 ctrl_val, mask;
+
+	mask = 0xfULL << (idx * 4);
+
+	rdmsrl(hwc->config_base, ctrl_val);
+	ctrl_val &= ~mask;
+	(void)checking_wrmsrl(hwc->config_base, ctrl_val);
+}
+
+static inline void
+p6_pmu_disable_event(struct hw_perf_event *hwc, int idx)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	u64 val = P6_NOP_EVENT;
+
+	if (cpuc->enabled)
+		val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+
+	(void)checking_wrmsrl(hwc->config_base + idx, val);
+}
+
+static inline void
+intel_pmu_disable_event(struct hw_perf_event *hwc, int idx)
+{
+	if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
+		intel_pmu_disable_bts();
+		return;
+	}
+
+	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
+		intel_pmu_disable_fixed(hwc, idx);
+		return;
+	}
+
+	x86_pmu_disable_event(hwc, idx);
+}
+
+static inline void
+amd_pmu_disable_event(struct hw_perf_event *hwc, int idx)
+{
+	x86_pmu_disable_event(hwc, idx);
+}
+
+static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
+
+/*
+ * Set the next IRQ period, based on the hwc->period_left value.
+ * To be called with the event disabled in hw:
+ */
+static int
+x86_perf_event_set_period(struct perf_event *event,
+			     struct hw_perf_event *hwc, int idx)
+{
+	s64 left = atomic64_read(&hwc->period_left);
+	s64 period = hwc->sample_period;
+	int err, ret = 0;
+
+	if (idx == X86_PMC_IDX_FIXED_BTS)
+		return 0;
+
+	/*
+	 * If we are way outside a reasoable range then just skip forward:
+	 */
+	if (unlikely(left <= -period)) {
+		left = period;
+		atomic64_set(&hwc->period_left, left);
+		hwc->last_period = period;
+		ret = 1;
+	}
+
+	if (unlikely(left <= 0)) {
+		left += period;
+		atomic64_set(&hwc->period_left, left);
+		hwc->last_period = period;
+		ret = 1;
+	}
+	/*
+	 * Quirk: certain CPUs dont like it if just 1 hw_event is left:
+	 */
+	if (unlikely(left < 2))
+		left = 2;
+
+	if (left > x86_pmu.max_period)
+		left = x86_pmu.max_period;
+
+	per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;
+
+	/*
+	 * The hw event starts counting from this event offset,
+	 * mark it to be able to extra future deltas:
+	 */
+	atomic64_set(&hwc->prev_count, (u64)-left);
+
+	err = checking_wrmsrl(hwc->event_base + idx,
+			     (u64)(-left) & x86_pmu.event_mask);
+
+	perf_event_update_userpage(event);
+
+	return ret;
+}
+
+static inline void
+intel_pmu_enable_fixed(struct hw_perf_event *hwc, int __idx)
+{
+	int idx = __idx - X86_PMC_IDX_FIXED;
+	u64 ctrl_val, bits, mask;
+	int err;
+
+	/*
+	 * Enable IRQ generation (0x8),
+	 * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
+	 * if requested:
+	 */
+	bits = 0x8ULL;
+	if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
+		bits |= 0x2;
+	if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
+		bits |= 0x1;
+	bits <<= (idx * 4);
+	mask = 0xfULL << (idx * 4);
+
+	rdmsrl(hwc->config_base, ctrl_val);
+	ctrl_val &= ~mask;
+	ctrl_val |= bits;
+	err = checking_wrmsrl(hwc->config_base, ctrl_val);
+}
+
+static void p6_pmu_enable_event(struct hw_perf_event *hwc, int idx)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	u64 val;
+
+	val = hwc->config;
+	if (cpuc->enabled)
+		val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+
+	(void)checking_wrmsrl(hwc->config_base + idx, val);
+}
+
+
+static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx)
+{
+	if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
+		if (!__get_cpu_var(cpu_hw_events).enabled)
+			return;
+
+		intel_pmu_enable_bts(hwc->config);
+		return;
+	}
+
+	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
+		intel_pmu_enable_fixed(hwc, idx);
+		return;
+	}
+
+	x86_pmu_enable_event(hwc, idx);
+}
+
+static void amd_pmu_enable_event(struct hw_perf_event *hwc, int idx)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+
+	if (cpuc->enabled)
+		x86_pmu_enable_event(hwc, idx);
+}
+
+static int
+fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc)
+{
+	unsigned int hw_event;
+
+	hw_event = hwc->config & ARCH_PERFMON_EVENT_MASK;
+
+	if (unlikely((hw_event ==
+		      x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) &&
+		     (hwc->sample_period == 1)))
+		return X86_PMC_IDX_FIXED_BTS;
+
+	if (!x86_pmu.num_events_fixed)
+		return -1;
+
+	if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS)))
+		return X86_PMC_IDX_FIXED_INSTRUCTIONS;
+	if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES)))
+		return X86_PMC_IDX_FIXED_CPU_CYCLES;
+	if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES)))
+		return X86_PMC_IDX_FIXED_BUS_CYCLES;
+
+	return -1;
+}
+
+/*
+ * Find a PMC slot for the freshly enabled / scheduled in event:
+ */
+static int x86_pmu_enable(struct perf_event *event)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx;
+
+	idx = fixed_mode_idx(event, hwc);
+	if (idx == X86_PMC_IDX_FIXED_BTS) {
+		/* BTS is already occupied. */
+		if (test_and_set_bit(idx, cpuc->used_mask))
+			return -EAGAIN;
+
+		hwc->config_base	= 0;
+		hwc->event_base	= 0;
+		hwc->idx		= idx;
+	} else if (idx >= 0) {
+		/*
+		 * Try to get the fixed event, if that is already taken
+		 * then try to get a generic event:
+		 */
+		if (test_and_set_bit(idx, cpuc->used_mask))
+			goto try_generic;
+
+		hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
+		/*
+		 * We set it so that event_base + idx in wrmsr/rdmsr maps to
+		 * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
+		 */
+		hwc->event_base =
+			MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
+		hwc->idx = idx;
+	} else {
+		idx = hwc->idx;
+		/* Try to get the previous generic event again */
+		if (test_and_set_bit(idx, cpuc->used_mask)) {
+try_generic:
+			idx = find_first_zero_bit(cpuc->used_mask,
+						  x86_pmu.num_events);
+			if (idx == x86_pmu.num_events)
+				return -EAGAIN;
+
+			set_bit(idx, cpuc->used_mask);
+			hwc->idx = idx;
+		}
+		hwc->config_base  = x86_pmu.eventsel;
+		hwc->event_base = x86_pmu.perfctr;
+	}
+
+	perf_events_lapic_init();
+
+	x86_pmu.disable(hwc, idx);
+
+	cpuc->events[idx] = event;
+	set_bit(idx, cpuc->active_mask);
+
+	x86_perf_event_set_period(event, hwc, idx);
+	x86_pmu.enable(hwc, idx);
+
+	perf_event_update_userpage(event);
+
+	return 0;
+}
+
+static void x86_pmu_unthrottle(struct perf_event *event)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (WARN_ON_ONCE(hwc->idx >= X86_PMC_IDX_MAX ||
+				cpuc->events[hwc->idx] != event))
+		return;
+
+	x86_pmu.enable(hwc, hwc->idx);
+}
+
+void perf_event_print_debug(void)
+{
+	u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
+	struct cpu_hw_events *cpuc;
+	unsigned long flags;
+	int cpu, idx;
+
+	if (!x86_pmu.num_events)
+		return;
+
+	local_irq_save(flags);
+
+	cpu = smp_processor_id();
+	cpuc = &per_cpu(cpu_hw_events, cpu);
+
+	if (x86_pmu.version >= 2) {
+		rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
+		rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
+		rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
+		rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
+
+		pr_info("\n");
+		pr_info("CPU#%d: ctrl:       %016llx\n", cpu, ctrl);
+		pr_info("CPU#%d: status:     %016llx\n", cpu, status);
+		pr_info("CPU#%d: overflow:   %016llx\n", cpu, overflow);
+		pr_info("CPU#%d: fixed:      %016llx\n", cpu, fixed);
+	}
+	pr_info("CPU#%d: used:       %016llx\n", cpu, *(u64 *)cpuc->used_mask);
+
+	for (idx = 0; idx < x86_pmu.num_events; idx++) {
+		rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
+		rdmsrl(x86_pmu.perfctr  + idx, pmc_count);
+
+		prev_left = per_cpu(pmc_prev_left[idx], cpu);
+
+		pr_info("CPU#%d:   gen-PMC%d ctrl:  %016llx\n",
+			cpu, idx, pmc_ctrl);
+		pr_info("CPU#%d:   gen-PMC%d count: %016llx\n",
+			cpu, idx, pmc_count);
+		pr_info("CPU#%d:   gen-PMC%d left:  %016llx\n",
+			cpu, idx, prev_left);
+	}
+	for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) {
+		rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
+
+		pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
+			cpu, idx, pmc_count);
+	}
+	local_irq_restore(flags);
+}
+
+static void intel_pmu_drain_bts_buffer(struct cpu_hw_events *cpuc)
+{
+	struct debug_store *ds = cpuc->ds;
+	struct bts_record {
+		u64	from;
+		u64	to;
+		u64	flags;
+	};
+	struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS];
+	struct bts_record *at, *top;
+	struct perf_output_handle handle;
+	struct perf_event_header header;
+	struct perf_sample_data data;
+	struct pt_regs regs;
+
+	if (!event)
+		return;
+
+	if (!ds)
+		return;
+
+	at  = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
+	top = (struct bts_record *)(unsigned long)ds->bts_index;
+
+	if (top <= at)
+		return;
+
+	ds->bts_index = ds->bts_buffer_base;
+
+
+	data.period	= event->hw.last_period;
+	data.addr	= 0;
+	regs.ip		= 0;
+
+	/*
+	 * Prepare a generic sample, i.e. fill in the invariant fields.
+	 * We will overwrite the from and to address before we output
+	 * the sample.
+	 */
+	perf_prepare_sample(&header, &data, event, &regs);
+
+	if (perf_output_begin(&handle, event,
+			      header.size * (top - at), 1, 1))
+		return;
+
+	for (; at < top; at++) {
+		data.ip		= at->from;
+		data.addr	= at->to;
+
+		perf_output_sample(&handle, &header, &data, event);
+	}
+
+	perf_output_end(&handle);
+
+	/* There's new data available. */
+	event->hw.interrupts++;
+	event->pending_kill = POLL_IN;
+}
+
+static void x86_pmu_disable(struct perf_event *event)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	/*
+	 * Must be done before we disable, otherwise the nmi handler
+	 * could reenable again:
+	 */
+	clear_bit(idx, cpuc->active_mask);
+	x86_pmu.disable(hwc, idx);
+
+	/*
+	 * Make sure the cleared pointer becomes visible before we
+	 * (potentially) free the event:
+	 */
+	barrier();
+
+	/*
+	 * Drain the remaining delta count out of a event
+	 * that we are disabling:
+	 */
+	x86_perf_event_update(event, hwc, idx);
+
+	/* Drain the remaining BTS records. */
+	if (unlikely(idx == X86_PMC_IDX_FIXED_BTS))
+		intel_pmu_drain_bts_buffer(cpuc);
+
+	cpuc->events[idx] = NULL;
+	clear_bit(idx, cpuc->used_mask);
+
+	perf_event_update_userpage(event);
+}
+
+/*
+ * Save and restart an expired event. Called by NMI contexts,
+ * so it has to be careful about preempting normal event ops:
+ */
+static int intel_pmu_save_and_restart(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+	int ret;
+
+	x86_perf_event_update(event, hwc, idx);
+	ret = x86_perf_event_set_period(event, hwc, idx);
+
+	if (event->state == PERF_EVENT_STATE_ACTIVE)
+		intel_pmu_enable_event(hwc, idx);
+
+	return ret;
+}
+
+static void intel_pmu_reset(void)
+{
+	struct debug_store *ds = __get_cpu_var(cpu_hw_events).ds;
+	unsigned long flags;
+	int idx;
+
+	if (!x86_pmu.num_events)
+		return;
+
+	local_irq_save(flags);
+
+	printk("clearing PMU state on CPU#%d\n", smp_processor_id());
+
+	for (idx = 0; idx < x86_pmu.num_events; idx++) {
+		checking_wrmsrl(x86_pmu.eventsel + idx, 0ull);
+		checking_wrmsrl(x86_pmu.perfctr  + idx, 0ull);
+	}
+	for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) {
+		checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
+	}
+	if (ds)
+		ds->bts_index = ds->bts_buffer_base;
+
+	local_irq_restore(flags);
+}
+
+static int p6_pmu_handle_irq(struct pt_regs *regs)
+{
+	struct perf_sample_data data;
+	struct cpu_hw_events *cpuc;
+	struct perf_event *event;
+	struct hw_perf_event *hwc;
+	int idx, handled = 0;
+	u64 val;
+
+	data.addr = 0;
+
+	cpuc = &__get_cpu_var(cpu_hw_events);
+
+	for (idx = 0; idx < x86_pmu.num_events; idx++) {
+		if (!test_bit(idx, cpuc->active_mask))
+			continue;
+
+		event = cpuc->events[idx];
+		hwc = &event->hw;
+
+		val = x86_perf_event_update(event, hwc, idx);
+		if (val & (1ULL << (x86_pmu.event_bits - 1)))
+			continue;
+
+		/*
+		 * event overflow
+		 */
+		handled		= 1;
+		data.period	= event->hw.last_period;
+
+		if (!x86_perf_event_set_period(event, hwc, idx))
+			continue;
+
+		if (perf_event_overflow(event, 1, &data, regs))
+			p6_pmu_disable_event(hwc, idx);
+	}
+
+	if (handled)
+		inc_irq_stat(apic_perf_irqs);
+
+	return handled;
+}
+
+/*
+ * This handler is triggered by the local APIC, so the APIC IRQ handling
+ * rules apply:
+ */
+static int intel_pmu_handle_irq(struct pt_regs *regs)
+{
+	struct perf_sample_data data;
+	struct cpu_hw_events *cpuc;
+	int bit, loops;
+	u64 ack, status;
+
+	data.addr = 0;
+
+	cpuc = &__get_cpu_var(cpu_hw_events);
+
+	perf_disable();
+	intel_pmu_drain_bts_buffer(cpuc);
+	status = intel_pmu_get_status();
+	if (!status) {
+		perf_enable();
+		return 0;
+	}
+
+	loops = 0;
+again:
+	if (++loops > 100) {
+		WARN_ONCE(1, "perfevents: irq loop stuck!\n");
+		perf_event_print_debug();
+		intel_pmu_reset();
+		perf_enable();
+		return 1;
+	}
+
+	inc_irq_stat(apic_perf_irqs);
+	ack = status;
+	for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
+		struct perf_event *event = cpuc->events[bit];
+
+		clear_bit(bit, (unsigned long *) &status);
+		if (!test_bit(bit, cpuc->active_mask))
+			continue;
+
+		if (!intel_pmu_save_and_restart(event))
+			continue;
+
+		data.period = event->hw.last_period;
+
+		if (perf_event_overflow(event, 1, &data, regs))
+			intel_pmu_disable_event(&event->hw, bit);
+	}
+
+	intel_pmu_ack_status(ack);
+
+	/*
+	 * Repeat if there is more work to be done:
+	 */
+	status = intel_pmu_get_status();
+	if (status)
+		goto again;
+
+	perf_enable();
+
+	return 1;
+}
+
+static int amd_pmu_handle_irq(struct pt_regs *regs)
+{
+	struct perf_sample_data data;
+	struct cpu_hw_events *cpuc;
+	struct perf_event *event;
+	struct hw_perf_event *hwc;
+	int idx, handled = 0;
+	u64 val;
+
+	data.addr = 0;
+
+	cpuc = &__get_cpu_var(cpu_hw_events);
+
+	for (idx = 0; idx < x86_pmu.num_events; idx++) {
+		if (!test_bit(idx, cpuc->active_mask))
+			continue;
+
+		event = cpuc->events[idx];
+		hwc = &event->hw;
+
+		val = x86_perf_event_update(event, hwc, idx);
+		if (val & (1ULL << (x86_pmu.event_bits - 1)))
+			continue;
+
+		/*
+		 * event overflow
+		 */
+		handled		= 1;
+		data.period	= event->hw.last_period;
+
+		if (!x86_perf_event_set_period(event, hwc, idx))
+			continue;
+
+		if (perf_event_overflow(event, 1, &data, regs))
+			amd_pmu_disable_event(hwc, idx);
+	}
+
+	if (handled)
+		inc_irq_stat(apic_perf_irqs);
+
+	return handled;
+}
+
+void smp_perf_pending_interrupt(struct pt_regs *regs)
+{
+	irq_enter();
+	ack_APIC_irq();
+	inc_irq_stat(apic_pending_irqs);
+	perf_event_do_pending();
+	irq_exit();
+}
+
+void set_perf_event_pending(void)
+{
+#ifdef CONFIG_X86_LOCAL_APIC
+	apic->send_IPI_self(LOCAL_PENDING_VECTOR);
+#endif
+}
+
+void perf_events_lapic_init(void)
+{
+#ifdef CONFIG_X86_LOCAL_APIC
+	if (!x86_pmu.apic || !x86_pmu_initialized())
+		return;
+
+	/*
+	 * Always use NMI for PMU
+	 */
+	apic_write(APIC_LVTPC, APIC_DM_NMI);
+#endif
+}
+
+static int __kprobes
+perf_event_nmi_handler(struct notifier_block *self,
+			 unsigned long cmd, void *__args)
+{
+	struct die_args *args = __args;
+	struct pt_regs *regs;
+
+	if (!atomic_read(&active_events))
+		return NOTIFY_DONE;
+
+	switch (cmd) {
+	case DIE_NMI:
+	case DIE_NMI_IPI:
+		break;
+
+	default:
+		return NOTIFY_DONE;
+	}
+
+	regs = args->regs;
+
+#ifdef CONFIG_X86_LOCAL_APIC
+	apic_write(APIC_LVTPC, APIC_DM_NMI);
+#endif
+	/*
+	 * Can't rely on the handled return value to say it was our NMI, two
+	 * events could trigger 'simultaneously' raising two back-to-back NMIs.
+	 *
+	 * If the first NMI handles both, the latter will be empty and daze
+	 * the CPU.
+	 */
+	x86_pmu.handle_irq(regs);
+
+	return NOTIFY_STOP;
+}
+
+static __read_mostly struct notifier_block perf_event_nmi_notifier = {
+	.notifier_call		= perf_event_nmi_handler,
+	.next			= NULL,
+	.priority		= 1
+};
+
+static struct x86_pmu p6_pmu = {
+	.name			= "p6",
+	.handle_irq		= p6_pmu_handle_irq,
+	.disable_all		= p6_pmu_disable_all,
+	.enable_all		= p6_pmu_enable_all,
+	.enable			= p6_pmu_enable_event,
+	.disable		= p6_pmu_disable_event,
+	.eventsel		= MSR_P6_EVNTSEL0,
+	.perfctr		= MSR_P6_PERFCTR0,
+	.event_map		= p6_pmu_event_map,
+	.raw_event		= p6_pmu_raw_event,
+	.max_events		= ARRAY_SIZE(p6_perfmon_event_map),
+	.apic			= 1,
+	.max_period		= (1ULL << 31) - 1,
+	.version		= 0,
+	.num_events		= 2,
+	/*
+	 * Events have 40 bits implemented. However they are designed such
+	 * that bits [32-39] are sign extensions of bit 31. As such the
+	 * effective width of a event for P6-like PMU is 32 bits only.
+	 *
+	 * See IA-32 Intel Architecture Software developer manual Vol 3B
+	 */
+	.event_bits		= 32,
+	.event_mask		= (1ULL << 32) - 1,
+};
+
+static struct x86_pmu intel_pmu = {
+	.name			= "Intel",
+	.handle_irq		= intel_pmu_handle_irq,
+	.disable_all		= intel_pmu_disable_all,
+	.enable_all		= intel_pmu_enable_all,
+	.enable			= intel_pmu_enable_event,
+	.disable		= intel_pmu_disable_event,
+	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0,
+	.perfctr		= MSR_ARCH_PERFMON_PERFCTR0,
+	.event_map		= intel_pmu_event_map,
+	.raw_event		= intel_pmu_raw_event,
+	.max_events		= ARRAY_SIZE(intel_perfmon_event_map),
+	.apic			= 1,
+	/*
+	 * Intel PMCs cannot be accessed sanely above 32 bit width,
+	 * so we install an artificial 1<<31 period regardless of
+	 * the generic event period:
+	 */
+	.max_period		= (1ULL << 31) - 1,
+	.enable_bts		= intel_pmu_enable_bts,
+	.disable_bts		= intel_pmu_disable_bts,
+};
+
+static struct x86_pmu amd_pmu = {
+	.name			= "AMD",
+	.handle_irq		= amd_pmu_handle_irq,
+	.disable_all		= amd_pmu_disable_all,
+	.enable_all		= amd_pmu_enable_all,
+	.enable			= amd_pmu_enable_event,
+	.disable		= amd_pmu_disable_event,
+	.eventsel		= MSR_K7_EVNTSEL0,
+	.perfctr		= MSR_K7_PERFCTR0,
+	.event_map		= amd_pmu_event_map,
+	.raw_event		= amd_pmu_raw_event,
+	.max_events		= ARRAY_SIZE(amd_perfmon_event_map),
+	.num_events		= 4,
+	.event_bits		= 48,
+	.event_mask		= (1ULL << 48) - 1,
+	.apic			= 1,
+	/* use highest bit to detect overflow */
+	.max_period		= (1ULL << 47) - 1,
+};
+
+static int p6_pmu_init(void)
+{
+	switch (boot_cpu_data.x86_model) {
+	case 1:
+	case 3:  /* Pentium Pro */
+	case 5:
+	case 6:  /* Pentium II */
+	case 7:
+	case 8:
+	case 11: /* Pentium III */
+		break;
+	case 9:
+	case 13:
+		/* Pentium M */
+		break;
+	default:
+		pr_cont("unsupported p6 CPU model %d ",
+			boot_cpu_data.x86_model);
+		return -ENODEV;
+	}
+
+	x86_pmu = p6_pmu;
+
+	if (!cpu_has_apic) {
+		pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
+		pr_info("no hardware sampling interrupt available.\n");
+		x86_pmu.apic = 0;
+	}
+
+	return 0;
+}
+
+static int intel_pmu_init(void)
+{
+	union cpuid10_edx edx;
+	union cpuid10_eax eax;
+	unsigned int unused;
+	unsigned int ebx;
+	int version;
+
+	if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
+		/* check for P6 processor family */
+	   if (boot_cpu_data.x86 == 6) {
+		return p6_pmu_init();
+	   } else {
+		return -ENODEV;
+	   }
+	}
+
+	/*
+	 * Check whether the Architectural PerfMon supports
+	 * Branch Misses Retired hw_event or not.
+	 */
+	cpuid(10, &eax.full, &ebx, &unused, &edx.full);
+	if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
+		return -ENODEV;
+
+	version = eax.split.version_id;
+	if (version < 2)
+		return -ENODEV;
+
+	x86_pmu				= intel_pmu;
+	x86_pmu.version			= version;
+	x86_pmu.num_events		= eax.split.num_events;
+	x86_pmu.event_bits		= eax.split.bit_width;
+	x86_pmu.event_mask		= (1ULL << eax.split.bit_width) - 1;
+
+	/*
+	 * Quirk: v2 perfmon does not report fixed-purpose events, so
+	 * assume at least 3 events:
+	 */
+	x86_pmu.num_events_fixed	= max((int)edx.split.num_events_fixed, 3);
+
+	/*
+	 * Install the hw-cache-events table:
+	 */
+	switch (boot_cpu_data.x86_model) {
+	case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
+	case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
+	case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
+	case 29: /* six-core 45 nm xeon "Dunnington" */
+		memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
+		       sizeof(hw_cache_event_ids));
+
+		pr_cont("Core2 events, ");
+		break;
+	default:
+	case 26:
+		memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
+		       sizeof(hw_cache_event_ids));
+
+		pr_cont("Nehalem/Corei7 events, ");
+		break;
+	case 28:
+		memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
+		       sizeof(hw_cache_event_ids));
+
+		pr_cont("Atom events, ");
+		break;
+	}
+	return 0;
+}
+
+static int amd_pmu_init(void)
+{
+	/* Performance-monitoring supported from K7 and later: */
+	if (boot_cpu_data.x86 < 6)
+		return -ENODEV;
+
+	x86_pmu = amd_pmu;
+
+	/* Events are common for all AMDs */
+	memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
+	       sizeof(hw_cache_event_ids));
+
+	return 0;
+}
+
+void __init init_hw_perf_events(void)
+{
+	int err;
+
+	pr_info("Performance Events: ");
+
+	switch (boot_cpu_data.x86_vendor) {
+	case X86_VENDOR_INTEL:
+		err = intel_pmu_init();
+		break;
+	case X86_VENDOR_AMD:
+		err = amd_pmu_init();
+		break;
+	default:
+		return;
+	}
+	if (err != 0) {
+		pr_cont("no PMU driver, software events only.\n");
+		return;
+	}
+
+	pr_cont("%s PMU driver.\n", x86_pmu.name);
+
+	if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) {
+		WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
+		     x86_pmu.num_events, X86_PMC_MAX_GENERIC);
+		x86_pmu.num_events = X86_PMC_MAX_GENERIC;
+	}
+	perf_event_mask = (1 << x86_pmu.num_events) - 1;
+	perf_max_events = x86_pmu.num_events;
+
+	if (x86_pmu.num_events_fixed > X86_PMC_MAX_FIXED) {
+		WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
+		     x86_pmu.num_events_fixed, X86_PMC_MAX_FIXED);
+		x86_pmu.num_events_fixed = X86_PMC_MAX_FIXED;
+	}
+
+	perf_event_mask |=
+		((1LL << x86_pmu.num_events_fixed)-1) << X86_PMC_IDX_FIXED;
+	x86_pmu.intel_ctrl = perf_event_mask;
+
+	perf_events_lapic_init();
+	register_die_notifier(&perf_event_nmi_notifier);
+
+	pr_info("... version:                 %d\n",     x86_pmu.version);
+	pr_info("... bit width:               %d\n",     x86_pmu.event_bits);
+	pr_info("... generic events:        %d\n",     x86_pmu.num_events);
+	pr_info("... value mask:              %016Lx\n", x86_pmu.event_mask);
+	pr_info("... max period:              %016Lx\n", x86_pmu.max_period);
+	pr_info("... fixed-purpose events:  %d\n",     x86_pmu.num_events_fixed);
+	pr_info("... event mask:            %016Lx\n", perf_event_mask);
+}
+
+static inline void x86_pmu_read(struct perf_event *event)
+{
+	x86_perf_event_update(event, &event->hw, event->hw.idx);
+}
+
+static const struct pmu pmu = {
+	.enable		= x86_pmu_enable,
+	.disable	= x86_pmu_disable,
+	.read		= x86_pmu_read,
+	.unthrottle	= x86_pmu_unthrottle,
+};
+
+const struct pmu *hw_perf_event_init(struct perf_event *event)
+{
+	int err;
+
+	err = __hw_perf_event_init(event);
+	if (err) {
+		if (event->destroy)
+			event->destroy(event);
+		return ERR_PTR(err);
+	}
+
+	return &pmu;
+}
+
+/*
+ * callchain support
+ */
+
+static inline
+void callchain_store(struct perf_callchain_entry *entry, u64 ip)
+{
+	if (entry->nr < PERF_MAX_STACK_DEPTH)
+		entry->ip[entry->nr++] = ip;
+}
+
+static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
+static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry);
+static DEFINE_PER_CPU(int, in_nmi_frame);
+
+
+static void
+backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
+{
+	/* Ignore warnings */
+}
+
+static void backtrace_warning(void *data, char *msg)
+{
+	/* Ignore warnings */
+}
+
+static int backtrace_stack(void *data, char *name)
+{
+	per_cpu(in_nmi_frame, smp_processor_id()) =
+			x86_is_stack_id(NMI_STACK, name);
+
+	return 0;
+}
+
+static void backtrace_address(void *data, unsigned long addr, int reliable)
+{
+	struct perf_callchain_entry *entry = data;
+
+	if (per_cpu(in_nmi_frame, smp_processor_id()))
+		return;
+
+	if (reliable)
+		callchain_store(entry, addr);
+}
+
+static const struct stacktrace_ops backtrace_ops = {
+	.warning		= backtrace_warning,
+	.warning_symbol		= backtrace_warning_symbol,
+	.stack			= backtrace_stack,
+	.address		= backtrace_address,
+};
+
+#include "../dumpstack.h"
+
+static void
+perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
+{
+	callchain_store(entry, PERF_CONTEXT_KERNEL);
+	callchain_store(entry, regs->ip);
+
+	dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry);
+}
+
+/*
+ * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
+ */
+static unsigned long
+copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
+{
+	unsigned long offset, addr = (unsigned long)from;
+	int type = in_nmi() ? KM_NMI : KM_IRQ0;
+	unsigned long size, len = 0;
+	struct page *page;
+	void *map;
+	int ret;
+
+	do {
+		ret = __get_user_pages_fast(addr, 1, 0, &page);
+		if (!ret)
+			break;
+
+		offset = addr & (PAGE_SIZE - 1);
+		size = min(PAGE_SIZE - offset, n - len);
+
+		map = kmap_atomic(page, type);
+		memcpy(to, map+offset, size);
+		kunmap_atomic(map, type);
+		put_page(page);
+
+		len  += size;
+		to   += size;
+		addr += size;
+
+	} while (len < n);
+
+	return len;
+}
+
+static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
+{
+	unsigned long bytes;
+
+	bytes = copy_from_user_nmi(frame, fp, sizeof(*frame));
+
+	return bytes == sizeof(*frame);
+}
+
+static void
+perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
+{
+	struct stack_frame frame;
+	const void __user *fp;
+
+	if (!user_mode(regs))
+		regs = task_pt_regs(current);
+
+	fp = (void __user *)regs->bp;
+
+	callchain_store(entry, PERF_CONTEXT_USER);
+	callchain_store(entry, regs->ip);
+
+	while (entry->nr < PERF_MAX_STACK_DEPTH) {
+		frame.next_frame	     = NULL;
+		frame.return_address = 0;
+
+		if (!copy_stack_frame(fp, &frame))
+			break;
+
+		if ((unsigned long)fp < regs->sp)
+			break;
+
+		callchain_store(entry, frame.return_address);
+		fp = frame.next_frame;
+	}
+}
+
+static void
+perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
+{
+	int is_user;
+
+	if (!regs)
+		return;
+
+	is_user = user_mode(regs);
+
+	if (!current || current->pid == 0)
+		return;
+
+	if (is_user && current->state != TASK_RUNNING)
+		return;
+
+	if (!is_user)
+		perf_callchain_kernel(regs, entry);
+
+	if (current->mm)
+		perf_callchain_user(regs, entry);
+}
+
+struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
+{
+	struct perf_callchain_entry *entry;
+
+	if (in_nmi())
+		entry = &__get_cpu_var(pmc_nmi_entry);
+	else
+		entry = &__get_cpu_var(pmc_irq_entry);
+
+	entry->nr = 0;
+
+	perf_do_callchain(regs, entry);
+
+	return entry;
+}
+
+void hw_perf_event_setup_online(int cpu)
+{
+	init_debug_store_on_cpu(cpu);
+}
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 392bea4..fab786f 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -20,7 +20,7 @@
 #include <linux/kprobes.h>
 
 #include <asm/apic.h>
-#include <asm/perf_counter.h>
+#include <asm/perf_event.h>
 
 struct nmi_watchdog_ctlblk {
 	unsigned int cccr_msr;
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index d59fe32..681c3fd 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1021,7 +1021,7 @@ apicinterrupt ERROR_APIC_VECTOR \
 apicinterrupt SPURIOUS_APIC_VECTOR \
 	spurious_interrupt smp_spurious_interrupt
 
-#ifdef CONFIG_PERF_COUNTERS
+#ifdef CONFIG_PERF_EVENTS
 apicinterrupt LOCAL_PENDING_VECTOR \
 	perf_pending_interrupt smp_perf_pending_interrupt
 #endif
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 3008831..40f3077 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -208,7 +208,7 @@ static void __init apic_intr_init(void)
 	alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
 
 	/* Performance monitoring interrupts: */
-# ifdef CONFIG_PERF_COUNTERS
+# ifdef CONFIG_PERF_EVENTS
 	alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt);
 # endif
 
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index d51321d..0157cd2 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -335,4 +335,4 @@ ENTRY(sys_call_table)
 	.long sys_preadv
 	.long sys_pwritev
 	.long sys_rt_tgsigqueueinfo	/* 335 */
-	.long sys_perf_counter_open
+	.long sys_perf_event_open
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 775a020..82728f2 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -10,7 +10,7 @@
 #include <linux/bootmem.h>		/* max_low_pfn			*/
 #include <linux/kprobes.h>		/* __kprobes, ...		*/
 #include <linux/mmiotrace.h>		/* kmmio_handler, ...		*/
-#include <linux/perf_counter.h>		/* perf_swcounter_event		*/
+#include <linux/perf_event.h>		/* perf_sw_event		*/
 
 #include <asm/traps.h>			/* dotraplinkage, ...		*/
 #include <asm/pgalloc.h>		/* pgd_*(), ...			*/
@@ -1017,7 +1017,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
 	if (unlikely(error_code & PF_RSVD))
 		pgtable_bad(regs, error_code, address);
 
-	perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
+	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
 
 	/*
 	 * If we're in an interrupt, have no user context or are running
@@ -1114,11 +1114,11 @@ good_area:
 
 	if (fault & VM_FAULT_MAJOR) {
 		tsk->maj_flt++;
-		perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
 				     regs, address);
 	} else {
 		tsk->min_flt++;
-		perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
 				     regs, address);
 	}
 
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 4899215..8eb0587 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -234,11 +234,11 @@ static void arch_perfmon_setup_counters(void)
 	if (eax.split.version_id == 0 && current_cpu_data.x86 == 6 &&
 		current_cpu_data.x86_model == 15) {
 		eax.split.version_id = 2;
-		eax.split.num_counters = 2;
+		eax.split.num_events = 2;
 		eax.split.bit_width = 40;
 	}
 
-	num_counters = eax.split.num_counters;
+	num_counters = eax.split.num_events;
 
 	op_arch_perfmon_spec.num_counters = num_counters;
 	op_arch_perfmon_spec.num_controls = num_counters;
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h
index b837761..7b8e75d 100644
--- a/arch/x86/oprofile/op_x86_model.h
+++ b/arch/x86/oprofile/op_x86_model.h
@@ -13,7 +13,7 @@
 #define OP_X86_MODEL_H
 
 #include <asm/types.h>
-#include <asm/perf_counter.h>
+#include <asm/perf_event.h>
 
 struct op_msr {
 	unsigned long	addr;
diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c
index 50eecfe..44203ff 100644
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@ -26,7 +26,7 @@
 #include <linux/proc_fs.h>
 #include <linux/nmi.h>
 #include <linux/quotaops.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/suspend.h>
@@ -252,7 +252,7 @@ static void sysrq_handle_showregs(int key, struct tty_struct *tty)
 	struct pt_regs *regs = get_irq_regs();
 	if (regs)
 		show_regs(regs);
-	perf_counter_print_debug();
+	perf_event_print_debug();
 }
 static struct sysrq_key_op sysrq_showregs_op = {
 	.handler	= sysrq_handle_showregs,
diff --git a/fs/exec.c b/fs/exec.c
index 172ceb6..434dba7 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -33,7 +33,7 @@
 #include <linux/string.h>
 #include <linux/init.h>
 #include <linux/pagemap.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <linux/highmem.h>
 #include <linux/spinlock.h>
 #include <linux/key.h>
@@ -923,7 +923,7 @@ void set_task_comm(struct task_struct *tsk, char *buf)
 	task_lock(tsk);
 	strlcpy(tsk->comm, buf, sizeof(tsk->comm));
 	task_unlock(tsk);
-	perf_counter_comm(tsk);
+	perf_event_comm(tsk);
 }
 
 int flush_old_exec(struct linux_binprm * bprm)
@@ -997,7 +997,7 @@ int flush_old_exec(struct linux_binprm * bprm)
 	 * security domain:
 	 */
 	if (!get_dumpable(current->mm))
-		perf_counter_exit_task(current);
+		perf_event_exit_task(current);
 
 	/* An exec changes our domain. We are no longer part of the thread
 	   group */
diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h
index 1125e5a..d76b66a 100644
--- a/include/asm-generic/unistd.h
+++ b/include/asm-generic/unistd.h
@@ -620,8 +620,8 @@ __SYSCALL(__NR_move_pages, sys_move_pages)
 
 #define __NR_rt_tgsigqueueinfo 240
 __SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo)
-#define __NR_perf_counter_open 241
-__SYSCALL(__NR_perf_counter_open, sys_perf_counter_open)
+#define __NR_perf_event_open 241
+__SYSCALL(__NR_perf_event_open, sys_perf_event_open)
 
 #undef __NR_syscalls
 #define __NR_syscalls 242
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 9e7f2e8..21a6f5d 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -106,13 +106,13 @@ extern struct group_info init_groups;
 
 extern struct cred init_cred;
 
-#ifdef CONFIG_PERF_COUNTERS
-# define INIT_PERF_COUNTERS(tsk)					\
-	.perf_counter_mutex = 						\
-		 __MUTEX_INITIALIZER(tsk.perf_counter_mutex),		\
-	.perf_counter_list = LIST_HEAD_INIT(tsk.perf_counter_list),
+#ifdef CONFIG_PERF_EVENTS
+# define INIT_PERF_EVENTS(tsk)					\
+	.perf_event_mutex = 						\
+		 __MUTEX_INITIALIZER(tsk.perf_event_mutex),		\
+	.perf_event_list = LIST_HEAD_INIT(tsk.perf_event_list),
 #else
-# define INIT_PERF_COUNTERS(tsk)
+# define INIT_PERF_EVENTS(tsk)
 #endif
 
 /*
@@ -178,7 +178,7 @@ extern struct cred init_cred;
 	},								\
 	.dirties = INIT_PROP_LOCAL_SINGLE(dirties),			\
 	INIT_IDS							\
-	INIT_PERF_COUNTERS(tsk)						\
+	INIT_PERF_EVENTS(tsk)						\
 	INIT_TRACE_IRQFLAGS						\
 	INIT_LOCKDEP							\
 	INIT_FTRACE_GRAPH						\
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
deleted file mode 100644
index f648627..0000000
--- a/include/linux/perf_counter.h
+++ /dev/null
@@ -1,858 +0,0 @@
-/*
- *  Performance counters:
- *
- *    Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de>
- *    Copyright (C) 2008-2009, Red Hat, Inc., Ingo Molnar
- *    Copyright (C) 2008-2009, Red Hat, Inc., Peter Zijlstra
- *
- *  Data type definitions, declarations, prototypes.
- *
- *    Started by: Thomas Gleixner and Ingo Molnar
- *
- *  For licencing details see kernel-base/COPYING
- */
-#ifndef _LINUX_PERF_COUNTER_H
-#define _LINUX_PERF_COUNTER_H
-
-#include <linux/types.h>
-#include <linux/ioctl.h>
-#include <asm/byteorder.h>
-
-/*
- * User-space ABI bits:
- */
-
-/*
- * attr.type
- */
-enum perf_type_id {
-	PERF_TYPE_HARDWARE			= 0,
-	PERF_TYPE_SOFTWARE			= 1,
-	PERF_TYPE_TRACEPOINT			= 2,
-	PERF_TYPE_HW_CACHE			= 3,
-	PERF_TYPE_RAW				= 4,
-
-	PERF_TYPE_MAX,				/* non-ABI */
-};
-
-/*
- * Generalized performance counter event types, used by the
- * attr.event_id parameter of the sys_perf_counter_open()
- * syscall:
- */
-enum perf_hw_id {
-	/*
-	 * Common hardware events, generalized by the kernel:
-	 */
-	PERF_COUNT_HW_CPU_CYCLES		= 0,
-	PERF_COUNT_HW_INSTRUCTIONS		= 1,
-	PERF_COUNT_HW_CACHE_REFERENCES		= 2,
-	PERF_COUNT_HW_CACHE_MISSES		= 3,
-	PERF_COUNT_HW_BRANCH_INSTRUCTIONS	= 4,
-	PERF_COUNT_HW_BRANCH_MISSES		= 5,
-	PERF_COUNT_HW_BUS_CYCLES		= 6,
-
-	PERF_COUNT_HW_MAX,			/* non-ABI */
-};
-
-/*
- * Generalized hardware cache counters:
- *
- *       { L1-D, L1-I, LLC, ITLB, DTLB, BPU } x
- *       { read, write, prefetch } x
- *       { accesses, misses }
- */
-enum perf_hw_cache_id {
-	PERF_COUNT_HW_CACHE_L1D			= 0,
-	PERF_COUNT_HW_CACHE_L1I			= 1,
-	PERF_COUNT_HW_CACHE_LL			= 2,
-	PERF_COUNT_HW_CACHE_DTLB		= 3,
-	PERF_COUNT_HW_CACHE_ITLB		= 4,
-	PERF_COUNT_HW_CACHE_BPU			= 5,
-
-	PERF_COUNT_HW_CACHE_MAX,		/* non-ABI */
-};
-
-enum perf_hw_cache_op_id {
-	PERF_COUNT_HW_CACHE_OP_READ		= 0,
-	PERF_COUNT_HW_CACHE_OP_WRITE		= 1,
-	PERF_COUNT_HW_CACHE_OP_PREFETCH		= 2,
-
-	PERF_COUNT_HW_CACHE_OP_MAX,		/* non-ABI */
-};
-
-enum perf_hw_cache_op_result_id {
-	PERF_COUNT_HW_CACHE_RESULT_ACCESS	= 0,
-	PERF_COUNT_HW_CACHE_RESULT_MISS		= 1,
-
-	PERF_COUNT_HW_CACHE_RESULT_MAX,		/* non-ABI */
-};
-
-/*
- * Special "software" counters provided by the kernel, even if the hardware
- * does not support performance counters. These counters measure various
- * physical and sw events of the kernel (and allow the profiling of them as
- * well):
- */
-enum perf_sw_ids {
-	PERF_COUNT_SW_CPU_CLOCK			= 0,
-	PERF_COUNT_SW_TASK_CLOCK		= 1,
-	PERF_COUNT_SW_PAGE_FAULTS		= 2,
-	PERF_COUNT_SW_CONTEXT_SWITCHES		= 3,
-	PERF_COUNT_SW_CPU_MIGRATIONS		= 4,
-	PERF_COUNT_SW_PAGE_FAULTS_MIN		= 5,
-	PERF_COUNT_SW_PAGE_FAULTS_MAJ		= 6,
-
-	PERF_COUNT_SW_MAX,			/* non-ABI */
-};
-
-/*
- * Bits that can be set in attr.sample_type to request information
- * in the overflow packets.
- */
-enum perf_counter_sample_format {
-	PERF_SAMPLE_IP				= 1U << 0,
-	PERF_SAMPLE_TID				= 1U << 1,
-	PERF_SAMPLE_TIME			= 1U << 2,
-	PERF_SAMPLE_ADDR			= 1U << 3,
-	PERF_SAMPLE_READ			= 1U << 4,
-	PERF_SAMPLE_CALLCHAIN			= 1U << 5,
-	PERF_SAMPLE_ID				= 1U << 6,
-	PERF_SAMPLE_CPU				= 1U << 7,
-	PERF_SAMPLE_PERIOD			= 1U << 8,
-	PERF_SAMPLE_STREAM_ID			= 1U << 9,
-	PERF_SAMPLE_RAW				= 1U << 10,
-
-	PERF_SAMPLE_MAX = 1U << 11,		/* non-ABI */
-};
-
-/*
- * The format of the data returned by read() on a perf counter fd,
- * as specified by attr.read_format:
- *
- * struct read_format {
- * 	{ u64		value;
- * 	  { u64		time_enabled; } && PERF_FORMAT_ENABLED
- * 	  { u64		time_running; } && PERF_FORMAT_RUNNING
- * 	  { u64		id;           } && PERF_FORMAT_ID
- * 	} && !PERF_FORMAT_GROUP
- *
- * 	{ u64		nr;
- * 	  { u64		time_enabled; } && PERF_FORMAT_ENABLED
- * 	  { u64		time_running; } && PERF_FORMAT_RUNNING
- * 	  { u64		value;
- * 	    { u64	id;           } && PERF_FORMAT_ID
- * 	  }		cntr[nr];
- * 	} && PERF_FORMAT_GROUP
- * };
- */
-enum perf_counter_read_format {
-	PERF_FORMAT_TOTAL_TIME_ENABLED		= 1U << 0,
-	PERF_FORMAT_TOTAL_TIME_RUNNING		= 1U << 1,
-	PERF_FORMAT_ID				= 1U << 2,
-	PERF_FORMAT_GROUP			= 1U << 3,
-
-	PERF_FORMAT_MAX = 1U << 4, 		/* non-ABI */
-};
-
-#define PERF_ATTR_SIZE_VER0	64	/* sizeof first published struct */
-
-/*
- * Hardware event to monitor via a performance monitoring counter:
- */
-struct perf_counter_attr {
-
-	/*
-	 * Major type: hardware/software/tracepoint/etc.
-	 */
-	__u32			type;
-
-	/*
-	 * Size of the attr structure, for fwd/bwd compat.
-	 */
-	__u32			size;
-
-	/*
-	 * Type specific configuration information.
-	 */
-	__u64			config;
-
-	union {
-		__u64		sample_period;
-		__u64		sample_freq;
-	};
-
-	__u64			sample_type;
-	__u64			read_format;
-
-	__u64			disabled       :  1, /* off by default        */
-				inherit	       :  1, /* children inherit it   */
-				pinned	       :  1, /* must always be on PMU */
-				exclusive      :  1, /* only group on PMU     */
-				exclude_user   :  1, /* don't count user      */
-				exclude_kernel :  1, /* ditto kernel          */
-				exclude_hv     :  1, /* ditto hypervisor      */
-				exclude_idle   :  1, /* don't count when idle */
-				mmap           :  1, /* include mmap data     */
-				comm	       :  1, /* include comm data     */
-				freq           :  1, /* use freq, not period  */
-				inherit_stat   :  1, /* per task counts       */
-				enable_on_exec :  1, /* next exec enables     */
-				task           :  1, /* trace fork/exit       */
-				watermark      :  1, /* wakeup_watermark      */
-
-				__reserved_1   : 49;
-
-	union {
-		__u32		wakeup_events;	  /* wakeup every n events */
-		__u32		wakeup_watermark; /* bytes before wakeup   */
-	};
-	__u32			__reserved_2;
-
-	__u64			__reserved_3;
-};
-
-/*
- * Ioctls that can be done on a perf counter fd:
- */
-#define PERF_COUNTER_IOC_ENABLE		_IO ('$', 0)
-#define PERF_COUNTER_IOC_DISABLE	_IO ('$', 1)
-#define PERF_COUNTER_IOC_REFRESH	_IO ('$', 2)
-#define PERF_COUNTER_IOC_RESET		_IO ('$', 3)
-#define PERF_COUNTER_IOC_PERIOD		_IOW('$', 4, u64)
-#define PERF_COUNTER_IOC_SET_OUTPUT	_IO ('$', 5)
-
-enum perf_counter_ioc_flags {
-	PERF_IOC_FLAG_GROUP		= 1U << 0,
-};
-
-/*
- * Structure of the page that can be mapped via mmap
- */
-struct perf_counter_mmap_page {
-	__u32	version;		/* version number of this structure */
-	__u32	compat_version;		/* lowest version this is compat with */
-
-	/*
-	 * Bits needed to read the hw counters in user-space.
-	 *
-	 *   u32 seq;
-	 *   s64 count;
-	 *
-	 *   do {
-	 *     seq = pc->lock;
-	 *
-	 *     barrier()
-	 *     if (pc->index) {
-	 *       count = pmc_read(pc->index - 1);
-	 *       count += pc->offset;
-	 *     } else
-	 *       goto regular_read;
-	 *
-	 *     barrier();
-	 *   } while (pc->lock != seq);
-	 *
-	 * NOTE: for obvious reason this only works on self-monitoring
-	 *       processes.
-	 */
-	__u32	lock;			/* seqlock for synchronization */
-	__u32	index;			/* hardware counter identifier */
-	__s64	offset;			/* add to hardware counter value */
-	__u64	time_enabled;		/* time counter active */
-	__u64	time_running;		/* time counter on cpu */
-
-		/*
-		 * Hole for extension of the self monitor capabilities
-		 */
-
-	__u64	__reserved[123];	/* align to 1k */
-
-	/*
-	 * Control data for the mmap() data buffer.
-	 *
-	 * User-space reading the @data_head value should issue an rmb(), on
-	 * SMP capable platforms, after reading this value -- see
-	 * perf_counter_wakeup().
-	 *
-	 * When the mapping is PROT_WRITE the @data_tail value should be
-	 * written by userspace to reflect the last read data. In this case
-	 * the kernel will not over-write unread data.
-	 */
-	__u64   data_head;		/* head in the data section */
-	__u64	data_tail;		/* user-space written tail */
-};
-
-#define PERF_EVENT_MISC_CPUMODE_MASK		(3 << 0)
-#define PERF_EVENT_MISC_CPUMODE_UNKNOWN		(0 << 0)
-#define PERF_EVENT_MISC_KERNEL			(1 << 0)
-#define PERF_EVENT_MISC_USER			(2 << 0)
-#define PERF_EVENT_MISC_HYPERVISOR		(3 << 0)
-
-struct perf_event_header {
-	__u32	type;
-	__u16	misc;
-	__u16	size;
-};
-
-enum perf_event_type {
-
-	/*
-	 * The MMAP events record the PROT_EXEC mappings so that we can
-	 * correlate userspace IPs to code. They have the following structure:
-	 *
-	 * struct {
-	 *	struct perf_event_header	header;
-	 *
-	 *	u32				pid, tid;
-	 *	u64				addr;
-	 *	u64				len;
-	 *	u64				pgoff;
-	 *	char				filename[];
-	 * };
-	 */
-	PERF_EVENT_MMAP			= 1,
-
-	/*
-	 * struct {
-	 * 	struct perf_event_header	header;
-	 * 	u64				id;
-	 * 	u64				lost;
-	 * };
-	 */
-	PERF_EVENT_LOST			= 2,
-
-	/*
-	 * struct {
-	 *	struct perf_event_header	header;
-	 *
-	 *	u32				pid, tid;
-	 *	char				comm[];
-	 * };
-	 */
-	PERF_EVENT_COMM			= 3,
-
-	/*
-	 * struct {
-	 *	struct perf_event_header	header;
-	 *	u32				pid, ppid;
-	 *	u32				tid, ptid;
-	 *	u64				time;
-	 * };
-	 */
-	PERF_EVENT_EXIT			= 4,
-
-	/*
-	 * struct {
-	 *	struct perf_event_header	header;
-	 *	u64				time;
-	 *	u64				id;
-	 *	u64				stream_id;
-	 * };
-	 */
-	PERF_EVENT_THROTTLE		= 5,
-	PERF_EVENT_UNTHROTTLE		= 6,
-
-	/*
-	 * struct {
-	 *	struct perf_event_header	header;
-	 *	u32				pid, ppid;
-	 *	u32				tid, ptid;
-	 *	{ u64				time;     } && PERF_SAMPLE_TIME
-	 * };
-	 */
-	PERF_EVENT_FORK			= 7,
-
-	/*
-	 * struct {
-	 * 	struct perf_event_header	header;
-	 * 	u32				pid, tid;
-	 *
-	 * 	struct read_format		values;
-	 * };
-	 */
-	PERF_EVENT_READ			= 8,
-
-	/*
-	 * struct {
-	 *	struct perf_event_header	header;
-	 *
-	 *	{ u64			ip;	  } && PERF_SAMPLE_IP
-	 *	{ u32			pid, tid; } && PERF_SAMPLE_TID
-	 *	{ u64			time;     } && PERF_SAMPLE_TIME
-	 *	{ u64			addr;     } && PERF_SAMPLE_ADDR
-	 *	{ u64			id;	  } && PERF_SAMPLE_ID
-	 *	{ u64			stream_id;} && PERF_SAMPLE_STREAM_ID
-	 *	{ u32			cpu, res; } && PERF_SAMPLE_CPU
-	 * 	{ u64			period;   } && PERF_SAMPLE_PERIOD
-	 *
-	 *	{ struct read_format	values;	  } && PERF_SAMPLE_READ
-	 *
-	 *	{ u64			nr,
-	 *	  u64			ips[nr];  } && PERF_SAMPLE_CALLCHAIN
-	 *
-	 * 	#
-	 * 	# The RAW record below is opaque data wrt the ABI
-	 * 	#
-	 * 	# That is, the ABI doesn't make any promises wrt to
-	 * 	# the stability of its content, it may vary depending
-	 * 	# on event, hardware, kernel version and phase of
-	 * 	# the moon.
-	 * 	#
-	 * 	# In other words, PERF_SAMPLE_RAW contents are not an ABI.
-	 * 	#
-	 *
-	 *	{ u32			size;
-	 *	  char                  data[size];}&& PERF_SAMPLE_RAW
-	 * };
-	 */
-	PERF_EVENT_SAMPLE		= 9,
-
-	PERF_EVENT_MAX,			/* non-ABI */
-};
-
-enum perf_callchain_context {
-	PERF_CONTEXT_HV			= (__u64)-32,
-	PERF_CONTEXT_KERNEL		= (__u64)-128,
-	PERF_CONTEXT_USER		= (__u64)-512,
-
-	PERF_CONTEXT_GUEST		= (__u64)-2048,
-	PERF_CONTEXT_GUEST_KERNEL	= (__u64)-2176,
-	PERF_CONTEXT_GUEST_USER		= (__u64)-2560,
-
-	PERF_CONTEXT_MAX		= (__u64)-4095,
-};
-
-#define PERF_FLAG_FD_NO_GROUP	(1U << 0)
-#define PERF_FLAG_FD_OUTPUT	(1U << 1)
-
-#ifdef __KERNEL__
-/*
- * Kernel-internal data types and definitions:
- */
-
-#ifdef CONFIG_PERF_COUNTERS
-# include <asm/perf_counter.h>
-#endif
-
-#include <linux/list.h>
-#include <linux/mutex.h>
-#include <linux/rculist.h>
-#include <linux/rcupdate.h>
-#include <linux/spinlock.h>
-#include <linux/hrtimer.h>
-#include <linux/fs.h>
-#include <linux/pid_namespace.h>
-#include <asm/atomic.h>
-
-#define PERF_MAX_STACK_DEPTH		255
-
-struct perf_callchain_entry {
-	__u64				nr;
-	__u64				ip[PERF_MAX_STACK_DEPTH];
-};
-
-struct perf_raw_record {
-	u32				size;
-	void				*data;
-};
-
-struct task_struct;
-
-/**
- * struct hw_perf_counter - performance counter hardware details:
- */
-struct hw_perf_counter {
-#ifdef CONFIG_PERF_COUNTERS
-	union {
-		struct { /* hardware */
-			u64		config;
-			unsigned long	config_base;
-			unsigned long	counter_base;
-			int		idx;
-		};
-		union { /* software */
-			atomic64_t	count;
-			struct hrtimer	hrtimer;
-		};
-	};
-	atomic64_t			prev_count;
-	u64				sample_period;
-	u64				last_period;
-	atomic64_t			period_left;
-	u64				interrupts;
-
-	u64				freq_count;
-	u64				freq_interrupts;
-	u64				freq_stamp;
-#endif
-};
-
-struct perf_counter;
-
-/**
- * struct pmu - generic performance monitoring unit
- */
-struct pmu {
-	int (*enable)			(struct perf_counter *counter);
-	void (*disable)			(struct perf_counter *counter);
-	void (*read)			(struct perf_counter *counter);
-	void (*unthrottle)		(struct perf_counter *counter);
-};
-
-/**
- * enum perf_counter_active_state - the states of a counter
- */
-enum perf_counter_active_state {
-	PERF_COUNTER_STATE_ERROR	= -2,
-	PERF_COUNTER_STATE_OFF		= -1,
-	PERF_COUNTER_STATE_INACTIVE	=  0,
-	PERF_COUNTER_STATE_ACTIVE	=  1,
-};
-
-struct file;
-
-struct perf_mmap_data {
-	struct rcu_head			rcu_head;
-	int				nr_pages;	/* nr of data pages  */
-	int				writable;	/* are we writable   */
-	int				nr_locked;	/* nr pages mlocked  */
-
-	atomic_t			poll;		/* POLL_ for wakeups */
-	atomic_t			events;		/* event limit       */
-
-	atomic_long_t			head;		/* write position    */
-	atomic_long_t			done_head;	/* completed head    */
-
-	atomic_t			lock;		/* concurrent writes */
-	atomic_t			wakeup;		/* needs a wakeup    */
-	atomic_t			lost;		/* nr records lost   */
-
-	long				watermark;	/* wakeup watermark  */
-
-	struct perf_counter_mmap_page   *user_page;
-	void				*data_pages[0];
-};
-
-struct perf_pending_entry {
-	struct perf_pending_entry *next;
-	void (*func)(struct perf_pending_entry *);
-};
-
-/**
- * struct perf_counter - performance counter kernel representation:
- */
-struct perf_counter {
-#ifdef CONFIG_PERF_COUNTERS
-	struct list_head		group_entry;
-	struct list_head		event_entry;
-	struct list_head		sibling_list;
-	int				nr_siblings;
-	struct perf_counter		*group_leader;
-	struct perf_counter		*output;
-	const struct pmu		*pmu;
-
-	enum perf_counter_active_state	state;
-	atomic64_t			count;
-
-	/*
-	 * These are the total time in nanoseconds that the counter
-	 * has been enabled (i.e. eligible to run, and the task has
-	 * been scheduled in, if this is a per-task counter)
-	 * and running (scheduled onto the CPU), respectively.
-	 *
-	 * They are computed from tstamp_enabled, tstamp_running and
-	 * tstamp_stopped when the counter is in INACTIVE or ACTIVE state.
-	 */
-	u64				total_time_enabled;
-	u64				total_time_running;
-
-	/*
-	 * These are timestamps used for computing total_time_enabled
-	 * and total_time_running when the counter is in INACTIVE or
-	 * ACTIVE state, measured in nanoseconds from an arbitrary point
-	 * in time.
-	 * tstamp_enabled: the notional time when the counter was enabled
-	 * tstamp_running: the notional time when the counter was scheduled on
-	 * tstamp_stopped: in INACTIVE state, the notional time when the
-	 *	counter was scheduled off.
-	 */
-	u64				tstamp_enabled;
-	u64				tstamp_running;
-	u64				tstamp_stopped;
-
-	struct perf_counter_attr	attr;
-	struct hw_perf_counter		hw;
-
-	struct perf_counter_context	*ctx;
-	struct file			*filp;
-
-	/*
-	 * These accumulate total time (in nanoseconds) that children
-	 * counters have been enabled and running, respectively.
-	 */
-	atomic64_t			child_total_time_enabled;
-	atomic64_t			child_total_time_running;
-
-	/*
-	 * Protect attach/detach and child_list:
-	 */
-	struct mutex			child_mutex;
-	struct list_head		child_list;
-	struct perf_counter		*parent;
-
-	int				oncpu;
-	int				cpu;
-
-	struct list_head		owner_entry;
-	struct task_struct		*owner;
-
-	/* mmap bits */
-	struct mutex			mmap_mutex;
-	atomic_t			mmap_count;
-	struct perf_mmap_data		*data;
-
-	/* poll related */
-	wait_queue_head_t		waitq;
-	struct fasync_struct		*fasync;
-
-	/* delayed work for NMIs and such */
-	int				pending_wakeup;
-	int				pending_kill;
-	int				pending_disable;
-	struct perf_pending_entry	pending;
-
-	atomic_t			event_limit;
-
-	void (*destroy)(struct perf_counter *);
-	struct rcu_head			rcu_head;
-
-	struct pid_namespace		*ns;
-	u64				id;
-#endif
-};
-
-/**
- * struct perf_counter_context - counter context structure
- *
- * Used as a container for task counters and CPU counters as well:
- */
-struct perf_counter_context {
-	/*
-	 * Protect the states of the counters in the list,
-	 * nr_active, and the list:
-	 */
-	spinlock_t			lock;
-	/*
-	 * Protect the list of counters.  Locking either mutex or lock
-	 * is sufficient to ensure the list doesn't change; to change
-	 * the list you need to lock both the mutex and the spinlock.
-	 */
-	struct mutex			mutex;
-
-	struct list_head		group_list;
-	struct list_head		event_list;
-	int				nr_counters;
-	int				nr_active;
-	int				is_active;
-	int				nr_stat;
-	atomic_t			refcount;
-	struct task_struct		*task;
-
-	/*
-	 * Context clock, runs when context enabled.
-	 */
-	u64				time;
-	u64				timestamp;
-
-	/*
-	 * These fields let us detect when two contexts have both
-	 * been cloned (inherited) from a common ancestor.
-	 */
-	struct perf_counter_context	*parent_ctx;
-	u64				parent_gen;
-	u64				generation;
-	int				pin_count;
-	struct rcu_head			rcu_head;
-};
-
-/**
- * struct perf_counter_cpu_context - per cpu counter context structure
- */
-struct perf_cpu_context {
-	struct perf_counter_context	ctx;
-	struct perf_counter_context	*task_ctx;
-	int				active_oncpu;
-	int				max_pertask;
-	int				exclusive;
-
-	/*
-	 * Recursion avoidance:
-	 *
-	 * task, softirq, irq, nmi context
-	 */
-	int				recursion[4];
-};
-
-struct perf_output_handle {
-	struct perf_counter	*counter;
-	struct perf_mmap_data	*data;
-	unsigned long		head;
-	unsigned long		offset;
-	int			nmi;
-	int			sample;
-	int			locked;
-	unsigned long		flags;
-};
-
-#ifdef CONFIG_PERF_COUNTERS
-
-/*
- * Set by architecture code:
- */
-extern int perf_max_counters;
-
-extern const struct pmu *hw_perf_counter_init(struct perf_counter *counter);
-
-extern void perf_counter_task_sched_in(struct task_struct *task, int cpu);
-extern void perf_counter_task_sched_out(struct task_struct *task,
-					struct task_struct *next, int cpu);
-extern void perf_counter_task_tick(struct task_struct *task, int cpu);
-extern int perf_counter_init_task(struct task_struct *child);
-extern void perf_counter_exit_task(struct task_struct *child);
-extern void perf_counter_free_task(struct task_struct *task);
-extern void set_perf_counter_pending(void);
-extern void perf_counter_do_pending(void);
-extern void perf_counter_print_debug(void);
-extern void __perf_disable(void);
-extern bool __perf_enable(void);
-extern void perf_disable(void);
-extern void perf_enable(void);
-extern int perf_counter_task_disable(void);
-extern int perf_counter_task_enable(void);
-extern int hw_perf_group_sched_in(struct perf_counter *group_leader,
-	       struct perf_cpu_context *cpuctx,
-	       struct perf_counter_context *ctx, int cpu);
-extern void perf_counter_update_userpage(struct perf_counter *counter);
-
-struct perf_sample_data {
-	u64				type;
-
-	u64				ip;
-	struct {
-		u32	pid;
-		u32	tid;
-	}				tid_entry;
-	u64				time;
-	u64				addr;
-	u64				id;
-	u64				stream_id;
-	struct {
-		u32	cpu;
-		u32	reserved;
-	}				cpu_entry;
-	u64				period;
-	struct perf_callchain_entry	*callchain;
-	struct perf_raw_record		*raw;
-};
-
-extern void perf_output_sample(struct perf_output_handle *handle,
-			       struct perf_event_header *header,
-			       struct perf_sample_data *data,
-			       struct perf_counter *counter);
-extern void perf_prepare_sample(struct perf_event_header *header,
-				struct perf_sample_data *data,
-				struct perf_counter *counter,
-				struct pt_regs *regs);
-
-extern int perf_counter_overflow(struct perf_counter *counter, int nmi,
-				 struct perf_sample_data *data,
-				 struct pt_regs *regs);
-
-/*
- * Return 1 for a software counter, 0 for a hardware counter
- */
-static inline int is_software_counter(struct perf_counter *counter)
-{
-	return (counter->attr.type != PERF_TYPE_RAW) &&
-		(counter->attr.type != PERF_TYPE_HARDWARE) &&
-		(counter->attr.type != PERF_TYPE_HW_CACHE);
-}
-
-extern atomic_t perf_swcounter_enabled[PERF_COUNT_SW_MAX];
-
-extern void __perf_swcounter_event(u32, u64, int, struct pt_regs *, u64);
-
-static inline void
-perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
-{
-	if (atomic_read(&perf_swcounter_enabled[event]))
-		__perf_swcounter_event(event, nr, nmi, regs, addr);
-}
-
-extern void __perf_counter_mmap(struct vm_area_struct *vma);
-
-static inline void perf_counter_mmap(struct vm_area_struct *vma)
-{
-	if (vma->vm_flags & VM_EXEC)
-		__perf_counter_mmap(vma);
-}
-
-extern void perf_counter_comm(struct task_struct *tsk);
-extern void perf_counter_fork(struct task_struct *tsk);
-
-extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs);
-
-extern int sysctl_perf_counter_paranoid;
-extern int sysctl_perf_counter_mlock;
-extern int sysctl_perf_counter_sample_rate;
-
-extern void perf_counter_init(void);
-extern void perf_tpcounter_event(int event_id, u64 addr, u64 count,
-				 void *record, int entry_size);
-
-#ifndef perf_misc_flags
-#define perf_misc_flags(regs)	(user_mode(regs) ? PERF_EVENT_MISC_USER : \
-				 PERF_EVENT_MISC_KERNEL)
-#define perf_instruction_pointer(regs)	instruction_pointer(regs)
-#endif
-
-extern int perf_output_begin(struct perf_output_handle *handle,
-			     struct perf_counter *counter, unsigned int size,
-			     int nmi, int sample);
-extern void perf_output_end(struct perf_output_handle *handle);
-extern void perf_output_copy(struct perf_output_handle *handle,
-			     const void *buf, unsigned int len);
-#else
-static inline void
-perf_counter_task_sched_in(struct task_struct *task, int cpu)		{ }
-static inline void
-perf_counter_task_sched_out(struct task_struct *task,
-			    struct task_struct *next, int cpu)		{ }
-static inline void
-perf_counter_task_tick(struct task_struct *task, int cpu)		{ }
-static inline int perf_counter_init_task(struct task_struct *child)	{ return 0; }
-static inline void perf_counter_exit_task(struct task_struct *child)	{ }
-static inline void perf_counter_free_task(struct task_struct *task)	{ }
-static inline void perf_counter_do_pending(void)			{ }
-static inline void perf_counter_print_debug(void)			{ }
-static inline void perf_disable(void)					{ }
-static inline void perf_enable(void)					{ }
-static inline int perf_counter_task_disable(void)	{ return -EINVAL; }
-static inline int perf_counter_task_enable(void)	{ return -EINVAL; }
-
-static inline void
-perf_swcounter_event(u32 event, u64 nr, int nmi,
-		     struct pt_regs *regs, u64 addr)			{ }
-
-static inline void perf_counter_mmap(struct vm_area_struct *vma)	{ }
-static inline void perf_counter_comm(struct task_struct *tsk)		{ }
-static inline void perf_counter_fork(struct task_struct *tsk)		{ }
-static inline void perf_counter_init(void)				{ }
-
-#endif
-
-#define perf_output_put(handle, x) \
-	perf_output_copy((handle), &(x), sizeof(x))
-
-#endif /* __KERNEL__ */
-#endif /* _LINUX_PERF_COUNTER_H */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
new file mode 100644
index 0000000..ae9d9ed
--- /dev/null
+++ b/include/linux/perf_event.h
@@ -0,0 +1,858 @@
+/*
+ *  Performance events:
+ *
+ *    Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de>
+ *    Copyright (C) 2008-2009, Red Hat, Inc., Ingo Molnar
+ *    Copyright (C) 2008-2009, Red Hat, Inc., Peter Zijlstra
+ *
+ *  Data type definitions, declarations, prototypes.
+ *
+ *    Started by: Thomas Gleixner and Ingo Molnar
+ *
+ *  For licencing details see kernel-base/COPYING
+ */
+#ifndef _LINUX_PERF_EVENT_H
+#define _LINUX_PERF_EVENT_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+#include <asm/byteorder.h>
+
+/*
+ * User-space ABI bits:
+ */
+
+/*
+ * attr.type
+ */
+enum perf_type_id {
+	PERF_TYPE_HARDWARE			= 0,
+	PERF_TYPE_SOFTWARE			= 1,
+	PERF_TYPE_TRACEPOINT			= 2,
+	PERF_TYPE_HW_CACHE			= 3,
+	PERF_TYPE_RAW				= 4,
+
+	PERF_TYPE_MAX,				/* non-ABI */
+};
+
+/*
+ * Generalized performance event event_id types, used by the
+ * attr.event_id parameter of the sys_perf_event_open()
+ * syscall:
+ */
+enum perf_hw_id {
+	/*
+	 * Common hardware events, generalized by the kernel:
+	 */
+	PERF_COUNT_HW_CPU_CYCLES		= 0,
+	PERF_COUNT_HW_INSTRUCTIONS		= 1,
+	PERF_COUNT_HW_CACHE_REFERENCES		= 2,
+	PERF_COUNT_HW_CACHE_MISSES		= 3,
+	PERF_COUNT_HW_BRANCH_INSTRUCTIONS	= 4,
+	PERF_COUNT_HW_BRANCH_MISSES		= 5,
+	PERF_COUNT_HW_BUS_CYCLES		= 6,
+
+	PERF_COUNT_HW_MAX,			/* non-ABI */
+};
+
+/*
+ * Generalized hardware cache events:
+ *
+ *       { L1-D, L1-I, LLC, ITLB, DTLB, BPU } x
+ *       { read, write, prefetch } x
+ *       { accesses, misses }
+ */
+enum perf_hw_cache_id {
+	PERF_COUNT_HW_CACHE_L1D			= 0,
+	PERF_COUNT_HW_CACHE_L1I			= 1,
+	PERF_COUNT_HW_CACHE_LL			= 2,
+	PERF_COUNT_HW_CACHE_DTLB		= 3,
+	PERF_COUNT_HW_CACHE_ITLB		= 4,
+	PERF_COUNT_HW_CACHE_BPU			= 5,
+
+	PERF_COUNT_HW_CACHE_MAX,		/* non-ABI */
+};
+
+enum perf_hw_cache_op_id {
+	PERF_COUNT_HW_CACHE_OP_READ		= 0,
+	PERF_COUNT_HW_CACHE_OP_WRITE		= 1,
+	PERF_COUNT_HW_CACHE_OP_PREFETCH		= 2,
+
+	PERF_COUNT_HW_CACHE_OP_MAX,		/* non-ABI */
+};
+
+enum perf_hw_cache_op_result_id {
+	PERF_COUNT_HW_CACHE_RESULT_ACCESS	= 0,
+	PERF_COUNT_HW_CACHE_RESULT_MISS		= 1,
+
+	PERF_COUNT_HW_CACHE_RESULT_MAX,		/* non-ABI */
+};
+
+/*
+ * Special "software" events provided by the kernel, even if the hardware
+ * does not support performance events. These events measure various
+ * physical and sw events of the kernel (and allow the profiling of them as
+ * well):
+ */
+enum perf_sw_ids {
+	PERF_COUNT_SW_CPU_CLOCK			= 0,
+	PERF_COUNT_SW_TASK_CLOCK		= 1,
+	PERF_COUNT_SW_PAGE_FAULTS		= 2,
+	PERF_COUNT_SW_CONTEXT_SWITCHES		= 3,
+	PERF_COUNT_SW_CPU_MIGRATIONS		= 4,
+	PERF_COUNT_SW_PAGE_FAULTS_MIN		= 5,
+	PERF_COUNT_SW_PAGE_FAULTS_MAJ		= 6,
+
+	PERF_COUNT_SW_MAX,			/* non-ABI */
+};
+
+/*
+ * Bits that can be set in attr.sample_type to request information
+ * in the overflow packets.
+ */
+enum perf_event_sample_format {
+	PERF_SAMPLE_IP				= 1U << 0,
+	PERF_SAMPLE_TID				= 1U << 1,
+	PERF_SAMPLE_TIME			= 1U << 2,
+	PERF_SAMPLE_ADDR			= 1U << 3,
+	PERF_SAMPLE_READ			= 1U << 4,
+	PERF_SAMPLE_CALLCHAIN			= 1U << 5,
+	PERF_SAMPLE_ID				= 1U << 6,
+	PERF_SAMPLE_CPU				= 1U << 7,
+	PERF_SAMPLE_PERIOD			= 1U << 8,
+	PERF_SAMPLE_STREAM_ID			= 1U << 9,
+	PERF_SAMPLE_RAW				= 1U << 10,
+
+	PERF_SAMPLE_MAX = 1U << 11,		/* non-ABI */
+};
+
+/*
+ * The format of the data returned by read() on a perf event fd,
+ * as specified by attr.read_format:
+ *
+ * struct read_format {
+ * 	{ u64		value;
+ * 	  { u64		time_enabled; } && PERF_FORMAT_ENABLED
+ * 	  { u64		time_running; } && PERF_FORMAT_RUNNING
+ * 	  { u64		id;           } && PERF_FORMAT_ID
+ * 	} && !PERF_FORMAT_GROUP
+ *
+ * 	{ u64		nr;
+ * 	  { u64		time_enabled; } && PERF_FORMAT_ENABLED
+ * 	  { u64		time_running; } && PERF_FORMAT_RUNNING
+ * 	  { u64		value;
+ * 	    { u64	id;           } && PERF_FORMAT_ID
+ * 	  }		cntr[nr];
+ * 	} && PERF_FORMAT_GROUP
+ * };
+ */
+enum perf_event_read_format {
+	PERF_FORMAT_TOTAL_TIME_ENABLED		= 1U << 0,
+	PERF_FORMAT_TOTAL_TIME_RUNNING		= 1U << 1,
+	PERF_FORMAT_ID				= 1U << 2,
+	PERF_FORMAT_GROUP			= 1U << 3,
+
+	PERF_FORMAT_MAX = 1U << 4, 		/* non-ABI */
+};
+
+#define PERF_ATTR_SIZE_VER0	64	/* sizeof first published struct */
+
+/*
+ * Hardware event_id to monitor via a performance monitoring event:
+ */
+struct perf_event_attr {
+
+	/*
+	 * Major type: hardware/software/tracepoint/etc.
+	 */
+	__u32			type;
+
+	/*
+	 * Size of the attr structure, for fwd/bwd compat.
+	 */
+	__u32			size;
+
+	/*
+	 * Type specific configuration information.
+	 */
+	__u64			config;
+
+	union {
+		__u64		sample_period;
+		__u64		sample_freq;
+	};
+
+	__u64			sample_type;
+	__u64			read_format;
+
+	__u64			disabled       :  1, /* off by default        */
+				inherit	       :  1, /* children inherit it   */
+				pinned	       :  1, /* must always be on PMU */
+				exclusive      :  1, /* only group on PMU     */
+				exclude_user   :  1, /* don't count user      */
+				exclude_kernel :  1, /* ditto kernel          */
+				exclude_hv     :  1, /* ditto hypervisor      */
+				exclude_idle   :  1, /* don't count when idle */
+				mmap           :  1, /* include mmap data     */
+				comm	       :  1, /* include comm data     */
+				freq           :  1, /* use freq, not period  */
+				inherit_stat   :  1, /* per task counts       */
+				enable_on_exec :  1, /* next exec enables     */
+				task           :  1, /* trace fork/exit       */
+				watermark      :  1, /* wakeup_watermark      */
+
+				__reserved_1   : 49;
+
+	union {
+		__u32		wakeup_events;	  /* wakeup every n events */
+		__u32		wakeup_watermark; /* bytes before wakeup   */
+	};
+	__u32			__reserved_2;
+
+	__u64			__reserved_3;
+};
+
+/*
+ * Ioctls that can be done on a perf event fd:
+ */
+#define PERF_EVENT_IOC_ENABLE		_IO ('$', 0)
+#define PERF_EVENT_IOC_DISABLE	_IO ('$', 1)
+#define PERF_EVENT_IOC_REFRESH	_IO ('$', 2)
+#define PERF_EVENT_IOC_RESET		_IO ('$', 3)
+#define PERF_EVENT_IOC_PERIOD		_IOW('$', 4, u64)
+#define PERF_EVENT_IOC_SET_OUTPUT	_IO ('$', 5)
+
+enum perf_event_ioc_flags {
+	PERF_IOC_FLAG_GROUP		= 1U << 0,
+};
+
+/*
+ * Structure of the page that can be mapped via mmap
+ */
+struct perf_event_mmap_page {
+	__u32	version;		/* version number of this structure */
+	__u32	compat_version;		/* lowest version this is compat with */
+
+	/*
+	 * Bits needed to read the hw events in user-space.
+	 *
+	 *   u32 seq;
+	 *   s64 count;
+	 *
+	 *   do {
+	 *     seq = pc->lock;
+	 *
+	 *     barrier()
+	 *     if (pc->index) {
+	 *       count = pmc_read(pc->index - 1);
+	 *       count += pc->offset;
+	 *     } else
+	 *       goto regular_read;
+	 *
+	 *     barrier();
+	 *   } while (pc->lock != seq);
+	 *
+	 * NOTE: for obvious reason this only works on self-monitoring
+	 *       processes.
+	 */
+	__u32	lock;			/* seqlock for synchronization */
+	__u32	index;			/* hardware event identifier */
+	__s64	offset;			/* add to hardware event value */
+	__u64	time_enabled;		/* time event active */
+	__u64	time_running;		/* time event on cpu */
+
+		/*
+		 * Hole for extension of the self monitor capabilities
+		 */
+
+	__u64	__reserved[123];	/* align to 1k */
+
+	/*
+	 * Control data for the mmap() data buffer.
+	 *
+	 * User-space reading the @data_head value should issue an rmb(), on
+	 * SMP capable platforms, after reading this value -- see
+	 * perf_event_wakeup().
+	 *
+	 * When the mapping is PROT_WRITE the @data_tail value should be
+	 * written by userspace to reflect the last read data. In this case
+	 * the kernel will not over-write unread data.
+	 */
+	__u64   data_head;		/* head in the data section */
+	__u64	data_tail;		/* user-space written tail */
+};
+
+#define PERF_RECORD_MISC_CPUMODE_MASK		(3 << 0)
+#define PERF_RECORD_MISC_CPUMODE_UNKNOWN		(0 << 0)
+#define PERF_RECORD_MISC_KERNEL			(1 << 0)
+#define PERF_RECORD_MISC_USER			(2 << 0)
+#define PERF_RECORD_MISC_HYPERVISOR		(3 << 0)
+
+struct perf_event_header {
+	__u32	type;
+	__u16	misc;
+	__u16	size;
+};
+
+enum perf_event_type {
+
+	/*
+	 * The MMAP events record the PROT_EXEC mappings so that we can
+	 * correlate userspace IPs to code. They have the following structure:
+	 *
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *
+	 *	u32				pid, tid;
+	 *	u64				addr;
+	 *	u64				len;
+	 *	u64				pgoff;
+	 *	char				filename[];
+	 * };
+	 */
+	PERF_RECORD_MMAP			= 1,
+
+	/*
+	 * struct {
+	 * 	struct perf_event_header	header;
+	 * 	u64				id;
+	 * 	u64				lost;
+	 * };
+	 */
+	PERF_RECORD_LOST			= 2,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *
+	 *	u32				pid, tid;
+	 *	char				comm[];
+	 * };
+	 */
+	PERF_RECORD_COMM			= 3,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	u32				pid, ppid;
+	 *	u32				tid, ptid;
+	 *	u64				time;
+	 * };
+	 */
+	PERF_RECORD_EXIT			= 4,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	u64				time;
+	 *	u64				id;
+	 *	u64				stream_id;
+	 * };
+	 */
+	PERF_RECORD_THROTTLE		= 5,
+	PERF_RECORD_UNTHROTTLE		= 6,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	u32				pid, ppid;
+	 *	u32				tid, ptid;
+	 *	{ u64				time;     } && PERF_SAMPLE_TIME
+	 * };
+	 */
+	PERF_RECORD_FORK			= 7,
+
+	/*
+	 * struct {
+	 * 	struct perf_event_header	header;
+	 * 	u32				pid, tid;
+	 *
+	 * 	struct read_format		values;
+	 * };
+	 */
+	PERF_RECORD_READ			= 8,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *
+	 *	{ u64			ip;	  } && PERF_SAMPLE_IP
+	 *	{ u32			pid, tid; } && PERF_SAMPLE_TID
+	 *	{ u64			time;     } && PERF_SAMPLE_TIME
+	 *	{ u64			addr;     } && PERF_SAMPLE_ADDR
+	 *	{ u64			id;	  } && PERF_SAMPLE_ID
+	 *	{ u64			stream_id;} && PERF_SAMPLE_STREAM_ID
+	 *	{ u32			cpu, res; } && PERF_SAMPLE_CPU
+	 * 	{ u64			period;   } && PERF_SAMPLE_PERIOD
+	 *
+	 *	{ struct read_format	values;	  } && PERF_SAMPLE_READ
+	 *
+	 *	{ u64			nr,
+	 *	  u64			ips[nr];  } && PERF_SAMPLE_CALLCHAIN
+	 *
+	 * 	#
+	 * 	# The RAW record below is opaque data wrt the ABI
+	 * 	#
+	 * 	# That is, the ABI doesn't make any promises wrt to
+	 * 	# the stability of its content, it may vary depending
+	 * 	# on event_id, hardware, kernel version and phase of
+	 * 	# the moon.
+	 * 	#
+	 * 	# In other words, PERF_SAMPLE_RAW contents are not an ABI.
+	 * 	#
+	 *
+	 *	{ u32			size;
+	 *	  char                  data[size];}&& PERF_SAMPLE_RAW
+	 * };
+	 */
+	PERF_RECORD_SAMPLE		= 9,
+
+	PERF_RECORD_MAX,			/* non-ABI */
+};
+
+enum perf_callchain_context {
+	PERF_CONTEXT_HV			= (__u64)-32,
+	PERF_CONTEXT_KERNEL		= (__u64)-128,
+	PERF_CONTEXT_USER		= (__u64)-512,
+
+	PERF_CONTEXT_GUEST		= (__u64)-2048,
+	PERF_CONTEXT_GUEST_KERNEL	= (__u64)-2176,
+	PERF_CONTEXT_GUEST_USER		= (__u64)-2560,
+
+	PERF_CONTEXT_MAX		= (__u64)-4095,
+};
+
+#define PERF_FLAG_FD_NO_GROUP	(1U << 0)
+#define PERF_FLAG_FD_OUTPUT	(1U << 1)
+
+#ifdef __KERNEL__
+/*
+ * Kernel-internal data types and definitions:
+ */
+
+#ifdef CONFIG_PERF_EVENTS
+# include <asm/perf_event.h>
+#endif
+
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/rculist.h>
+#include <linux/rcupdate.h>
+#include <linux/spinlock.h>
+#include <linux/hrtimer.h>
+#include <linux/fs.h>
+#include <linux/pid_namespace.h>
+#include <asm/atomic.h>
+
+#define PERF_MAX_STACK_DEPTH		255
+
+struct perf_callchain_entry {
+	__u64				nr;
+	__u64				ip[PERF_MAX_STACK_DEPTH];
+};
+
+struct perf_raw_record {
+	u32				size;
+	void				*data;
+};
+
+struct task_struct;
+
+/**
+ * struct hw_perf_event - performance event hardware details:
+ */
+struct hw_perf_event {
+#ifdef CONFIG_PERF_EVENTS
+	union {
+		struct { /* hardware */
+			u64		config;
+			unsigned long	config_base;
+			unsigned long	event_base;
+			int		idx;
+		};
+		union { /* software */
+			atomic64_t	count;
+			struct hrtimer	hrtimer;
+		};
+	};
+	atomic64_t			prev_count;
+	u64				sample_period;
+	u64				last_period;
+	atomic64_t			period_left;
+	u64				interrupts;
+
+	u64				freq_count;
+	u64				freq_interrupts;
+	u64				freq_stamp;
+#endif
+};
+
+struct perf_event;
+
+/**
+ * struct pmu - generic performance monitoring unit
+ */
+struct pmu {
+	int (*enable)			(struct perf_event *event);
+	void (*disable)			(struct perf_event *event);
+	void (*read)			(struct perf_event *event);
+	void (*unthrottle)		(struct perf_event *event);
+};
+
+/**
+ * enum perf_event_active_state - the states of a event
+ */
+enum perf_event_active_state {
+	PERF_EVENT_STATE_ERROR	= -2,
+	PERF_EVENT_STATE_OFF		= -1,
+	PERF_EVENT_STATE_INACTIVE	=  0,
+	PERF_EVENT_STATE_ACTIVE	=  1,
+};
+
+struct file;
+
+struct perf_mmap_data {
+	struct rcu_head			rcu_head;
+	int				nr_pages;	/* nr of data pages  */
+	int				writable;	/* are we writable   */
+	int				nr_locked;	/* nr pages mlocked  */
+
+	atomic_t			poll;		/* POLL_ for wakeups */
+	atomic_t			events;		/* event_id limit       */
+
+	atomic_long_t			head;		/* write position    */
+	atomic_long_t			done_head;	/* completed head    */
+
+	atomic_t			lock;		/* concurrent writes */
+	atomic_t			wakeup;		/* needs a wakeup    */
+	atomic_t			lost;		/* nr records lost   */
+
+	long				watermark;	/* wakeup watermark  */
+
+	struct perf_event_mmap_page   *user_page;
+	void				*data_pages[0];
+};
+
+struct perf_pending_entry {
+	struct perf_pending_entry *next;
+	void (*func)(struct perf_pending_entry *);
+};
+
+/**
+ * struct perf_event - performance event kernel representation:
+ */
+struct perf_event {
+#ifdef CONFIG_PERF_EVENTS
+	struct list_head		group_entry;
+	struct list_head		event_entry;
+	struct list_head		sibling_list;
+	int				nr_siblings;
+	struct perf_event		*group_leader;
+	struct perf_event		*output;
+	const struct pmu		*pmu;
+
+	enum perf_event_active_state	state;
+	atomic64_t			count;
+
+	/*
+	 * These are the total time in nanoseconds that the event
+	 * has been enabled (i.e. eligible to run, and the task has
+	 * been scheduled in, if this is a per-task event)
+	 * and running (scheduled onto the CPU), respectively.
+	 *
+	 * They are computed from tstamp_enabled, tstamp_running and
+	 * tstamp_stopped when the event is in INACTIVE or ACTIVE state.
+	 */
+	u64				total_time_enabled;
+	u64				total_time_running;
+
+	/*
+	 * These are timestamps used for computing total_time_enabled
+	 * and total_time_running when the event is in INACTIVE or
+	 * ACTIVE state, measured in nanoseconds from an arbitrary point
+	 * in time.
+	 * tstamp_enabled: the notional time when the event was enabled
+	 * tstamp_running: the notional time when the event was scheduled on
+	 * tstamp_stopped: in INACTIVE state, the notional time when the
+	 *	event was scheduled off.
+	 */
+	u64				tstamp_enabled;
+	u64				tstamp_running;
+	u64				tstamp_stopped;
+
+	struct perf_event_attr	attr;
+	struct hw_perf_event		hw;
+
+	struct perf_event_context	*ctx;
+	struct file			*filp;
+
+	/*
+	 * These accumulate total time (in nanoseconds) that children
+	 * events have been enabled and running, respectively.
+	 */
+	atomic64_t			child_total_time_enabled;
+	atomic64_t			child_total_time_running;
+
+	/*
+	 * Protect attach/detach and child_list:
+	 */
+	struct mutex			child_mutex;
+	struct list_head		child_list;
+	struct perf_event		*parent;
+
+	int				oncpu;
+	int				cpu;
+
+	struct list_head		owner_entry;
+	struct task_struct		*owner;
+
+	/* mmap bits */
+	struct mutex			mmap_mutex;
+	atomic_t			mmap_count;
+	struct perf_mmap_data		*data;
+
+	/* poll related */
+	wait_queue_head_t		waitq;
+	struct fasync_struct		*fasync;
+
+	/* delayed work for NMIs and such */
+	int				pending_wakeup;
+	int				pending_kill;
+	int				pending_disable;
+	struct perf_pending_entry	pending;
+
+	atomic_t			event_limit;
+
+	void (*destroy)(struct perf_event *);
+	struct rcu_head			rcu_head;
+
+	struct pid_namespace		*ns;
+	u64				id;
+#endif
+};
+
+/**
+ * struct perf_event_context - event context structure
+ *
+ * Used as a container for task events and CPU events as well:
+ */
+struct perf_event_context {
+	/*
+	 * Protect the states of the events in the list,
+	 * nr_active, and the list:
+	 */
+	spinlock_t			lock;
+	/*
+	 * Protect the list of events.  Locking either mutex or lock
+	 * is sufficient to ensure the list doesn't change; to change
+	 * the list you need to lock both the mutex and the spinlock.
+	 */
+	struct mutex			mutex;
+
+	struct list_head		group_list;
+	struct list_head		event_list;
+	int				nr_events;
+	int				nr_active;
+	int				is_active;
+	int				nr_stat;
+	atomic_t			refcount;
+	struct task_struct		*task;
+
+	/*
+	 * Context clock, runs when context enabled.
+	 */
+	u64				time;
+	u64				timestamp;
+
+	/*
+	 * These fields let us detect when two contexts have both
+	 * been cloned (inherited) from a common ancestor.
+	 */
+	struct perf_event_context	*parent_ctx;
+	u64				parent_gen;
+	u64				generation;
+	int				pin_count;
+	struct rcu_head			rcu_head;
+};
+
+/**
+ * struct perf_event_cpu_context - per cpu event context structure
+ */
+struct perf_cpu_context {
+	struct perf_event_context	ctx;
+	struct perf_event_context	*task_ctx;
+	int				active_oncpu;
+	int				max_pertask;
+	int				exclusive;
+
+	/*
+	 * Recursion avoidance:
+	 *
+	 * task, softirq, irq, nmi context
+	 */
+	int				recursion[4];
+};
+
+struct perf_output_handle {
+	struct perf_event	*event;
+	struct perf_mmap_data	*data;
+	unsigned long		head;
+	unsigned long		offset;
+	int			nmi;
+	int			sample;
+	int			locked;
+	unsigned long		flags;
+};
+
+#ifdef CONFIG_PERF_EVENTS
+
+/*
+ * Set by architecture code:
+ */
+extern int perf_max_events;
+
+extern const struct pmu *hw_perf_event_init(struct perf_event *event);
+
+extern void perf_event_task_sched_in(struct task_struct *task, int cpu);
+extern void perf_event_task_sched_out(struct task_struct *task,
+					struct task_struct *next, int cpu);
+extern void perf_event_task_tick(struct task_struct *task, int cpu);
+extern int perf_event_init_task(struct task_struct *child);
+extern void perf_event_exit_task(struct task_struct *child);
+extern void perf_event_free_task(struct task_struct *task);
+extern void set_perf_event_pending(void);
+extern void perf_event_do_pending(void);
+extern void perf_event_print_debug(void);
+extern void __perf_disable(void);
+extern bool __perf_enable(void);
+extern void perf_disable(void);
+extern void perf_enable(void);
+extern int perf_event_task_disable(void);
+extern int perf_event_task_enable(void);
+extern int hw_perf_group_sched_in(struct perf_event *group_leader,
+	       struct perf_cpu_context *cpuctx,
+	       struct perf_event_context *ctx, int cpu);
+extern void perf_event_update_userpage(struct perf_event *event);
+
+struct perf_sample_data {
+	u64				type;
+
+	u64				ip;
+	struct {
+		u32	pid;
+		u32	tid;
+	}				tid_entry;
+	u64				time;
+	u64				addr;
+	u64				id;
+	u64				stream_id;
+	struct {
+		u32	cpu;
+		u32	reserved;
+	}				cpu_entry;
+	u64				period;
+	struct perf_callchain_entry	*callchain;
+	struct perf_raw_record		*raw;
+};
+
+extern void perf_output_sample(struct perf_output_handle *handle,
+			       struct perf_event_header *header,
+			       struct perf_sample_data *data,
+			       struct perf_event *event);
+extern void perf_prepare_sample(struct perf_event_header *header,
+				struct perf_sample_data *data,
+				struct perf_event *event,
+				struct pt_regs *regs);
+
+extern int perf_event_overflow(struct perf_event *event, int nmi,
+				 struct perf_sample_data *data,
+				 struct pt_regs *regs);
+
+/*
+ * Return 1 for a software event, 0 for a hardware event
+ */
+static inline int is_software_event(struct perf_event *event)
+{
+	return (event->attr.type != PERF_TYPE_RAW) &&
+		(event->attr.type != PERF_TYPE_HARDWARE) &&
+		(event->attr.type != PERF_TYPE_HW_CACHE);
+}
+
+extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
+
+extern void __perf_sw_event(u32, u64, int, struct pt_regs *, u64);
+
+static inline void
+perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
+{
+	if (atomic_read(&perf_swevent_enabled[event_id]))
+		__perf_sw_event(event_id, nr, nmi, regs, addr);
+}
+
+extern void __perf_event_mmap(struct vm_area_struct *vma);
+
+static inline void perf_event_mmap(struct vm_area_struct *vma)
+{
+	if (vma->vm_flags & VM_EXEC)
+		__perf_event_mmap(vma);
+}
+
+extern void perf_event_comm(struct task_struct *tsk);
+extern void perf_event_fork(struct task_struct *tsk);
+
+extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs);
+
+extern int sysctl_perf_event_paranoid;
+extern int sysctl_perf_event_mlock;
+extern int sysctl_perf_event_sample_rate;
+
+extern void perf_event_init(void);
+extern void perf_tp_event(int event_id, u64 addr, u64 count,
+				 void *record, int entry_size);
+
+#ifndef perf_misc_flags
+#define perf_misc_flags(regs)	(user_mode(regs) ? PERF_RECORD_MISC_USER : \
+				 PERF_RECORD_MISC_KERNEL)
+#define perf_instruction_pointer(regs)	instruction_pointer(regs)
+#endif
+
+extern int perf_output_begin(struct perf_output_handle *handle,
+			     struct perf_event *event, unsigned int size,
+			     int nmi, int sample);
+extern void perf_output_end(struct perf_output_handle *handle);
+extern void perf_output_copy(struct perf_output_handle *handle,
+			     const void *buf, unsigned int len);
+#else
+static inline void
+perf_event_task_sched_in(struct task_struct *task, int cpu)		{ }
+static inline void
+perf_event_task_sched_out(struct task_struct *task,
+			    struct task_struct *next, int cpu)		{ }
+static inline void
+perf_event_task_tick(struct task_struct *task, int cpu)		{ }
+static inline int perf_event_init_task(struct task_struct *child)	{ return 0; }
+static inline void perf_event_exit_task(struct task_struct *child)	{ }
+static inline void perf_event_free_task(struct task_struct *task)	{ }
+static inline void perf_event_do_pending(void)			{ }
+static inline void perf_event_print_debug(void)			{ }
+static inline void perf_disable(void)					{ }
+static inline void perf_enable(void)					{ }
+static inline int perf_event_task_disable(void)	{ return -EINVAL; }
+static inline int perf_event_task_enable(void)	{ return -EINVAL; }
+
+static inline void
+perf_sw_event(u32 event_id, u64 nr, int nmi,
+		     struct pt_regs *regs, u64 addr)			{ }
+
+static inline void perf_event_mmap(struct vm_area_struct *vma)	{ }
+static inline void perf_event_comm(struct task_struct *tsk)		{ }
+static inline void perf_event_fork(struct task_struct *tsk)		{ }
+static inline void perf_event_init(void)				{ }
+
+#endif
+
+#define perf_output_put(handle, x) \
+	perf_output_copy((handle), &(x), sizeof(x))
+
+#endif /* __KERNEL__ */
+#endif /* _LINUX_PERF_EVENT_H */
diff --git a/include/linux/prctl.h b/include/linux/prctl.h
index b00df4c..07bff66 100644
--- a/include/linux/prctl.h
+++ b/include/linux/prctl.h
@@ -85,7 +85,7 @@
 #define PR_SET_TIMERSLACK 29
 #define PR_GET_TIMERSLACK 30
 
-#define PR_TASK_PERF_COUNTERS_DISABLE		31
-#define PR_TASK_PERF_COUNTERS_ENABLE		32
+#define PR_TASK_PERF_EVENTS_DISABLE		31
+#define PR_TASK_PERF_EVENTS_ENABLE		32
 
 #endif /* _LINUX_PRCTL_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8af3d24..8b265a8 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -100,7 +100,7 @@ struct robust_list_head;
 struct bio;
 struct fs_struct;
 struct bts_context;
-struct perf_counter_context;
+struct perf_event_context;
 
 /*
  * List of flags we want to share for kernel threads,
@@ -701,7 +701,7 @@ struct user_struct {
 #endif
 #endif
 
-#ifdef CONFIG_PERF_COUNTERS
+#ifdef CONFIG_PERF_EVENTS
 	atomic_long_t locked_vm;
 #endif
 };
@@ -1449,10 +1449,10 @@ struct task_struct {
 	struct list_head pi_state_list;
 	struct futex_pi_state *pi_state_cache;
 #endif
-#ifdef CONFIG_PERF_COUNTERS
-	struct perf_counter_context *perf_counter_ctxp;
-	struct mutex perf_counter_mutex;
-	struct list_head perf_counter_list;
+#ifdef CONFIG_PERF_EVENTS
+	struct perf_event_context *perf_event_ctxp;
+	struct mutex perf_event_mutex;
+	struct list_head perf_event_list;
 #endif
 #ifdef CONFIG_NUMA
 	struct mempolicy *mempolicy;	/* Protected by alloc_lock */
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index a8e3782..02f19f9 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -55,7 +55,7 @@ struct compat_timeval;
 struct robust_list_head;
 struct getcpu_cache;
 struct old_linux_dirent;
-struct perf_counter_attr;
+struct perf_event_attr;
 
 #include <linux/types.h>
 #include <linux/aio_abi.h>
@@ -885,7 +885,7 @@ asmlinkage long sys_ppoll(struct pollfd __user *, unsigned int,
 int kernel_execve(const char *filename, char *const argv[], char *const envp[]);
 
 
-asmlinkage long sys_perf_counter_open(
-		struct perf_counter_attr __user *attr_uptr,
+asmlinkage long sys_perf_event_open(
+		struct perf_event_attr __user *attr_uptr,
 		pid_t pid, int cpu, int group_fd, unsigned long flags);
 #endif
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 72a3b43..ec91e78 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -378,7 +378,7 @@ static inline int ftrace_get_offsets_##call(				\
 #ifdef CONFIG_EVENT_PROFILE
 
 /*
- * Generate the functions needed for tracepoint perf_counter support.
+ * Generate the functions needed for tracepoint perf_event support.
  *
  * NOTE: The insertion profile callback (ftrace_profile_<call>) is defined later
  *
@@ -656,7 +656,7 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
  * {
  *	struct ftrace_data_offsets_<call> __maybe_unused __data_offsets;
  *	struct ftrace_event_call *event_call = &event_<call>;
- *	extern void perf_tpcounter_event(int, u64, u64, void *, int);
+ *	extern void perf_tp_event(int, u64, u64, void *, int);
  *	struct ftrace_raw_##call *entry;
  *	u64 __addr = 0, __count = 1;
  *	unsigned long irq_flags;
@@ -691,7 +691,7 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
  *
  *		<assign>  <- affect our values
  *
- *		perf_tpcounter_event(event_call->id, __addr, __count, entry,
+ *		perf_tp_event(event_call->id, __addr, __count, entry,
  *			     __entry_size);  <- submit them to perf counter
  *	} while (0);
  *
@@ -712,7 +712,7 @@ static void ftrace_profile_##call(proto)				\
 {									\
 	struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\
 	struct ftrace_event_call *event_call = &event_##call;		\
-	extern void perf_tpcounter_event(int, u64, u64, void *, int);	\
+	extern void perf_tp_event(int, u64, u64, void *, int);	\
 	struct ftrace_raw_##call *entry;				\
 	u64 __addr = 0, __count = 1;					\
 	unsigned long irq_flags;					\
@@ -742,7 +742,7 @@ static void ftrace_profile_##call(proto)				\
 									\
 		{ assign; }						\
 									\
-		perf_tpcounter_event(event_call->id, __addr, __count, entry,\
+		perf_tp_event(event_call->id, __addr, __count, entry,\
 			     __entry_size);				\
 	} while (0);							\
 									\
diff --git a/init/Kconfig b/init/Kconfig
index 8e8b76d..cfdf5c3 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -915,17 +915,17 @@ config AIO
           by some high performance threaded applications. Disabling
           this option saves about 7k.
 
-config HAVE_PERF_COUNTERS
+config HAVE_PERF_EVENTS
 	bool
 	help
 	  See tools/perf/design.txt for details.
 
 menu "Performance Counters"
 
-config PERF_COUNTERS
+config PERF_EVENTS
 	bool "Kernel Performance Counters"
 	default y if PROFILING
-	depends on HAVE_PERF_COUNTERS
+	depends on HAVE_PERF_EVENTS
 	select ANON_INODES
 	help
 	  Enable kernel support for performance counter hardware.
@@ -947,7 +947,7 @@ config PERF_COUNTERS
 
 config EVENT_PROFILE
 	bool "Tracepoint profiling sources"
-	depends on PERF_COUNTERS && EVENT_TRACING
+	depends on PERF_EVENTS && EVENT_TRACING
 	default y
 	help
 	 Allow the use of tracepoints as software performance counters.
diff --git a/kernel/Makefile b/kernel/Makefile
index 3d9c7e2..e26a546 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -96,7 +96,7 @@ obj-$(CONFIG_X86_DS) += trace/
 obj-$(CONFIG_RING_BUFFER) += trace/
 obj-$(CONFIG_SMP) += sched_cpupri.o
 obj-$(CONFIG_SLOW_WORK) += slow-work.o
-obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o
+obj-$(CONFIG_PERF_EVENTS) += perf_event.o
 
 ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
 # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
diff --git a/kernel/exit.c b/kernel/exit.c
index ae5d866..e47ee8a 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -47,7 +47,7 @@
 #include <linux/tracehook.h>
 #include <linux/fs_struct.h>
 #include <linux/init_task.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <trace/events/sched.h>
 
 #include <asm/uaccess.h>
@@ -154,8 +154,8 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
 {
 	struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
 
-#ifdef CONFIG_PERF_COUNTERS
-	WARN_ON_ONCE(tsk->perf_counter_ctxp);
+#ifdef CONFIG_PERF_EVENTS
+	WARN_ON_ONCE(tsk->perf_event_ctxp);
 #endif
 	trace_sched_process_free(tsk);
 	put_task_struct(tsk);
@@ -981,7 +981,7 @@ NORET_TYPE void do_exit(long code)
 	 * Flush inherited counters to the parent - before the parent
 	 * gets woken up by child-exit notifications.
 	 */
-	perf_counter_exit_task(tsk);
+	perf_event_exit_task(tsk);
 
 	exit_notify(tsk, group_dead);
 #ifdef CONFIG_NUMA
diff --git a/kernel/fork.c b/kernel/fork.c
index bfee931..2cebfb2 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -61,7 +61,7 @@
 #include <linux/blkdev.h>
 #include <linux/fs_struct.h>
 #include <linux/magic.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -1078,7 +1078,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	/* Perform scheduler related setup. Assign this task to a CPU. */
 	sched_fork(p, clone_flags);
 
-	retval = perf_counter_init_task(p);
+	retval = perf_event_init_task(p);
 	if (retval)
 		goto bad_fork_cleanup_policy;
 
@@ -1253,7 +1253,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	write_unlock_irq(&tasklist_lock);
 	proc_fork_connector(p);
 	cgroup_post_fork(p);
-	perf_counter_fork(p);
+	perf_event_fork(p);
 	return p;
 
 bad_fork_free_pid:
@@ -1280,7 +1280,7 @@ bad_fork_cleanup_semundo:
 bad_fork_cleanup_audit:
 	audit_free(p);
 bad_fork_cleanup_policy:
-	perf_counter_free_task(p);
+	perf_event_free_task(p);
 #ifdef CONFIG_NUMA
 	mpol_put(p->mempolicy);
 bad_fork_cleanup_cgroup:
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
deleted file mode 100644
index 62de0db..0000000
--- a/kernel/perf_counter.c
+++ /dev/null
@@ -1,5000 +0,0 @@
-/*
- * Performance counter core code
- *
- *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
- *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
- *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
- *  Copyright  �  2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
- *
- *  For licensing details see kernel-base/COPYING
- */
-
-#include <linux/fs.h>
-#include <linux/mm.h>
-#include <linux/cpu.h>
-#include <linux/smp.h>
-#include <linux/file.h>
-#include <linux/poll.h>
-#include <linux/sysfs.h>
-#include <linux/dcache.h>
-#include <linux/percpu.h>
-#include <linux/ptrace.h>
-#include <linux/vmstat.h>
-#include <linux/hardirq.h>
-#include <linux/rculist.h>
-#include <linux/uaccess.h>
-#include <linux/syscalls.h>
-#include <linux/anon_inodes.h>
-#include <linux/kernel_stat.h>
-#include <linux/perf_counter.h>
-
-#include <asm/irq_regs.h>
-
-/*
- * Each CPU has a list of per CPU counters:
- */
-DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context);
-
-int perf_max_counters __read_mostly = 1;
-static int perf_reserved_percpu __read_mostly;
-static int perf_overcommit __read_mostly = 1;
-
-static atomic_t nr_counters __read_mostly;
-static atomic_t nr_mmap_counters __read_mostly;
-static atomic_t nr_comm_counters __read_mostly;
-static atomic_t nr_task_counters __read_mostly;
-
-/*
- * perf counter paranoia level:
- *  -1 - not paranoid at all
- *   0 - disallow raw tracepoint access for unpriv
- *   1 - disallow cpu counters for unpriv
- *   2 - disallow kernel profiling for unpriv
- */
-int sysctl_perf_counter_paranoid __read_mostly = 1;
-
-static inline bool perf_paranoid_tracepoint_raw(void)
-{
-	return sysctl_perf_counter_paranoid > -1;
-}
-
-static inline bool perf_paranoid_cpu(void)
-{
-	return sysctl_perf_counter_paranoid > 0;
-}
-
-static inline bool perf_paranoid_kernel(void)
-{
-	return sysctl_perf_counter_paranoid > 1;
-}
-
-int sysctl_perf_counter_mlock __read_mostly = 512; /* 'free' kb per user */
-
-/*
- * max perf counter sample rate
- */
-int sysctl_perf_counter_sample_rate __read_mostly = 100000;
-
-static atomic64_t perf_counter_id;
-
-/*
- * Lock for (sysadmin-configurable) counter reservations:
- */
-static DEFINE_SPINLOCK(perf_resource_lock);
-
-/*
- * Architecture provided APIs - weak aliases:
- */
-extern __weak const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
-{
-	return NULL;
-}
-
-void __weak hw_perf_disable(void)		{ barrier(); }
-void __weak hw_perf_enable(void)		{ barrier(); }
-
-void __weak hw_perf_counter_setup(int cpu)	{ barrier(); }
-void __weak hw_perf_counter_setup_online(int cpu)	{ barrier(); }
-
-int __weak
-hw_perf_group_sched_in(struct perf_counter *group_leader,
-	       struct perf_cpu_context *cpuctx,
-	       struct perf_counter_context *ctx, int cpu)
-{
-	return 0;
-}
-
-void __weak perf_counter_print_debug(void)	{ }
-
-static DEFINE_PER_CPU(int, perf_disable_count);
-
-void __perf_disable(void)
-{
-	__get_cpu_var(perf_disable_count)++;
-}
-
-bool __perf_enable(void)
-{
-	return !--__get_cpu_var(perf_disable_count);
-}
-
-void perf_disable(void)
-{
-	__perf_disable();
-	hw_perf_disable();
-}
-
-void perf_enable(void)
-{
-	if (__perf_enable())
-		hw_perf_enable();
-}
-
-static void get_ctx(struct perf_counter_context *ctx)
-{
-	WARN_ON(!atomic_inc_not_zero(&ctx->refcount));
-}
-
-static void free_ctx(struct rcu_head *head)
-{
-	struct perf_counter_context *ctx;
-
-	ctx = container_of(head, struct perf_counter_context, rcu_head);
-	kfree(ctx);
-}
-
-static void put_ctx(struct perf_counter_context *ctx)
-{
-	if (atomic_dec_and_test(&ctx->refcount)) {
-		if (ctx->parent_ctx)
-			put_ctx(ctx->parent_ctx);
-		if (ctx->task)
-			put_task_struct(ctx->task);
-		call_rcu(&ctx->rcu_head, free_ctx);
-	}
-}
-
-static void unclone_ctx(struct perf_counter_context *ctx)
-{
-	if (ctx->parent_ctx) {
-		put_ctx(ctx->parent_ctx);
-		ctx->parent_ctx = NULL;
-	}
-}
-
-/*
- * If we inherit counters we want to return the parent counter id
- * to userspace.
- */
-static u64 primary_counter_id(struct perf_counter *counter)
-{
-	u64 id = counter->id;
-
-	if (counter->parent)
-		id = counter->parent->id;
-
-	return id;
-}
-
-/*
- * Get the perf_counter_context for a task and lock it.
- * This has to cope with with the fact that until it is locked,
- * the context could get moved to another task.
- */
-static struct perf_counter_context *
-perf_lock_task_context(struct task_struct *task, unsigned long *flags)
-{
-	struct perf_counter_context *ctx;
-
-	rcu_read_lock();
- retry:
-	ctx = rcu_dereference(task->perf_counter_ctxp);
-	if (ctx) {
-		/*
-		 * If this context is a clone of another, it might
-		 * get swapped for another underneath us by
-		 * perf_counter_task_sched_out, though the
-		 * rcu_read_lock() protects us from any context
-		 * getting freed.  Lock the context and check if it
-		 * got swapped before we could get the lock, and retry
-		 * if so.  If we locked the right context, then it
-		 * can't get swapped on us any more.
-		 */
-		spin_lock_irqsave(&ctx->lock, *flags);
-		if (ctx != rcu_dereference(task->perf_counter_ctxp)) {
-			spin_unlock_irqrestore(&ctx->lock, *flags);
-			goto retry;
-		}
-
-		if (!atomic_inc_not_zero(&ctx->refcount)) {
-			spin_unlock_irqrestore(&ctx->lock, *flags);
-			ctx = NULL;
-		}
-	}
-	rcu_read_unlock();
-	return ctx;
-}
-
-/*
- * Get the context for a task and increment its pin_count so it
- * can't get swapped to another task.  This also increments its
- * reference count so that the context can't get freed.
- */
-static struct perf_counter_context *perf_pin_task_context(struct task_struct *task)
-{
-	struct perf_counter_context *ctx;
-	unsigned long flags;
-
-	ctx = perf_lock_task_context(task, &flags);
-	if (ctx) {
-		++ctx->pin_count;
-		spin_unlock_irqrestore(&ctx->lock, flags);
-	}
-	return ctx;
-}
-
-static void perf_unpin_context(struct perf_counter_context *ctx)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&ctx->lock, flags);
-	--ctx->pin_count;
-	spin_unlock_irqrestore(&ctx->lock, flags);
-	put_ctx(ctx);
-}
-
-/*
- * Add a counter from the lists for its context.
- * Must be called with ctx->mutex and ctx->lock held.
- */
-static void
-list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
-{
-	struct perf_counter *group_leader = counter->group_leader;
-
-	/*
-	 * Depending on whether it is a standalone or sibling counter,
-	 * add it straight to the context's counter list, or to the group
-	 * leader's sibling list:
-	 */
-	if (group_leader == counter)
-		list_add_tail(&counter->group_entry, &ctx->group_list);
-	else {
-		list_add_tail(&counter->group_entry, &group_leader->sibling_list);
-		group_leader->nr_siblings++;
-	}
-
-	list_add_rcu(&counter->event_entry, &ctx->event_list);
-	ctx->nr_counters++;
-	if (counter->attr.inherit_stat)
-		ctx->nr_stat++;
-}
-
-/*
- * Remove a counter from the lists for its context.
- * Must be called with ctx->mutex and ctx->lock held.
- */
-static void
-list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
-{
-	struct perf_counter *sibling, *tmp;
-
-	if (list_empty(&counter->group_entry))
-		return;
-	ctx->nr_counters--;
-	if (counter->attr.inherit_stat)
-		ctx->nr_stat--;
-
-	list_del_init(&counter->group_entry);
-	list_del_rcu(&counter->event_entry);
-
-	if (counter->group_leader != counter)
-		counter->group_leader->nr_siblings--;
-
-	/*
-	 * If this was a group counter with sibling counters then
-	 * upgrade the siblings to singleton counters by adding them
-	 * to the context list directly:
-	 */
-	list_for_each_entry_safe(sibling, tmp, &counter->sibling_list, group_entry) {
-
-		list_move_tail(&sibling->group_entry, &ctx->group_list);
-		sibling->group_leader = sibling;
-	}
-}
-
-static void
-counter_sched_out(struct perf_counter *counter,
-		  struct perf_cpu_context *cpuctx,
-		  struct perf_counter_context *ctx)
-{
-	if (counter->state != PERF_COUNTER_STATE_ACTIVE)
-		return;
-
-	counter->state = PERF_COUNTER_STATE_INACTIVE;
-	if (counter->pending_disable) {
-		counter->pending_disable = 0;
-		counter->state = PERF_COUNTER_STATE_OFF;
-	}
-	counter->tstamp_stopped = ctx->time;
-	counter->pmu->disable(counter);
-	counter->oncpu = -1;
-
-	if (!is_software_counter(counter))
-		cpuctx->active_oncpu--;
-	ctx->nr_active--;
-	if (counter->attr.exclusive || !cpuctx->active_oncpu)
-		cpuctx->exclusive = 0;
-}
-
-static void
-group_sched_out(struct perf_counter *group_counter,
-		struct perf_cpu_context *cpuctx,
-		struct perf_counter_context *ctx)
-{
-	struct perf_counter *counter;
-
-	if (group_counter->state != PERF_COUNTER_STATE_ACTIVE)
-		return;
-
-	counter_sched_out(group_counter, cpuctx, ctx);
-
-	/*
-	 * Schedule out siblings (if any):
-	 */
-	list_for_each_entry(counter, &group_counter->sibling_list, group_entry)
-		counter_sched_out(counter, cpuctx, ctx);
-
-	if (group_counter->attr.exclusive)
-		cpuctx->exclusive = 0;
-}
-
-/*
- * Cross CPU call to remove a performance counter
- *
- * We disable the counter on the hardware level first. After that we
- * remove it from the context list.
- */
-static void __perf_counter_remove_from_context(void *info)
-{
-	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
-	struct perf_counter *counter = info;
-	struct perf_counter_context *ctx = counter->ctx;
-
-	/*
-	 * If this is a task context, we need to check whether it is
-	 * the current task context of this cpu. If not it has been
-	 * scheduled out before the smp call arrived.
-	 */
-	if (ctx->task && cpuctx->task_ctx != ctx)
-		return;
-
-	spin_lock(&ctx->lock);
-	/*
-	 * Protect the list operation against NMI by disabling the
-	 * counters on a global level.
-	 */
-	perf_disable();
-
-	counter_sched_out(counter, cpuctx, ctx);
-
-	list_del_counter(counter, ctx);
-
-	if (!ctx->task) {
-		/*
-		 * Allow more per task counters with respect to the
-		 * reservation:
-		 */
-		cpuctx->max_pertask =
-			min(perf_max_counters - ctx->nr_counters,
-			    perf_max_counters - perf_reserved_percpu);
-	}
-
-	perf_enable();
-	spin_unlock(&ctx->lock);
-}
-
-
-/*
- * Remove the counter from a task's (or a CPU's) list of counters.
- *
- * Must be called with ctx->mutex held.
- *
- * CPU counters are removed with a smp call. For task counters we only
- * call when the task is on a CPU.
- *
- * If counter->ctx is a cloned context, callers must make sure that
- * every task struct that counter->ctx->task could possibly point to
- * remains valid.  This is OK when called from perf_release since
- * that only calls us on the top-level context, which can't be a clone.
- * When called from perf_counter_exit_task, it's OK because the
- * context has been detached from its task.
- */
-static void perf_counter_remove_from_context(struct perf_counter *counter)
-{
-	struct perf_counter_context *ctx = counter->ctx;
-	struct task_struct *task = ctx->task;
-
-	if (!task) {
-		/*
-		 * Per cpu counters are removed via an smp call and
-		 * the removal is always sucessful.
-		 */
-		smp_call_function_single(counter->cpu,
-					 __perf_counter_remove_from_context,
-					 counter, 1);
-		return;
-	}
-
-retry:
-	task_oncpu_function_call(task, __perf_counter_remove_from_context,
-				 counter);
-
-	spin_lock_irq(&ctx->lock);
-	/*
-	 * If the context is active we need to retry the smp call.
-	 */
-	if (ctx->nr_active && !list_empty(&counter->group_entry)) {
-		spin_unlock_irq(&ctx->lock);
-		goto retry;
-	}
-
-	/*
-	 * The lock prevents that this context is scheduled in so we
-	 * can remove the counter safely, if the call above did not
-	 * succeed.
-	 */
-	if (!list_empty(&counter->group_entry)) {
-		list_del_counter(counter, ctx);
-	}
-	spin_unlock_irq(&ctx->lock);
-}
-
-static inline u64 perf_clock(void)
-{
-	return cpu_clock(smp_processor_id());
-}
-
-/*
- * Update the record of the current time in a context.
- */
-static void update_context_time(struct perf_counter_context *ctx)
-{
-	u64 now = perf_clock();
-
-	ctx->time += now - ctx->timestamp;
-	ctx->timestamp = now;
-}
-
-/*
- * Update the total_time_enabled and total_time_running fields for a counter.
- */
-static void update_counter_times(struct perf_counter *counter)
-{
-	struct perf_counter_context *ctx = counter->ctx;
-	u64 run_end;
-
-	if (counter->state < PERF_COUNTER_STATE_INACTIVE ||
-	    counter->group_leader->state < PERF_COUNTER_STATE_INACTIVE)
-		return;
-
-	counter->total_time_enabled = ctx->time - counter->tstamp_enabled;
-
-	if (counter->state == PERF_COUNTER_STATE_INACTIVE)
-		run_end = counter->tstamp_stopped;
-	else
-		run_end = ctx->time;
-
-	counter->total_time_running = run_end - counter->tstamp_running;
-}
-
-/*
- * Update total_time_enabled and total_time_running for all counters in a group.
- */
-static void update_group_times(struct perf_counter *leader)
-{
-	struct perf_counter *counter;
-
-	update_counter_times(leader);
-	list_for_each_entry(counter, &leader->sibling_list, group_entry)
-		update_counter_times(counter);
-}
-
-/*
- * Cross CPU call to disable a performance counter
- */
-static void __perf_counter_disable(void *info)
-{
-	struct perf_counter *counter = info;
-	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
-	struct perf_counter_context *ctx = counter->ctx;
-
-	/*
-	 * If this is a per-task counter, need to check whether this
-	 * counter's task is the current task on this cpu.
-	 */
-	if (ctx->task && cpuctx->task_ctx != ctx)
-		return;
-
-	spin_lock(&ctx->lock);
-
-	/*
-	 * If the counter is on, turn it off.
-	 * If it is in error state, leave it in error state.
-	 */
-	if (counter->state >= PERF_COUNTER_STATE_INACTIVE) {
-		update_context_time(ctx);
-		update_group_times(counter);
-		if (counter == counter->group_leader)
-			group_sched_out(counter, cpuctx, ctx);
-		else
-			counter_sched_out(counter, cpuctx, ctx);
-		counter->state = PERF_COUNTER_STATE_OFF;
-	}
-
-	spin_unlock(&ctx->lock);
-}
-
-/*
- * Disable a counter.
- *
- * If counter->ctx is a cloned context, callers must make sure that
- * every task struct that counter->ctx->task could possibly point to
- * remains valid.  This condition is satisifed when called through
- * perf_counter_for_each_child or perf_counter_for_each because they
- * hold the top-level counter's child_mutex, so any descendant that
- * goes to exit will block in sync_child_counter.
- * When called from perf_pending_counter it's OK because counter->ctx
- * is the current context on this CPU and preemption is disabled,
- * hence we can't get into perf_counter_task_sched_out for this context.
- */
-static void perf_counter_disable(struct perf_counter *counter)
-{
-	struct perf_counter_context *ctx = counter->ctx;
-	struct task_struct *task = ctx->task;
-
-	if (!task) {
-		/*
-		 * Disable the counter on the cpu that it's on
-		 */
-		smp_call_function_single(counter->cpu, __perf_counter_disable,
-					 counter, 1);
-		return;
-	}
-
- retry:
-	task_oncpu_function_call(task, __perf_counter_disable, counter);
-
-	spin_lock_irq(&ctx->lock);
-	/*
-	 * If the counter is still active, we need to retry the cross-call.
-	 */
-	if (counter->state == PERF_COUNTER_STATE_ACTIVE) {
-		spin_unlock_irq(&ctx->lock);
-		goto retry;
-	}
-
-	/*
-	 * Since we have the lock this context can't be scheduled
-	 * in, so we can change the state safely.
-	 */
-	if (counter->state == PERF_COUNTER_STATE_INACTIVE) {
-		update_group_times(counter);
-		counter->state = PERF_COUNTER_STATE_OFF;
-	}
-
-	spin_unlock_irq(&ctx->lock);
-}
-
-static int
-counter_sched_in(struct perf_counter *counter,
-		 struct perf_cpu_context *cpuctx,
-		 struct perf_counter_context *ctx,
-		 int cpu)
-{
-	if (counter->state <= PERF_COUNTER_STATE_OFF)
-		return 0;
-
-	counter->state = PERF_COUNTER_STATE_ACTIVE;
-	counter->oncpu = cpu;	/* TODO: put 'cpu' into cpuctx->cpu */
-	/*
-	 * The new state must be visible before we turn it on in the hardware:
-	 */
-	smp_wmb();
-
-	if (counter->pmu->enable(counter)) {
-		counter->state = PERF_COUNTER_STATE_INACTIVE;
-		counter->oncpu = -1;
-		return -EAGAIN;
-	}
-
-	counter->tstamp_running += ctx->time - counter->tstamp_stopped;
-
-	if (!is_software_counter(counter))
-		cpuctx->active_oncpu++;
-	ctx->nr_active++;
-
-	if (counter->attr.exclusive)
-		cpuctx->exclusive = 1;
-
-	return 0;
-}
-
-static int
-group_sched_in(struct perf_counter *group_counter,
-	       struct perf_cpu_context *cpuctx,
-	       struct perf_counter_context *ctx,
-	       int cpu)
-{
-	struct perf_counter *counter, *partial_group;
-	int ret;
-
-	if (group_counter->state == PERF_COUNTER_STATE_OFF)
-		return 0;
-
-	ret = hw_perf_group_sched_in(group_counter, cpuctx, ctx, cpu);
-	if (ret)
-		return ret < 0 ? ret : 0;
-
-	if (counter_sched_in(group_counter, cpuctx, ctx, cpu))
-		return -EAGAIN;
-
-	/*
-	 * Schedule in siblings as one group (if any):
-	 */
-	list_for_each_entry(counter, &group_counter->sibling_list, group_entry) {
-		if (counter_sched_in(counter, cpuctx, ctx, cpu)) {
-			partial_group = counter;
-			goto group_error;
-		}
-	}
-
-	return 0;
-
-group_error:
-	/*
-	 * Groups can be scheduled in as one unit only, so undo any
-	 * partial group before returning:
-	 */
-	list_for_each_entry(counter, &group_counter->sibling_list, group_entry) {
-		if (counter == partial_group)
-			break;
-		counter_sched_out(counter, cpuctx, ctx);
-	}
-	counter_sched_out(group_counter, cpuctx, ctx);
-
-	return -EAGAIN;
-}
-
-/*
- * Return 1 for a group consisting entirely of software counters,
- * 0 if the group contains any hardware counters.
- */
-static int is_software_only_group(struct perf_counter *leader)
-{
-	struct perf_counter *counter;
-
-	if (!is_software_counter(leader))
-		return 0;
-
-	list_for_each_entry(counter, &leader->sibling_list, group_entry)
-		if (!is_software_counter(counter))
-			return 0;
-
-	return 1;
-}
-
-/*
- * Work out whether we can put this counter group on the CPU now.
- */
-static int group_can_go_on(struct perf_counter *counter,
-			   struct perf_cpu_context *cpuctx,
-			   int can_add_hw)
-{
-	/*
-	 * Groups consisting entirely of software counters can always go on.
-	 */
-	if (is_software_only_group(counter))
-		return 1;
-	/*
-	 * If an exclusive group is already on, no other hardware
-	 * counters can go on.
-	 */
-	if (cpuctx->exclusive)
-		return 0;
-	/*
-	 * If this group is exclusive and there are already
-	 * counters on the CPU, it can't go on.
-	 */
-	if (counter->attr.exclusive && cpuctx->active_oncpu)
-		return 0;
-	/*
-	 * Otherwise, try to add it if all previous groups were able
-	 * to go on.
-	 */
-	return can_add_hw;
-}
-
-static void add_counter_to_ctx(struct perf_counter *counter,
-			       struct perf_counter_context *ctx)
-{
-	list_add_counter(counter, ctx);
-	counter->tstamp_enabled = ctx->time;
-	counter->tstamp_running = ctx->time;
-	counter->tstamp_stopped = ctx->time;
-}
-
-/*
- * Cross CPU call to install and enable a performance counter
- *
- * Must be called with ctx->mutex held
- */
-static void __perf_install_in_context(void *info)
-{
-	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
-	struct perf_counter *counter = info;
-	struct perf_counter_context *ctx = counter->ctx;
-	struct perf_counter *leader = counter->group_leader;
-	int cpu = smp_processor_id();
-	int err;
-
-	/*
-	 * If this is a task context, we need to check whether it is
-	 * the current task context of this cpu. If not it has been
-	 * scheduled out before the smp call arrived.
-	 * Or possibly this is the right context but it isn't
-	 * on this cpu because it had no counters.
-	 */
-	if (ctx->task && cpuctx->task_ctx != ctx) {
-		if (cpuctx->task_ctx || ctx->task != current)
-			return;
-		cpuctx->task_ctx = ctx;
-	}
-
-	spin_lock(&ctx->lock);
-	ctx->is_active = 1;
-	update_context_time(ctx);
-
-	/*
-	 * Protect the list operation against NMI by disabling the
-	 * counters on a global level. NOP for non NMI based counters.
-	 */
-	perf_disable();
-
-	add_counter_to_ctx(counter, ctx);
-
-	/*
-	 * Don't put the counter on if it is disabled or if
-	 * it is in a group and the group isn't on.
-	 */
-	if (counter->state != PERF_COUNTER_STATE_INACTIVE ||
-	    (leader != counter && leader->state != PERF_COUNTER_STATE_ACTIVE))
-		goto unlock;
-
-	/*
-	 * An exclusive counter can't go on if there are already active
-	 * hardware counters, and no hardware counter can go on if there
-	 * is already an exclusive counter on.
-	 */
-	if (!group_can_go_on(counter, cpuctx, 1))
-		err = -EEXIST;
-	else
-		err = counter_sched_in(counter, cpuctx, ctx, cpu);
-
-	if (err) {
-		/*
-		 * This counter couldn't go on.  If it is in a group
-		 * then we have to pull the whole group off.
-		 * If the counter group is pinned then put it in error state.
-		 */
-		if (leader != counter)
-			group_sched_out(leader, cpuctx, ctx);
-		if (leader->attr.pinned) {
-			update_group_times(leader);
-			leader->state = PERF_COUNTER_STATE_ERROR;
-		}
-	}
-
-	if (!err && !ctx->task && cpuctx->max_pertask)
-		cpuctx->max_pertask--;
-
- unlock:
-	perf_enable();
-
-	spin_unlock(&ctx->lock);
-}
-
-/*
- * Attach a performance counter to a context
- *
- * First we add the counter to the list with the hardware enable bit
- * in counter->hw_config cleared.
- *
- * If the counter is attached to a task which is on a CPU we use a smp
- * call to enable it in the task context. The task might have been
- * scheduled away, but we check this in the smp call again.
- *
- * Must be called with ctx->mutex held.
- */
-static void
-perf_install_in_context(struct perf_counter_context *ctx,
-			struct perf_counter *counter,
-			int cpu)
-{
-	struct task_struct *task = ctx->task;
-
-	if (!task) {
-		/*
-		 * Per cpu counters are installed via an smp call and
-		 * the install is always sucessful.
-		 */
-		smp_call_function_single(cpu, __perf_install_in_context,
-					 counter, 1);
-		return;
-	}
-
-retry:
-	task_oncpu_function_call(task, __perf_install_in_context,
-				 counter);
-
-	spin_lock_irq(&ctx->lock);
-	/*
-	 * we need to retry the smp call.
-	 */
-	if (ctx->is_active && list_empty(&counter->group_entry)) {
-		spin_unlock_irq(&ctx->lock);
-		goto retry;
-	}
-
-	/*
-	 * The lock prevents that this context is scheduled in so we
-	 * can add the counter safely, if it the call above did not
-	 * succeed.
-	 */
-	if (list_empty(&counter->group_entry))
-		add_counter_to_ctx(counter, ctx);
-	spin_unlock_irq(&ctx->lock);
-}
-
-/*
- * Put a counter into inactive state and update time fields.
- * Enabling the leader of a group effectively enables all
- * the group members that aren't explicitly disabled, so we
- * have to update their ->tstamp_enabled also.
- * Note: this works for group members as well as group leaders
- * since the non-leader members' sibling_lists will be empty.
- */
-static void __perf_counter_mark_enabled(struct perf_counter *counter,
-					struct perf_counter_context *ctx)
-{
-	struct perf_counter *sub;
-
-	counter->state = PERF_COUNTER_STATE_INACTIVE;
-	counter->tstamp_enabled = ctx->time - counter->total_time_enabled;
-	list_for_each_entry(sub, &counter->sibling_list, group_entry)
-		if (sub->state >= PERF_COUNTER_STATE_INACTIVE)
-			sub->tstamp_enabled =
-				ctx->time - sub->total_time_enabled;
-}
-
-/*
- * Cross CPU call to enable a performance counter
- */
-static void __perf_counter_enable(void *info)
-{
-	struct perf_counter *counter = info;
-	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
-	struct perf_counter_context *ctx = counter->ctx;
-	struct perf_counter *leader = counter->group_leader;
-	int err;
-
-	/*
-	 * If this is a per-task counter, need to check whether this
-	 * counter's task is the current task on this cpu.
-	 */
-	if (ctx->task && cpuctx->task_ctx != ctx) {
-		if (cpuctx->task_ctx || ctx->task != current)
-			return;
-		cpuctx->task_ctx = ctx;
-	}
-
-	spin_lock(&ctx->lock);
-	ctx->is_active = 1;
-	update_context_time(ctx);
-
-	if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
-		goto unlock;
-	__perf_counter_mark_enabled(counter, ctx);
-
-	/*
-	 * If the counter is in a group and isn't the group leader,
-	 * then don't put it on unless the group is on.
-	 */
-	if (leader != counter && leader->state != PERF_COUNTER_STATE_ACTIVE)
-		goto unlock;
-
-	if (!group_can_go_on(counter, cpuctx, 1)) {
-		err = -EEXIST;
-	} else {
-		perf_disable();
-		if (counter == leader)
-			err = group_sched_in(counter, cpuctx, ctx,
-					     smp_processor_id());
-		else
-			err = counter_sched_in(counter, cpuctx, ctx,
-					       smp_processor_id());
-		perf_enable();
-	}
-
-	if (err) {
-		/*
-		 * If this counter can't go on and it's part of a
-		 * group, then the whole group has to come off.
-		 */
-		if (leader != counter)
-			group_sched_out(leader, cpuctx, ctx);
-		if (leader->attr.pinned) {
-			update_group_times(leader);
-			leader->state = PERF_COUNTER_STATE_ERROR;
-		}
-	}
-
- unlock:
-	spin_unlock(&ctx->lock);
-}
-
-/*
- * Enable a counter.
- *
- * If counter->ctx is a cloned context, callers must make sure that
- * every task struct that counter->ctx->task could possibly point to
- * remains valid.  This condition is satisfied when called through
- * perf_counter_for_each_child or perf_counter_for_each as described
- * for perf_counter_disable.
- */
-static void perf_counter_enable(struct perf_counter *counter)
-{
-	struct perf_counter_context *ctx = counter->ctx;
-	struct task_struct *task = ctx->task;
-
-	if (!task) {
-		/*
-		 * Enable the counter on the cpu that it's on
-		 */
-		smp_call_function_single(counter->cpu, __perf_counter_enable,
-					 counter, 1);
-		return;
-	}
-
-	spin_lock_irq(&ctx->lock);
-	if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
-		goto out;
-
-	/*
-	 * If the counter is in error state, clear that first.
-	 * That way, if we see the counter in error state below, we
-	 * know that it has gone back into error state, as distinct
-	 * from the task having been scheduled away before the
-	 * cross-call arrived.
-	 */
-	if (counter->state == PERF_COUNTER_STATE_ERROR)
-		counter->state = PERF_COUNTER_STATE_OFF;
-
- retry:
-	spin_unlock_irq(&ctx->lock);
-	task_oncpu_function_call(task, __perf_counter_enable, counter);
-
-	spin_lock_irq(&ctx->lock);
-
-	/*
-	 * If the context is active and the counter is still off,
-	 * we need to retry the cross-call.
-	 */
-	if (ctx->is_active && counter->state == PERF_COUNTER_STATE_OFF)
-		goto retry;
-
-	/*
-	 * Since we have the lock this context can't be scheduled
-	 * in, so we can change the state safely.
-	 */
-	if (counter->state == PERF_COUNTER_STATE_OFF)
-		__perf_counter_mark_enabled(counter, ctx);
-
- out:
-	spin_unlock_irq(&ctx->lock);
-}
-
-static int perf_counter_refresh(struct perf_counter *counter, int refresh)
-{
-	/*
-	 * not supported on inherited counters
-	 */
-	if (counter->attr.inherit)
-		return -EINVAL;
-
-	atomic_add(refresh, &counter->event_limit);
-	perf_counter_enable(counter);
-
-	return 0;
-}
-
-void __perf_counter_sched_out(struct perf_counter_context *ctx,
-			      struct perf_cpu_context *cpuctx)
-{
-	struct perf_counter *counter;
-
-	spin_lock(&ctx->lock);
-	ctx->is_active = 0;
-	if (likely(!ctx->nr_counters))
-		goto out;
-	update_context_time(ctx);
-
-	perf_disable();
-	if (ctx->nr_active) {
-		list_for_each_entry(counter, &ctx->group_list, group_entry) {
-			if (counter != counter->group_leader)
-				counter_sched_out(counter, cpuctx, ctx);
-			else
-				group_sched_out(counter, cpuctx, ctx);
-		}
-	}
-	perf_enable();
- out:
-	spin_unlock(&ctx->lock);
-}
-
-/*
- * Test whether two contexts are equivalent, i.e. whether they
- * have both been cloned from the same version of the same context
- * and they both have the same number of enabled counters.
- * If the number of enabled counters is the same, then the set
- * of enabled counters should be the same, because these are both
- * inherited contexts, therefore we can't access individual counters
- * in them directly with an fd; we can only enable/disable all
- * counters via prctl, or enable/disable all counters in a family
- * via ioctl, which will have the same effect on both contexts.
- */
-static int context_equiv(struct perf_counter_context *ctx1,
-			 struct perf_counter_context *ctx2)
-{
-	return ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx
-		&& ctx1->parent_gen == ctx2->parent_gen
-		&& !ctx1->pin_count && !ctx2->pin_count;
-}
-
-static void __perf_counter_read(void *counter);
-
-static void __perf_counter_sync_stat(struct perf_counter *counter,
-				     struct perf_counter *next_counter)
-{
-	u64 value;
-
-	if (!counter->attr.inherit_stat)
-		return;
-
-	/*
-	 * Update the counter value, we cannot use perf_counter_read()
-	 * because we're in the middle of a context switch and have IRQs
-	 * disabled, which upsets smp_call_function_single(), however
-	 * we know the counter must be on the current CPU, therefore we
-	 * don't need to use it.
-	 */
-	switch (counter->state) {
-	case PERF_COUNTER_STATE_ACTIVE:
-		__perf_counter_read(counter);
-		break;
-
-	case PERF_COUNTER_STATE_INACTIVE:
-		update_counter_times(counter);
-		break;
-
-	default:
-		break;
-	}
-
-	/*
-	 * In order to keep per-task stats reliable we need to flip the counter
-	 * values when we flip the contexts.
-	 */
-	value = atomic64_read(&next_counter->count);
-	value = atomic64_xchg(&counter->count, value);
-	atomic64_set(&next_counter->count, value);
-
-	swap(counter->total_time_enabled, next_counter->total_time_enabled);
-	swap(counter->total_time_running, next_counter->total_time_running);
-
-	/*
-	 * Since we swizzled the values, update the user visible data too.
-	 */
-	perf_counter_update_userpage(counter);
-	perf_counter_update_userpage(next_counter);
-}
-
-#define list_next_entry(pos, member) \
-	list_entry(pos->member.next, typeof(*pos), member)
-
-static void perf_counter_sync_stat(struct perf_counter_context *ctx,
-				   struct perf_counter_context *next_ctx)
-{
-	struct perf_counter *counter, *next_counter;
-
-	if (!ctx->nr_stat)
-		return;
-
-	counter = list_first_entry(&ctx->event_list,
-				   struct perf_counter, event_entry);
-
-	next_counter = list_first_entry(&next_ctx->event_list,
-					struct perf_counter, event_entry);
-
-	while (&counter->event_entry != &ctx->event_list &&
-	       &next_counter->event_entry != &next_ctx->event_list) {
-
-		__perf_counter_sync_stat(counter, next_counter);
-
-		counter = list_next_entry(counter, event_entry);
-		next_counter = list_next_entry(next_counter, event_entry);
-	}
-}
-
-/*
- * Called from scheduler to remove the counters of the current task,
- * with interrupts disabled.
- *
- * We stop each counter and update the counter value in counter->count.
- *
- * This does not protect us against NMI, but disable()
- * sets the disabled bit in the control field of counter _before_
- * accessing the counter control register. If a NMI hits, then it will
- * not restart the counter.
- */
-void perf_counter_task_sched_out(struct task_struct *task,
-				 struct task_struct *next, int cpu)
-{
-	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
-	struct perf_counter_context *ctx = task->perf_counter_ctxp;
-	struct perf_counter_context *next_ctx;
-	struct perf_counter_context *parent;
-	struct pt_regs *regs;
-	int do_switch = 1;
-
-	regs = task_pt_regs(task);
-	perf_swcounter_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, regs, 0);
-
-	if (likely(!ctx || !cpuctx->task_ctx))
-		return;
-
-	update_context_time(ctx);
-
-	rcu_read_lock();
-	parent = rcu_dereference(ctx->parent_ctx);
-	next_ctx = next->perf_counter_ctxp;
-	if (parent && next_ctx &&
-	    rcu_dereference(next_ctx->parent_ctx) == parent) {
-		/*
-		 * Looks like the two contexts are clones, so we might be
-		 * able to optimize the context switch.  We lock both
-		 * contexts and check that they are clones under the
-		 * lock (including re-checking that neither has been
-		 * uncloned in the meantime).  It doesn't matter which
-		 * order we take the locks because no other cpu could
-		 * be trying to lock both of these tasks.
-		 */
-		spin_lock(&ctx->lock);
-		spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING);
-		if (context_equiv(ctx, next_ctx)) {
-			/*
-			 * XXX do we need a memory barrier of sorts
-			 * wrt to rcu_dereference() of perf_counter_ctxp
-			 */
-			task->perf_counter_ctxp = next_ctx;
-			next->perf_counter_ctxp = ctx;
-			ctx->task = next;
-			next_ctx->task = task;
-			do_switch = 0;
-
-			perf_counter_sync_stat(ctx, next_ctx);
-		}
-		spin_unlock(&next_ctx->lock);
-		spin_unlock(&ctx->lock);
-	}
-	rcu_read_unlock();
-
-	if (do_switch) {
-		__perf_counter_sched_out(ctx, cpuctx);
-		cpuctx->task_ctx = NULL;
-	}
-}
-
-/*
- * Called with IRQs disabled
- */
-static void __perf_counter_task_sched_out(struct perf_counter_context *ctx)
-{
-	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
-
-	if (!cpuctx->task_ctx)
-		return;
-
-	if (WARN_ON_ONCE(ctx != cpuctx->task_ctx))
-		return;
-
-	__perf_counter_sched_out(ctx, cpuctx);
-	cpuctx->task_ctx = NULL;
-}
-
-/*
- * Called with IRQs disabled
- */
-static void perf_counter_cpu_sched_out(struct perf_cpu_context *cpuctx)
-{
-	__perf_counter_sched_out(&cpuctx->ctx, cpuctx);
-}
-
-static void
-__perf_counter_sched_in(struct perf_counter_context *ctx,
-			struct perf_cpu_context *cpuctx, int cpu)
-{
-	struct perf_counter *counter;
-	int can_add_hw = 1;
-
-	spin_lock(&ctx->lock);
-	ctx->is_active = 1;
-	if (likely(!ctx->nr_counters))
-		goto out;
-
-	ctx->timestamp = perf_clock();
-
-	perf_disable();
-
-	/*
-	 * First go through the list and put on any pinned groups
-	 * in order to give them the best chance of going on.
-	 */
-	list_for_each_entry(counter, &ctx->group_list, group_entry) {
-		if (counter->state <= PERF_COUNTER_STATE_OFF ||
-		    !counter->attr.pinned)
-			continue;
-		if (counter->cpu != -1 && counter->cpu != cpu)
-			continue;
-
-		if (counter != counter->group_leader)
-			counter_sched_in(counter, cpuctx, ctx, cpu);
-		else {
-			if (group_can_go_on(counter, cpuctx, 1))
-				group_sched_in(counter, cpuctx, ctx, cpu);
-		}
-
-		/*
-		 * If this pinned group hasn't been scheduled,
-		 * put it in error state.
-		 */
-		if (counter->state == PERF_COUNTER_STATE_INACTIVE) {
-			update_group_times(counter);
-			counter->state = PERF_COUNTER_STATE_ERROR;
-		}
-	}
-
-	list_for_each_entry(counter, &ctx->group_list, group_entry) {
-		/*
-		 * Ignore counters in OFF or ERROR state, and
-		 * ignore pinned counters since we did them already.
-		 */
-		if (counter->state <= PERF_COUNTER_STATE_OFF ||
-		    counter->attr.pinned)
-			continue;
-
-		/*
-		 * Listen to the 'cpu' scheduling filter constraint
-		 * of counters:
-		 */
-		if (counter->cpu != -1 && counter->cpu != cpu)
-			continue;
-
-		if (counter != counter->group_leader) {
-			if (counter_sched_in(counter, cpuctx, ctx, cpu))
-				can_add_hw = 0;
-		} else {
-			if (group_can_go_on(counter, cpuctx, can_add_hw)) {
-				if (group_sched_in(counter, cpuctx, ctx, cpu))
-					can_add_hw = 0;
-			}
-		}
-	}
-	perf_enable();
- out:
-	spin_unlock(&ctx->lock);
-}
-
-/*
- * Called from scheduler to add the counters of the current task
- * with interrupts disabled.
- *
- * We restore the counter value and then enable it.
- *
- * This does not protect us against NMI, but enable()
- * sets the enabled bit in the control field of counter _before_
- * accessing the counter control register. If a NMI hits, then it will
- * keep the counter running.
- */
-void perf_counter_task_sched_in(struct task_struct *task, int cpu)
-{
-	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
-	struct perf_counter_context *ctx = task->perf_counter_ctxp;
-
-	if (likely(!ctx))
-		return;
-	if (cpuctx->task_ctx == ctx)
-		return;
-	__perf_counter_sched_in(ctx, cpuctx, cpu);
-	cpuctx->task_ctx = ctx;
-}
-
-static void perf_counter_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu)
-{
-	struct perf_counter_context *ctx = &cpuctx->ctx;
-
-	__perf_counter_sched_in(ctx, cpuctx, cpu);
-}
-
-#define MAX_INTERRUPTS (~0ULL)
-
-static void perf_log_throttle(struct perf_counter *counter, int enable);
-
-static void perf_adjust_period(struct perf_counter *counter, u64 events)
-{
-	struct hw_perf_counter *hwc = &counter->hw;
-	u64 period, sample_period;
-	s64 delta;
-
-	events *= hwc->sample_period;
-	period = div64_u64(events, counter->attr.sample_freq);
-
-	delta = (s64)(period - hwc->sample_period);
-	delta = (delta + 7) / 8; /* low pass filter */
-
-	sample_period = hwc->sample_period + delta;
-
-	if (!sample_period)
-		sample_period = 1;
-
-	hwc->sample_period = sample_period;
-}
-
-static void perf_ctx_adjust_freq(struct perf_counter_context *ctx)
-{
-	struct perf_counter *counter;
-	struct hw_perf_counter *hwc;
-	u64 interrupts, freq;
-
-	spin_lock(&ctx->lock);
-	list_for_each_entry(counter, &ctx->group_list, group_entry) {
-		if (counter->state != PERF_COUNTER_STATE_ACTIVE)
-			continue;
-
-		hwc = &counter->hw;
-
-		interrupts = hwc->interrupts;
-		hwc->interrupts = 0;
-
-		/*
-		 * unthrottle counters on the tick
-		 */
-		if (interrupts == MAX_INTERRUPTS) {
-			perf_log_throttle(counter, 1);
-			counter->pmu->unthrottle(counter);
-			interrupts = 2*sysctl_perf_counter_sample_rate/HZ;
-		}
-
-		if (!counter->attr.freq || !counter->attr.sample_freq)
-			continue;
-
-		/*
-		 * if the specified freq < HZ then we need to skip ticks
-		 */
-		if (counter->attr.sample_freq < HZ) {
-			freq = counter->attr.sample_freq;
-
-			hwc->freq_count += freq;
-			hwc->freq_interrupts += interrupts;
-
-			if (hwc->freq_count < HZ)
-				continue;
-
-			interrupts = hwc->freq_interrupts;
-			hwc->freq_interrupts = 0;
-			hwc->freq_count -= HZ;
-		} else
-			freq = HZ;
-
-		perf_adjust_period(counter, freq * interrupts);
-
-		/*
-		 * In order to avoid being stalled by an (accidental) huge
-		 * sample period, force reset the sample period if we didn't
-		 * get any events in this freq period.
-		 */
-		if (!interrupts) {
-			perf_disable();
-			counter->pmu->disable(counter);
-			atomic64_set(&hwc->period_left, 0);
-			counter->pmu->enable(counter);
-			perf_enable();
-		}
-	}
-	spin_unlock(&ctx->lock);
-}
-
-/*
- * Round-robin a context's counters:
- */
-static void rotate_ctx(struct perf_counter_context *ctx)
-{
-	struct perf_counter *counter;
-
-	if (!ctx->nr_counters)
-		return;
-
-	spin_lock(&ctx->lock);
-	/*
-	 * Rotate the first entry last (works just fine for group counters too):
-	 */
-	perf_disable();
-	list_for_each_entry(counter, &ctx->group_list, group_entry) {
-		list_move_tail(&counter->group_entry, &ctx->group_list);
-		break;
-	}
-	perf_enable();
-
-	spin_unlock(&ctx->lock);
-}
-
-void perf_counter_task_tick(struct task_struct *curr, int cpu)
-{
-	struct perf_cpu_context *cpuctx;
-	struct perf_counter_context *ctx;
-
-	if (!atomic_read(&nr_counters))
-		return;
-
-	cpuctx = &per_cpu(perf_cpu_context, cpu);
-	ctx = curr->perf_counter_ctxp;
-
-	perf_ctx_adjust_freq(&cpuctx->ctx);
-	if (ctx)
-		perf_ctx_adjust_freq(ctx);
-
-	perf_counter_cpu_sched_out(cpuctx);
-	if (ctx)
-		__perf_counter_task_sched_out(ctx);
-
-	rotate_ctx(&cpuctx->ctx);
-	if (ctx)
-		rotate_ctx(ctx);
-
-	perf_counter_cpu_sched_in(cpuctx, cpu);
-	if (ctx)
-		perf_counter_task_sched_in(curr, cpu);
-}
-
-/*
- * Enable all of a task's counters that have been marked enable-on-exec.
- * This expects task == current.
- */
-static void perf_counter_enable_on_exec(struct task_struct *task)
-{
-	struct perf_counter_context *ctx;
-	struct perf_counter *counter;
-	unsigned long flags;
-	int enabled = 0;
-
-	local_irq_save(flags);
-	ctx = task->perf_counter_ctxp;
-	if (!ctx || !ctx->nr_counters)
-		goto out;
-
-	__perf_counter_task_sched_out(ctx);
-
-	spin_lock(&ctx->lock);
-
-	list_for_each_entry(counter, &ctx->group_list, group_entry) {
-		if (!counter->attr.enable_on_exec)
-			continue;
-		counter->attr.enable_on_exec = 0;
-		if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
-			continue;
-		__perf_counter_mark_enabled(counter, ctx);
-		enabled = 1;
-	}
-
-	/*
-	 * Unclone this context if we enabled any counter.
-	 */
-	if (enabled)
-		unclone_ctx(ctx);
-
-	spin_unlock(&ctx->lock);
-
-	perf_counter_task_sched_in(task, smp_processor_id());
- out:
-	local_irq_restore(flags);
-}
-
-/*
- * Cross CPU call to read the hardware counter
- */
-static void __perf_counter_read(void *info)
-{
-	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
-	struct perf_counter *counter = info;
-	struct perf_counter_context *ctx = counter->ctx;
-	unsigned long flags;
-
-	/*
-	 * If this is a task context, we need to check whether it is
-	 * the current task context of this cpu.  If not it has been
-	 * scheduled out before the smp call arrived.  In that case
-	 * counter->count would have been updated to a recent sample
-	 * when the counter was scheduled out.
-	 */
-	if (ctx->task && cpuctx->task_ctx != ctx)
-		return;
-
-	local_irq_save(flags);
-	if (ctx->is_active)
-		update_context_time(ctx);
-	counter->pmu->read(counter);
-	update_counter_times(counter);
-	local_irq_restore(flags);
-}
-
-static u64 perf_counter_read(struct perf_counter *counter)
-{
-	/*
-	 * If counter is enabled and currently active on a CPU, update the
-	 * value in the counter structure:
-	 */
-	if (counter->state == PERF_COUNTER_STATE_ACTIVE) {
-		smp_call_function_single(counter->oncpu,
-					 __perf_counter_read, counter, 1);
-	} else if (counter->state == PERF_COUNTER_STATE_INACTIVE) {
-		update_counter_times(counter);
-	}
-
-	return atomic64_read(&counter->count);
-}
-
-/*
- * Initialize the perf_counter context in a task_struct:
- */
-static void
-__perf_counter_init_context(struct perf_counter_context *ctx,
-			    struct task_struct *task)
-{
-	memset(ctx, 0, sizeof(*ctx));
-	spin_lock_init(&ctx->lock);
-	mutex_init(&ctx->mutex);
-	INIT_LIST_HEAD(&ctx->group_list);
-	INIT_LIST_HEAD(&ctx->event_list);
-	atomic_set(&ctx->refcount, 1);
-	ctx->task = task;
-}
-
-static struct perf_counter_context *find_get_context(pid_t pid, int cpu)
-{
-	struct perf_counter_context *ctx;
-	struct perf_cpu_context *cpuctx;
-	struct task_struct *task;
-	unsigned long flags;
-	int err;
-
-	/*
-	 * If cpu is not a wildcard then this is a percpu counter:
-	 */
-	if (cpu != -1) {
-		/* Must be root to operate on a CPU counter: */
-		if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
-			return ERR_PTR(-EACCES);
-
-		if (cpu < 0 || cpu > num_possible_cpus())
-			return ERR_PTR(-EINVAL);
-
-		/*
-		 * We could be clever and allow to attach a counter to an
-		 * offline CPU and activate it when the CPU comes up, but
-		 * that's for later.
-		 */
-		if (!cpu_isset(cpu, cpu_online_map))
-			return ERR_PTR(-ENODEV);
-
-		cpuctx = &per_cpu(perf_cpu_context, cpu);
-		ctx = &cpuctx->ctx;
-		get_ctx(ctx);
-
-		return ctx;
-	}
-
-	rcu_read_lock();
-	if (!pid)
-		task = current;
-	else
-		task = find_task_by_vpid(pid);
-	if (task)
-		get_task_struct(task);
-	rcu_read_unlock();
-
-	if (!task)
-		return ERR_PTR(-ESRCH);
-
-	/*
-	 * Can't attach counters to a dying task.
-	 */
-	err = -ESRCH;
-	if (task->flags & PF_EXITING)
-		goto errout;
-
-	/* Reuse ptrace permission checks for now. */
-	err = -EACCES;
-	if (!ptrace_may_access(task, PTRACE_MODE_READ))
-		goto errout;
-
- retry:
-	ctx = perf_lock_task_context(task, &flags);
-	if (ctx) {
-		unclone_ctx(ctx);
-		spin_unlock_irqrestore(&ctx->lock, flags);
-	}
-
-	if (!ctx) {
-		ctx = kmalloc(sizeof(struct perf_counter_context), GFP_KERNEL);
-		err = -ENOMEM;
-		if (!ctx)
-			goto errout;
-		__perf_counter_init_context(ctx, task);
-		get_ctx(ctx);
-		if (cmpxchg(&task->perf_counter_ctxp, NULL, ctx)) {
-			/*
-			 * We raced with some other task; use
-			 * the context they set.
-			 */
-			kfree(ctx);
-			goto retry;
-		}
-		get_task_struct(task);
-	}
-
-	put_task_struct(task);
-	return ctx;
-
- errout:
-	put_task_struct(task);
-	return ERR_PTR(err);
-}
-
-static void free_counter_rcu(struct rcu_head *head)
-{
-	struct perf_counter *counter;
-
-	counter = container_of(head, struct perf_counter, rcu_head);
-	if (counter->ns)
-		put_pid_ns(counter->ns);
-	kfree(counter);
-}
-
-static void perf_pending_sync(struct perf_counter *counter);
-
-static void free_counter(struct perf_counter *counter)
-{
-	perf_pending_sync(counter);
-
-	if (!counter->parent) {
-		atomic_dec(&nr_counters);
-		if (counter->attr.mmap)
-			atomic_dec(&nr_mmap_counters);
-		if (counter->attr.comm)
-			atomic_dec(&nr_comm_counters);
-		if (counter->attr.task)
-			atomic_dec(&nr_task_counters);
-	}
-
-	if (counter->output) {
-		fput(counter->output->filp);
-		counter->output = NULL;
-	}
-
-	if (counter->destroy)
-		counter->destroy(counter);
-
-	put_ctx(counter->ctx);
-	call_rcu(&counter->rcu_head, free_counter_rcu);
-}
-
-/*
- * Called when the last reference to the file is gone.
- */
-static int perf_release(struct inode *inode, struct file *file)
-{
-	struct perf_counter *counter = file->private_data;
-	struct perf_counter_context *ctx = counter->ctx;
-
-	file->private_data = NULL;
-
-	WARN_ON_ONCE(ctx->parent_ctx);
-	mutex_lock(&ctx->mutex);
-	perf_counter_remove_from_context(counter);
-	mutex_unlock(&ctx->mutex);
-
-	mutex_lock(&counter->owner->perf_counter_mutex);
-	list_del_init(&counter->owner_entry);
-	mutex_unlock(&counter->owner->perf_counter_mutex);
-	put_task_struct(counter->owner);
-
-	free_counter(counter);
-
-	return 0;
-}
-
-static int perf_counter_read_size(struct perf_counter *counter)
-{
-	int entry = sizeof(u64); /* value */
-	int size = 0;
-	int nr = 1;
-
-	if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
-		size += sizeof(u64);
-
-	if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
-		size += sizeof(u64);
-
-	if (counter->attr.read_format & PERF_FORMAT_ID)
-		entry += sizeof(u64);
-
-	if (counter->attr.read_format & PERF_FORMAT_GROUP) {
-		nr += counter->group_leader->nr_siblings;
-		size += sizeof(u64);
-	}
-
-	size += entry * nr;
-
-	return size;
-}
-
-static u64 perf_counter_read_value(struct perf_counter *counter)
-{
-	struct perf_counter *child;
-	u64 total = 0;
-
-	total += perf_counter_read(counter);
-	list_for_each_entry(child, &counter->child_list, child_list)
-		total += perf_counter_read(child);
-
-	return total;
-}
-
-static int perf_counter_read_entry(struct perf_counter *counter,
-				   u64 read_format, char __user *buf)
-{
-	int n = 0, count = 0;
-	u64 values[2];
-
-	values[n++] = perf_counter_read_value(counter);
-	if (read_format & PERF_FORMAT_ID)
-		values[n++] = primary_counter_id(counter);
-
-	count = n * sizeof(u64);
-
-	if (copy_to_user(buf, values, count))
-		return -EFAULT;
-
-	return count;
-}
-
-static int perf_counter_read_group(struct perf_counter *counter,
-				   u64 read_format, char __user *buf)
-{
-	struct perf_counter *leader = counter->group_leader, *sub;
-	int n = 0, size = 0, err = -EFAULT;
-	u64 values[3];
-
-	values[n++] = 1 + leader->nr_siblings;
-	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
-		values[n++] = leader->total_time_enabled +
-			atomic64_read(&leader->child_total_time_enabled);
-	}
-	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
-		values[n++] = leader->total_time_running +
-			atomic64_read(&leader->child_total_time_running);
-	}
-
-	size = n * sizeof(u64);
-
-	if (copy_to_user(buf, values, size))
-		return -EFAULT;
-
-	err = perf_counter_read_entry(leader, read_format, buf + size);
-	if (err < 0)
-		return err;
-
-	size += err;
-
-	list_for_each_entry(sub, &leader->sibling_list, group_entry) {
-		err = perf_counter_read_entry(sub, read_format,
-				buf + size);
-		if (err < 0)
-			return err;
-
-		size += err;
-	}
-
-	return size;
-}
-
-static int perf_counter_read_one(struct perf_counter *counter,
-				 u64 read_format, char __user *buf)
-{
-	u64 values[4];
-	int n = 0;
-
-	values[n++] = perf_counter_read_value(counter);
-	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
-		values[n++] = counter->total_time_enabled +
-			atomic64_read(&counter->child_total_time_enabled);
-	}
-	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
-		values[n++] = counter->total_time_running +
-			atomic64_read(&counter->child_total_time_running);
-	}
-	if (read_format & PERF_FORMAT_ID)
-		values[n++] = primary_counter_id(counter);
-
-	if (copy_to_user(buf, values, n * sizeof(u64)))
-		return -EFAULT;
-
-	return n * sizeof(u64);
-}
-
-/*
- * Read the performance counter - simple non blocking version for now
- */
-static ssize_t
-perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
-{
-	u64 read_format = counter->attr.read_format;
-	int ret;
-
-	/*
-	 * Return end-of-file for a read on a counter that is in
-	 * error state (i.e. because it was pinned but it couldn't be
-	 * scheduled on to the CPU at some point).
-	 */
-	if (counter->state == PERF_COUNTER_STATE_ERROR)
-		return 0;
-
-	if (count < perf_counter_read_size(counter))
-		return -ENOSPC;
-
-	WARN_ON_ONCE(counter->ctx->parent_ctx);
-	mutex_lock(&counter->child_mutex);
-	if (read_format & PERF_FORMAT_GROUP)
-		ret = perf_counter_read_group(counter, read_format, buf);
-	else
-		ret = perf_counter_read_one(counter, read_format, buf);
-	mutex_unlock(&counter->child_mutex);
-
-	return ret;
-}
-
-static ssize_t
-perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
-{
-	struct perf_counter *counter = file->private_data;
-
-	return perf_read_hw(counter, buf, count);
-}
-
-static unsigned int perf_poll(struct file *file, poll_table *wait)
-{
-	struct perf_counter *counter = file->private_data;
-	struct perf_mmap_data *data;
-	unsigned int events = POLL_HUP;
-
-	rcu_read_lock();
-	data = rcu_dereference(counter->data);
-	if (data)
-		events = atomic_xchg(&data->poll, 0);
-	rcu_read_unlock();
-
-	poll_wait(file, &counter->waitq, wait);
-
-	return events;
-}
-
-static void perf_counter_reset(struct perf_counter *counter)
-{
-	(void)perf_counter_read(counter);
-	atomic64_set(&counter->count, 0);
-	perf_counter_update_userpage(counter);
-}
-
-/*
- * Holding the top-level counter's child_mutex means that any
- * descendant process that has inherited this counter will block
- * in sync_child_counter if it goes to exit, thus satisfying the
- * task existence requirements of perf_counter_enable/disable.
- */
-static void perf_counter_for_each_child(struct perf_counter *counter,
-					void (*func)(struct perf_counter *))
-{
-	struct perf_counter *child;
-
-	WARN_ON_ONCE(counter->ctx->parent_ctx);
-	mutex_lock(&counter->child_mutex);
-	func(counter);
-	list_for_each_entry(child, &counter->child_list, child_list)
-		func(child);
-	mutex_unlock(&counter->child_mutex);
-}
-
-static void perf_counter_for_each(struct perf_counter *counter,
-				  void (*func)(struct perf_counter *))
-{
-	struct perf_counter_context *ctx = counter->ctx;
-	struct perf_counter *sibling;
-
-	WARN_ON_ONCE(ctx->parent_ctx);
-	mutex_lock(&ctx->mutex);
-	counter = counter->group_leader;
-
-	perf_counter_for_each_child(counter, func);
-	func(counter);
-	list_for_each_entry(sibling, &counter->sibling_list, group_entry)
-		perf_counter_for_each_child(counter, func);
-	mutex_unlock(&ctx->mutex);
-}
-
-static int perf_counter_period(struct perf_counter *counter, u64 __user *arg)
-{
-	struct perf_counter_context *ctx = counter->ctx;
-	unsigned long size;
-	int ret = 0;
-	u64 value;
-
-	if (!counter->attr.sample_period)
-		return -EINVAL;
-
-	size = copy_from_user(&value, arg, sizeof(value));
-	if (size != sizeof(value))
-		return -EFAULT;
-
-	if (!value)
-		return -EINVAL;
-
-	spin_lock_irq(&ctx->lock);
-	if (counter->attr.freq) {
-		if (value > sysctl_perf_counter_sample_rate) {
-			ret = -EINVAL;
-			goto unlock;
-		}
-
-		counter->attr.sample_freq = value;
-	} else {
-		counter->attr.sample_period = value;
-		counter->hw.sample_period = value;
-	}
-unlock:
-	spin_unlock_irq(&ctx->lock);
-
-	return ret;
-}
-
-int perf_counter_set_output(struct perf_counter *counter, int output_fd);
-
-static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
-{
-	struct perf_counter *counter = file->private_data;
-	void (*func)(struct perf_counter *);
-	u32 flags = arg;
-
-	switch (cmd) {
-	case PERF_COUNTER_IOC_ENABLE:
-		func = perf_counter_enable;
-		break;
-	case PERF_COUNTER_IOC_DISABLE:
-		func = perf_counter_disable;
-		break;
-	case PERF_COUNTER_IOC_RESET:
-		func = perf_counter_reset;
-		break;
-
-	case PERF_COUNTER_IOC_REFRESH:
-		return perf_counter_refresh(counter, arg);
-
-	case PERF_COUNTER_IOC_PERIOD:
-		return perf_counter_period(counter, (u64 __user *)arg);
-
-	case PERF_COUNTER_IOC_SET_OUTPUT:
-		return perf_counter_set_output(counter, arg);
-
-	default:
-		return -ENOTTY;
-	}
-
-	if (flags & PERF_IOC_FLAG_GROUP)
-		perf_counter_for_each(counter, func);
-	else
-		perf_counter_for_each_child(counter, func);
-
-	return 0;
-}
-
-int perf_counter_task_enable(void)
-{
-	struct perf_counter *counter;
-
-	mutex_lock(&current->perf_counter_mutex);
-	list_for_each_entry(counter, &current->perf_counter_list, owner_entry)
-		perf_counter_for_each_child(counter, perf_counter_enable);
-	mutex_unlock(&current->perf_counter_mutex);
-
-	return 0;
-}
-
-int perf_counter_task_disable(void)
-{
-	struct perf_counter *counter;
-
-	mutex_lock(&current->perf_counter_mutex);
-	list_for_each_entry(counter, &current->perf_counter_list, owner_entry)
-		perf_counter_for_each_child(counter, perf_counter_disable);
-	mutex_unlock(&current->perf_counter_mutex);
-
-	return 0;
-}
-
-#ifndef PERF_COUNTER_INDEX_OFFSET
-# define PERF_COUNTER_INDEX_OFFSET 0
-#endif
-
-static int perf_counter_index(struct perf_counter *counter)
-{
-	if (counter->state != PERF_COUNTER_STATE_ACTIVE)
-		return 0;
-
-	return counter->hw.idx + 1 - PERF_COUNTER_INDEX_OFFSET;
-}
-
-/*
- * Callers need to ensure there can be no nesting of this function, otherwise
- * the seqlock logic goes bad. We can not serialize this because the arch
- * code calls this from NMI context.
- */
-void perf_counter_update_userpage(struct perf_counter *counter)
-{
-	struct perf_counter_mmap_page *userpg;
-	struct perf_mmap_data *data;
-
-	rcu_read_lock();
-	data = rcu_dereference(counter->data);
-	if (!data)
-		goto unlock;
-
-	userpg = data->user_page;
-
-	/*
-	 * Disable preemption so as to not let the corresponding user-space
-	 * spin too long if we get preempted.
-	 */
-	preempt_disable();
-	++userpg->lock;
-	barrier();
-	userpg->index = perf_counter_index(counter);
-	userpg->offset = atomic64_read(&counter->count);
-	if (counter->state == PERF_COUNTER_STATE_ACTIVE)
-		userpg->offset -= atomic64_read(&counter->hw.prev_count);
-
-	userpg->time_enabled = counter->total_time_enabled +
-			atomic64_read(&counter->child_total_time_enabled);
-
-	userpg->time_running = counter->total_time_running +
-			atomic64_read(&counter->child_total_time_running);
-
-	barrier();
-	++userpg->lock;
-	preempt_enable();
-unlock:
-	rcu_read_unlock();
-}
-
-static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
-	struct perf_counter *counter = vma->vm_file->private_data;
-	struct perf_mmap_data *data;
-	int ret = VM_FAULT_SIGBUS;
-
-	if (vmf->flags & FAULT_FLAG_MKWRITE) {
-		if (vmf->pgoff == 0)
-			ret = 0;
-		return ret;
-	}
-
-	rcu_read_lock();
-	data = rcu_dereference(counter->data);
-	if (!data)
-		goto unlock;
-
-	if (vmf->pgoff == 0) {
-		vmf->page = virt_to_page(data->user_page);
-	} else {
-		int nr = vmf->pgoff - 1;
-
-		if ((unsigned)nr > data->nr_pages)
-			goto unlock;
-
-		if (vmf->flags & FAULT_FLAG_WRITE)
-			goto unlock;
-
-		vmf->page = virt_to_page(data->data_pages[nr]);
-	}
-
-	get_page(vmf->page);
-	vmf->page->mapping = vma->vm_file->f_mapping;
-	vmf->page->index   = vmf->pgoff;
-
-	ret = 0;
-unlock:
-	rcu_read_unlock();
-
-	return ret;
-}
-
-static int perf_mmap_data_alloc(struct perf_counter *counter, int nr_pages)
-{
-	struct perf_mmap_data *data;
-	unsigned long size;
-	int i;
-
-	WARN_ON(atomic_read(&counter->mmap_count));
-
-	size = sizeof(struct perf_mmap_data);
-	size += nr_pages * sizeof(void *);
-
-	data = kzalloc(size, GFP_KERNEL);
-	if (!data)
-		goto fail;
-
-	data->user_page = (void *)get_zeroed_page(GFP_KERNEL);
-	if (!data->user_page)
-		goto fail_user_page;
-
-	for (i = 0; i < nr_pages; i++) {
-		data->data_pages[i] = (void *)get_zeroed_page(GFP_KERNEL);
-		if (!data->data_pages[i])
-			goto fail_data_pages;
-	}
-
-	data->nr_pages = nr_pages;
-	atomic_set(&data->lock, -1);
-
-	if (counter->attr.watermark) {
-		data->watermark = min_t(long, PAGE_SIZE * nr_pages,
-				      counter->attr.wakeup_watermark);
-	}
-	if (!data->watermark)
-		data->watermark = max(PAGE_SIZE, PAGE_SIZE * nr_pages / 4);
-
-	rcu_assign_pointer(counter->data, data);
-
-	return 0;
-
-fail_data_pages:
-	for (i--; i >= 0; i--)
-		free_page((unsigned long)data->data_pages[i]);
-
-	free_page((unsigned long)data->user_page);
-
-fail_user_page:
-	kfree(data);
-
-fail:
-	return -ENOMEM;
-}
-
-static void perf_mmap_free_page(unsigned long addr)
-{
-	struct page *page = virt_to_page((void *)addr);
-
-	page->mapping = NULL;
-	__free_page(page);
-}
-
-static void __perf_mmap_data_free(struct rcu_head *rcu_head)
-{
-	struct perf_mmap_data *data;
-	int i;
-
-	data = container_of(rcu_head, struct perf_mmap_data, rcu_head);
-
-	perf_mmap_free_page((unsigned long)data->user_page);
-	for (i = 0; i < data->nr_pages; i++)
-		perf_mmap_free_page((unsigned long)data->data_pages[i]);
-
-	kfree(data);
-}
-
-static void perf_mmap_data_free(struct perf_counter *counter)
-{
-	struct perf_mmap_data *data = counter->data;
-
-	WARN_ON(atomic_read(&counter->mmap_count));
-
-	rcu_assign_pointer(counter->data, NULL);
-	call_rcu(&data->rcu_head, __perf_mmap_data_free);
-}
-
-static void perf_mmap_open(struct vm_area_struct *vma)
-{
-	struct perf_counter *counter = vma->vm_file->private_data;
-
-	atomic_inc(&counter->mmap_count);
-}
-
-static void perf_mmap_close(struct vm_area_struct *vma)
-{
-	struct perf_counter *counter = vma->vm_file->private_data;
-
-	WARN_ON_ONCE(counter->ctx->parent_ctx);
-	if (atomic_dec_and_mutex_lock(&counter->mmap_count, &counter->mmap_mutex)) {
-		struct user_struct *user = current_user();
-
-		atomic_long_sub(counter->data->nr_pages + 1, &user->locked_vm);
-		vma->vm_mm->locked_vm -= counter->data->nr_locked;
-		perf_mmap_data_free(counter);
-		mutex_unlock(&counter->mmap_mutex);
-	}
-}
-
-static struct vm_operations_struct perf_mmap_vmops = {
-	.open		= perf_mmap_open,
-	.close		= perf_mmap_close,
-	.fault		= perf_mmap_fault,
-	.page_mkwrite	= perf_mmap_fault,
-};
-
-static int perf_mmap(struct file *file, struct vm_area_struct *vma)
-{
-	struct perf_counter *counter = file->private_data;
-	unsigned long user_locked, user_lock_limit;
-	struct user_struct *user = current_user();
-	unsigned long locked, lock_limit;
-	unsigned long vma_size;
-	unsigned long nr_pages;
-	long user_extra, extra;
-	int ret = 0;
-
-	if (!(vma->vm_flags & VM_SHARED))
-		return -EINVAL;
-
-	vma_size = vma->vm_end - vma->vm_start;
-	nr_pages = (vma_size / PAGE_SIZE) - 1;
-
-	/*
-	 * If we have data pages ensure they're a power-of-two number, so we
-	 * can do bitmasks instead of modulo.
-	 */
-	if (nr_pages != 0 && !is_power_of_2(nr_pages))
-		return -EINVAL;
-
-	if (vma_size != PAGE_SIZE * (1 + nr_pages))
-		return -EINVAL;
-
-	if (vma->vm_pgoff != 0)
-		return -EINVAL;
-
-	WARN_ON_ONCE(counter->ctx->parent_ctx);
-	mutex_lock(&counter->mmap_mutex);
-	if (counter->output) {
-		ret = -EINVAL;
-		goto unlock;
-	}
-
-	if (atomic_inc_not_zero(&counter->mmap_count)) {
-		if (nr_pages != counter->data->nr_pages)
-			ret = -EINVAL;
-		goto unlock;
-	}
-
-	user_extra = nr_pages + 1;
-	user_lock_limit = sysctl_perf_counter_mlock >> (PAGE_SHIFT - 10);
-
-	/*
-	 * Increase the limit linearly with more CPUs:
-	 */
-	user_lock_limit *= num_online_cpus();
-
-	user_locked = atomic_long_read(&user->locked_vm) + user_extra;
-
-	extra = 0;
-	if (user_locked > user_lock_limit)
-		extra = user_locked - user_lock_limit;
-
-	lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
-	lock_limit >>= PAGE_SHIFT;
-	locked = vma->vm_mm->locked_vm + extra;
-
-	if ((locked > lock_limit) && perf_paranoid_tracepoint_raw() &&
-		!capable(CAP_IPC_LOCK)) {
-		ret = -EPERM;
-		goto unlock;
-	}
-
-	WARN_ON(counter->data);
-	ret = perf_mmap_data_alloc(counter, nr_pages);
-	if (ret)
-		goto unlock;
-
-	atomic_set(&counter->mmap_count, 1);
-	atomic_long_add(user_extra, &user->locked_vm);
-	vma->vm_mm->locked_vm += extra;
-	counter->data->nr_locked = extra;
-	if (vma->vm_flags & VM_WRITE)
-		counter->data->writable = 1;
-
-unlock:
-	mutex_unlock(&counter->mmap_mutex);
-
-	vma->vm_flags |= VM_RESERVED;
-	vma->vm_ops = &perf_mmap_vmops;
-
-	return ret;
-}
-
-static int perf_fasync(int fd, struct file *filp, int on)
-{
-	struct inode *inode = filp->f_path.dentry->d_inode;
-	struct perf_counter *counter = filp->private_data;
-	int retval;
-
-	mutex_lock(&inode->i_mutex);
-	retval = fasync_helper(fd, filp, on, &counter->fasync);
-	mutex_unlock(&inode->i_mutex);
-
-	if (retval < 0)
-		return retval;
-
-	return 0;
-}
-
-static const struct file_operations perf_fops = {
-	.release		= perf_release,
-	.read			= perf_read,
-	.poll			= perf_poll,
-	.unlocked_ioctl		= perf_ioctl,
-	.compat_ioctl		= perf_ioctl,
-	.mmap			= perf_mmap,
-	.fasync			= perf_fasync,
-};
-
-/*
- * Perf counter wakeup
- *
- * If there's data, ensure we set the poll() state and publish everything
- * to user-space before waking everybody up.
- */
-
-void perf_counter_wakeup(struct perf_counter *counter)
-{
-	wake_up_all(&counter->waitq);
-
-	if (counter->pending_kill) {
-		kill_fasync(&counter->fasync, SIGIO, counter->pending_kill);
-		counter->pending_kill = 0;
-	}
-}
-
-/*
- * Pending wakeups
- *
- * Handle the case where we need to wakeup up from NMI (or rq->lock) context.
- *
- * The NMI bit means we cannot possibly take locks. Therefore, maintain a
- * single linked list and use cmpxchg() to add entries lockless.
- */
-
-static void perf_pending_counter(struct perf_pending_entry *entry)
-{
-	struct perf_counter *counter = container_of(entry,
-			struct perf_counter, pending);
-
-	if (counter->pending_disable) {
-		counter->pending_disable = 0;
-		__perf_counter_disable(counter);
-	}
-
-	if (counter->pending_wakeup) {
-		counter->pending_wakeup = 0;
-		perf_counter_wakeup(counter);
-	}
-}
-
-#define PENDING_TAIL ((struct perf_pending_entry *)-1UL)
-
-static DEFINE_PER_CPU(struct perf_pending_entry *, perf_pending_head) = {
-	PENDING_TAIL,
-};
-
-static void perf_pending_queue(struct perf_pending_entry *entry,
-			       void (*func)(struct perf_pending_entry *))
-{
-	struct perf_pending_entry **head;
-
-	if (cmpxchg(&entry->next, NULL, PENDING_TAIL) != NULL)
-		return;
-
-	entry->func = func;
-
-	head = &get_cpu_var(perf_pending_head);
-
-	do {
-		entry->next = *head;
-	} while (cmpxchg(head, entry->next, entry) != entry->next);
-
-	set_perf_counter_pending();
-
-	put_cpu_var(perf_pending_head);
-}
-
-static int __perf_pending_run(void)
-{
-	struct perf_pending_entry *list;
-	int nr = 0;
-
-	list = xchg(&__get_cpu_var(perf_pending_head), PENDING_TAIL);
-	while (list != PENDING_TAIL) {
-		void (*func)(struct perf_pending_entry *);
-		struct perf_pending_entry *entry = list;
-
-		list = list->next;
-
-		func = entry->func;
-		entry->next = NULL;
-		/*
-		 * Ensure we observe the unqueue before we issue the wakeup,
-		 * so that we won't be waiting forever.
-		 * -- see perf_not_pending().
-		 */
-		smp_wmb();
-
-		func(entry);
-		nr++;
-	}
-
-	return nr;
-}
-
-static inline int perf_not_pending(struct perf_counter *counter)
-{
-	/*
-	 * If we flush on whatever cpu we run, there is a chance we don't
-	 * need to wait.
-	 */
-	get_cpu();
-	__perf_pending_run();
-	put_cpu();
-
-	/*
-	 * Ensure we see the proper queue state before going to sleep
-	 * so that we do not miss the wakeup. -- see perf_pending_handle()
-	 */
-	smp_rmb();
-	return counter->pending.next == NULL;
-}
-
-static void perf_pending_sync(struct perf_counter *counter)
-{
-	wait_event(counter->waitq, perf_not_pending(counter));
-}
-
-void perf_counter_do_pending(void)
-{
-	__perf_pending_run();
-}
-
-/*
- * Callchain support -- arch specific
- */
-
-__weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
-{
-	return NULL;
-}
-
-/*
- * Output
- */
-static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail,
-			      unsigned long offset, unsigned long head)
-{
-	unsigned long mask;
-
-	if (!data->writable)
-		return true;
-
-	mask = (data->nr_pages << PAGE_SHIFT) - 1;
-
-	offset = (offset - tail) & mask;
-	head   = (head   - tail) & mask;
-
-	if ((int)(head - offset) < 0)
-		return false;
-
-	return true;
-}
-
-static void perf_output_wakeup(struct perf_output_handle *handle)
-{
-	atomic_set(&handle->data->poll, POLL_IN);
-
-	if (handle->nmi) {
-		handle->counter->pending_wakeup = 1;
-		perf_pending_queue(&handle->counter->pending,
-				   perf_pending_counter);
-	} else
-		perf_counter_wakeup(handle->counter);
-}
-
-/*
- * Curious locking construct.
- *
- * We need to ensure a later event doesn't publish a head when a former
- * event isn't done writing. However since we need to deal with NMIs we
- * cannot fully serialize things.
- *
- * What we do is serialize between CPUs so we only have to deal with NMI
- * nesting on a single CPU.
- *
- * We only publish the head (and generate a wakeup) when the outer-most
- * event completes.
- */
-static void perf_output_lock(struct perf_output_handle *handle)
-{
-	struct perf_mmap_data *data = handle->data;
-	int cpu;
-
-	handle->locked = 0;
-
-	local_irq_save(handle->flags);
-	cpu = smp_processor_id();
-
-	if (in_nmi() && atomic_read(&data->lock) == cpu)
-		return;
-
-	while (atomic_cmpxchg(&data->lock, -1, cpu) != -1)
-		cpu_relax();
-
-	handle->locked = 1;
-}
-
-static void perf_output_unlock(struct perf_output_handle *handle)
-{
-	struct perf_mmap_data *data = handle->data;
-	unsigned long head;
-	int cpu;
-
-	data->done_head = data->head;
-
-	if (!handle->locked)
-		goto out;
-
-again:
-	/*
-	 * The xchg implies a full barrier that ensures all writes are done
-	 * before we publish the new head, matched by a rmb() in userspace when
-	 * reading this position.
-	 */
-	while ((head = atomic_long_xchg(&data->done_head, 0)))
-		data->user_page->data_head = head;
-
-	/*
-	 * NMI can happen here, which means we can miss a done_head update.
-	 */
-
-	cpu = atomic_xchg(&data->lock, -1);
-	WARN_ON_ONCE(cpu != smp_processor_id());
-
-	/*
-	 * Therefore we have to validate we did not indeed do so.
-	 */
-	if (unlikely(atomic_long_read(&data->done_head))) {
-		/*
-		 * Since we had it locked, we can lock it again.
-		 */
-		while (atomic_cmpxchg(&data->lock, -1, cpu) != -1)
-			cpu_relax();
-
-		goto again;
-	}
-
-	if (atomic_xchg(&data->wakeup, 0))
-		perf_output_wakeup(handle);
-out:
-	local_irq_restore(handle->flags);
-}
-
-void perf_output_copy(struct perf_output_handle *handle,
-		      const void *buf, unsigned int len)
-{
-	unsigned int pages_mask;
-	unsigned int offset;
-	unsigned int size;
-	void **pages;
-
-	offset		= handle->offset;
-	pages_mask	= handle->data->nr_pages - 1;
-	pages		= handle->data->data_pages;
-
-	do {
-		unsigned int page_offset;
-		int nr;
-
-		nr	    = (offset >> PAGE_SHIFT) & pages_mask;
-		page_offset = offset & (PAGE_SIZE - 1);
-		size	    = min_t(unsigned int, PAGE_SIZE - page_offset, len);
-
-		memcpy(pages[nr] + page_offset, buf, size);
-
-		len	    -= size;
-		buf	    += size;
-		offset	    += size;
-	} while (len);
-
-	handle->offset = offset;
-
-	/*
-	 * Check we didn't copy past our reservation window, taking the
-	 * possible unsigned int wrap into account.
-	 */
-	WARN_ON_ONCE(((long)(handle->head - handle->offset)) < 0);
-}
-
-int perf_output_begin(struct perf_output_handle *handle,
-		      struct perf_counter *counter, unsigned int size,
-		      int nmi, int sample)
-{
-	struct perf_counter *output_counter;
-	struct perf_mmap_data *data;
-	unsigned long tail, offset, head;
-	int have_lost;
-	struct {
-		struct perf_event_header header;
-		u64			 id;
-		u64			 lost;
-	} lost_event;
-
-	rcu_read_lock();
-	/*
-	 * For inherited counters we send all the output towards the parent.
-	 */
-	if (counter->parent)
-		counter = counter->parent;
-
-	output_counter = rcu_dereference(counter->output);
-	if (output_counter)
-		counter = output_counter;
-
-	data = rcu_dereference(counter->data);
-	if (!data)
-		goto out;
-
-	handle->data	= data;
-	handle->counter	= counter;
-	handle->nmi	= nmi;
-	handle->sample	= sample;
-
-	if (!data->nr_pages)
-		goto fail;
-
-	have_lost = atomic_read(&data->lost);
-	if (have_lost)
-		size += sizeof(lost_event);
-
-	perf_output_lock(handle);
-
-	do {
-		/*
-		 * Userspace could choose to issue a mb() before updating the
-		 * tail pointer. So that all reads will be completed before the
-		 * write is issued.
-		 */
-		tail = ACCESS_ONCE(data->user_page->data_tail);
-		smp_rmb();
-		offset = head = atomic_long_read(&data->head);
-		head += size;
-		if (unlikely(!perf_output_space(data, tail, offset, head)))
-			goto fail;
-	} while (atomic_long_cmpxchg(&data->head, offset, head) != offset);
-
-	handle->offset	= offset;
-	handle->head	= head;
-
-	if (head - tail > data->watermark)
-		atomic_set(&data->wakeup, 1);
-
-	if (have_lost) {
-		lost_event.header.type = PERF_EVENT_LOST;
-		lost_event.header.misc = 0;
-		lost_event.header.size = sizeof(lost_event);
-		lost_event.id          = counter->id;
-		lost_event.lost        = atomic_xchg(&data->lost, 0);
-
-		perf_output_put(handle, lost_event);
-	}
-
-	return 0;
-
-fail:
-	atomic_inc(&data->lost);
-	perf_output_unlock(handle);
-out:
-	rcu_read_unlock();
-
-	return -ENOSPC;
-}
-
-void perf_output_end(struct perf_output_handle *handle)
-{
-	struct perf_counter *counter = handle->counter;
-	struct perf_mmap_data *data = handle->data;
-
-	int wakeup_events = counter->attr.wakeup_events;
-
-	if (handle->sample && wakeup_events) {
-		int events = atomic_inc_return(&data->events);
-		if (events >= wakeup_events) {
-			atomic_sub(wakeup_events, &data->events);
-			atomic_set(&data->wakeup, 1);
-		}
-	}
-
-	perf_output_unlock(handle);
-	rcu_read_unlock();
-}
-
-static u32 perf_counter_pid(struct perf_counter *counter, struct task_struct *p)
-{
-	/*
-	 * only top level counters have the pid namespace they were created in
-	 */
-	if (counter->parent)
-		counter = counter->parent;
-
-	return task_tgid_nr_ns(p, counter->ns);
-}
-
-static u32 perf_counter_tid(struct perf_counter *counter, struct task_struct *p)
-{
-	/*
-	 * only top level counters have the pid namespace they were created in
-	 */
-	if (counter->parent)
-		counter = counter->parent;
-
-	return task_pid_nr_ns(p, counter->ns);
-}
-
-static void perf_output_read_one(struct perf_output_handle *handle,
-				 struct perf_counter *counter)
-{
-	u64 read_format = counter->attr.read_format;
-	u64 values[4];
-	int n = 0;
-
-	values[n++] = atomic64_read(&counter->count);
-	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
-		values[n++] = counter->total_time_enabled +
-			atomic64_read(&counter->child_total_time_enabled);
-	}
-	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
-		values[n++] = counter->total_time_running +
-			atomic64_read(&counter->child_total_time_running);
-	}
-	if (read_format & PERF_FORMAT_ID)
-		values[n++] = primary_counter_id(counter);
-
-	perf_output_copy(handle, values, n * sizeof(u64));
-}
-
-/*
- * XXX PERF_FORMAT_GROUP vs inherited counters seems difficult.
- */
-static void perf_output_read_group(struct perf_output_handle *handle,
-			    struct perf_counter *counter)
-{
-	struct perf_counter *leader = counter->group_leader, *sub;
-	u64 read_format = counter->attr.read_format;
-	u64 values[5];
-	int n = 0;
-
-	values[n++] = 1 + leader->nr_siblings;
-
-	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
-		values[n++] = leader->total_time_enabled;
-
-	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
-		values[n++] = leader->total_time_running;
-
-	if (leader != counter)
-		leader->pmu->read(leader);
-
-	values[n++] = atomic64_read(&leader->count);
-	if (read_format & PERF_FORMAT_ID)
-		values[n++] = primary_counter_id(leader);
-
-	perf_output_copy(handle, values, n * sizeof(u64));
-
-	list_for_each_entry(sub, &leader->sibling_list, group_entry) {
-		n = 0;
-
-		if (sub != counter)
-			sub->pmu->read(sub);
-
-		values[n++] = atomic64_read(&sub->count);
-		if (read_format & PERF_FORMAT_ID)
-			values[n++] = primary_counter_id(sub);
-
-		perf_output_copy(handle, values, n * sizeof(u64));
-	}
-}
-
-static void perf_output_read(struct perf_output_handle *handle,
-			     struct perf_counter *counter)
-{
-	if (counter->attr.read_format & PERF_FORMAT_GROUP)
-		perf_output_read_group(handle, counter);
-	else
-		perf_output_read_one(handle, counter);
-}
-
-void perf_output_sample(struct perf_output_handle *handle,
-			struct perf_event_header *header,
-			struct perf_sample_data *data,
-			struct perf_counter *counter)
-{
-	u64 sample_type = data->type;
-
-	perf_output_put(handle, *header);
-
-	if (sample_type & PERF_SAMPLE_IP)
-		perf_output_put(handle, data->ip);
-
-	if (sample_type & PERF_SAMPLE_TID)
-		perf_output_put(handle, data->tid_entry);
-
-	if (sample_type & PERF_SAMPLE_TIME)
-		perf_output_put(handle, data->time);
-
-	if (sample_type & PERF_SAMPLE_ADDR)
-		perf_output_put(handle, data->addr);
-
-	if (sample_type & PERF_SAMPLE_ID)
-		perf_output_put(handle, data->id);
-
-	if (sample_type & PERF_SAMPLE_STREAM_ID)
-		perf_output_put(handle, data->stream_id);
-
-	if (sample_type & PERF_SAMPLE_CPU)
-		perf_output_put(handle, data->cpu_entry);
-
-	if (sample_type & PERF_SAMPLE_PERIOD)
-		perf_output_put(handle, data->period);
-
-	if (sample_type & PERF_SAMPLE_READ)
-		perf_output_read(handle, counter);
-
-	if (sample_type & PERF_SAMPLE_CALLCHAIN) {
-		if (data->callchain) {
-			int size = 1;
-
-			if (data->callchain)
-				size += data->callchain->nr;
-
-			size *= sizeof(u64);
-
-			perf_output_copy(handle, data->callchain, size);
-		} else {
-			u64 nr = 0;
-			perf_output_put(handle, nr);
-		}
-	}
-
-	if (sample_type & PERF_SAMPLE_RAW) {
-		if (data->raw) {
-			perf_output_put(handle, data->raw->size);
-			perf_output_copy(handle, data->raw->data,
-					 data->raw->size);
-		} else {
-			struct {
-				u32	size;
-				u32	data;
-			} raw = {
-				.size = sizeof(u32),
-				.data = 0,
-			};
-			perf_output_put(handle, raw);
-		}
-	}
-}
-
-void perf_prepare_sample(struct perf_event_header *header,
-			 struct perf_sample_data *data,
-			 struct perf_counter *counter,
-			 struct pt_regs *regs)
-{
-	u64 sample_type = counter->attr.sample_type;
-
-	data->type = sample_type;
-
-	header->type = PERF_EVENT_SAMPLE;
-	header->size = sizeof(*header);
-
-	header->misc = 0;
-	header->misc |= perf_misc_flags(regs);
-
-	if (sample_type & PERF_SAMPLE_IP) {
-		data->ip = perf_instruction_pointer(regs);
-
-		header->size += sizeof(data->ip);
-	}
-
-	if (sample_type & PERF_SAMPLE_TID) {
-		/* namespace issues */
-		data->tid_entry.pid = perf_counter_pid(counter, current);
-		data->tid_entry.tid = perf_counter_tid(counter, current);
-
-		header->size += sizeof(data->tid_entry);
-	}
-
-	if (sample_type & PERF_SAMPLE_TIME) {
-		data->time = perf_clock();
-
-		header->size += sizeof(data->time);
-	}
-
-	if (sample_type & PERF_SAMPLE_ADDR)
-		header->size += sizeof(data->addr);
-
-	if (sample_type & PERF_SAMPLE_ID) {
-		data->id = primary_counter_id(counter);
-
-		header->size += sizeof(data->id);
-	}
-
-	if (sample_type & PERF_SAMPLE_STREAM_ID) {
-		data->stream_id = counter->id;
-
-		header->size += sizeof(data->stream_id);
-	}
-
-	if (sample_type & PERF_SAMPLE_CPU) {
-		data->cpu_entry.cpu		= raw_smp_processor_id();
-		data->cpu_entry.reserved	= 0;
-
-		header->size += sizeof(data->cpu_entry);
-	}
-
-	if (sample_type & PERF_SAMPLE_PERIOD)
-		header->size += sizeof(data->period);
-
-	if (sample_type & PERF_SAMPLE_READ)
-		header->size += perf_counter_read_size(counter);
-
-	if (sample_type & PERF_SAMPLE_CALLCHAIN) {
-		int size = 1;
-
-		data->callchain = perf_callchain(regs);
-
-		if (data->callchain)
-			size += data->callchain->nr;
-
-		header->size += size * sizeof(u64);
-	}
-
-	if (sample_type & PERF_SAMPLE_RAW) {
-		int size = sizeof(u32);
-
-		if (data->raw)
-			size += data->raw->size;
-		else
-			size += sizeof(u32);
-
-		WARN_ON_ONCE(size & (sizeof(u64)-1));
-		header->size += size;
-	}
-}
-
-static void perf_counter_output(struct perf_counter *counter, int nmi,
-				struct perf_sample_data *data,
-				struct pt_regs *regs)
-{
-	struct perf_output_handle handle;
-	struct perf_event_header header;
-
-	perf_prepare_sample(&header, data, counter, regs);
-
-	if (perf_output_begin(&handle, counter, header.size, nmi, 1))
-		return;
-
-	perf_output_sample(&handle, &header, data, counter);
-
-	perf_output_end(&handle);
-}
-
-/*
- * read event
- */
-
-struct perf_read_event {
-	struct perf_event_header	header;
-
-	u32				pid;
-	u32				tid;
-};
-
-static void
-perf_counter_read_event(struct perf_counter *counter,
-			struct task_struct *task)
-{
-	struct perf_output_handle handle;
-	struct perf_read_event read_event = {
-		.header = {
-			.type = PERF_EVENT_READ,
-			.misc = 0,
-			.size = sizeof(read_event) + perf_counter_read_size(counter),
-		},
-		.pid = perf_counter_pid(counter, task),
-		.tid = perf_counter_tid(counter, task),
-	};
-	int ret;
-
-	ret = perf_output_begin(&handle, counter, read_event.header.size, 0, 0);
-	if (ret)
-		return;
-
-	perf_output_put(&handle, read_event);
-	perf_output_read(&handle, counter);
-
-	perf_output_end(&handle);
-}
-
-/*
- * task tracking -- fork/exit
- *
- * enabled by: attr.comm | attr.mmap | attr.task
- */
-
-struct perf_task_event {
-	struct task_struct		*task;
-	struct perf_counter_context	*task_ctx;
-
-	struct {
-		struct perf_event_header	header;
-
-		u32				pid;
-		u32				ppid;
-		u32				tid;
-		u32				ptid;
-		u64				time;
-	} event;
-};
-
-static void perf_counter_task_output(struct perf_counter *counter,
-				     struct perf_task_event *task_event)
-{
-	struct perf_output_handle handle;
-	int size;
-	struct task_struct *task = task_event->task;
-	int ret;
-
-	size  = task_event->event.header.size;
-	ret = perf_output_begin(&handle, counter, size, 0, 0);
-
-	if (ret)
-		return;
-
-	task_event->event.pid = perf_counter_pid(counter, task);
-	task_event->event.ppid = perf_counter_pid(counter, current);
-
-	task_event->event.tid = perf_counter_tid(counter, task);
-	task_event->event.ptid = perf_counter_tid(counter, current);
-
-	task_event->event.time = perf_clock();
-
-	perf_output_put(&handle, task_event->event);
-
-	perf_output_end(&handle);
-}
-
-static int perf_counter_task_match(struct perf_counter *counter)
-{
-	if (counter->attr.comm || counter->attr.mmap || counter->attr.task)
-		return 1;
-
-	return 0;
-}
-
-static void perf_counter_task_ctx(struct perf_counter_context *ctx,
-				  struct perf_task_event *task_event)
-{
-	struct perf_counter *counter;
-
-	if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
-		return;
-
-	rcu_read_lock();
-	list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) {
-		if (perf_counter_task_match(counter))
-			perf_counter_task_output(counter, task_event);
-	}
-	rcu_read_unlock();
-}
-
-static void perf_counter_task_event(struct perf_task_event *task_event)
-{
-	struct perf_cpu_context *cpuctx;
-	struct perf_counter_context *ctx = task_event->task_ctx;
-
-	cpuctx = &get_cpu_var(perf_cpu_context);
-	perf_counter_task_ctx(&cpuctx->ctx, task_event);
-	put_cpu_var(perf_cpu_context);
-
-	rcu_read_lock();
-	if (!ctx)
-		ctx = rcu_dereference(task_event->task->perf_counter_ctxp);
-	if (ctx)
-		perf_counter_task_ctx(ctx, task_event);
-	rcu_read_unlock();
-}
-
-static void perf_counter_task(struct task_struct *task,
-			      struct perf_counter_context *task_ctx,
-			      int new)
-{
-	struct perf_task_event task_event;
-
-	if (!atomic_read(&nr_comm_counters) &&
-	    !atomic_read(&nr_mmap_counters) &&
-	    !atomic_read(&nr_task_counters))
-		return;
-
-	task_event = (struct perf_task_event){
-		.task	  = task,
-		.task_ctx = task_ctx,
-		.event    = {
-			.header = {
-				.type = new ? PERF_EVENT_FORK : PERF_EVENT_EXIT,
-				.misc = 0,
-				.size = sizeof(task_event.event),
-			},
-			/* .pid  */
-			/* .ppid */
-			/* .tid  */
-			/* .ptid */
-		},
-	};
-
-	perf_counter_task_event(&task_event);
-}
-
-void perf_counter_fork(struct task_struct *task)
-{
-	perf_counter_task(task, NULL, 1);
-}
-
-/*
- * comm tracking
- */
-
-struct perf_comm_event {
-	struct task_struct	*task;
-	char			*comm;
-	int			comm_size;
-
-	struct {
-		struct perf_event_header	header;
-
-		u32				pid;
-		u32				tid;
-	} event;
-};
-
-static void perf_counter_comm_output(struct perf_counter *counter,
-				     struct perf_comm_event *comm_event)
-{
-	struct perf_output_handle handle;
-	int size = comm_event->event.header.size;
-	int ret = perf_output_begin(&handle, counter, size, 0, 0);
-
-	if (ret)
-		return;
-
-	comm_event->event.pid = perf_counter_pid(counter, comm_event->task);
-	comm_event->event.tid = perf_counter_tid(counter, comm_event->task);
-
-	perf_output_put(&handle, comm_event->event);
-	perf_output_copy(&handle, comm_event->comm,
-				   comm_event->comm_size);
-	perf_output_end(&handle);
-}
-
-static int perf_counter_comm_match(struct perf_counter *counter)
-{
-	if (counter->attr.comm)
-		return 1;
-
-	return 0;
-}
-
-static void perf_counter_comm_ctx(struct perf_counter_context *ctx,
-				  struct perf_comm_event *comm_event)
-{
-	struct perf_counter *counter;
-
-	if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
-		return;
-
-	rcu_read_lock();
-	list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) {
-		if (perf_counter_comm_match(counter))
-			perf_counter_comm_output(counter, comm_event);
-	}
-	rcu_read_unlock();
-}
-
-static void perf_counter_comm_event(struct perf_comm_event *comm_event)
-{
-	struct perf_cpu_context *cpuctx;
-	struct perf_counter_context *ctx;
-	unsigned int size;
-	char comm[TASK_COMM_LEN];
-
-	memset(comm, 0, sizeof(comm));
-	strncpy(comm, comm_event->task->comm, sizeof(comm));
-	size = ALIGN(strlen(comm)+1, sizeof(u64));
-
-	comm_event->comm = comm;
-	comm_event->comm_size = size;
-
-	comm_event->event.header.size = sizeof(comm_event->event) + size;
-
-	cpuctx = &get_cpu_var(perf_cpu_context);
-	perf_counter_comm_ctx(&cpuctx->ctx, comm_event);
-	put_cpu_var(perf_cpu_context);
-
-	rcu_read_lock();
-	/*
-	 * doesn't really matter which of the child contexts the
-	 * events ends up in.
-	 */
-	ctx = rcu_dereference(current->perf_counter_ctxp);
-	if (ctx)
-		perf_counter_comm_ctx(ctx, comm_event);
-	rcu_read_unlock();
-}
-
-void perf_counter_comm(struct task_struct *task)
-{
-	struct perf_comm_event comm_event;
-
-	if (task->perf_counter_ctxp)
-		perf_counter_enable_on_exec(task);
-
-	if (!atomic_read(&nr_comm_counters))
-		return;
-
-	comm_event = (struct perf_comm_event){
-		.task	= task,
-		/* .comm      */
-		/* .comm_size */
-		.event  = {
-			.header = {
-				.type = PERF_EVENT_COMM,
-				.misc = 0,
-				/* .size */
-			},
-			/* .pid */
-			/* .tid */
-		},
-	};
-
-	perf_counter_comm_event(&comm_event);
-}
-
-/*
- * mmap tracking
- */
-
-struct perf_mmap_event {
-	struct vm_area_struct	*vma;
-
-	const char		*file_name;
-	int			file_size;
-
-	struct {
-		struct perf_event_header	header;
-
-		u32				pid;
-		u32				tid;
-		u64				start;
-		u64				len;
-		u64				pgoff;
-	} event;
-};
-
-static void perf_counter_mmap_output(struct perf_counter *counter,
-				     struct perf_mmap_event *mmap_event)
-{
-	struct perf_output_handle handle;
-	int size = mmap_event->event.header.size;
-	int ret = perf_output_begin(&handle, counter, size, 0, 0);
-
-	if (ret)
-		return;
-
-	mmap_event->event.pid = perf_counter_pid(counter, current);
-	mmap_event->event.tid = perf_counter_tid(counter, current);
-
-	perf_output_put(&handle, mmap_event->event);
-	perf_output_copy(&handle, mmap_event->file_name,
-				   mmap_event->file_size);
-	perf_output_end(&handle);
-}
-
-static int perf_counter_mmap_match(struct perf_counter *counter,
-				   struct perf_mmap_event *mmap_event)
-{
-	if (counter->attr.mmap)
-		return 1;
-
-	return 0;
-}
-
-static void perf_counter_mmap_ctx(struct perf_counter_context *ctx,
-				  struct perf_mmap_event *mmap_event)
-{
-	struct perf_counter *counter;
-
-	if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
-		return;
-
-	rcu_read_lock();
-	list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) {
-		if (perf_counter_mmap_match(counter, mmap_event))
-			perf_counter_mmap_output(counter, mmap_event);
-	}
-	rcu_read_unlock();
-}
-
-static void perf_counter_mmap_event(struct perf_mmap_event *mmap_event)
-{
-	struct perf_cpu_context *cpuctx;
-	struct perf_counter_context *ctx;
-	struct vm_area_struct *vma = mmap_event->vma;
-	struct file *file = vma->vm_file;
-	unsigned int size;
-	char tmp[16];
-	char *buf = NULL;
-	const char *name;
-
-	memset(tmp, 0, sizeof(tmp));
-
-	if (file) {
-		/*
-		 * d_path works from the end of the buffer backwards, so we
-		 * need to add enough zero bytes after the string to handle
-		 * the 64bit alignment we do later.
-		 */
-		buf = kzalloc(PATH_MAX + sizeof(u64), GFP_KERNEL);
-		if (!buf) {
-			name = strncpy(tmp, "//enomem", sizeof(tmp));
-			goto got_name;
-		}
-		name = d_path(&file->f_path, buf, PATH_MAX);
-		if (IS_ERR(name)) {
-			name = strncpy(tmp, "//toolong", sizeof(tmp));
-			goto got_name;
-		}
-	} else {
-		if (arch_vma_name(mmap_event->vma)) {
-			name = strncpy(tmp, arch_vma_name(mmap_event->vma),
-				       sizeof(tmp));
-			goto got_name;
-		}
-
-		if (!vma->vm_mm) {
-			name = strncpy(tmp, "[vdso]", sizeof(tmp));
-			goto got_name;
-		}
-
-		name = strncpy(tmp, "//anon", sizeof(tmp));
-		goto got_name;
-	}
-
-got_name:
-	size = ALIGN(strlen(name)+1, sizeof(u64));
-
-	mmap_event->file_name = name;
-	mmap_event->file_size = size;
-
-	mmap_event->event.header.size = sizeof(mmap_event->event) + size;
-
-	cpuctx = &get_cpu_var(perf_cpu_context);
-	perf_counter_mmap_ctx(&cpuctx->ctx, mmap_event);
-	put_cpu_var(perf_cpu_context);
-
-	rcu_read_lock();
-	/*
-	 * doesn't really matter which of the child contexts the
-	 * events ends up in.
-	 */
-	ctx = rcu_dereference(current->perf_counter_ctxp);
-	if (ctx)
-		perf_counter_mmap_ctx(ctx, mmap_event);
-	rcu_read_unlock();
-
-	kfree(buf);
-}
-
-void __perf_counter_mmap(struct vm_area_struct *vma)
-{
-	struct perf_mmap_event mmap_event;
-
-	if (!atomic_read(&nr_mmap_counters))
-		return;
-
-	mmap_event = (struct perf_mmap_event){
-		.vma	= vma,
-		/* .file_name */
-		/* .file_size */
-		.event  = {
-			.header = {
-				.type = PERF_EVENT_MMAP,
-				.misc = 0,
-				/* .size */
-			},
-			/* .pid */
-			/* .tid */
-			.start  = vma->vm_start,
-			.len    = vma->vm_end - vma->vm_start,
-			.pgoff  = vma->vm_pgoff,
-		},
-	};
-
-	perf_counter_mmap_event(&mmap_event);
-}
-
-/*
- * IRQ throttle logging
- */
-
-static void perf_log_throttle(struct perf_counter *counter, int enable)
-{
-	struct perf_output_handle handle;
-	int ret;
-
-	struct {
-		struct perf_event_header	header;
-		u64				time;
-		u64				id;
-		u64				stream_id;
-	} throttle_event = {
-		.header = {
-			.type = PERF_EVENT_THROTTLE,
-			.misc = 0,
-			.size = sizeof(throttle_event),
-		},
-		.time		= perf_clock(),
-		.id		= primary_counter_id(counter),
-		.stream_id	= counter->id,
-	};
-
-	if (enable)
-		throttle_event.header.type = PERF_EVENT_UNTHROTTLE;
-
-	ret = perf_output_begin(&handle, counter, sizeof(throttle_event), 1, 0);
-	if (ret)
-		return;
-
-	perf_output_put(&handle, throttle_event);
-	perf_output_end(&handle);
-}
-
-/*
- * Generic counter overflow handling, sampling.
- */
-
-static int __perf_counter_overflow(struct perf_counter *counter, int nmi,
-				   int throttle, struct perf_sample_data *data,
-				   struct pt_regs *regs)
-{
-	int events = atomic_read(&counter->event_limit);
-	struct hw_perf_counter *hwc = &counter->hw;
-	int ret = 0;
-
-	throttle = (throttle && counter->pmu->unthrottle != NULL);
-
-	if (!throttle) {
-		hwc->interrupts++;
-	} else {
-		if (hwc->interrupts != MAX_INTERRUPTS) {
-			hwc->interrupts++;
-			if (HZ * hwc->interrupts >
-					(u64)sysctl_perf_counter_sample_rate) {
-				hwc->interrupts = MAX_INTERRUPTS;
-				perf_log_throttle(counter, 0);
-				ret = 1;
-			}
-		} else {
-			/*
-			 * Keep re-disabling counters even though on the previous
-			 * pass we disabled it - just in case we raced with a
-			 * sched-in and the counter got enabled again:
-			 */
-			ret = 1;
-		}
-	}
-
-	if (counter->attr.freq) {
-		u64 now = perf_clock();
-		s64 delta = now - hwc->freq_stamp;
-
-		hwc->freq_stamp = now;
-
-		if (delta > 0 && delta < TICK_NSEC)
-			perf_adjust_period(counter, NSEC_PER_SEC / (int)delta);
-	}
-
-	/*
-	 * XXX event_limit might not quite work as expected on inherited
-	 * counters
-	 */
-
-	counter->pending_kill = POLL_IN;
-	if (events && atomic_dec_and_test(&counter->event_limit)) {
-		ret = 1;
-		counter->pending_kill = POLL_HUP;
-		if (nmi) {
-			counter->pending_disable = 1;
-			perf_pending_queue(&counter->pending,
-					   perf_pending_counter);
-		} else
-			perf_counter_disable(counter);
-	}
-
-	perf_counter_output(counter, nmi, data, regs);
-	return ret;
-}
-
-int perf_counter_overflow(struct perf_counter *counter, int nmi,
-			  struct perf_sample_data *data,
-			  struct pt_regs *regs)
-{
-	return __perf_counter_overflow(counter, nmi, 1, data, regs);
-}
-
-/*
- * Generic software counter infrastructure
- */
-
-/*
- * We directly increment counter->count and keep a second value in
- * counter->hw.period_left to count intervals. This period counter
- * is kept in the range [-sample_period, 0] so that we can use the
- * sign as trigger.
- */
-
-static u64 perf_swcounter_set_period(struct perf_counter *counter)
-{
-	struct hw_perf_counter *hwc = &counter->hw;
-	u64 period = hwc->last_period;
-	u64 nr, offset;
-	s64 old, val;
-
-	hwc->last_period = hwc->sample_period;
-
-again:
-	old = val = atomic64_read(&hwc->period_left);
-	if (val < 0)
-		return 0;
-
-	nr = div64_u64(period + val, period);
-	offset = nr * period;
-	val -= offset;
-	if (atomic64_cmpxchg(&hwc->period_left, old, val) != old)
-		goto again;
-
-	return nr;
-}
-
-static void perf_swcounter_overflow(struct perf_counter *counter,
-				    int nmi, struct perf_sample_data *data,
-				    struct pt_regs *regs)
-{
-	struct hw_perf_counter *hwc = &counter->hw;
-	int throttle = 0;
-	u64 overflow;
-
-	data->period = counter->hw.last_period;
-	overflow = perf_swcounter_set_period(counter);
-
-	if (hwc->interrupts == MAX_INTERRUPTS)
-		return;
-
-	for (; overflow; overflow--) {
-		if (__perf_counter_overflow(counter, nmi, throttle,
-					    data, regs)) {
-			/*
-			 * We inhibit the overflow from happening when
-			 * hwc->interrupts == MAX_INTERRUPTS.
-			 */
-			break;
-		}
-		throttle = 1;
-	}
-}
-
-static void perf_swcounter_unthrottle(struct perf_counter *counter)
-{
-	/*
-	 * Nothing to do, we already reset hwc->interrupts.
-	 */
-}
-
-static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
-			       int nmi, struct perf_sample_data *data,
-			       struct pt_regs *regs)
-{
-	struct hw_perf_counter *hwc = &counter->hw;
-
-	atomic64_add(nr, &counter->count);
-
-	if (!hwc->sample_period)
-		return;
-
-	if (!regs)
-		return;
-
-	if (!atomic64_add_negative(nr, &hwc->period_left))
-		perf_swcounter_overflow(counter, nmi, data, regs);
-}
-
-static int perf_swcounter_is_counting(struct perf_counter *counter)
-{
-	/*
-	 * The counter is active, we're good!
-	 */
-	if (counter->state == PERF_COUNTER_STATE_ACTIVE)
-		return 1;
-
-	/*
-	 * The counter is off/error, not counting.
-	 */
-	if (counter->state != PERF_COUNTER_STATE_INACTIVE)
-		return 0;
-
-	/*
-	 * The counter is inactive, if the context is active
-	 * we're part of a group that didn't make it on the 'pmu',
-	 * not counting.
-	 */
-	if (counter->ctx->is_active)
-		return 0;
-
-	/*
-	 * We're inactive and the context is too, this means the
-	 * task is scheduled out, we're counting events that happen
-	 * to us, like migration events.
-	 */
-	return 1;
-}
-
-static int perf_swcounter_match(struct perf_counter *counter,
-				enum perf_type_id type,
-				u32 event_id, struct pt_regs *regs)
-{
-	if (!perf_swcounter_is_counting(counter))
-		return 0;
-
-	if (counter->attr.type != type)
-		return 0;
-	if (counter->attr.config != event_id)
-		return 0;
-
-	if (regs) {
-		if (counter->attr.exclude_user && user_mode(regs))
-			return 0;
-
-		if (counter->attr.exclude_kernel && !user_mode(regs))
-			return 0;
-	}
-
-	return 1;
-}
-
-static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
-				     enum perf_type_id type,
-				     u32 event_id, u64 nr, int nmi,
-				     struct perf_sample_data *data,
-				     struct pt_regs *regs)
-{
-	struct perf_counter *counter;
-
-	if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
-		return;
-
-	rcu_read_lock();
-	list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) {
-		if (perf_swcounter_match(counter, type, event_id, regs))
-			perf_swcounter_add(counter, nr, nmi, data, regs);
-	}
-	rcu_read_unlock();
-}
-
-static int *perf_swcounter_recursion_context(struct perf_cpu_context *cpuctx)
-{
-	if (in_nmi())
-		return &cpuctx->recursion[3];
-
-	if (in_irq())
-		return &cpuctx->recursion[2];
-
-	if (in_softirq())
-		return &cpuctx->recursion[1];
-
-	return &cpuctx->recursion[0];
-}
-
-static void do_perf_swcounter_event(enum perf_type_id type, u32 event,
-				    u64 nr, int nmi,
-				    struct perf_sample_data *data,
-				    struct pt_regs *regs)
-{
-	struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context);
-	int *recursion = perf_swcounter_recursion_context(cpuctx);
-	struct perf_counter_context *ctx;
-
-	if (*recursion)
-		goto out;
-
-	(*recursion)++;
-	barrier();
-
-	perf_swcounter_ctx_event(&cpuctx->ctx, type, event,
-				 nr, nmi, data, regs);
-	rcu_read_lock();
-	/*
-	 * doesn't really matter which of the child contexts the
-	 * events ends up in.
-	 */
-	ctx = rcu_dereference(current->perf_counter_ctxp);
-	if (ctx)
-		perf_swcounter_ctx_event(ctx, type, event, nr, nmi, data, regs);
-	rcu_read_unlock();
-
-	barrier();
-	(*recursion)--;
-
-out:
-	put_cpu_var(perf_cpu_context);
-}
-
-void __perf_swcounter_event(u32 event, u64 nr, int nmi,
-			    struct pt_regs *regs, u64 addr)
-{
-	struct perf_sample_data data = {
-		.addr = addr,
-	};
-
-	do_perf_swcounter_event(PERF_TYPE_SOFTWARE, event, nr, nmi,
-				&data, regs);
-}
-
-static void perf_swcounter_read(struct perf_counter *counter)
-{
-}
-
-static int perf_swcounter_enable(struct perf_counter *counter)
-{
-	struct hw_perf_counter *hwc = &counter->hw;
-
-	if (hwc->sample_period) {
-		hwc->last_period = hwc->sample_period;
-		perf_swcounter_set_period(counter);
-	}
-	return 0;
-}
-
-static void perf_swcounter_disable(struct perf_counter *counter)
-{
-}
-
-static const struct pmu perf_ops_generic = {
-	.enable		= perf_swcounter_enable,
-	.disable	= perf_swcounter_disable,
-	.read		= perf_swcounter_read,
-	.unthrottle	= perf_swcounter_unthrottle,
-};
-
-/*
- * hrtimer based swcounter callback
- */
-
-static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
-{
-	enum hrtimer_restart ret = HRTIMER_RESTART;
-	struct perf_sample_data data;
-	struct pt_regs *regs;
-	struct perf_counter *counter;
-	u64 period;
-
-	counter	= container_of(hrtimer, struct perf_counter, hw.hrtimer);
-	counter->pmu->read(counter);
-
-	data.addr = 0;
-	regs = get_irq_regs();
-	/*
-	 * In case we exclude kernel IPs or are somehow not in interrupt
-	 * context, provide the next best thing, the user IP.
-	 */
-	if ((counter->attr.exclude_kernel || !regs) &&
-			!counter->attr.exclude_user)
-		regs = task_pt_regs(current);
-
-	if (regs) {
-		if (perf_counter_overflow(counter, 0, &data, regs))
-			ret = HRTIMER_NORESTART;
-	}
-
-	period = max_t(u64, 10000, counter->hw.sample_period);
-	hrtimer_forward_now(hrtimer, ns_to_ktime(period));
-
-	return ret;
-}
-
-/*
- * Software counter: cpu wall time clock
- */
-
-static void cpu_clock_perf_counter_update(struct perf_counter *counter)
-{
-	int cpu = raw_smp_processor_id();
-	s64 prev;
-	u64 now;
-
-	now = cpu_clock(cpu);
-	prev = atomic64_read(&counter->hw.prev_count);
-	atomic64_set(&counter->hw.prev_count, now);
-	atomic64_add(now - prev, &counter->count);
-}
-
-static int cpu_clock_perf_counter_enable(struct perf_counter *counter)
-{
-	struct hw_perf_counter *hwc = &counter->hw;
-	int cpu = raw_smp_processor_id();
-
-	atomic64_set(&hwc->prev_count, cpu_clock(cpu));
-	hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-	hwc->hrtimer.function = perf_swcounter_hrtimer;
-	if (hwc->sample_period) {
-		u64 period = max_t(u64, 10000, hwc->sample_period);
-		__hrtimer_start_range_ns(&hwc->hrtimer,
-				ns_to_ktime(period), 0,
-				HRTIMER_MODE_REL, 0);
-	}
-
-	return 0;
-}
-
-static void cpu_clock_perf_counter_disable(struct perf_counter *counter)
-{
-	if (counter->hw.sample_period)
-		hrtimer_cancel(&counter->hw.hrtimer);
-	cpu_clock_perf_counter_update(counter);
-}
-
-static void cpu_clock_perf_counter_read(struct perf_counter *counter)
-{
-	cpu_clock_perf_counter_update(counter);
-}
-
-static const struct pmu perf_ops_cpu_clock = {
-	.enable		= cpu_clock_perf_counter_enable,
-	.disable	= cpu_clock_perf_counter_disable,
-	.read		= cpu_clock_perf_counter_read,
-};
-
-/*
- * Software counter: task time clock
- */
-
-static void task_clock_perf_counter_update(struct perf_counter *counter, u64 now)
-{
-	u64 prev;
-	s64 delta;
-
-	prev = atomic64_xchg(&counter->hw.prev_count, now);
-	delta = now - prev;
-	atomic64_add(delta, &counter->count);
-}
-
-static int task_clock_perf_counter_enable(struct perf_counter *counter)
-{
-	struct hw_perf_counter *hwc = &counter->hw;
-	u64 now;
-
-	now = counter->ctx->time;
-
-	atomic64_set(&hwc->prev_count, now);
-	hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-	hwc->hrtimer.function = perf_swcounter_hrtimer;
-	if (hwc->sample_period) {
-		u64 period = max_t(u64, 10000, hwc->sample_period);
-		__hrtimer_start_range_ns(&hwc->hrtimer,
-				ns_to_ktime(period), 0,
-				HRTIMER_MODE_REL, 0);
-	}
-
-	return 0;
-}
-
-static void task_clock_perf_counter_disable(struct perf_counter *counter)
-{
-	if (counter->hw.sample_period)
-		hrtimer_cancel(&counter->hw.hrtimer);
-	task_clock_perf_counter_update(counter, counter->ctx->time);
-
-}
-
-static void task_clock_perf_counter_read(struct perf_counter *counter)
-{
-	u64 time;
-
-	if (!in_nmi()) {
-		update_context_time(counter->ctx);
-		time = counter->ctx->time;
-	} else {
-		u64 now = perf_clock();
-		u64 delta = now - counter->ctx->timestamp;
-		time = counter->ctx->time + delta;
-	}
-
-	task_clock_perf_counter_update(counter, time);
-}
-
-static const struct pmu perf_ops_task_clock = {
-	.enable		= task_clock_perf_counter_enable,
-	.disable	= task_clock_perf_counter_disable,
-	.read		= task_clock_perf_counter_read,
-};
-
-#ifdef CONFIG_EVENT_PROFILE
-void perf_tpcounter_event(int event_id, u64 addr, u64 count, void *record,
-			  int entry_size)
-{
-	struct perf_raw_record raw = {
-		.size = entry_size,
-		.data = record,
-	};
-
-	struct perf_sample_data data = {
-		.addr = addr,
-		.raw = &raw,
-	};
-
-	struct pt_regs *regs = get_irq_regs();
-
-	if (!regs)
-		regs = task_pt_regs(current);
-
-	do_perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, count, 1,
-				&data, regs);
-}
-EXPORT_SYMBOL_GPL(perf_tpcounter_event);
-
-extern int ftrace_profile_enable(int);
-extern void ftrace_profile_disable(int);
-
-static void tp_perf_counter_destroy(struct perf_counter *counter)
-{
-	ftrace_profile_disable(counter->attr.config);
-}
-
-static const struct pmu *tp_perf_counter_init(struct perf_counter *counter)
-{
-	/*
-	 * Raw tracepoint data is a severe data leak, only allow root to
-	 * have these.
-	 */
-	if ((counter->attr.sample_type & PERF_SAMPLE_RAW) &&
-			perf_paranoid_tracepoint_raw() &&
-			!capable(CAP_SYS_ADMIN))
-		return ERR_PTR(-EPERM);
-
-	if (ftrace_profile_enable(counter->attr.config))
-		return NULL;
-
-	counter->destroy = tp_perf_counter_destroy;
-
-	return &perf_ops_generic;
-}
-#else
-static const struct pmu *tp_perf_counter_init(struct perf_counter *counter)
-{
-	return NULL;
-}
-#endif
-
-atomic_t perf_swcounter_enabled[PERF_COUNT_SW_MAX];
-
-static void sw_perf_counter_destroy(struct perf_counter *counter)
-{
-	u64 event_id = counter->attr.config;
-
-	WARN_ON(counter->parent);
-
-	atomic_dec(&perf_swcounter_enabled[event_id]);
-}
-
-static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
-{
-	const struct pmu *pmu = NULL;
-	u64 event_id = counter->attr.config;
-
-	/*
-	 * Software counters (currently) can't in general distinguish
-	 * between user, kernel and hypervisor events.
-	 * However, context switches and cpu migrations are considered
-	 * to be kernel events, and page faults are never hypervisor
-	 * events.
-	 */
-	switch (event_id) {
-	case PERF_COUNT_SW_CPU_CLOCK:
-		pmu = &perf_ops_cpu_clock;
-
-		break;
-	case PERF_COUNT_SW_TASK_CLOCK:
-		/*
-		 * If the user instantiates this as a per-cpu counter,
-		 * use the cpu_clock counter instead.
-		 */
-		if (counter->ctx->task)
-			pmu = &perf_ops_task_clock;
-		else
-			pmu = &perf_ops_cpu_clock;
-
-		break;
-	case PERF_COUNT_SW_PAGE_FAULTS:
-	case PERF_COUNT_SW_PAGE_FAULTS_MIN:
-	case PERF_COUNT_SW_PAGE_FAULTS_MAJ:
-	case PERF_COUNT_SW_CONTEXT_SWITCHES:
-	case PERF_COUNT_SW_CPU_MIGRATIONS:
-		if (!counter->parent) {
-			atomic_inc(&perf_swcounter_enabled[event_id]);
-			counter->destroy = sw_perf_counter_destroy;
-		}
-		pmu = &perf_ops_generic;
-		break;
-	}
-
-	return pmu;
-}
-
-/*
- * Allocate and initialize a counter structure
- */
-static struct perf_counter *
-perf_counter_alloc(struct perf_counter_attr *attr,
-		   int cpu,
-		   struct perf_counter_context *ctx,
-		   struct perf_counter *group_leader,
-		   struct perf_counter *parent_counter,
-		   gfp_t gfpflags)
-{
-	const struct pmu *pmu;
-	struct perf_counter *counter;
-	struct hw_perf_counter *hwc;
-	long err;
-
-	counter = kzalloc(sizeof(*counter), gfpflags);
-	if (!counter)
-		return ERR_PTR(-ENOMEM);
-
-	/*
-	 * Single counters are their own group leaders, with an
-	 * empty sibling list:
-	 */
-	if (!group_leader)
-		group_leader = counter;
-
-	mutex_init(&counter->child_mutex);
-	INIT_LIST_HEAD(&counter->child_list);
-
-	INIT_LIST_HEAD(&counter->group_entry);
-	INIT_LIST_HEAD(&counter->event_entry);
-	INIT_LIST_HEAD(&counter->sibling_list);
-	init_waitqueue_head(&counter->waitq);
-
-	mutex_init(&counter->mmap_mutex);
-
-	counter->cpu		= cpu;
-	counter->attr		= *attr;
-	counter->group_leader	= group_leader;
-	counter->pmu		= NULL;
-	counter->ctx		= ctx;
-	counter->oncpu		= -1;
-
-	counter->parent		= parent_counter;
-
-	counter->ns		= get_pid_ns(current->nsproxy->pid_ns);
-	counter->id		= atomic64_inc_return(&perf_counter_id);
-
-	counter->state		= PERF_COUNTER_STATE_INACTIVE;
-
-	if (attr->disabled)
-		counter->state = PERF_COUNTER_STATE_OFF;
-
-	pmu = NULL;
-
-	hwc = &counter->hw;
-	hwc->sample_period = attr->sample_period;
-	if (attr->freq && attr->sample_freq)
-		hwc->sample_period = 1;
-	hwc->last_period = hwc->sample_period;
-
-	atomic64_set(&hwc->period_left, hwc->sample_period);
-
-	/*
-	 * we currently do not support PERF_FORMAT_GROUP on inherited counters
-	 */
-	if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP))
-		goto done;
-
-	switch (attr->type) {
-	case PERF_TYPE_RAW:
-	case PERF_TYPE_HARDWARE:
-	case PERF_TYPE_HW_CACHE:
-		pmu = hw_perf_counter_init(counter);
-		break;
-
-	case PERF_TYPE_SOFTWARE:
-		pmu = sw_perf_counter_init(counter);
-		break;
-
-	case PERF_TYPE_TRACEPOINT:
-		pmu = tp_perf_counter_init(counter);
-		break;
-
-	default:
-		break;
-	}
-done:
-	err = 0;
-	if (!pmu)
-		err = -EINVAL;
-	else if (IS_ERR(pmu))
-		err = PTR_ERR(pmu);
-
-	if (err) {
-		if (counter->ns)
-			put_pid_ns(counter->ns);
-		kfree(counter);
-		return ERR_PTR(err);
-	}
-
-	counter->pmu = pmu;
-
-	if (!counter->parent) {
-		atomic_inc(&nr_counters);
-		if (counter->attr.mmap)
-			atomic_inc(&nr_mmap_counters);
-		if (counter->attr.comm)
-			atomic_inc(&nr_comm_counters);
-		if (counter->attr.task)
-			atomic_inc(&nr_task_counters);
-	}
-
-	return counter;
-}
-
-static int perf_copy_attr(struct perf_counter_attr __user *uattr,
-			  struct perf_counter_attr *attr)
-{
-	u32 size;
-	int ret;
-
-	if (!access_ok(VERIFY_WRITE, uattr, PERF_ATTR_SIZE_VER0))
-		return -EFAULT;
-
-	/*
-	 * zero the full structure, so that a short copy will be nice.
-	 */
-	memset(attr, 0, sizeof(*attr));
-
-	ret = get_user(size, &uattr->size);
-	if (ret)
-		return ret;
-
-	if (size > PAGE_SIZE)	/* silly large */
-		goto err_size;
-
-	if (!size)		/* abi compat */
-		size = PERF_ATTR_SIZE_VER0;
-
-	if (size < PERF_ATTR_SIZE_VER0)
-		goto err_size;
-
-	/*
-	 * If we're handed a bigger struct than we know of,
-	 * ensure all the unknown bits are 0 - i.e. new
-	 * user-space does not rely on any kernel feature
-	 * extensions we dont know about yet.
-	 */
-	if (size > sizeof(*attr)) {
-		unsigned char __user *addr;
-		unsigned char __user *end;
-		unsigned char val;
-
-		addr = (void __user *)uattr + sizeof(*attr);
-		end  = (void __user *)uattr + size;
-
-		for (; addr < end; addr++) {
-			ret = get_user(val, addr);
-			if (ret)
-				return ret;
-			if (val)
-				goto err_size;
-		}
-		size = sizeof(*attr);
-	}
-
-	ret = copy_from_user(attr, uattr, size);
-	if (ret)
-		return -EFAULT;
-
-	/*
-	 * If the type exists, the corresponding creation will verify
-	 * the attr->config.
-	 */
-	if (attr->type >= PERF_TYPE_MAX)
-		return -EINVAL;
-
-	if (attr->__reserved_1 || attr->__reserved_2 || attr->__reserved_3)
-		return -EINVAL;
-
-	if (attr->sample_type & ~(PERF_SAMPLE_MAX-1))
-		return -EINVAL;
-
-	if (attr->read_format & ~(PERF_FORMAT_MAX-1))
-		return -EINVAL;
-
-out:
-	return ret;
-
-err_size:
-	put_user(sizeof(*attr), &uattr->size);
-	ret = -E2BIG;
-	goto out;
-}
-
-int perf_counter_set_output(struct perf_counter *counter, int output_fd)
-{
-	struct perf_counter *output_counter = NULL;
-	struct file *output_file = NULL;
-	struct perf_counter *old_output;
-	int fput_needed = 0;
-	int ret = -EINVAL;
-
-	if (!output_fd)
-		goto set;
-
-	output_file = fget_light(output_fd, &fput_needed);
-	if (!output_file)
-		return -EBADF;
-
-	if (output_file->f_op != &perf_fops)
-		goto out;
-
-	output_counter = output_file->private_data;
-
-	/* Don't chain output fds */
-	if (output_counter->output)
-		goto out;
-
-	/* Don't set an output fd when we already have an output channel */
-	if (counter->data)
-		goto out;
-
-	atomic_long_inc(&output_file->f_count);
-
-set:
-	mutex_lock(&counter->mmap_mutex);
-	old_output = counter->output;
-	rcu_assign_pointer(counter->output, output_counter);
-	mutex_unlock(&counter->mmap_mutex);
-
-	if (old_output) {
-		/*
-		 * we need to make sure no existing perf_output_*()
-		 * is still referencing this counter.
-		 */
-		synchronize_rcu();
-		fput(old_output->filp);
-	}
-
-	ret = 0;
-out:
-	fput_light(output_file, fput_needed);
-	return ret;
-}
-
-/**
- * sys_perf_counter_open - open a performance counter, associate it to a task/cpu
- *
- * @attr_uptr:	event type attributes for monitoring/sampling
- * @pid:		target pid
- * @cpu:		target cpu
- * @group_fd:		group leader counter fd
- */
-SYSCALL_DEFINE5(perf_counter_open,
-		struct perf_counter_attr __user *, attr_uptr,
-		pid_t, pid, int, cpu, int, group_fd, unsigned long, flags)
-{
-	struct perf_counter *counter, *group_leader;
-	struct perf_counter_attr attr;
-	struct perf_counter_context *ctx;
-	struct file *counter_file = NULL;
-	struct file *group_file = NULL;
-	int fput_needed = 0;
-	int fput_needed2 = 0;
-	int err;
-
-	/* for future expandability... */
-	if (flags & ~(PERF_FLAG_FD_NO_GROUP | PERF_FLAG_FD_OUTPUT))
-		return -EINVAL;
-
-	err = perf_copy_attr(attr_uptr, &attr);
-	if (err)
-		return err;
-
-	if (!attr.exclude_kernel) {
-		if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
-			return -EACCES;
-	}
-
-	if (attr.freq) {
-		if (attr.sample_freq > sysctl_perf_counter_sample_rate)
-			return -EINVAL;
-	}
-
-	/*
-	 * Get the target context (task or percpu):
-	 */
-	ctx = find_get_context(pid, cpu);
-	if (IS_ERR(ctx))
-		return PTR_ERR(ctx);
-
-	/*
-	 * Look up the group leader (we will attach this counter to it):
-	 */
-	group_leader = NULL;
-	if (group_fd != -1 && !(flags & PERF_FLAG_FD_NO_GROUP)) {
-		err = -EINVAL;
-		group_file = fget_light(group_fd, &fput_needed);
-		if (!group_file)
-			goto err_put_context;
-		if (group_file->f_op != &perf_fops)
-			goto err_put_context;
-
-		group_leader = group_file->private_data;
-		/*
-		 * Do not allow a recursive hierarchy (this new sibling
-		 * becoming part of another group-sibling):
-		 */
-		if (group_leader->group_leader != group_leader)
-			goto err_put_context;
-		/*
-		 * Do not allow to attach to a group in a different
-		 * task or CPU context:
-		 */
-		if (group_leader->ctx != ctx)
-			goto err_put_context;
-		/*
-		 * Only a group leader can be exclusive or pinned
-		 */
-		if (attr.exclusive || attr.pinned)
-			goto err_put_context;
-	}
-
-	counter = perf_counter_alloc(&attr, cpu, ctx, group_leader,
-				     NULL, GFP_KERNEL);
-	err = PTR_ERR(counter);
-	if (IS_ERR(counter))
-		goto err_put_context;
-
-	err = anon_inode_getfd("[perf_counter]", &perf_fops, counter, 0);
-	if (err < 0)
-		goto err_free_put_context;
-
-	counter_file = fget_light(err, &fput_needed2);
-	if (!counter_file)
-		goto err_free_put_context;
-
-	if (flags & PERF_FLAG_FD_OUTPUT) {
-		err = perf_counter_set_output(counter, group_fd);
-		if (err)
-			goto err_fput_free_put_context;
-	}
-
-	counter->filp = counter_file;
-	WARN_ON_ONCE(ctx->parent_ctx);
-	mutex_lock(&ctx->mutex);
-	perf_install_in_context(ctx, counter, cpu);
-	++ctx->generation;
-	mutex_unlock(&ctx->mutex);
-
-	counter->owner = current;
-	get_task_struct(current);
-	mutex_lock(&current->perf_counter_mutex);
-	list_add_tail(&counter->owner_entry, &current->perf_counter_list);
-	mutex_unlock(&current->perf_counter_mutex);
-
-err_fput_free_put_context:
-	fput_light(counter_file, fput_needed2);
-
-err_free_put_context:
-	if (err < 0)
-		kfree(counter);
-
-err_put_context:
-	if (err < 0)
-		put_ctx(ctx);
-
-	fput_light(group_file, fput_needed);
-
-	return err;
-}
-
-/*
- * inherit a counter from parent task to child task:
- */
-static struct perf_counter *
-inherit_counter(struct perf_counter *parent_counter,
-	      struct task_struct *parent,
-	      struct perf_counter_context *parent_ctx,
-	      struct task_struct *child,
-	      struct perf_counter *group_leader,
-	      struct perf_counter_context *child_ctx)
-{
-	struct perf_counter *child_counter;
-
-	/*
-	 * Instead of creating recursive hierarchies of counters,
-	 * we link inherited counters back to the original parent,
-	 * which has a filp for sure, which we use as the reference
-	 * count:
-	 */
-	if (parent_counter->parent)
-		parent_counter = parent_counter->parent;
-
-	child_counter = perf_counter_alloc(&parent_counter->attr,
-					   parent_counter->cpu, child_ctx,
-					   group_leader, parent_counter,
-					   GFP_KERNEL);
-	if (IS_ERR(child_counter))
-		return child_counter;
-	get_ctx(child_ctx);
-
-	/*
-	 * Make the child state follow the state of the parent counter,
-	 * not its attr.disabled bit.  We hold the parent's mutex,
-	 * so we won't race with perf_counter_{en, dis}able_family.
-	 */
-	if (parent_counter->state >= PERF_COUNTER_STATE_INACTIVE)
-		child_counter->state = PERF_COUNTER_STATE_INACTIVE;
-	else
-		child_counter->state = PERF_COUNTER_STATE_OFF;
-
-	if (parent_counter->attr.freq)
-		child_counter->hw.sample_period = parent_counter->hw.sample_period;
-
-	/*
-	 * Link it up in the child's context:
-	 */
-	add_counter_to_ctx(child_counter, child_ctx);
-
-	/*
-	 * Get a reference to the parent filp - we will fput it
-	 * when the child counter exits. This is safe to do because
-	 * we are in the parent and we know that the filp still
-	 * exists and has a nonzero count:
-	 */
-	atomic_long_inc(&parent_counter->filp->f_count);
-
-	/*
-	 * Link this into the parent counter's child list
-	 */
-	WARN_ON_ONCE(parent_counter->ctx->parent_ctx);
-	mutex_lock(&parent_counter->child_mutex);
-	list_add_tail(&child_counter->child_list, &parent_counter->child_list);
-	mutex_unlock(&parent_counter->child_mutex);
-
-	return child_counter;
-}
-
-static int inherit_group(struct perf_counter *parent_counter,
-	      struct task_struct *parent,
-	      struct perf_counter_context *parent_ctx,
-	      struct task_struct *child,
-	      struct perf_counter_context *child_ctx)
-{
-	struct perf_counter *leader;
-	struct perf_counter *sub;
-	struct perf_counter *child_ctr;
-
-	leader = inherit_counter(parent_counter, parent, parent_ctx,
-				 child, NULL, child_ctx);
-	if (IS_ERR(leader))
-		return PTR_ERR(leader);
-	list_for_each_entry(sub, &parent_counter->sibling_list, group_entry) {
-		child_ctr = inherit_counter(sub, parent, parent_ctx,
-					    child, leader, child_ctx);
-		if (IS_ERR(child_ctr))
-			return PTR_ERR(child_ctr);
-	}
-	return 0;
-}
-
-static void sync_child_counter(struct perf_counter *child_counter,
-			       struct task_struct *child)
-{
-	struct perf_counter *parent_counter = child_counter->parent;
-	u64 child_val;
-
-	if (child_counter->attr.inherit_stat)
-		perf_counter_read_event(child_counter, child);
-
-	child_val = atomic64_read(&child_counter->count);
-
-	/*
-	 * Add back the child's count to the parent's count:
-	 */
-	atomic64_add(child_val, &parent_counter->count);
-	atomic64_add(child_counter->total_time_enabled,
-		     &parent_counter->child_total_time_enabled);
-	atomic64_add(child_counter->total_time_running,
-		     &parent_counter->child_total_time_running);
-
-	/*
-	 * Remove this counter from the parent's list
-	 */
-	WARN_ON_ONCE(parent_counter->ctx->parent_ctx);
-	mutex_lock(&parent_counter->child_mutex);
-	list_del_init(&child_counter->child_list);
-	mutex_unlock(&parent_counter->child_mutex);
-
-	/*
-	 * Release the parent counter, if this was the last
-	 * reference to it.
-	 */
-	fput(parent_counter->filp);
-}
-
-static void
-__perf_counter_exit_task(struct perf_counter *child_counter,
-			 struct perf_counter_context *child_ctx,
-			 struct task_struct *child)
-{
-	struct perf_counter *parent_counter;
-
-	update_counter_times(child_counter);
-	perf_counter_remove_from_context(child_counter);
-
-	parent_counter = child_counter->parent;
-	/*
-	 * It can happen that parent exits first, and has counters
-	 * that are still around due to the child reference. These
-	 * counters need to be zapped - but otherwise linger.
-	 */
-	if (parent_counter) {
-		sync_child_counter(child_counter, child);
-		free_counter(child_counter);
-	}
-}
-
-/*
- * When a child task exits, feed back counter values to parent counters.
- */
-void perf_counter_exit_task(struct task_struct *child)
-{
-	struct perf_counter *child_counter, *tmp;
-	struct perf_counter_context *child_ctx;
-	unsigned long flags;
-
-	if (likely(!child->perf_counter_ctxp)) {
-		perf_counter_task(child, NULL, 0);
-		return;
-	}
-
-	local_irq_save(flags);
-	/*
-	 * We can't reschedule here because interrupts are disabled,
-	 * and either child is current or it is a task that can't be
-	 * scheduled, so we are now safe from rescheduling changing
-	 * our context.
-	 */
-	child_ctx = child->perf_counter_ctxp;
-	__perf_counter_task_sched_out(child_ctx);
-
-	/*
-	 * Take the context lock here so that if find_get_context is
-	 * reading child->perf_counter_ctxp, we wait until it has
-	 * incremented the context's refcount before we do put_ctx below.
-	 */
-	spin_lock(&child_ctx->lock);
-	child->perf_counter_ctxp = NULL;
-	/*
-	 * If this context is a clone; unclone it so it can't get
-	 * swapped to another process while we're removing all
-	 * the counters from it.
-	 */
-	unclone_ctx(child_ctx);
-	spin_unlock_irqrestore(&child_ctx->lock, flags);
-
-	/*
-	 * Report the task dead after unscheduling the counters so that we
-	 * won't get any samples after PERF_EVENT_EXIT. We can however still
-	 * get a few PERF_EVENT_READ events.
-	 */
-	perf_counter_task(child, child_ctx, 0);
-
-	/*
-	 * We can recurse on the same lock type through:
-	 *
-	 *   __perf_counter_exit_task()
-	 *     sync_child_counter()
-	 *       fput(parent_counter->filp)
-	 *         perf_release()
-	 *           mutex_lock(&ctx->mutex)
-	 *
-	 * But since its the parent context it won't be the same instance.
-	 */
-	mutex_lock_nested(&child_ctx->mutex, SINGLE_DEPTH_NESTING);
-
-again:
-	list_for_each_entry_safe(child_counter, tmp, &child_ctx->group_list,
-				 group_entry)
-		__perf_counter_exit_task(child_counter, child_ctx, child);
-
-	/*
-	 * If the last counter was a group counter, it will have appended all
-	 * its siblings to the list, but we obtained 'tmp' before that which
-	 * will still point to the list head terminating the iteration.
-	 */
-	if (!list_empty(&child_ctx->group_list))
-		goto again;
-
-	mutex_unlock(&child_ctx->mutex);
-
-	put_ctx(child_ctx);
-}
-
-/*
- * free an unexposed, unused context as created by inheritance by
- * init_task below, used by fork() in case of fail.
- */
-void perf_counter_free_task(struct task_struct *task)
-{
-	struct perf_counter_context *ctx = task->perf_counter_ctxp;
-	struct perf_counter *counter, *tmp;
-
-	if (!ctx)
-		return;
-
-	mutex_lock(&ctx->mutex);
-again:
-	list_for_each_entry_safe(counter, tmp, &ctx->group_list, group_entry) {
-		struct perf_counter *parent = counter->parent;
-
-		if (WARN_ON_ONCE(!parent))
-			continue;
-
-		mutex_lock(&parent->child_mutex);
-		list_del_init(&counter->child_list);
-		mutex_unlock(&parent->child_mutex);
-
-		fput(parent->filp);
-
-		list_del_counter(counter, ctx);
-		free_counter(counter);
-	}
-
-	if (!list_empty(&ctx->group_list))
-		goto again;
-
-	mutex_unlock(&ctx->mutex);
-
-	put_ctx(ctx);
-}
-
-/*
- * Initialize the perf_counter context in task_struct
- */
-int perf_counter_init_task(struct task_struct *child)
-{
-	struct perf_counter_context *child_ctx, *parent_ctx;
-	struct perf_counter_context *cloned_ctx;
-	struct perf_counter *counter;
-	struct task_struct *parent = current;
-	int inherited_all = 1;
-	int ret = 0;
-
-	child->perf_counter_ctxp = NULL;
-
-	mutex_init(&child->perf_counter_mutex);
-	INIT_LIST_HEAD(&child->perf_counter_list);
-
-	if (likely(!parent->perf_counter_ctxp))
-		return 0;
-
-	/*
-	 * This is executed from the parent task context, so inherit
-	 * counters that have been marked for cloning.
-	 * First allocate and initialize a context for the child.
-	 */
-
-	child_ctx = kmalloc(sizeof(struct perf_counter_context), GFP_KERNEL);
-	if (!child_ctx)
-		return -ENOMEM;
-
-	__perf_counter_init_context(child_ctx, child);
-	child->perf_counter_ctxp = child_ctx;
-	get_task_struct(child);
-
-	/*
-	 * If the parent's context is a clone, pin it so it won't get
-	 * swapped under us.
-	 */
-	parent_ctx = perf_pin_task_context(parent);
-
-	/*
-	 * No need to check if parent_ctx != NULL here; since we saw
-	 * it non-NULL earlier, the only reason for it to become NULL
-	 * is if we exit, and since we're currently in the middle of
-	 * a fork we can't be exiting at the same time.
-	 */
-
-	/*
-	 * Lock the parent list. No need to lock the child - not PID
-	 * hashed yet and not running, so nobody can access it.
-	 */
-	mutex_lock(&parent_ctx->mutex);
-
-	/*
-	 * We dont have to disable NMIs - we are only looking at
-	 * the list, not manipulating it:
-	 */
-	list_for_each_entry_rcu(counter, &parent_ctx->event_list, event_entry) {
-		if (counter != counter->group_leader)
-			continue;
-
-		if (!counter->attr.inherit) {
-			inherited_all = 0;
-			continue;
-		}
-
-		ret = inherit_group(counter, parent, parent_ctx,
-					     child, child_ctx);
-		if (ret) {
-			inherited_all = 0;
-			break;
-		}
-	}
-
-	if (inherited_all) {
-		/*
-		 * Mark the child context as a clone of the parent
-		 * context, or of whatever the parent is a clone of.
-		 * Note that if the parent is a clone, it could get
-		 * uncloned at any point, but that doesn't matter
-		 * because the list of counters and the generation
-		 * count can't have changed since we took the mutex.
-		 */
-		cloned_ctx = rcu_dereference(parent_ctx->parent_ctx);
-		if (cloned_ctx) {
-			child_ctx->parent_ctx = cloned_ctx;
-			child_ctx->parent_gen = parent_ctx->parent_gen;
-		} else {
-			child_ctx->parent_ctx = parent_ctx;
-			child_ctx->parent_gen = parent_ctx->generation;
-		}
-		get_ctx(child_ctx->parent_ctx);
-	}
-
-	mutex_unlock(&parent_ctx->mutex);
-
-	perf_unpin_context(parent_ctx);
-
-	return ret;
-}
-
-static void __cpuinit perf_counter_init_cpu(int cpu)
-{
-	struct perf_cpu_context *cpuctx;
-
-	cpuctx = &per_cpu(perf_cpu_context, cpu);
-	__perf_counter_init_context(&cpuctx->ctx, NULL);
-
-	spin_lock(&perf_resource_lock);
-	cpuctx->max_pertask = perf_max_counters - perf_reserved_percpu;
-	spin_unlock(&perf_resource_lock);
-
-	hw_perf_counter_setup(cpu);
-}
-
-#ifdef CONFIG_HOTPLUG_CPU
-static void __perf_counter_exit_cpu(void *info)
-{
-	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
-	struct perf_counter_context *ctx = &cpuctx->ctx;
-	struct perf_counter *counter, *tmp;
-
-	list_for_each_entry_safe(counter, tmp, &ctx->group_list, group_entry)
-		__perf_counter_remove_from_context(counter);
-}
-static void perf_counter_exit_cpu(int cpu)
-{
-	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
-	struct perf_counter_context *ctx = &cpuctx->ctx;
-
-	mutex_lock(&ctx->mutex);
-	smp_call_function_single(cpu, __perf_counter_exit_cpu, NULL, 1);
-	mutex_unlock(&ctx->mutex);
-}
-#else
-static inline void perf_counter_exit_cpu(int cpu) { }
-#endif
-
-static int __cpuinit
-perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
-{
-	unsigned int cpu = (long)hcpu;
-
-	switch (action) {
-
-	case CPU_UP_PREPARE:
-	case CPU_UP_PREPARE_FROZEN:
-		perf_counter_init_cpu(cpu);
-		break;
-
-	case CPU_ONLINE:
-	case CPU_ONLINE_FROZEN:
-		hw_perf_counter_setup_online(cpu);
-		break;
-
-	case CPU_DOWN_PREPARE:
-	case CPU_DOWN_PREPARE_FROZEN:
-		perf_counter_exit_cpu(cpu);
-		break;
-
-	default:
-		break;
-	}
-
-	return NOTIFY_OK;
-}
-
-/*
- * This has to have a higher priority than migration_notifier in sched.c.
- */
-static struct notifier_block __cpuinitdata perf_cpu_nb = {
-	.notifier_call		= perf_cpu_notify,
-	.priority		= 20,
-};
-
-void __init perf_counter_init(void)
-{
-	perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE,
-			(void *)(long)smp_processor_id());
-	perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_ONLINE,
-			(void *)(long)smp_processor_id());
-	register_cpu_notifier(&perf_cpu_nb);
-}
-
-static ssize_t perf_show_reserve_percpu(struct sysdev_class *class, char *buf)
-{
-	return sprintf(buf, "%d\n", perf_reserved_percpu);
-}
-
-static ssize_t
-perf_set_reserve_percpu(struct sysdev_class *class,
-			const char *buf,
-			size_t count)
-{
-	struct perf_cpu_context *cpuctx;
-	unsigned long val;
-	int err, cpu, mpt;
-
-	err = strict_strtoul(buf, 10, &val);
-	if (err)
-		return err;
-	if (val > perf_max_counters)
-		return -EINVAL;
-
-	spin_lock(&perf_resource_lock);
-	perf_reserved_percpu = val;
-	for_each_online_cpu(cpu) {
-		cpuctx = &per_cpu(perf_cpu_context, cpu);
-		spin_lock_irq(&cpuctx->ctx.lock);
-		mpt = min(perf_max_counters - cpuctx->ctx.nr_counters,
-			  perf_max_counters - perf_reserved_percpu);
-		cpuctx->max_pertask = mpt;
-		spin_unlock_irq(&cpuctx->ctx.lock);
-	}
-	spin_unlock(&perf_resource_lock);
-
-	return count;
-}
-
-static ssize_t perf_show_overcommit(struct sysdev_class *class, char *buf)
-{
-	return sprintf(buf, "%d\n", perf_overcommit);
-}
-
-static ssize_t
-perf_set_overcommit(struct sysdev_class *class, const char *buf, size_t count)
-{
-	unsigned long val;
-	int err;
-
-	err = strict_strtoul(buf, 10, &val);
-	if (err)
-		return err;
-	if (val > 1)
-		return -EINVAL;
-
-	spin_lock(&perf_resource_lock);
-	perf_overcommit = val;
-	spin_unlock(&perf_resource_lock);
-
-	return count;
-}
-
-static SYSDEV_CLASS_ATTR(
-				reserve_percpu,
-				0644,
-				perf_show_reserve_percpu,
-				perf_set_reserve_percpu
-			);
-
-static SYSDEV_CLASS_ATTR(
-				overcommit,
-				0644,
-				perf_show_overcommit,
-				perf_set_overcommit
-			);
-
-static struct attribute *perfclass_attrs[] = {
-	&attr_reserve_percpu.attr,
-	&attr_overcommit.attr,
-	NULL
-};
-
-static struct attribute_group perfclass_attr_group = {
-	.attrs			= perfclass_attrs,
-	.name			= "perf_counters",
-};
-
-static int __init perf_counter_sysfs_init(void)
-{
-	return sysfs_create_group(&cpu_sysdev_class.kset.kobj,
-				  &perfclass_attr_group);
-}
-device_initcall(perf_counter_sysfs_init);
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
new file mode 100644
index 0000000..6e8b99a
--- /dev/null
+++ b/kernel/perf_event.c
@@ -0,0 +1,5000 @@
+/*
+ * Performance event core code
+ *
+ *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
+ *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
+ *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ *  Copyright  �  2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ *
+ *  For licensing details see kernel-base/COPYING
+ */
+
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/cpu.h>
+#include <linux/smp.h>
+#include <linux/file.h>
+#include <linux/poll.h>
+#include <linux/sysfs.h>
+#include <linux/dcache.h>
+#include <linux/percpu.h>
+#include <linux/ptrace.h>
+#include <linux/vmstat.h>
+#include <linux/hardirq.h>
+#include <linux/rculist.h>
+#include <linux/uaccess.h>
+#include <linux/syscalls.h>
+#include <linux/anon_inodes.h>
+#include <linux/kernel_stat.h>
+#include <linux/perf_event.h>
+
+#include <asm/irq_regs.h>
+
+/*
+ * Each CPU has a list of per CPU events:
+ */
+DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context);
+
+int perf_max_events __read_mostly = 1;
+static int perf_reserved_percpu __read_mostly;
+static int perf_overcommit __read_mostly = 1;
+
+static atomic_t nr_events __read_mostly;
+static atomic_t nr_mmap_events __read_mostly;
+static atomic_t nr_comm_events __read_mostly;
+static atomic_t nr_task_events __read_mostly;
+
+/*
+ * perf event paranoia level:
+ *  -1 - not paranoid at all
+ *   0 - disallow raw tracepoint access for unpriv
+ *   1 - disallow cpu events for unpriv
+ *   2 - disallow kernel profiling for unpriv
+ */
+int sysctl_perf_event_paranoid __read_mostly = 1;
+
+static inline bool perf_paranoid_tracepoint_raw(void)
+{
+	return sysctl_perf_event_paranoid > -1;
+}
+
+static inline bool perf_paranoid_cpu(void)
+{
+	return sysctl_perf_event_paranoid > 0;
+}
+
+static inline bool perf_paranoid_kernel(void)
+{
+	return sysctl_perf_event_paranoid > 1;
+}
+
+int sysctl_perf_event_mlock __read_mostly = 512; /* 'free' kb per user */
+
+/*
+ * max perf event sample rate
+ */
+int sysctl_perf_event_sample_rate __read_mostly = 100000;
+
+static atomic64_t perf_event_id;
+
+/*
+ * Lock for (sysadmin-configurable) event reservations:
+ */
+static DEFINE_SPINLOCK(perf_resource_lock);
+
+/*
+ * Architecture provided APIs - weak aliases:
+ */
+extern __weak const struct pmu *hw_perf_event_init(struct perf_event *event)
+{
+	return NULL;
+}
+
+void __weak hw_perf_disable(void)		{ barrier(); }
+void __weak hw_perf_enable(void)		{ barrier(); }
+
+void __weak hw_perf_event_setup(int cpu)	{ barrier(); }
+void __weak hw_perf_event_setup_online(int cpu)	{ barrier(); }
+
+int __weak
+hw_perf_group_sched_in(struct perf_event *group_leader,
+	       struct perf_cpu_context *cpuctx,
+	       struct perf_event_context *ctx, int cpu)
+{
+	return 0;
+}
+
+void __weak perf_event_print_debug(void)	{ }
+
+static DEFINE_PER_CPU(int, perf_disable_count);
+
+void __perf_disable(void)
+{
+	__get_cpu_var(perf_disable_count)++;
+}
+
+bool __perf_enable(void)
+{
+	return !--__get_cpu_var(perf_disable_count);
+}
+
+void perf_disable(void)
+{
+	__perf_disable();
+	hw_perf_disable();
+}
+
+void perf_enable(void)
+{
+	if (__perf_enable())
+		hw_perf_enable();
+}
+
+static void get_ctx(struct perf_event_context *ctx)
+{
+	WARN_ON(!atomic_inc_not_zero(&ctx->refcount));
+}
+
+static void free_ctx(struct rcu_head *head)
+{
+	struct perf_event_context *ctx;
+
+	ctx = container_of(head, struct perf_event_context, rcu_head);
+	kfree(ctx);
+}
+
+static void put_ctx(struct perf_event_context *ctx)
+{
+	if (atomic_dec_and_test(&ctx->refcount)) {
+		if (ctx->parent_ctx)
+			put_ctx(ctx->parent_ctx);
+		if (ctx->task)
+			put_task_struct(ctx->task);
+		call_rcu(&ctx->rcu_head, free_ctx);
+	}
+}
+
+static void unclone_ctx(struct perf_event_context *ctx)
+{
+	if (ctx->parent_ctx) {
+		put_ctx(ctx->parent_ctx);
+		ctx->parent_ctx = NULL;
+	}
+}
+
+/*
+ * If we inherit events we want to return the parent event id
+ * to userspace.
+ */
+static u64 primary_event_id(struct perf_event *event)
+{
+	u64 id = event->id;
+
+	if (event->parent)
+		id = event->parent->id;
+
+	return id;
+}
+
+/*
+ * Get the perf_event_context for a task and lock it.
+ * This has to cope with with the fact that until it is locked,
+ * the context could get moved to another task.
+ */
+static struct perf_event_context *
+perf_lock_task_context(struct task_struct *task, unsigned long *flags)
+{
+	struct perf_event_context *ctx;
+
+	rcu_read_lock();
+ retry:
+	ctx = rcu_dereference(task->perf_event_ctxp);
+	if (ctx) {
+		/*
+		 * If this context is a clone of another, it might
+		 * get swapped for another underneath us by
+		 * perf_event_task_sched_out, though the
+		 * rcu_read_lock() protects us from any context
+		 * getting freed.  Lock the context and check if it
+		 * got swapped before we could get the lock, and retry
+		 * if so.  If we locked the right context, then it
+		 * can't get swapped on us any more.
+		 */
+		spin_lock_irqsave(&ctx->lock, *flags);
+		if (ctx != rcu_dereference(task->perf_event_ctxp)) {
+			spin_unlock_irqrestore(&ctx->lock, *flags);
+			goto retry;
+		}
+
+		if (!atomic_inc_not_zero(&ctx->refcount)) {
+			spin_unlock_irqrestore(&ctx->lock, *flags);
+			ctx = NULL;
+		}
+	}
+	rcu_read_unlock();
+	return ctx;
+}
+
+/*
+ * Get the context for a task and increment its pin_count so it
+ * can't get swapped to another task.  This also increments its
+ * reference count so that the context can't get freed.
+ */
+static struct perf_event_context *perf_pin_task_context(struct task_struct *task)
+{
+	struct perf_event_context *ctx;
+	unsigned long flags;
+
+	ctx = perf_lock_task_context(task, &flags);
+	if (ctx) {
+		++ctx->pin_count;
+		spin_unlock_irqrestore(&ctx->lock, flags);
+	}
+	return ctx;
+}
+
+static void perf_unpin_context(struct perf_event_context *ctx)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ctx->lock, flags);
+	--ctx->pin_count;
+	spin_unlock_irqrestore(&ctx->lock, flags);
+	put_ctx(ctx);
+}
+
+/*
+ * Add a event from the lists for its context.
+ * Must be called with ctx->mutex and ctx->lock held.
+ */
+static void
+list_add_event(struct perf_event *event, struct perf_event_context *ctx)
+{
+	struct perf_event *group_leader = event->group_leader;
+
+	/*
+	 * Depending on whether it is a standalone or sibling event,
+	 * add it straight to the context's event list, or to the group
+	 * leader's sibling list:
+	 */
+	if (group_leader == event)
+		list_add_tail(&event->group_entry, &ctx->group_list);
+	else {
+		list_add_tail(&event->group_entry, &group_leader->sibling_list);
+		group_leader->nr_siblings++;
+	}
+
+	list_add_rcu(&event->event_entry, &ctx->event_list);
+	ctx->nr_events++;
+	if (event->attr.inherit_stat)
+		ctx->nr_stat++;
+}
+
+/*
+ * Remove a event from the lists for its context.
+ * Must be called with ctx->mutex and ctx->lock held.
+ */
+static void
+list_del_event(struct perf_event *event, struct perf_event_context *ctx)
+{
+	struct perf_event *sibling, *tmp;
+
+	if (list_empty(&event->group_entry))
+		return;
+	ctx->nr_events--;
+	if (event->attr.inherit_stat)
+		ctx->nr_stat--;
+
+	list_del_init(&event->group_entry);
+	list_del_rcu(&event->event_entry);
+
+	if (event->group_leader != event)
+		event->group_leader->nr_siblings--;
+
+	/*
+	 * If this was a group event with sibling events then
+	 * upgrade the siblings to singleton events by adding them
+	 * to the context list directly:
+	 */
+	list_for_each_entry_safe(sibling, tmp, &event->sibling_list, group_entry) {
+
+		list_move_tail(&sibling->group_entry, &ctx->group_list);
+		sibling->group_leader = sibling;
+	}
+}
+
+static void
+event_sched_out(struct perf_event *event,
+		  struct perf_cpu_context *cpuctx,
+		  struct perf_event_context *ctx)
+{
+	if (event->state != PERF_EVENT_STATE_ACTIVE)
+		return;
+
+	event->state = PERF_EVENT_STATE_INACTIVE;
+	if (event->pending_disable) {
+		event->pending_disable = 0;
+		event->state = PERF_EVENT_STATE_OFF;
+	}
+	event->tstamp_stopped = ctx->time;
+	event->pmu->disable(event);
+	event->oncpu = -1;
+
+	if (!is_software_event(event))
+		cpuctx->active_oncpu--;
+	ctx->nr_active--;
+	if (event->attr.exclusive || !cpuctx->active_oncpu)
+		cpuctx->exclusive = 0;
+}
+
+static void
+group_sched_out(struct perf_event *group_event,
+		struct perf_cpu_context *cpuctx,
+		struct perf_event_context *ctx)
+{
+	struct perf_event *event;
+
+	if (group_event->state != PERF_EVENT_STATE_ACTIVE)
+		return;
+
+	event_sched_out(group_event, cpuctx, ctx);
+
+	/*
+	 * Schedule out siblings (if any):
+	 */
+	list_for_each_entry(event, &group_event->sibling_list, group_entry)
+		event_sched_out(event, cpuctx, ctx);
+
+	if (group_event->attr.exclusive)
+		cpuctx->exclusive = 0;
+}
+
+/*
+ * Cross CPU call to remove a performance event
+ *
+ * We disable the event on the hardware level first. After that we
+ * remove it from the context list.
+ */
+static void __perf_event_remove_from_context(void *info)
+{
+	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+	struct perf_event *event = info;
+	struct perf_event_context *ctx = event->ctx;
+
+	/*
+	 * If this is a task context, we need to check whether it is
+	 * the current task context of this cpu. If not it has been
+	 * scheduled out before the smp call arrived.
+	 */
+	if (ctx->task && cpuctx->task_ctx != ctx)
+		return;
+
+	spin_lock(&ctx->lock);
+	/*
+	 * Protect the list operation against NMI by disabling the
+	 * events on a global level.
+	 */
+	perf_disable();
+
+	event_sched_out(event, cpuctx, ctx);
+
+	list_del_event(event, ctx);
+
+	if (!ctx->task) {
+		/*
+		 * Allow more per task events with respect to the
+		 * reservation:
+		 */
+		cpuctx->max_pertask =
+			min(perf_max_events - ctx->nr_events,
+			    perf_max_events - perf_reserved_percpu);
+	}
+
+	perf_enable();
+	spin_unlock(&ctx->lock);
+}
+
+
+/*
+ * Remove the event from a task's (or a CPU's) list of events.
+ *
+ * Must be called with ctx->mutex held.
+ *
+ * CPU events are removed with a smp call. For task events we only
+ * call when the task is on a CPU.
+ *
+ * If event->ctx is a cloned context, callers must make sure that
+ * every task struct that event->ctx->task could possibly point to
+ * remains valid.  This is OK when called from perf_release since
+ * that only calls us on the top-level context, which can't be a clone.
+ * When called from perf_event_exit_task, it's OK because the
+ * context has been detached from its task.
+ */
+static void perf_event_remove_from_context(struct perf_event *event)
+{
+	struct perf_event_context *ctx = event->ctx;
+	struct task_struct *task = ctx->task;
+
+	if (!task) {
+		/*
+		 * Per cpu events are removed via an smp call and
+		 * the removal is always sucessful.
+		 */
+		smp_call_function_single(event->cpu,
+					 __perf_event_remove_from_context,
+					 event, 1);
+		return;
+	}
+
+retry:
+	task_oncpu_function_call(task, __perf_event_remove_from_context,
+				 event);
+
+	spin_lock_irq(&ctx->lock);
+	/*
+	 * If the context is active we need to retry the smp call.
+	 */
+	if (ctx->nr_active && !list_empty(&event->group_entry)) {
+		spin_unlock_irq(&ctx->lock);
+		goto retry;
+	}
+
+	/*
+	 * The lock prevents that this context is scheduled in so we
+	 * can remove the event safely, if the call above did not
+	 * succeed.
+	 */
+	if (!list_empty(&event->group_entry)) {
+		list_del_event(event, ctx);
+	}
+	spin_unlock_irq(&ctx->lock);
+}
+
+static inline u64 perf_clock(void)
+{
+	return cpu_clock(smp_processor_id());
+}
+
+/*
+ * Update the record of the current time in a context.
+ */
+static void update_context_time(struct perf_event_context *ctx)
+{
+	u64 now = perf_clock();
+
+	ctx->time += now - ctx->timestamp;
+	ctx->timestamp = now;
+}
+
+/*
+ * Update the total_time_enabled and total_time_running fields for a event.
+ */
+static void update_event_times(struct perf_event *event)
+{
+	struct perf_event_context *ctx = event->ctx;
+	u64 run_end;
+
+	if (event->state < PERF_EVENT_STATE_INACTIVE ||
+	    event->group_leader->state < PERF_EVENT_STATE_INACTIVE)
+		return;
+
+	event->total_time_enabled = ctx->time - event->tstamp_enabled;
+
+	if (event->state == PERF_EVENT_STATE_INACTIVE)
+		run_end = event->tstamp_stopped;
+	else
+		run_end = ctx->time;
+
+	event->total_time_running = run_end - event->tstamp_running;
+}
+
+/*
+ * Update total_time_enabled and total_time_running for all events in a group.
+ */
+static void update_group_times(struct perf_event *leader)
+{
+	struct perf_event *event;
+
+	update_event_times(leader);
+	list_for_each_entry(event, &leader->sibling_list, group_entry)
+		update_event_times(event);
+}
+
+/*
+ * Cross CPU call to disable a performance event
+ */
+static void __perf_event_disable(void *info)
+{
+	struct perf_event *event = info;
+	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+	struct perf_event_context *ctx = event->ctx;
+
+	/*
+	 * If this is a per-task event, need to check whether this
+	 * event's task is the current task on this cpu.
+	 */
+	if (ctx->task && cpuctx->task_ctx != ctx)
+		return;
+
+	spin_lock(&ctx->lock);
+
+	/*
+	 * If the event is on, turn it off.
+	 * If it is in error state, leave it in error state.
+	 */
+	if (event->state >= PERF_EVENT_STATE_INACTIVE) {
+		update_context_time(ctx);
+		update_group_times(event);
+		if (event == event->group_leader)
+			group_sched_out(event, cpuctx, ctx);
+		else
+			event_sched_out(event, cpuctx, ctx);
+		event->state = PERF_EVENT_STATE_OFF;
+	}
+
+	spin_unlock(&ctx->lock);
+}
+
+/*
+ * Disable a event.
+ *
+ * If event->ctx is a cloned context, callers must make sure that
+ * every task struct that event->ctx->task could possibly point to
+ * remains valid.  This condition is satisifed when called through
+ * perf_event_for_each_child or perf_event_for_each because they
+ * hold the top-level event's child_mutex, so any descendant that
+ * goes to exit will block in sync_child_event.
+ * When called from perf_pending_event it's OK because event->ctx
+ * is the current context on this CPU and preemption is disabled,
+ * hence we can't get into perf_event_task_sched_out for this context.
+ */
+static void perf_event_disable(struct perf_event *event)
+{
+	struct perf_event_context *ctx = event->ctx;
+	struct task_struct *task = ctx->task;
+
+	if (!task) {
+		/*
+		 * Disable the event on the cpu that it's on
+		 */
+		smp_call_function_single(event->cpu, __perf_event_disable,
+					 event, 1);
+		return;
+	}
+
+ retry:
+	task_oncpu_function_call(task, __perf_event_disable, event);
+
+	spin_lock_irq(&ctx->lock);
+	/*
+	 * If the event is still active, we need to retry the cross-call.
+	 */
+	if (event->state == PERF_EVENT_STATE_ACTIVE) {
+		spin_unlock_irq(&ctx->lock);
+		goto retry;
+	}
+
+	/*
+	 * Since we have the lock this context can't be scheduled
+	 * in, so we can change the state safely.
+	 */
+	if (event->state == PERF_EVENT_STATE_INACTIVE) {
+		update_group_times(event);
+		event->state = PERF_EVENT_STATE_OFF;
+	}
+
+	spin_unlock_irq(&ctx->lock);
+}
+
+static int
+event_sched_in(struct perf_event *event,
+		 struct perf_cpu_context *cpuctx,
+		 struct perf_event_context *ctx,
+		 int cpu)
+{
+	if (event->state <= PERF_EVENT_STATE_OFF)
+		return 0;
+
+	event->state = PERF_EVENT_STATE_ACTIVE;
+	event->oncpu = cpu;	/* TODO: put 'cpu' into cpuctx->cpu */
+	/*
+	 * The new state must be visible before we turn it on in the hardware:
+	 */
+	smp_wmb();
+
+	if (event->pmu->enable(event)) {
+		event->state = PERF_EVENT_STATE_INACTIVE;
+		event->oncpu = -1;
+		return -EAGAIN;
+	}
+
+	event->tstamp_running += ctx->time - event->tstamp_stopped;
+
+	if (!is_software_event(event))
+		cpuctx->active_oncpu++;
+	ctx->nr_active++;
+
+	if (event->attr.exclusive)
+		cpuctx->exclusive = 1;
+
+	return 0;
+}
+
+static int
+group_sched_in(struct perf_event *group_event,
+	       struct perf_cpu_context *cpuctx,
+	       struct perf_event_context *ctx,
+	       int cpu)
+{
+	struct perf_event *event, *partial_group;
+	int ret;
+
+	if (group_event->state == PERF_EVENT_STATE_OFF)
+		return 0;
+
+	ret = hw_perf_group_sched_in(group_event, cpuctx, ctx, cpu);
+	if (ret)
+		return ret < 0 ? ret : 0;
+
+	if (event_sched_in(group_event, cpuctx, ctx, cpu))
+		return -EAGAIN;
+
+	/*
+	 * Schedule in siblings as one group (if any):
+	 */
+	list_for_each_entry(event, &group_event->sibling_list, group_entry) {
+		if (event_sched_in(event, cpuctx, ctx, cpu)) {
+			partial_group = event;
+			goto group_error;
+		}
+	}
+
+	return 0;
+
+group_error:
+	/*
+	 * Groups can be scheduled in as one unit only, so undo any
+	 * partial group before returning:
+	 */
+	list_for_each_entry(event, &group_event->sibling_list, group_entry) {
+		if (event == partial_group)
+			break;
+		event_sched_out(event, cpuctx, ctx);
+	}
+	event_sched_out(group_event, cpuctx, ctx);
+
+	return -EAGAIN;
+}
+
+/*
+ * Return 1 for a group consisting entirely of software events,
+ * 0 if the group contains any hardware events.
+ */
+static int is_software_only_group(struct perf_event *leader)
+{
+	struct perf_event *event;
+
+	if (!is_software_event(leader))
+		return 0;
+
+	list_for_each_entry(event, &leader->sibling_list, group_entry)
+		if (!is_software_event(event))
+			return 0;
+
+	return 1;
+}
+
+/*
+ * Work out whether we can put this event group on the CPU now.
+ */
+static int group_can_go_on(struct perf_event *event,
+			   struct perf_cpu_context *cpuctx,
+			   int can_add_hw)
+{
+	/*
+	 * Groups consisting entirely of software events can always go on.
+	 */
+	if (is_software_only_group(event))
+		return 1;
+	/*
+	 * If an exclusive group is already on, no other hardware
+	 * events can go on.
+	 */
+	if (cpuctx->exclusive)
+		return 0;
+	/*
+	 * If this group is exclusive and there are already
+	 * events on the CPU, it can't go on.
+	 */
+	if (event->attr.exclusive && cpuctx->active_oncpu)
+		return 0;
+	/*
+	 * Otherwise, try to add it if all previous groups were able
+	 * to go on.
+	 */
+	return can_add_hw;
+}
+
+static void add_event_to_ctx(struct perf_event *event,
+			       struct perf_event_context *ctx)
+{
+	list_add_event(event, ctx);
+	event->tstamp_enabled = ctx->time;
+	event->tstamp_running = ctx->time;
+	event->tstamp_stopped = ctx->time;
+}
+
+/*
+ * Cross CPU call to install and enable a performance event
+ *
+ * Must be called with ctx->mutex held
+ */
+static void __perf_install_in_context(void *info)
+{
+	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+	struct perf_event *event = info;
+	struct perf_event_context *ctx = event->ctx;
+	struct perf_event *leader = event->group_leader;
+	int cpu = smp_processor_id();
+	int err;
+
+	/*
+	 * If this is a task context, we need to check whether it is
+	 * the current task context of this cpu. If not it has been
+	 * scheduled out before the smp call arrived.
+	 * Or possibly this is the right context but it isn't
+	 * on this cpu because it had no events.
+	 */
+	if (ctx->task && cpuctx->task_ctx != ctx) {
+		if (cpuctx->task_ctx || ctx->task != current)
+			return;
+		cpuctx->task_ctx = ctx;
+	}
+
+	spin_lock(&ctx->lock);
+	ctx->is_active = 1;
+	update_context_time(ctx);
+
+	/*
+	 * Protect the list operation against NMI by disabling the
+	 * events on a global level. NOP for non NMI based events.
+	 */
+	perf_disable();
+
+	add_event_to_ctx(event, ctx);
+
+	/*
+	 * Don't put the event on if it is disabled or if
+	 * it is in a group and the group isn't on.
+	 */
+	if (event->state != PERF_EVENT_STATE_INACTIVE ||
+	    (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE))
+		goto unlock;
+
+	/*
+	 * An exclusive event can't go on if there are already active
+	 * hardware events, and no hardware event can go on if there
+	 * is already an exclusive event on.
+	 */
+	if (!group_can_go_on(event, cpuctx, 1))
+		err = -EEXIST;
+	else
+		err = event_sched_in(event, cpuctx, ctx, cpu);
+
+	if (err) {
+		/*
+		 * This event couldn't go on.  If it is in a group
+		 * then we have to pull the whole group off.
+		 * If the event group is pinned then put it in error state.
+		 */
+		if (leader != event)
+			group_sched_out(leader, cpuctx, ctx);
+		if (leader->attr.pinned) {
+			update_group_times(leader);
+			leader->state = PERF_EVENT_STATE_ERROR;
+		}
+	}
+
+	if (!err && !ctx->task && cpuctx->max_pertask)
+		cpuctx->max_pertask--;
+
+ unlock:
+	perf_enable();
+
+	spin_unlock(&ctx->lock);
+}
+
+/*
+ * Attach a performance event to a context
+ *
+ * First we add the event to the list with the hardware enable bit
+ * in event->hw_config cleared.
+ *
+ * If the event is attached to a task which is on a CPU we use a smp
+ * call to enable it in the task context. The task might have been
+ * scheduled away, but we check this in the smp call again.
+ *
+ * Must be called with ctx->mutex held.
+ */
+static void
+perf_install_in_context(struct perf_event_context *ctx,
+			struct perf_event *event,
+			int cpu)
+{
+	struct task_struct *task = ctx->task;
+
+	if (!task) {
+		/*
+		 * Per cpu events are installed via an smp call and
+		 * the install is always sucessful.
+		 */
+		smp_call_function_single(cpu, __perf_install_in_context,
+					 event, 1);
+		return;
+	}
+
+retry:
+	task_oncpu_function_call(task, __perf_install_in_context,
+				 event);
+
+	spin_lock_irq(&ctx->lock);
+	/*
+	 * we need to retry the smp call.
+	 */
+	if (ctx->is_active && list_empty(&event->group_entry)) {
+		spin_unlock_irq(&ctx->lock);
+		goto retry;
+	}
+
+	/*
+	 * The lock prevents that this context is scheduled in so we
+	 * can add the event safely, if it the call above did not
+	 * succeed.
+	 */
+	if (list_empty(&event->group_entry))
+		add_event_to_ctx(event, ctx);
+	spin_unlock_irq(&ctx->lock);
+}
+
+/*
+ * Put a event into inactive state and update time fields.
+ * Enabling the leader of a group effectively enables all
+ * the group members that aren't explicitly disabled, so we
+ * have to update their ->tstamp_enabled also.
+ * Note: this works for group members as well as group leaders
+ * since the non-leader members' sibling_lists will be empty.
+ */
+static void __perf_event_mark_enabled(struct perf_event *event,
+					struct perf_event_context *ctx)
+{
+	struct perf_event *sub;
+
+	event->state = PERF_EVENT_STATE_INACTIVE;
+	event->tstamp_enabled = ctx->time - event->total_time_enabled;
+	list_for_each_entry(sub, &event->sibling_list, group_entry)
+		if (sub->state >= PERF_EVENT_STATE_INACTIVE)
+			sub->tstamp_enabled =
+				ctx->time - sub->total_time_enabled;
+}
+
+/*
+ * Cross CPU call to enable a performance event
+ */
+static void __perf_event_enable(void *info)
+{
+	struct perf_event *event = info;
+	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+	struct perf_event_context *ctx = event->ctx;
+	struct perf_event *leader = event->group_leader;
+	int err;
+
+	/*
+	 * If this is a per-task event, need to check whether this
+	 * event's task is the current task on this cpu.
+	 */
+	if (ctx->task && cpuctx->task_ctx != ctx) {
+		if (cpuctx->task_ctx || ctx->task != current)
+			return;
+		cpuctx->task_ctx = ctx;
+	}
+
+	spin_lock(&ctx->lock);
+	ctx->is_active = 1;
+	update_context_time(ctx);
+
+	if (event->state >= PERF_EVENT_STATE_INACTIVE)
+		goto unlock;
+	__perf_event_mark_enabled(event, ctx);
+
+	/*
+	 * If the event is in a group and isn't the group leader,
+	 * then don't put it on unless the group is on.
+	 */
+	if (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE)
+		goto unlock;
+
+	if (!group_can_go_on(event, cpuctx, 1)) {
+		err = -EEXIST;
+	} else {
+		perf_disable();
+		if (event == leader)
+			err = group_sched_in(event, cpuctx, ctx,
+					     smp_processor_id());
+		else
+			err = event_sched_in(event, cpuctx, ctx,
+					       smp_processor_id());
+		perf_enable();
+	}
+
+	if (err) {
+		/*
+		 * If this event can't go on and it's part of a
+		 * group, then the whole group has to come off.
+		 */
+		if (leader != event)
+			group_sched_out(leader, cpuctx, ctx);
+		if (leader->attr.pinned) {
+			update_group_times(leader);
+			leader->state = PERF_EVENT_STATE_ERROR;
+		}
+	}
+
+ unlock:
+	spin_unlock(&ctx->lock);
+}
+
+/*
+ * Enable a event.
+ *
+ * If event->ctx is a cloned context, callers must make sure that
+ * every task struct that event->ctx->task could possibly point to
+ * remains valid.  This condition is satisfied when called through
+ * perf_event_for_each_child or perf_event_for_each as described
+ * for perf_event_disable.
+ */
+static void perf_event_enable(struct perf_event *event)
+{
+	struct perf_event_context *ctx = event->ctx;
+	struct task_struct *task = ctx->task;
+
+	if (!task) {
+		/*
+		 * Enable the event on the cpu that it's on
+		 */
+		smp_call_function_single(event->cpu, __perf_event_enable,
+					 event, 1);
+		return;
+	}
+
+	spin_lock_irq(&ctx->lock);
+	if (event->state >= PERF_EVENT_STATE_INACTIVE)
+		goto out;
+
+	/*
+	 * If the event is in error state, clear that first.
+	 * That way, if we see the event in error state below, we
+	 * know that it has gone back into error state, as distinct
+	 * from the task having been scheduled away before the
+	 * cross-call arrived.
+	 */
+	if (event->state == PERF_EVENT_STATE_ERROR)
+		event->state = PERF_EVENT_STATE_OFF;
+
+ retry:
+	spin_unlock_irq(&ctx->lock);
+	task_oncpu_function_call(task, __perf_event_enable, event);
+
+	spin_lock_irq(&ctx->lock);
+
+	/*
+	 * If the context is active and the event is still off,
+	 * we need to retry the cross-call.
+	 */
+	if (ctx->is_active && event->state == PERF_EVENT_STATE_OFF)
+		goto retry;
+
+	/*
+	 * Since we have the lock this context can't be scheduled
+	 * in, so we can change the state safely.
+	 */
+	if (event->state == PERF_EVENT_STATE_OFF)
+		__perf_event_mark_enabled(event, ctx);
+
+ out:
+	spin_unlock_irq(&ctx->lock);
+}
+
+static int perf_event_refresh(struct perf_event *event, int refresh)
+{
+	/*
+	 * not supported on inherited events
+	 */
+	if (event->attr.inherit)
+		return -EINVAL;
+
+	atomic_add(refresh, &event->event_limit);
+	perf_event_enable(event);
+
+	return 0;
+}
+
+void __perf_event_sched_out(struct perf_event_context *ctx,
+			      struct perf_cpu_context *cpuctx)
+{
+	struct perf_event *event;
+
+	spin_lock(&ctx->lock);
+	ctx->is_active = 0;
+	if (likely(!ctx->nr_events))
+		goto out;
+	update_context_time(ctx);
+
+	perf_disable();
+	if (ctx->nr_active) {
+		list_for_each_entry(event, &ctx->group_list, group_entry) {
+			if (event != event->group_leader)
+				event_sched_out(event, cpuctx, ctx);
+			else
+				group_sched_out(event, cpuctx, ctx);
+		}
+	}
+	perf_enable();
+ out:
+	spin_unlock(&ctx->lock);
+}
+
+/*
+ * Test whether two contexts are equivalent, i.e. whether they
+ * have both been cloned from the same version of the same context
+ * and they both have the same number of enabled events.
+ * If the number of enabled events is the same, then the set
+ * of enabled events should be the same, because these are both
+ * inherited contexts, therefore we can't access individual events
+ * in them directly with an fd; we can only enable/disable all
+ * events via prctl, or enable/disable all events in a family
+ * via ioctl, which will have the same effect on both contexts.
+ */
+static int context_equiv(struct perf_event_context *ctx1,
+			 struct perf_event_context *ctx2)
+{
+	return ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx
+		&& ctx1->parent_gen == ctx2->parent_gen
+		&& !ctx1->pin_count && !ctx2->pin_count;
+}
+
+static void __perf_event_read(void *event);
+
+static void __perf_event_sync_stat(struct perf_event *event,
+				     struct perf_event *next_event)
+{
+	u64 value;
+
+	if (!event->attr.inherit_stat)
+		return;
+
+	/*
+	 * Update the event value, we cannot use perf_event_read()
+	 * because we're in the middle of a context switch and have IRQs
+	 * disabled, which upsets smp_call_function_single(), however
+	 * we know the event must be on the current CPU, therefore we
+	 * don't need to use it.
+	 */
+	switch (event->state) {
+	case PERF_EVENT_STATE_ACTIVE:
+		__perf_event_read(event);
+		break;
+
+	case PERF_EVENT_STATE_INACTIVE:
+		update_event_times(event);
+		break;
+
+	default:
+		break;
+	}
+
+	/*
+	 * In order to keep per-task stats reliable we need to flip the event
+	 * values when we flip the contexts.
+	 */
+	value = atomic64_read(&next_event->count);
+	value = atomic64_xchg(&event->count, value);
+	atomic64_set(&next_event->count, value);
+
+	swap(event->total_time_enabled, next_event->total_time_enabled);
+	swap(event->total_time_running, next_event->total_time_running);
+
+	/*
+	 * Since we swizzled the values, update the user visible data too.
+	 */
+	perf_event_update_userpage(event);
+	perf_event_update_userpage(next_event);
+}
+
+#define list_next_entry(pos, member) \
+	list_entry(pos->member.next, typeof(*pos), member)
+
+static void perf_event_sync_stat(struct perf_event_context *ctx,
+				   struct perf_event_context *next_ctx)
+{
+	struct perf_event *event, *next_event;
+
+	if (!ctx->nr_stat)
+		return;
+
+	event = list_first_entry(&ctx->event_list,
+				   struct perf_event, event_entry);
+
+	next_event = list_first_entry(&next_ctx->event_list,
+					struct perf_event, event_entry);
+
+	while (&event->event_entry != &ctx->event_list &&
+	       &next_event->event_entry != &next_ctx->event_list) {
+
+		__perf_event_sync_stat(event, next_event);
+
+		event = list_next_entry(event, event_entry);
+		next_event = list_next_entry(next_event, event_entry);
+	}
+}
+
+/*
+ * Called from scheduler to remove the events of the current task,
+ * with interrupts disabled.
+ *
+ * We stop each event and update the event value in event->count.
+ *
+ * This does not protect us against NMI, but disable()
+ * sets the disabled bit in the control field of event _before_
+ * accessing the event control register. If a NMI hits, then it will
+ * not restart the event.
+ */
+void perf_event_task_sched_out(struct task_struct *task,
+				 struct task_struct *next, int cpu)
+{
+	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
+	struct perf_event_context *ctx = task->perf_event_ctxp;
+	struct perf_event_context *next_ctx;
+	struct perf_event_context *parent;
+	struct pt_regs *regs;
+	int do_switch = 1;
+
+	regs = task_pt_regs(task);
+	perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, regs, 0);
+
+	if (likely(!ctx || !cpuctx->task_ctx))
+		return;
+
+	update_context_time(ctx);
+
+	rcu_read_lock();
+	parent = rcu_dereference(ctx->parent_ctx);
+	next_ctx = next->perf_event_ctxp;
+	if (parent && next_ctx &&
+	    rcu_dereference(next_ctx->parent_ctx) == parent) {
+		/*
+		 * Looks like the two contexts are clones, so we might be
+		 * able to optimize the context switch.  We lock both
+		 * contexts and check that they are clones under the
+		 * lock (including re-checking that neither has been
+		 * uncloned in the meantime).  It doesn't matter which
+		 * order we take the locks because no other cpu could
+		 * be trying to lock both of these tasks.
+		 */
+		spin_lock(&ctx->lock);
+		spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING);
+		if (context_equiv(ctx, next_ctx)) {
+			/*
+			 * XXX do we need a memory barrier of sorts
+			 * wrt to rcu_dereference() of perf_event_ctxp
+			 */
+			task->perf_event_ctxp = next_ctx;
+			next->perf_event_ctxp = ctx;
+			ctx->task = next;
+			next_ctx->task = task;
+			do_switch = 0;
+
+			perf_event_sync_stat(ctx, next_ctx);
+		}
+		spin_unlock(&next_ctx->lock);
+		spin_unlock(&ctx->lock);
+	}
+	rcu_read_unlock();
+
+	if (do_switch) {
+		__perf_event_sched_out(ctx, cpuctx);
+		cpuctx->task_ctx = NULL;
+	}
+}
+
+/*
+ * Called with IRQs disabled
+ */
+static void __perf_event_task_sched_out(struct perf_event_context *ctx)
+{
+	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+
+	if (!cpuctx->task_ctx)
+		return;
+
+	if (WARN_ON_ONCE(ctx != cpuctx->task_ctx))
+		return;
+
+	__perf_event_sched_out(ctx, cpuctx);
+	cpuctx->task_ctx = NULL;
+}
+
+/*
+ * Called with IRQs disabled
+ */
+static void perf_event_cpu_sched_out(struct perf_cpu_context *cpuctx)
+{
+	__perf_event_sched_out(&cpuctx->ctx, cpuctx);
+}
+
+static void
+__perf_event_sched_in(struct perf_event_context *ctx,
+			struct perf_cpu_context *cpuctx, int cpu)
+{
+	struct perf_event *event;
+	int can_add_hw = 1;
+
+	spin_lock(&ctx->lock);
+	ctx->is_active = 1;
+	if (likely(!ctx->nr_events))
+		goto out;
+
+	ctx->timestamp = perf_clock();
+
+	perf_disable();
+
+	/*
+	 * First go through the list and put on any pinned groups
+	 * in order to give them the best chance of going on.
+	 */
+	list_for_each_entry(event, &ctx->group_list, group_entry) {
+		if (event->state <= PERF_EVENT_STATE_OFF ||
+		    !event->attr.pinned)
+			continue;
+		if (event->cpu != -1 && event->cpu != cpu)
+			continue;
+
+		if (event != event->group_leader)
+			event_sched_in(event, cpuctx, ctx, cpu);
+		else {
+			if (group_can_go_on(event, cpuctx, 1))
+				group_sched_in(event, cpuctx, ctx, cpu);
+		}
+
+		/*
+		 * If this pinned group hasn't been scheduled,
+		 * put it in error state.
+		 */
+		if (event->state == PERF_EVENT_STATE_INACTIVE) {
+			update_group_times(event);
+			event->state = PERF_EVENT_STATE_ERROR;
+		}
+	}
+
+	list_for_each_entry(event, &ctx->group_list, group_entry) {
+		/*
+		 * Ignore events in OFF or ERROR state, and
+		 * ignore pinned events since we did them already.
+		 */
+		if (event->state <= PERF_EVENT_STATE_OFF ||
+		    event->attr.pinned)
+			continue;
+
+		/*
+		 * Listen to the 'cpu' scheduling filter constraint
+		 * of events:
+		 */
+		if (event->cpu != -1 && event->cpu != cpu)
+			continue;
+
+		if (event != event->group_leader) {
+			if (event_sched_in(event, cpuctx, ctx, cpu))
+				can_add_hw = 0;
+		} else {
+			if (group_can_go_on(event, cpuctx, can_add_hw)) {
+				if (group_sched_in(event, cpuctx, ctx, cpu))
+					can_add_hw = 0;
+			}
+		}
+	}
+	perf_enable();
+ out:
+	spin_unlock(&ctx->lock);
+}
+
+/*
+ * Called from scheduler to add the events of the current task
+ * with interrupts disabled.
+ *
+ * We restore the event value and then enable it.
+ *
+ * This does not protect us against NMI, but enable()
+ * sets the enabled bit in the control field of event _before_
+ * accessing the event control register. If a NMI hits, then it will
+ * keep the event running.
+ */
+void perf_event_task_sched_in(struct task_struct *task, int cpu)
+{
+	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
+	struct perf_event_context *ctx = task->perf_event_ctxp;
+
+	if (likely(!ctx))
+		return;
+	if (cpuctx->task_ctx == ctx)
+		return;
+	__perf_event_sched_in(ctx, cpuctx, cpu);
+	cpuctx->task_ctx = ctx;
+}
+
+static void perf_event_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu)
+{
+	struct perf_event_context *ctx = &cpuctx->ctx;
+
+	__perf_event_sched_in(ctx, cpuctx, cpu);
+}
+
+#define MAX_INTERRUPTS (~0ULL)
+
+static void perf_log_throttle(struct perf_event *event, int enable);
+
+static void perf_adjust_period(struct perf_event *event, u64 events)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	u64 period, sample_period;
+	s64 delta;
+
+	events *= hwc->sample_period;
+	period = div64_u64(events, event->attr.sample_freq);
+
+	delta = (s64)(period - hwc->sample_period);
+	delta = (delta + 7) / 8; /* low pass filter */
+
+	sample_period = hwc->sample_period + delta;
+
+	if (!sample_period)
+		sample_period = 1;
+
+	hwc->sample_period = sample_period;
+}
+
+static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
+{
+	struct perf_event *event;
+	struct hw_perf_event *hwc;
+	u64 interrupts, freq;
+
+	spin_lock(&ctx->lock);
+	list_for_each_entry(event, &ctx->group_list, group_entry) {
+		if (event->state != PERF_EVENT_STATE_ACTIVE)
+			continue;
+
+		hwc = &event->hw;
+
+		interrupts = hwc->interrupts;
+		hwc->interrupts = 0;
+
+		/*
+		 * unthrottle events on the tick
+		 */
+		if (interrupts == MAX_INTERRUPTS) {
+			perf_log_throttle(event, 1);
+			event->pmu->unthrottle(event);
+			interrupts = 2*sysctl_perf_event_sample_rate/HZ;
+		}
+
+		if (!event->attr.freq || !event->attr.sample_freq)
+			continue;
+
+		/*
+		 * if the specified freq < HZ then we need to skip ticks
+		 */
+		if (event->attr.sample_freq < HZ) {
+			freq = event->attr.sample_freq;
+
+			hwc->freq_count += freq;
+			hwc->freq_interrupts += interrupts;
+
+			if (hwc->freq_count < HZ)
+				continue;
+
+			interrupts = hwc->freq_interrupts;
+			hwc->freq_interrupts = 0;
+			hwc->freq_count -= HZ;
+		} else
+			freq = HZ;
+
+		perf_adjust_period(event, freq * interrupts);
+
+		/*
+		 * In order to avoid being stalled by an (accidental) huge
+		 * sample period, force reset the sample period if we didn't
+		 * get any events in this freq period.
+		 */
+		if (!interrupts) {
+			perf_disable();
+			event->pmu->disable(event);
+			atomic64_set(&hwc->period_left, 0);
+			event->pmu->enable(event);
+			perf_enable();
+		}
+	}
+	spin_unlock(&ctx->lock);
+}
+
+/*
+ * Round-robin a context's events:
+ */
+static void rotate_ctx(struct perf_event_context *ctx)
+{
+	struct perf_event *event;
+
+	if (!ctx->nr_events)
+		return;
+
+	spin_lock(&ctx->lock);
+	/*
+	 * Rotate the first entry last (works just fine for group events too):
+	 */
+	perf_disable();
+	list_for_each_entry(event, &ctx->group_list, group_entry) {
+		list_move_tail(&event->group_entry, &ctx->group_list);
+		break;
+	}
+	perf_enable();
+
+	spin_unlock(&ctx->lock);
+}
+
+void perf_event_task_tick(struct task_struct *curr, int cpu)
+{
+	struct perf_cpu_context *cpuctx;
+	struct perf_event_context *ctx;
+
+	if (!atomic_read(&nr_events))
+		return;
+
+	cpuctx = &per_cpu(perf_cpu_context, cpu);
+	ctx = curr->perf_event_ctxp;
+
+	perf_ctx_adjust_freq(&cpuctx->ctx);
+	if (ctx)
+		perf_ctx_adjust_freq(ctx);
+
+	perf_event_cpu_sched_out(cpuctx);
+	if (ctx)
+		__perf_event_task_sched_out(ctx);
+
+	rotate_ctx(&cpuctx->ctx);
+	if (ctx)
+		rotate_ctx(ctx);
+
+	perf_event_cpu_sched_in(cpuctx, cpu);
+	if (ctx)
+		perf_event_task_sched_in(curr, cpu);
+}
+
+/*
+ * Enable all of a task's events that have been marked enable-on-exec.
+ * This expects task == current.
+ */
+static void perf_event_enable_on_exec(struct task_struct *task)
+{
+	struct perf_event_context *ctx;
+	struct perf_event *event;
+	unsigned long flags;
+	int enabled = 0;
+
+	local_irq_save(flags);
+	ctx = task->perf_event_ctxp;
+	if (!ctx || !ctx->nr_events)
+		goto out;
+
+	__perf_event_task_sched_out(ctx);
+
+	spin_lock(&ctx->lock);
+
+	list_for_each_entry(event, &ctx->group_list, group_entry) {
+		if (!event->attr.enable_on_exec)
+			continue;
+		event->attr.enable_on_exec = 0;
+		if (event->state >= PERF_EVENT_STATE_INACTIVE)
+			continue;
+		__perf_event_mark_enabled(event, ctx);
+		enabled = 1;
+	}
+
+	/*
+	 * Unclone this context if we enabled any event.
+	 */
+	if (enabled)
+		unclone_ctx(ctx);
+
+	spin_unlock(&ctx->lock);
+
+	perf_event_task_sched_in(task, smp_processor_id());
+ out:
+	local_irq_restore(flags);
+}
+
+/*
+ * Cross CPU call to read the hardware event
+ */
+static void __perf_event_read(void *info)
+{
+	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+	struct perf_event *event = info;
+	struct perf_event_context *ctx = event->ctx;
+	unsigned long flags;
+
+	/*
+	 * If this is a task context, we need to check whether it is
+	 * the current task context of this cpu.  If not it has been
+	 * scheduled out before the smp call arrived.  In that case
+	 * event->count would have been updated to a recent sample
+	 * when the event was scheduled out.
+	 */
+	if (ctx->task && cpuctx->task_ctx != ctx)
+		return;
+
+	local_irq_save(flags);
+	if (ctx->is_active)
+		update_context_time(ctx);
+	event->pmu->read(event);
+	update_event_times(event);
+	local_irq_restore(flags);
+}
+
+static u64 perf_event_read(struct perf_event *event)
+{
+	/*
+	 * If event is enabled and currently active on a CPU, update the
+	 * value in the event structure:
+	 */
+	if (event->state == PERF_EVENT_STATE_ACTIVE) {
+		smp_call_function_single(event->oncpu,
+					 __perf_event_read, event, 1);
+	} else if (event->state == PERF_EVENT_STATE_INACTIVE) {
+		update_event_times(event);
+	}
+
+	return atomic64_read(&event->count);
+}
+
+/*
+ * Initialize the perf_event context in a task_struct:
+ */
+static void
+__perf_event_init_context(struct perf_event_context *ctx,
+			    struct task_struct *task)
+{
+	memset(ctx, 0, sizeof(*ctx));
+	spin_lock_init(&ctx->lock);
+	mutex_init(&ctx->mutex);
+	INIT_LIST_HEAD(&ctx->group_list);
+	INIT_LIST_HEAD(&ctx->event_list);
+	atomic_set(&ctx->refcount, 1);
+	ctx->task = task;
+}
+
+static struct perf_event_context *find_get_context(pid_t pid, int cpu)
+{
+	struct perf_event_context *ctx;
+	struct perf_cpu_context *cpuctx;
+	struct task_struct *task;
+	unsigned long flags;
+	int err;
+
+	/*
+	 * If cpu is not a wildcard then this is a percpu event:
+	 */
+	if (cpu != -1) {
+		/* Must be root to operate on a CPU event: */
+		if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
+			return ERR_PTR(-EACCES);
+
+		if (cpu < 0 || cpu > num_possible_cpus())
+			return ERR_PTR(-EINVAL);
+
+		/*
+		 * We could be clever and allow to attach a event to an
+		 * offline CPU and activate it when the CPU comes up, but
+		 * that's for later.
+		 */
+		if (!cpu_isset(cpu, cpu_online_map))
+			return ERR_PTR(-ENODEV);
+
+		cpuctx = &per_cpu(perf_cpu_context, cpu);
+		ctx = &cpuctx->ctx;
+		get_ctx(ctx);
+
+		return ctx;
+	}
+
+	rcu_read_lock();
+	if (!pid)
+		task = current;
+	else
+		task = find_task_by_vpid(pid);
+	if (task)
+		get_task_struct(task);
+	rcu_read_unlock();
+
+	if (!task)
+		return ERR_PTR(-ESRCH);
+
+	/*
+	 * Can't attach events to a dying task.
+	 */
+	err = -ESRCH;
+	if (task->flags & PF_EXITING)
+		goto errout;
+
+	/* Reuse ptrace permission checks for now. */
+	err = -EACCES;
+	if (!ptrace_may_access(task, PTRACE_MODE_READ))
+		goto errout;
+
+ retry:
+	ctx = perf_lock_task_context(task, &flags);
+	if (ctx) {
+		unclone_ctx(ctx);
+		spin_unlock_irqrestore(&ctx->lock, flags);
+	}
+
+	if (!ctx) {
+		ctx = kmalloc(sizeof(struct perf_event_context), GFP_KERNEL);
+		err = -ENOMEM;
+		if (!ctx)
+			goto errout;
+		__perf_event_init_context(ctx, task);
+		get_ctx(ctx);
+		if (cmpxchg(&task->perf_event_ctxp, NULL, ctx)) {
+			/*
+			 * We raced with some other task; use
+			 * the context they set.
+			 */
+			kfree(ctx);
+			goto retry;
+		}
+		get_task_struct(task);
+	}
+
+	put_task_struct(task);
+	return ctx;
+
+ errout:
+	put_task_struct(task);
+	return ERR_PTR(err);
+}
+
+static void free_event_rcu(struct rcu_head *head)
+{
+	struct perf_event *event;
+
+	event = container_of(head, struct perf_event, rcu_head);
+	if (event->ns)
+		put_pid_ns(event->ns);
+	kfree(event);
+}
+
+static void perf_pending_sync(struct perf_event *event);
+
+static void free_event(struct perf_event *event)
+{
+	perf_pending_sync(event);
+
+	if (!event->parent) {
+		atomic_dec(&nr_events);
+		if (event->attr.mmap)
+			atomic_dec(&nr_mmap_events);
+		if (event->attr.comm)
+			atomic_dec(&nr_comm_events);
+		if (event->attr.task)
+			atomic_dec(&nr_task_events);
+	}
+
+	if (event->output) {
+		fput(event->output->filp);
+		event->output = NULL;
+	}
+
+	if (event->destroy)
+		event->destroy(event);
+
+	put_ctx(event->ctx);
+	call_rcu(&event->rcu_head, free_event_rcu);
+}
+
+/*
+ * Called when the last reference to the file is gone.
+ */
+static int perf_release(struct inode *inode, struct file *file)
+{
+	struct perf_event *event = file->private_data;
+	struct perf_event_context *ctx = event->ctx;
+
+	file->private_data = NULL;
+
+	WARN_ON_ONCE(ctx->parent_ctx);
+	mutex_lock(&ctx->mutex);
+	perf_event_remove_from_context(event);
+	mutex_unlock(&ctx->mutex);
+
+	mutex_lock(&event->owner->perf_event_mutex);
+	list_del_init(&event->owner_entry);
+	mutex_unlock(&event->owner->perf_event_mutex);
+	put_task_struct(event->owner);
+
+	free_event(event);
+
+	return 0;
+}
+
+static int perf_event_read_size(struct perf_event *event)
+{
+	int entry = sizeof(u64); /* value */
+	int size = 0;
+	int nr = 1;
+
+	if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+		size += sizeof(u64);
+
+	if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+		size += sizeof(u64);
+
+	if (event->attr.read_format & PERF_FORMAT_ID)
+		entry += sizeof(u64);
+
+	if (event->attr.read_format & PERF_FORMAT_GROUP) {
+		nr += event->group_leader->nr_siblings;
+		size += sizeof(u64);
+	}
+
+	size += entry * nr;
+
+	return size;
+}
+
+static u64 perf_event_read_value(struct perf_event *event)
+{
+	struct perf_event *child;
+	u64 total = 0;
+
+	total += perf_event_read(event);
+	list_for_each_entry(child, &event->child_list, child_list)
+		total += perf_event_read(child);
+
+	return total;
+}
+
+static int perf_event_read_entry(struct perf_event *event,
+				   u64 read_format, char __user *buf)
+{
+	int n = 0, count = 0;
+	u64 values[2];
+
+	values[n++] = perf_event_read_value(event);
+	if (read_format & PERF_FORMAT_ID)
+		values[n++] = primary_event_id(event);
+
+	count = n * sizeof(u64);
+
+	if (copy_to_user(buf, values, count))
+		return -EFAULT;
+
+	return count;
+}
+
+static int perf_event_read_group(struct perf_event *event,
+				   u64 read_format, char __user *buf)
+{
+	struct perf_event *leader = event->group_leader, *sub;
+	int n = 0, size = 0, err = -EFAULT;
+	u64 values[3];
+
+	values[n++] = 1 + leader->nr_siblings;
+	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
+		values[n++] = leader->total_time_enabled +
+			atomic64_read(&leader->child_total_time_enabled);
+	}
+	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
+		values[n++] = leader->total_time_running +
+			atomic64_read(&leader->child_total_time_running);
+	}
+
+	size = n * sizeof(u64);
+
+	if (copy_to_user(buf, values, size))
+		return -EFAULT;
+
+	err = perf_event_read_entry(leader, read_format, buf + size);
+	if (err < 0)
+		return err;
+
+	size += err;
+
+	list_for_each_entry(sub, &leader->sibling_list, group_entry) {
+		err = perf_event_read_entry(sub, read_format,
+				buf + size);
+		if (err < 0)
+			return err;
+
+		size += err;
+	}
+
+	return size;
+}
+
+static int perf_event_read_one(struct perf_event *event,
+				 u64 read_format, char __user *buf)
+{
+	u64 values[4];
+	int n = 0;
+
+	values[n++] = perf_event_read_value(event);
+	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
+		values[n++] = event->total_time_enabled +
+			atomic64_read(&event->child_total_time_enabled);
+	}
+	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
+		values[n++] = event->total_time_running +
+			atomic64_read(&event->child_total_time_running);
+	}
+	if (read_format & PERF_FORMAT_ID)
+		values[n++] = primary_event_id(event);
+
+	if (copy_to_user(buf, values, n * sizeof(u64)))
+		return -EFAULT;
+
+	return n * sizeof(u64);
+}
+
+/*
+ * Read the performance event - simple non blocking version for now
+ */
+static ssize_t
+perf_read_hw(struct perf_event *event, char __user *buf, size_t count)
+{
+	u64 read_format = event->attr.read_format;
+	int ret;
+
+	/*
+	 * Return end-of-file for a read on a event that is in
+	 * error state (i.e. because it was pinned but it couldn't be
+	 * scheduled on to the CPU at some point).
+	 */
+	if (event->state == PERF_EVENT_STATE_ERROR)
+		return 0;
+
+	if (count < perf_event_read_size(event))
+		return -ENOSPC;
+
+	WARN_ON_ONCE(event->ctx->parent_ctx);
+	mutex_lock(&event->child_mutex);
+	if (read_format & PERF_FORMAT_GROUP)
+		ret = perf_event_read_group(event, read_format, buf);
+	else
+		ret = perf_event_read_one(event, read_format, buf);
+	mutex_unlock(&event->child_mutex);
+
+	return ret;
+}
+
+static ssize_t
+perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
+{
+	struct perf_event *event = file->private_data;
+
+	return perf_read_hw(event, buf, count);
+}
+
+static unsigned int perf_poll(struct file *file, poll_table *wait)
+{
+	struct perf_event *event = file->private_data;
+	struct perf_mmap_data *data;
+	unsigned int events = POLL_HUP;
+
+	rcu_read_lock();
+	data = rcu_dereference(event->data);
+	if (data)
+		events = atomic_xchg(&data->poll, 0);
+	rcu_read_unlock();
+
+	poll_wait(file, &event->waitq, wait);
+
+	return events;
+}
+
+static void perf_event_reset(struct perf_event *event)
+{
+	(void)perf_event_read(event);
+	atomic64_set(&event->count, 0);
+	perf_event_update_userpage(event);
+}
+
+/*
+ * Holding the top-level event's child_mutex means that any
+ * descendant process that has inherited this event will block
+ * in sync_child_event if it goes to exit, thus satisfying the
+ * task existence requirements of perf_event_enable/disable.
+ */
+static void perf_event_for_each_child(struct perf_event *event,
+					void (*func)(struct perf_event *))
+{
+	struct perf_event *child;
+
+	WARN_ON_ONCE(event->ctx->parent_ctx);
+	mutex_lock(&event->child_mutex);
+	func(event);
+	list_for_each_entry(child, &event->child_list, child_list)
+		func(child);
+	mutex_unlock(&event->child_mutex);
+}
+
+static void perf_event_for_each(struct perf_event *event,
+				  void (*func)(struct perf_event *))
+{
+	struct perf_event_context *ctx = event->ctx;
+	struct perf_event *sibling;
+
+	WARN_ON_ONCE(ctx->parent_ctx);
+	mutex_lock(&ctx->mutex);
+	event = event->group_leader;
+
+	perf_event_for_each_child(event, func);
+	func(event);
+	list_for_each_entry(sibling, &event->sibling_list, group_entry)
+		perf_event_for_each_child(event, func);
+	mutex_unlock(&ctx->mutex);
+}
+
+static int perf_event_period(struct perf_event *event, u64 __user *arg)
+{
+	struct perf_event_context *ctx = event->ctx;
+	unsigned long size;
+	int ret = 0;
+	u64 value;
+
+	if (!event->attr.sample_period)
+		return -EINVAL;
+
+	size = copy_from_user(&value, arg, sizeof(value));
+	if (size != sizeof(value))
+		return -EFAULT;
+
+	if (!value)
+		return -EINVAL;
+
+	spin_lock_irq(&ctx->lock);
+	if (event->attr.freq) {
+		if (value > sysctl_perf_event_sample_rate) {
+			ret = -EINVAL;
+			goto unlock;
+		}
+
+		event->attr.sample_freq = value;
+	} else {
+		event->attr.sample_period = value;
+		event->hw.sample_period = value;
+	}
+unlock:
+	spin_unlock_irq(&ctx->lock);
+
+	return ret;
+}
+
+int perf_event_set_output(struct perf_event *event, int output_fd);
+
+static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	struct perf_event *event = file->private_data;
+	void (*func)(struct perf_event *);
+	u32 flags = arg;
+
+	switch (cmd) {
+	case PERF_EVENT_IOC_ENABLE:
+		func = perf_event_enable;
+		break;
+	case PERF_EVENT_IOC_DISABLE:
+		func = perf_event_disable;
+		break;
+	case PERF_EVENT_IOC_RESET:
+		func = perf_event_reset;
+		break;
+
+	case PERF_EVENT_IOC_REFRESH:
+		return perf_event_refresh(event, arg);
+
+	case PERF_EVENT_IOC_PERIOD:
+		return perf_event_period(event, (u64 __user *)arg);
+
+	case PERF_EVENT_IOC_SET_OUTPUT:
+		return perf_event_set_output(event, arg);
+
+	default:
+		return -ENOTTY;
+	}
+
+	if (flags & PERF_IOC_FLAG_GROUP)
+		perf_event_for_each(event, func);
+	else
+		perf_event_for_each_child(event, func);
+
+	return 0;
+}
+
+int perf_event_task_enable(void)
+{
+	struct perf_event *event;
+
+	mutex_lock(&current->perf_event_mutex);
+	list_for_each_entry(event, &current->perf_event_list, owner_entry)
+		perf_event_for_each_child(event, perf_event_enable);
+	mutex_unlock(&current->perf_event_mutex);
+
+	return 0;
+}
+
+int perf_event_task_disable(void)
+{
+	struct perf_event *event;
+
+	mutex_lock(&current->perf_event_mutex);
+	list_for_each_entry(event, &current->perf_event_list, owner_entry)
+		perf_event_for_each_child(event, perf_event_disable);
+	mutex_unlock(&current->perf_event_mutex);
+
+	return 0;
+}
+
+#ifndef PERF_EVENT_INDEX_OFFSET
+# define PERF_EVENT_INDEX_OFFSET 0
+#endif
+
+static int perf_event_index(struct perf_event *event)
+{
+	if (event->state != PERF_EVENT_STATE_ACTIVE)
+		return 0;
+
+	return event->hw.idx + 1 - PERF_EVENT_INDEX_OFFSET;
+}
+
+/*
+ * Callers need to ensure there can be no nesting of this function, otherwise
+ * the seqlock logic goes bad. We can not serialize this because the arch
+ * code calls this from NMI context.
+ */
+void perf_event_update_userpage(struct perf_event *event)
+{
+	struct perf_event_mmap_page *userpg;
+	struct perf_mmap_data *data;
+
+	rcu_read_lock();
+	data = rcu_dereference(event->data);
+	if (!data)
+		goto unlock;
+
+	userpg = data->user_page;
+
+	/*
+	 * Disable preemption so as to not let the corresponding user-space
+	 * spin too long if we get preempted.
+	 */
+	preempt_disable();
+	++userpg->lock;
+	barrier();
+	userpg->index = perf_event_index(event);
+	userpg->offset = atomic64_read(&event->count);
+	if (event->state == PERF_EVENT_STATE_ACTIVE)
+		userpg->offset -= atomic64_read(&event->hw.prev_count);
+
+	userpg->time_enabled = event->total_time_enabled +
+			atomic64_read(&event->child_total_time_enabled);
+
+	userpg->time_running = event->total_time_running +
+			atomic64_read(&event->child_total_time_running);
+
+	barrier();
+	++userpg->lock;
+	preempt_enable();
+unlock:
+	rcu_read_unlock();
+}
+
+static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct perf_event *event = vma->vm_file->private_data;
+	struct perf_mmap_data *data;
+	int ret = VM_FAULT_SIGBUS;
+
+	if (vmf->flags & FAULT_FLAG_MKWRITE) {
+		if (vmf->pgoff == 0)
+			ret = 0;
+		return ret;
+	}
+
+	rcu_read_lock();
+	data = rcu_dereference(event->data);
+	if (!data)
+		goto unlock;
+
+	if (vmf->pgoff == 0) {
+		vmf->page = virt_to_page(data->user_page);
+	} else {
+		int nr = vmf->pgoff - 1;
+
+		if ((unsigned)nr > data->nr_pages)
+			goto unlock;
+
+		if (vmf->flags & FAULT_FLAG_WRITE)
+			goto unlock;
+
+		vmf->page = virt_to_page(data->data_pages[nr]);
+	}
+
+	get_page(vmf->page);
+	vmf->page->mapping = vma->vm_file->f_mapping;
+	vmf->page->index   = vmf->pgoff;
+
+	ret = 0;
+unlock:
+	rcu_read_unlock();
+
+	return ret;
+}
+
+static int perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
+{
+	struct perf_mmap_data *data;
+	unsigned long size;
+	int i;
+
+	WARN_ON(atomic_read(&event->mmap_count));
+
+	size = sizeof(struct perf_mmap_data);
+	size += nr_pages * sizeof(void *);
+
+	data = kzalloc(size, GFP_KERNEL);
+	if (!data)
+		goto fail;
+
+	data->user_page = (void *)get_zeroed_page(GFP_KERNEL);
+	if (!data->user_page)
+		goto fail_user_page;
+
+	for (i = 0; i < nr_pages; i++) {
+		data->data_pages[i] = (void *)get_zeroed_page(GFP_KERNEL);
+		if (!data->data_pages[i])
+			goto fail_data_pages;
+	}
+
+	data->nr_pages = nr_pages;
+	atomic_set(&data->lock, -1);
+
+	if (event->attr.watermark) {
+		data->watermark = min_t(long, PAGE_SIZE * nr_pages,
+				      event->attr.wakeup_watermark);
+	}
+	if (!data->watermark)
+		data->watermark = max(PAGE_SIZE, PAGE_SIZE * nr_pages / 4);
+
+	rcu_assign_pointer(event->data, data);
+
+	return 0;
+
+fail_data_pages:
+	for (i--; i >= 0; i--)
+		free_page((unsigned long)data->data_pages[i]);
+
+	free_page((unsigned long)data->user_page);
+
+fail_user_page:
+	kfree(data);
+
+fail:
+	return -ENOMEM;
+}
+
+static void perf_mmap_free_page(unsigned long addr)
+{
+	struct page *page = virt_to_page((void *)addr);
+
+	page->mapping = NULL;
+	__free_page(page);
+}
+
+static void __perf_mmap_data_free(struct rcu_head *rcu_head)
+{
+	struct perf_mmap_data *data;
+	int i;
+
+	data = container_of(rcu_head, struct perf_mmap_data, rcu_head);
+
+	perf_mmap_free_page((unsigned long)data->user_page);
+	for (i = 0; i < data->nr_pages; i++)
+		perf_mmap_free_page((unsigned long)data->data_pages[i]);
+
+	kfree(data);
+}
+
+static void perf_mmap_data_free(struct perf_event *event)
+{
+	struct perf_mmap_data *data = event->data;
+
+	WARN_ON(atomic_read(&event->mmap_count));
+
+	rcu_assign_pointer(event->data, NULL);
+	call_rcu(&data->rcu_head, __perf_mmap_data_free);
+}
+
+static void perf_mmap_open(struct vm_area_struct *vma)
+{
+	struct perf_event *event = vma->vm_file->private_data;
+
+	atomic_inc(&event->mmap_count);
+}
+
+static void perf_mmap_close(struct vm_area_struct *vma)
+{
+	struct perf_event *event = vma->vm_file->private_data;
+
+	WARN_ON_ONCE(event->ctx->parent_ctx);
+	if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) {
+		struct user_struct *user = current_user();
+
+		atomic_long_sub(event->data->nr_pages + 1, &user->locked_vm);
+		vma->vm_mm->locked_vm -= event->data->nr_locked;
+		perf_mmap_data_free(event);
+		mutex_unlock(&event->mmap_mutex);
+	}
+}
+
+static struct vm_operations_struct perf_mmap_vmops = {
+	.open		= perf_mmap_open,
+	.close		= perf_mmap_close,
+	.fault		= perf_mmap_fault,
+	.page_mkwrite	= perf_mmap_fault,
+};
+
+static int perf_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	struct perf_event *event = file->private_data;
+	unsigned long user_locked, user_lock_limit;
+	struct user_struct *user = current_user();
+	unsigned long locked, lock_limit;
+	unsigned long vma_size;
+	unsigned long nr_pages;
+	long user_extra, extra;
+	int ret = 0;
+
+	if (!(vma->vm_flags & VM_SHARED))
+		return -EINVAL;
+
+	vma_size = vma->vm_end - vma->vm_start;
+	nr_pages = (vma_size / PAGE_SIZE) - 1;
+
+	/*
+	 * If we have data pages ensure they're a power-of-two number, so we
+	 * can do bitmasks instead of modulo.
+	 */
+	if (nr_pages != 0 && !is_power_of_2(nr_pages))
+		return -EINVAL;
+
+	if (vma_size != PAGE_SIZE * (1 + nr_pages))
+		return -EINVAL;
+
+	if (vma->vm_pgoff != 0)
+		return -EINVAL;
+
+	WARN_ON_ONCE(event->ctx->parent_ctx);
+	mutex_lock(&event->mmap_mutex);
+	if (event->output) {
+		ret = -EINVAL;
+		goto unlock;
+	}
+
+	if (atomic_inc_not_zero(&event->mmap_count)) {
+		if (nr_pages != event->data->nr_pages)
+			ret = -EINVAL;
+		goto unlock;
+	}
+
+	user_extra = nr_pages + 1;
+	user_lock_limit = sysctl_perf_event_mlock >> (PAGE_SHIFT - 10);
+
+	/*
+	 * Increase the limit linearly with more CPUs:
+	 */
+	user_lock_limit *= num_online_cpus();
+
+	user_locked = atomic_long_read(&user->locked_vm) + user_extra;
+
+	extra = 0;
+	if (user_locked > user_lock_limit)
+		extra = user_locked - user_lock_limit;
+
+	lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
+	lock_limit >>= PAGE_SHIFT;
+	locked = vma->vm_mm->locked_vm + extra;
+
+	if ((locked > lock_limit) && perf_paranoid_tracepoint_raw() &&
+		!capable(CAP_IPC_LOCK)) {
+		ret = -EPERM;
+		goto unlock;
+	}
+
+	WARN_ON(event->data);
+	ret = perf_mmap_data_alloc(event, nr_pages);
+	if (ret)
+		goto unlock;
+
+	atomic_set(&event->mmap_count, 1);
+	atomic_long_add(user_extra, &user->locked_vm);
+	vma->vm_mm->locked_vm += extra;
+	event->data->nr_locked = extra;
+	if (vma->vm_flags & VM_WRITE)
+		event->data->writable = 1;
+
+unlock:
+	mutex_unlock(&event->mmap_mutex);
+
+	vma->vm_flags |= VM_RESERVED;
+	vma->vm_ops = &perf_mmap_vmops;
+
+	return ret;
+}
+
+static int perf_fasync(int fd, struct file *filp, int on)
+{
+	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct perf_event *event = filp->private_data;
+	int retval;
+
+	mutex_lock(&inode->i_mutex);
+	retval = fasync_helper(fd, filp, on, &event->fasync);
+	mutex_unlock(&inode->i_mutex);
+
+	if (retval < 0)
+		return retval;
+
+	return 0;
+}
+
+static const struct file_operations perf_fops = {
+	.release		= perf_release,
+	.read			= perf_read,
+	.poll			= perf_poll,
+	.unlocked_ioctl		= perf_ioctl,
+	.compat_ioctl		= perf_ioctl,
+	.mmap			= perf_mmap,
+	.fasync			= perf_fasync,
+};
+
+/*
+ * Perf event wakeup
+ *
+ * If there's data, ensure we set the poll() state and publish everything
+ * to user-space before waking everybody up.
+ */
+
+void perf_event_wakeup(struct perf_event *event)
+{
+	wake_up_all(&event->waitq);
+
+	if (event->pending_kill) {
+		kill_fasync(&event->fasync, SIGIO, event->pending_kill);
+		event->pending_kill = 0;
+	}
+}
+
+/*
+ * Pending wakeups
+ *
+ * Handle the case where we need to wakeup up from NMI (or rq->lock) context.
+ *
+ * The NMI bit means we cannot possibly take locks. Therefore, maintain a
+ * single linked list and use cmpxchg() to add entries lockless.
+ */
+
+static void perf_pending_event(struct perf_pending_entry *entry)
+{
+	struct perf_event *event = container_of(entry,
+			struct perf_event, pending);
+
+	if (event->pending_disable) {
+		event->pending_disable = 0;
+		__perf_event_disable(event);
+	}
+
+	if (event->pending_wakeup) {
+		event->pending_wakeup = 0;
+		perf_event_wakeup(event);
+	}
+}
+
+#define PENDING_TAIL ((struct perf_pending_entry *)-1UL)
+
+static DEFINE_PER_CPU(struct perf_pending_entry *, perf_pending_head) = {
+	PENDING_TAIL,
+};
+
+static void perf_pending_queue(struct perf_pending_entry *entry,
+			       void (*func)(struct perf_pending_entry *))
+{
+	struct perf_pending_entry **head;
+
+	if (cmpxchg(&entry->next, NULL, PENDING_TAIL) != NULL)
+		return;
+
+	entry->func = func;
+
+	head = &get_cpu_var(perf_pending_head);
+
+	do {
+		entry->next = *head;
+	} while (cmpxchg(head, entry->next, entry) != entry->next);
+
+	set_perf_event_pending();
+
+	put_cpu_var(perf_pending_head);
+}
+
+static int __perf_pending_run(void)
+{
+	struct perf_pending_entry *list;
+	int nr = 0;
+
+	list = xchg(&__get_cpu_var(perf_pending_head), PENDING_TAIL);
+	while (list != PENDING_TAIL) {
+		void (*func)(struct perf_pending_entry *);
+		struct perf_pending_entry *entry = list;
+
+		list = list->next;
+
+		func = entry->func;
+		entry->next = NULL;
+		/*
+		 * Ensure we observe the unqueue before we issue the wakeup,
+		 * so that we won't be waiting forever.
+		 * -- see perf_not_pending().
+		 */
+		smp_wmb();
+
+		func(entry);
+		nr++;
+	}
+
+	return nr;
+}
+
+static inline int perf_not_pending(struct perf_event *event)
+{
+	/*
+	 * If we flush on whatever cpu we run, there is a chance we don't
+	 * need to wait.
+	 */
+	get_cpu();
+	__perf_pending_run();
+	put_cpu();
+
+	/*
+	 * Ensure we see the proper queue state before going to sleep
+	 * so that we do not miss the wakeup. -- see perf_pending_handle()
+	 */
+	smp_rmb();
+	return event->pending.next == NULL;
+}
+
+static void perf_pending_sync(struct perf_event *event)
+{
+	wait_event(event->waitq, perf_not_pending(event));
+}
+
+void perf_event_do_pending(void)
+{
+	__perf_pending_run();
+}
+
+/*
+ * Callchain support -- arch specific
+ */
+
+__weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
+{
+	return NULL;
+}
+
+/*
+ * Output
+ */
+static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail,
+			      unsigned long offset, unsigned long head)
+{
+	unsigned long mask;
+
+	if (!data->writable)
+		return true;
+
+	mask = (data->nr_pages << PAGE_SHIFT) - 1;
+
+	offset = (offset - tail) & mask;
+	head   = (head   - tail) & mask;
+
+	if ((int)(head - offset) < 0)
+		return false;
+
+	return true;
+}
+
+static void perf_output_wakeup(struct perf_output_handle *handle)
+{
+	atomic_set(&handle->data->poll, POLL_IN);
+
+	if (handle->nmi) {
+		handle->event->pending_wakeup = 1;
+		perf_pending_queue(&handle->event->pending,
+				   perf_pending_event);
+	} else
+		perf_event_wakeup(handle->event);
+}
+
+/*
+ * Curious locking construct.
+ *
+ * We need to ensure a later event_id doesn't publish a head when a former
+ * event_id isn't done writing. However since we need to deal with NMIs we
+ * cannot fully serialize things.
+ *
+ * What we do is serialize between CPUs so we only have to deal with NMI
+ * nesting on a single CPU.
+ *
+ * We only publish the head (and generate a wakeup) when the outer-most
+ * event_id completes.
+ */
+static void perf_output_lock(struct perf_output_handle *handle)
+{
+	struct perf_mmap_data *data = handle->data;
+	int cpu;
+
+	handle->locked = 0;
+
+	local_irq_save(handle->flags);
+	cpu = smp_processor_id();
+
+	if (in_nmi() && atomic_read(&data->lock) == cpu)
+		return;
+
+	while (atomic_cmpxchg(&data->lock, -1, cpu) != -1)
+		cpu_relax();
+
+	handle->locked = 1;
+}
+
+static void perf_output_unlock(struct perf_output_handle *handle)
+{
+	struct perf_mmap_data *data = handle->data;
+	unsigned long head;
+	int cpu;
+
+	data->done_head = data->head;
+
+	if (!handle->locked)
+		goto out;
+
+again:
+	/*
+	 * The xchg implies a full barrier that ensures all writes are done
+	 * before we publish the new head, matched by a rmb() in userspace when
+	 * reading this position.
+	 */
+	while ((head = atomic_long_xchg(&data->done_head, 0)))
+		data->user_page->data_head = head;
+
+	/*
+	 * NMI can happen here, which means we can miss a done_head update.
+	 */
+
+	cpu = atomic_xchg(&data->lock, -1);
+	WARN_ON_ONCE(cpu != smp_processor_id());
+
+	/*
+	 * Therefore we have to validate we did not indeed do so.
+	 */
+	if (unlikely(atomic_long_read(&data->done_head))) {
+		/*
+		 * Since we had it locked, we can lock it again.
+		 */
+		while (atomic_cmpxchg(&data->lock, -1, cpu) != -1)
+			cpu_relax();
+
+		goto again;
+	}
+
+	if (atomic_xchg(&data->wakeup, 0))
+		perf_output_wakeup(handle);
+out:
+	local_irq_restore(handle->flags);
+}
+
+void perf_output_copy(struct perf_output_handle *handle,
+		      const void *buf, unsigned int len)
+{
+	unsigned int pages_mask;
+	unsigned int offset;
+	unsigned int size;
+	void **pages;
+
+	offset		= handle->offset;
+	pages_mask	= handle->data->nr_pages - 1;
+	pages		= handle->data->data_pages;
+
+	do {
+		unsigned int page_offset;
+		int nr;
+
+		nr	    = (offset >> PAGE_SHIFT) & pages_mask;
+		page_offset = offset & (PAGE_SIZE - 1);
+		size	    = min_t(unsigned int, PAGE_SIZE - page_offset, len);
+
+		memcpy(pages[nr] + page_offset, buf, size);
+
+		len	    -= size;
+		buf	    += size;
+		offset	    += size;
+	} while (len);
+
+	handle->offset = offset;
+
+	/*
+	 * Check we didn't copy past our reservation window, taking the
+	 * possible unsigned int wrap into account.
+	 */
+	WARN_ON_ONCE(((long)(handle->head - handle->offset)) < 0);
+}
+
+int perf_output_begin(struct perf_output_handle *handle,
+		      struct perf_event *event, unsigned int size,
+		      int nmi, int sample)
+{
+	struct perf_event *output_event;
+	struct perf_mmap_data *data;
+	unsigned long tail, offset, head;
+	int have_lost;
+	struct {
+		struct perf_event_header header;
+		u64			 id;
+		u64			 lost;
+	} lost_event;
+
+	rcu_read_lock();
+	/*
+	 * For inherited events we send all the output towards the parent.
+	 */
+	if (event->parent)
+		event = event->parent;
+
+	output_event = rcu_dereference(event->output);
+	if (output_event)
+		event = output_event;
+
+	data = rcu_dereference(event->data);
+	if (!data)
+		goto out;
+
+	handle->data	= data;
+	handle->event	= event;
+	handle->nmi	= nmi;
+	handle->sample	= sample;
+
+	if (!data->nr_pages)
+		goto fail;
+
+	have_lost = atomic_read(&data->lost);
+	if (have_lost)
+		size += sizeof(lost_event);
+
+	perf_output_lock(handle);
+
+	do {
+		/*
+		 * Userspace could choose to issue a mb() before updating the
+		 * tail pointer. So that all reads will be completed before the
+		 * write is issued.
+		 */
+		tail = ACCESS_ONCE(data->user_page->data_tail);
+		smp_rmb();
+		offset = head = atomic_long_read(&data->head);
+		head += size;
+		if (unlikely(!perf_output_space(data, tail, offset, head)))
+			goto fail;
+	} while (atomic_long_cmpxchg(&data->head, offset, head) != offset);
+
+	handle->offset	= offset;
+	handle->head	= head;
+
+	if (head - tail > data->watermark)
+		atomic_set(&data->wakeup, 1);
+
+	if (have_lost) {
+		lost_event.header.type = PERF_RECORD_LOST;
+		lost_event.header.misc = 0;
+		lost_event.header.size = sizeof(lost_event);
+		lost_event.id          = event->id;
+		lost_event.lost        = atomic_xchg(&data->lost, 0);
+
+		perf_output_put(handle, lost_event);
+	}
+
+	return 0;
+
+fail:
+	atomic_inc(&data->lost);
+	perf_output_unlock(handle);
+out:
+	rcu_read_unlock();
+
+	return -ENOSPC;
+}
+
+void perf_output_end(struct perf_output_handle *handle)
+{
+	struct perf_event *event = handle->event;
+	struct perf_mmap_data *data = handle->data;
+
+	int wakeup_events = event->attr.wakeup_events;
+
+	if (handle->sample && wakeup_events) {
+		int events = atomic_inc_return(&data->events);
+		if (events >= wakeup_events) {
+			atomic_sub(wakeup_events, &data->events);
+			atomic_set(&data->wakeup, 1);
+		}
+	}
+
+	perf_output_unlock(handle);
+	rcu_read_unlock();
+}
+
+static u32 perf_event_pid(struct perf_event *event, struct task_struct *p)
+{
+	/*
+	 * only top level events have the pid namespace they were created in
+	 */
+	if (event->parent)
+		event = event->parent;
+
+	return task_tgid_nr_ns(p, event->ns);
+}
+
+static u32 perf_event_tid(struct perf_event *event, struct task_struct *p)
+{
+	/*
+	 * only top level events have the pid namespace they were created in
+	 */
+	if (event->parent)
+		event = event->parent;
+
+	return task_pid_nr_ns(p, event->ns);
+}
+
+static void perf_output_read_one(struct perf_output_handle *handle,
+				 struct perf_event *event)
+{
+	u64 read_format = event->attr.read_format;
+	u64 values[4];
+	int n = 0;
+
+	values[n++] = atomic64_read(&event->count);
+	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
+		values[n++] = event->total_time_enabled +
+			atomic64_read(&event->child_total_time_enabled);
+	}
+	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
+		values[n++] = event->total_time_running +
+			atomic64_read(&event->child_total_time_running);
+	}
+	if (read_format & PERF_FORMAT_ID)
+		values[n++] = primary_event_id(event);
+
+	perf_output_copy(handle, values, n * sizeof(u64));
+}
+
+/*
+ * XXX PERF_FORMAT_GROUP vs inherited events seems difficult.
+ */
+static void perf_output_read_group(struct perf_output_handle *handle,
+			    struct perf_event *event)
+{
+	struct perf_event *leader = event->group_leader, *sub;
+	u64 read_format = event->attr.read_format;
+	u64 values[5];
+	int n = 0;
+
+	values[n++] = 1 + leader->nr_siblings;
+
+	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+		values[n++] = leader->total_time_enabled;
+
+	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+		values[n++] = leader->total_time_running;
+
+	if (leader != event)
+		leader->pmu->read(leader);
+
+	values[n++] = atomic64_read(&leader->count);
+	if (read_format & PERF_FORMAT_ID)
+		values[n++] = primary_event_id(leader);
+
+	perf_output_copy(handle, values, n * sizeof(u64));
+
+	list_for_each_entry(sub, &leader->sibling_list, group_entry) {
+		n = 0;
+
+		if (sub != event)
+			sub->pmu->read(sub);
+
+		values[n++] = atomic64_read(&sub->count);
+		if (read_format & PERF_FORMAT_ID)
+			values[n++] = primary_event_id(sub);
+
+		perf_output_copy(handle, values, n * sizeof(u64));
+	}
+}
+
+static void perf_output_read(struct perf_output_handle *handle,
+			     struct perf_event *event)
+{
+	if (event->attr.read_format & PERF_FORMAT_GROUP)
+		perf_output_read_group(handle, event);
+	else
+		perf_output_read_one(handle, event);
+}
+
+void perf_output_sample(struct perf_output_handle *handle,
+			struct perf_event_header *header,
+			struct perf_sample_data *data,
+			struct perf_event *event)
+{
+	u64 sample_type = data->type;
+
+	perf_output_put(handle, *header);
+
+	if (sample_type & PERF_SAMPLE_IP)
+		perf_output_put(handle, data->ip);
+
+	if (sample_type & PERF_SAMPLE_TID)
+		perf_output_put(handle, data->tid_entry);
+
+	if (sample_type & PERF_SAMPLE_TIME)
+		perf_output_put(handle, data->time);
+
+	if (sample_type & PERF_SAMPLE_ADDR)
+		perf_output_put(handle, data->addr);
+
+	if (sample_type & PERF_SAMPLE_ID)
+		perf_output_put(handle, data->id);
+
+	if (sample_type & PERF_SAMPLE_STREAM_ID)
+		perf_output_put(handle, data->stream_id);
+
+	if (sample_type & PERF_SAMPLE_CPU)
+		perf_output_put(handle, data->cpu_entry);
+
+	if (sample_type & PERF_SAMPLE_PERIOD)
+		perf_output_put(handle, data->period);
+
+	if (sample_type & PERF_SAMPLE_READ)
+		perf_output_read(handle, event);
+
+	if (sample_type & PERF_SAMPLE_CALLCHAIN) {
+		if (data->callchain) {
+			int size = 1;
+
+			if (data->callchain)
+				size += data->callchain->nr;
+
+			size *= sizeof(u64);
+
+			perf_output_copy(handle, data->callchain, size);
+		} else {
+			u64 nr = 0;
+			perf_output_put(handle, nr);
+		}
+	}
+
+	if (sample_type & PERF_SAMPLE_RAW) {
+		if (data->raw) {
+			perf_output_put(handle, data->raw->size);
+			perf_output_copy(handle, data->raw->data,
+					 data->raw->size);
+		} else {
+			struct {
+				u32	size;
+				u32	data;
+			} raw = {
+				.size = sizeof(u32),
+				.data = 0,
+			};
+			perf_output_put(handle, raw);
+		}
+	}
+}
+
+void perf_prepare_sample(struct perf_event_header *header,
+			 struct perf_sample_data *data,
+			 struct perf_event *event,
+			 struct pt_regs *regs)
+{
+	u64 sample_type = event->attr.sample_type;
+
+	data->type = sample_type;
+
+	header->type = PERF_RECORD_SAMPLE;
+	header->size = sizeof(*header);
+
+	header->misc = 0;
+	header->misc |= perf_misc_flags(regs);
+
+	if (sample_type & PERF_SAMPLE_IP) {
+		data->ip = perf_instruction_pointer(regs);
+
+		header->size += sizeof(data->ip);
+	}
+
+	if (sample_type & PERF_SAMPLE_TID) {
+		/* namespace issues */
+		data->tid_entry.pid = perf_event_pid(event, current);
+		data->tid_entry.tid = perf_event_tid(event, current);
+
+		header->size += sizeof(data->tid_entry);
+	}
+
+	if (sample_type & PERF_SAMPLE_TIME) {
+		data->time = perf_clock();
+
+		header->size += sizeof(data->time);
+	}
+
+	if (sample_type & PERF_SAMPLE_ADDR)
+		header->size += sizeof(data->addr);
+
+	if (sample_type & PERF_SAMPLE_ID) {
+		data->id = primary_event_id(event);
+
+		header->size += sizeof(data->id);
+	}
+
+	if (sample_type & PERF_SAMPLE_STREAM_ID) {
+		data->stream_id = event->id;
+
+		header->size += sizeof(data->stream_id);
+	}
+
+	if (sample_type & PERF_SAMPLE_CPU) {
+		data->cpu_entry.cpu		= raw_smp_processor_id();
+		data->cpu_entry.reserved	= 0;
+
+		header->size += sizeof(data->cpu_entry);
+	}
+
+	if (sample_type & PERF_SAMPLE_PERIOD)
+		header->size += sizeof(data->period);
+
+	if (sample_type & PERF_SAMPLE_READ)
+		header->size += perf_event_read_size(event);
+
+	if (sample_type & PERF_SAMPLE_CALLCHAIN) {
+		int size = 1;
+
+		data->callchain = perf_callchain(regs);
+
+		if (data->callchain)
+			size += data->callchain->nr;
+
+		header->size += size * sizeof(u64);
+	}
+
+	if (sample_type & PERF_SAMPLE_RAW) {
+		int size = sizeof(u32);
+
+		if (data->raw)
+			size += data->raw->size;
+		else
+			size += sizeof(u32);
+
+		WARN_ON_ONCE(size & (sizeof(u64)-1));
+		header->size += size;
+	}
+}
+
+static void perf_event_output(struct perf_event *event, int nmi,
+				struct perf_sample_data *data,
+				struct pt_regs *regs)
+{
+	struct perf_output_handle handle;
+	struct perf_event_header header;
+
+	perf_prepare_sample(&header, data, event, regs);
+
+	if (perf_output_begin(&handle, event, header.size, nmi, 1))
+		return;
+
+	perf_output_sample(&handle, &header, data, event);
+
+	perf_output_end(&handle);
+}
+
+/*
+ * read event_id
+ */
+
+struct perf_read_event {
+	struct perf_event_header	header;
+
+	u32				pid;
+	u32				tid;
+};
+
+static void
+perf_event_read_event(struct perf_event *event,
+			struct task_struct *task)
+{
+	struct perf_output_handle handle;
+	struct perf_read_event read_event = {
+		.header = {
+			.type = PERF_RECORD_READ,
+			.misc = 0,
+			.size = sizeof(read_event) + perf_event_read_size(event),
+		},
+		.pid = perf_event_pid(event, task),
+		.tid = perf_event_tid(event, task),
+	};
+	int ret;
+
+	ret = perf_output_begin(&handle, event, read_event.header.size, 0, 0);
+	if (ret)
+		return;
+
+	perf_output_put(&handle, read_event);
+	perf_output_read(&handle, event);
+
+	perf_output_end(&handle);
+}
+
+/*
+ * task tracking -- fork/exit
+ *
+ * enabled by: attr.comm | attr.mmap | attr.task
+ */
+
+struct perf_task_event {
+	struct task_struct		*task;
+	struct perf_event_context	*task_ctx;
+
+	struct {
+		struct perf_event_header	header;
+
+		u32				pid;
+		u32				ppid;
+		u32				tid;
+		u32				ptid;
+		u64				time;
+	} event_id;
+};
+
+static void perf_event_task_output(struct perf_event *event,
+				     struct perf_task_event *task_event)
+{
+	struct perf_output_handle handle;
+	int size;
+	struct task_struct *task = task_event->task;
+	int ret;
+
+	size  = task_event->event_id.header.size;
+	ret = perf_output_begin(&handle, event, size, 0, 0);
+
+	if (ret)
+		return;
+
+	task_event->event_id.pid = perf_event_pid(event, task);
+	task_event->event_id.ppid = perf_event_pid(event, current);
+
+	task_event->event_id.tid = perf_event_tid(event, task);
+	task_event->event_id.ptid = perf_event_tid(event, current);
+
+	task_event->event_id.time = perf_clock();
+
+	perf_output_put(&handle, task_event->event_id);
+
+	perf_output_end(&handle);
+}
+
+static int perf_event_task_match(struct perf_event *event)
+{
+	if (event->attr.comm || event->attr.mmap || event->attr.task)
+		return 1;
+
+	return 0;
+}
+
+static void perf_event_task_ctx(struct perf_event_context *ctx,
+				  struct perf_task_event *task_event)
+{
+	struct perf_event *event;
+
+	if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
+		return;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
+		if (perf_event_task_match(event))
+			perf_event_task_output(event, task_event);
+	}
+	rcu_read_unlock();
+}
+
+static void perf_event_task_event(struct perf_task_event *task_event)
+{
+	struct perf_cpu_context *cpuctx;
+	struct perf_event_context *ctx = task_event->task_ctx;
+
+	cpuctx = &get_cpu_var(perf_cpu_context);
+	perf_event_task_ctx(&cpuctx->ctx, task_event);
+	put_cpu_var(perf_cpu_context);
+
+	rcu_read_lock();
+	if (!ctx)
+		ctx = rcu_dereference(task_event->task->perf_event_ctxp);
+	if (ctx)
+		perf_event_task_ctx(ctx, task_event);
+	rcu_read_unlock();
+}
+
+static void perf_event_task(struct task_struct *task,
+			      struct perf_event_context *task_ctx,
+			      int new)
+{
+	struct perf_task_event task_event;
+
+	if (!atomic_read(&nr_comm_events) &&
+	    !atomic_read(&nr_mmap_events) &&
+	    !atomic_read(&nr_task_events))
+		return;
+
+	task_event = (struct perf_task_event){
+		.task	  = task,
+		.task_ctx = task_ctx,
+		.event_id    = {
+			.header = {
+				.type = new ? PERF_RECORD_FORK : PERF_RECORD_EXIT,
+				.misc = 0,
+				.size = sizeof(task_event.event_id),
+			},
+			/* .pid  */
+			/* .ppid */
+			/* .tid  */
+			/* .ptid */
+		},
+	};
+
+	perf_event_task_event(&task_event);
+}
+
+void perf_event_fork(struct task_struct *task)
+{
+	perf_event_task(task, NULL, 1);
+}
+
+/*
+ * comm tracking
+ */
+
+struct perf_comm_event {
+	struct task_struct	*task;
+	char			*comm;
+	int			comm_size;
+
+	struct {
+		struct perf_event_header	header;
+
+		u32				pid;
+		u32				tid;
+	} event_id;
+};
+
+static void perf_event_comm_output(struct perf_event *event,
+				     struct perf_comm_event *comm_event)
+{
+	struct perf_output_handle handle;
+	int size = comm_event->event_id.header.size;
+	int ret = perf_output_begin(&handle, event, size, 0, 0);
+
+	if (ret)
+		return;
+
+	comm_event->event_id.pid = perf_event_pid(event, comm_event->task);
+	comm_event->event_id.tid = perf_event_tid(event, comm_event->task);
+
+	perf_output_put(&handle, comm_event->event_id);
+	perf_output_copy(&handle, comm_event->comm,
+				   comm_event->comm_size);
+	perf_output_end(&handle);
+}
+
+static int perf_event_comm_match(struct perf_event *event)
+{
+	if (event->attr.comm)
+		return 1;
+
+	return 0;
+}
+
+static void perf_event_comm_ctx(struct perf_event_context *ctx,
+				  struct perf_comm_event *comm_event)
+{
+	struct perf_event *event;
+
+	if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
+		return;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
+		if (perf_event_comm_match(event))
+			perf_event_comm_output(event, comm_event);
+	}
+	rcu_read_unlock();
+}
+
+static void perf_event_comm_event(struct perf_comm_event *comm_event)
+{
+	struct perf_cpu_context *cpuctx;
+	struct perf_event_context *ctx;
+	unsigned int size;
+	char comm[TASK_COMM_LEN];
+
+	memset(comm, 0, sizeof(comm));
+	strncpy(comm, comm_event->task->comm, sizeof(comm));
+	size = ALIGN(strlen(comm)+1, sizeof(u64));
+
+	comm_event->comm = comm;
+	comm_event->comm_size = size;
+
+	comm_event->event_id.header.size = sizeof(comm_event->event_id) + size;
+
+	cpuctx = &get_cpu_var(perf_cpu_context);
+	perf_event_comm_ctx(&cpuctx->ctx, comm_event);
+	put_cpu_var(perf_cpu_context);
+
+	rcu_read_lock();
+	/*
+	 * doesn't really matter which of the child contexts the
+	 * events ends up in.
+	 */
+	ctx = rcu_dereference(current->perf_event_ctxp);
+	if (ctx)
+		perf_event_comm_ctx(ctx, comm_event);
+	rcu_read_unlock();
+}
+
+void perf_event_comm(struct task_struct *task)
+{
+	struct perf_comm_event comm_event;
+
+	if (task->perf_event_ctxp)
+		perf_event_enable_on_exec(task);
+
+	if (!atomic_read(&nr_comm_events))
+		return;
+
+	comm_event = (struct perf_comm_event){
+		.task	= task,
+		/* .comm      */
+		/* .comm_size */
+		.event_id  = {
+			.header = {
+				.type = PERF_RECORD_COMM,
+				.misc = 0,
+				/* .size */
+			},
+			/* .pid */
+			/* .tid */
+		},
+	};
+
+	perf_event_comm_event(&comm_event);
+}
+
+/*
+ * mmap tracking
+ */
+
+struct perf_mmap_event {
+	struct vm_area_struct	*vma;
+
+	const char		*file_name;
+	int			file_size;
+
+	struct {
+		struct perf_event_header	header;
+
+		u32				pid;
+		u32				tid;
+		u64				start;
+		u64				len;
+		u64				pgoff;
+	} event_id;
+};
+
+static void perf_event_mmap_output(struct perf_event *event,
+				     struct perf_mmap_event *mmap_event)
+{
+	struct perf_output_handle handle;
+	int size = mmap_event->event_id.header.size;
+	int ret = perf_output_begin(&handle, event, size, 0, 0);
+
+	if (ret)
+		return;
+
+	mmap_event->event_id.pid = perf_event_pid(event, current);
+	mmap_event->event_id.tid = perf_event_tid(event, current);
+
+	perf_output_put(&handle, mmap_event->event_id);
+	perf_output_copy(&handle, mmap_event->file_name,
+				   mmap_event->file_size);
+	perf_output_end(&handle);
+}
+
+static int perf_event_mmap_match(struct perf_event *event,
+				   struct perf_mmap_event *mmap_event)
+{
+	if (event->attr.mmap)
+		return 1;
+
+	return 0;
+}
+
+static void perf_event_mmap_ctx(struct perf_event_context *ctx,
+				  struct perf_mmap_event *mmap_event)
+{
+	struct perf_event *event;
+
+	if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
+		return;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
+		if (perf_event_mmap_match(event, mmap_event))
+			perf_event_mmap_output(event, mmap_event);
+	}
+	rcu_read_unlock();
+}
+
+static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
+{
+	struct perf_cpu_context *cpuctx;
+	struct perf_event_context *ctx;
+	struct vm_area_struct *vma = mmap_event->vma;
+	struct file *file = vma->vm_file;
+	unsigned int size;
+	char tmp[16];
+	char *buf = NULL;
+	const char *name;
+
+	memset(tmp, 0, sizeof(tmp));
+
+	if (file) {
+		/*
+		 * d_path works from the end of the buffer backwards, so we
+		 * need to add enough zero bytes after the string to handle
+		 * the 64bit alignment we do later.
+		 */
+		buf = kzalloc(PATH_MAX + sizeof(u64), GFP_KERNEL);
+		if (!buf) {
+			name = strncpy(tmp, "//enomem", sizeof(tmp));
+			goto got_name;
+		}
+		name = d_path(&file->f_path, buf, PATH_MAX);
+		if (IS_ERR(name)) {
+			name = strncpy(tmp, "//toolong", sizeof(tmp));
+			goto got_name;
+		}
+	} else {
+		if (arch_vma_name(mmap_event->vma)) {
+			name = strncpy(tmp, arch_vma_name(mmap_event->vma),
+				       sizeof(tmp));
+			goto got_name;
+		}
+
+		if (!vma->vm_mm) {
+			name = strncpy(tmp, "[vdso]", sizeof(tmp));
+			goto got_name;
+		}
+
+		name = strncpy(tmp, "//anon", sizeof(tmp));
+		goto got_name;
+	}
+
+got_name:
+	size = ALIGN(strlen(name)+1, sizeof(u64));
+
+	mmap_event->file_name = name;
+	mmap_event->file_size = size;
+
+	mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size;
+
+	cpuctx = &get_cpu_var(perf_cpu_context);
+	perf_event_mmap_ctx(&cpuctx->ctx, mmap_event);
+	put_cpu_var(perf_cpu_context);
+
+	rcu_read_lock();
+	/*
+	 * doesn't really matter which of the child contexts the
+	 * events ends up in.
+	 */
+	ctx = rcu_dereference(current->perf_event_ctxp);
+	if (ctx)
+		perf_event_mmap_ctx(ctx, mmap_event);
+	rcu_read_unlock();
+
+	kfree(buf);
+}
+
+void __perf_event_mmap(struct vm_area_struct *vma)
+{
+	struct perf_mmap_event mmap_event;
+
+	if (!atomic_read(&nr_mmap_events))
+		return;
+
+	mmap_event = (struct perf_mmap_event){
+		.vma	= vma,
+		/* .file_name */
+		/* .file_size */
+		.event_id  = {
+			.header = {
+				.type = PERF_RECORD_MMAP,
+				.misc = 0,
+				/* .size */
+			},
+			/* .pid */
+			/* .tid */
+			.start  = vma->vm_start,
+			.len    = vma->vm_end - vma->vm_start,
+			.pgoff  = vma->vm_pgoff,
+		},
+	};
+
+	perf_event_mmap_event(&mmap_event);
+}
+
+/*
+ * IRQ throttle logging
+ */
+
+static void perf_log_throttle(struct perf_event *event, int enable)
+{
+	struct perf_output_handle handle;
+	int ret;
+
+	struct {
+		struct perf_event_header	header;
+		u64				time;
+		u64				id;
+		u64				stream_id;
+	} throttle_event = {
+		.header = {
+			.type = PERF_RECORD_THROTTLE,
+			.misc = 0,
+			.size = sizeof(throttle_event),
+		},
+		.time		= perf_clock(),
+		.id		= primary_event_id(event),
+		.stream_id	= event->id,
+	};
+
+	if (enable)
+		throttle_event.header.type = PERF_RECORD_UNTHROTTLE;
+
+	ret = perf_output_begin(&handle, event, sizeof(throttle_event), 1, 0);
+	if (ret)
+		return;
+
+	perf_output_put(&handle, throttle_event);
+	perf_output_end(&handle);
+}
+
+/*
+ * Generic event overflow handling, sampling.
+ */
+
+static int __perf_event_overflow(struct perf_event *event, int nmi,
+				   int throttle, struct perf_sample_data *data,
+				   struct pt_regs *regs)
+{
+	int events = atomic_read(&event->event_limit);
+	struct hw_perf_event *hwc = &event->hw;
+	int ret = 0;
+
+	throttle = (throttle && event->pmu->unthrottle != NULL);
+
+	if (!throttle) {
+		hwc->interrupts++;
+	} else {
+		if (hwc->interrupts != MAX_INTERRUPTS) {
+			hwc->interrupts++;
+			if (HZ * hwc->interrupts >
+					(u64)sysctl_perf_event_sample_rate) {
+				hwc->interrupts = MAX_INTERRUPTS;
+				perf_log_throttle(event, 0);
+				ret = 1;
+			}
+		} else {
+			/*
+			 * Keep re-disabling events even though on the previous
+			 * pass we disabled it - just in case we raced with a
+			 * sched-in and the event got enabled again:
+			 */
+			ret = 1;
+		}
+	}
+
+	if (event->attr.freq) {
+		u64 now = perf_clock();
+		s64 delta = now - hwc->freq_stamp;
+
+		hwc->freq_stamp = now;
+
+		if (delta > 0 && delta < TICK_NSEC)
+			perf_adjust_period(event, NSEC_PER_SEC / (int)delta);
+	}
+
+	/*
+	 * XXX event_limit might not quite work as expected on inherited
+	 * events
+	 */
+
+	event->pending_kill = POLL_IN;
+	if (events && atomic_dec_and_test(&event->event_limit)) {
+		ret = 1;
+		event->pending_kill = POLL_HUP;
+		if (nmi) {
+			event->pending_disable = 1;
+			perf_pending_queue(&event->pending,
+					   perf_pending_event);
+		} else
+			perf_event_disable(event);
+	}
+
+	perf_event_output(event, nmi, data, regs);
+	return ret;
+}
+
+int perf_event_overflow(struct perf_event *event, int nmi,
+			  struct perf_sample_data *data,
+			  struct pt_regs *regs)
+{
+	return __perf_event_overflow(event, nmi, 1, data, regs);
+}
+
+/*
+ * Generic software event infrastructure
+ */
+
+/*
+ * We directly increment event->count and keep a second value in
+ * event->hw.period_left to count intervals. This period event
+ * is kept in the range [-sample_period, 0] so that we can use the
+ * sign as trigger.
+ */
+
+static u64 perf_swevent_set_period(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	u64 period = hwc->last_period;
+	u64 nr, offset;
+	s64 old, val;
+
+	hwc->last_period = hwc->sample_period;
+
+again:
+	old = val = atomic64_read(&hwc->period_left);
+	if (val < 0)
+		return 0;
+
+	nr = div64_u64(period + val, period);
+	offset = nr * period;
+	val -= offset;
+	if (atomic64_cmpxchg(&hwc->period_left, old, val) != old)
+		goto again;
+
+	return nr;
+}
+
+static void perf_swevent_overflow(struct perf_event *event,
+				    int nmi, struct perf_sample_data *data,
+				    struct pt_regs *regs)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int throttle = 0;
+	u64 overflow;
+
+	data->period = event->hw.last_period;
+	overflow = perf_swevent_set_period(event);
+
+	if (hwc->interrupts == MAX_INTERRUPTS)
+		return;
+
+	for (; overflow; overflow--) {
+		if (__perf_event_overflow(event, nmi, throttle,
+					    data, regs)) {
+			/*
+			 * We inhibit the overflow from happening when
+			 * hwc->interrupts == MAX_INTERRUPTS.
+			 */
+			break;
+		}
+		throttle = 1;
+	}
+}
+
+static void perf_swevent_unthrottle(struct perf_event *event)
+{
+	/*
+	 * Nothing to do, we already reset hwc->interrupts.
+	 */
+}
+
+static void perf_swevent_add(struct perf_event *event, u64 nr,
+			       int nmi, struct perf_sample_data *data,
+			       struct pt_regs *regs)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	atomic64_add(nr, &event->count);
+
+	if (!hwc->sample_period)
+		return;
+
+	if (!regs)
+		return;
+
+	if (!atomic64_add_negative(nr, &hwc->period_left))
+		perf_swevent_overflow(event, nmi, data, regs);
+}
+
+static int perf_swevent_is_counting(struct perf_event *event)
+{
+	/*
+	 * The event is active, we're good!
+	 */
+	if (event->state == PERF_EVENT_STATE_ACTIVE)
+		return 1;
+
+	/*
+	 * The event is off/error, not counting.
+	 */
+	if (event->state != PERF_EVENT_STATE_INACTIVE)
+		return 0;
+
+	/*
+	 * The event is inactive, if the context is active
+	 * we're part of a group that didn't make it on the 'pmu',
+	 * not counting.
+	 */
+	if (event->ctx->is_active)
+		return 0;
+
+	/*
+	 * We're inactive and the context is too, this means the
+	 * task is scheduled out, we're counting events that happen
+	 * to us, like migration events.
+	 */
+	return 1;
+}
+
+static int perf_swevent_match(struct perf_event *event,
+				enum perf_type_id type,
+				u32 event_id, struct pt_regs *regs)
+{
+	if (!perf_swevent_is_counting(event))
+		return 0;
+
+	if (event->attr.type != type)
+		return 0;
+	if (event->attr.config != event_id)
+		return 0;
+
+	if (regs) {
+		if (event->attr.exclude_user && user_mode(regs))
+			return 0;
+
+		if (event->attr.exclude_kernel && !user_mode(regs))
+			return 0;
+	}
+
+	return 1;
+}
+
+static void perf_swevent_ctx_event(struct perf_event_context *ctx,
+				     enum perf_type_id type,
+				     u32 event_id, u64 nr, int nmi,
+				     struct perf_sample_data *data,
+				     struct pt_regs *regs)
+{
+	struct perf_event *event;
+
+	if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
+		return;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
+		if (perf_swevent_match(event, type, event_id, regs))
+			perf_swevent_add(event, nr, nmi, data, regs);
+	}
+	rcu_read_unlock();
+}
+
+static int *perf_swevent_recursion_context(struct perf_cpu_context *cpuctx)
+{
+	if (in_nmi())
+		return &cpuctx->recursion[3];
+
+	if (in_irq())
+		return &cpuctx->recursion[2];
+
+	if (in_softirq())
+		return &cpuctx->recursion[1];
+
+	return &cpuctx->recursion[0];
+}
+
+static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
+				    u64 nr, int nmi,
+				    struct perf_sample_data *data,
+				    struct pt_regs *regs)
+{
+	struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context);
+	int *recursion = perf_swevent_recursion_context(cpuctx);
+	struct perf_event_context *ctx;
+
+	if (*recursion)
+		goto out;
+
+	(*recursion)++;
+	barrier();
+
+	perf_swevent_ctx_event(&cpuctx->ctx, type, event_id,
+				 nr, nmi, data, regs);
+	rcu_read_lock();
+	/*
+	 * doesn't really matter which of the child contexts the
+	 * events ends up in.
+	 */
+	ctx = rcu_dereference(current->perf_event_ctxp);
+	if (ctx)
+		perf_swevent_ctx_event(ctx, type, event_id, nr, nmi, data, regs);
+	rcu_read_unlock();
+
+	barrier();
+	(*recursion)--;
+
+out:
+	put_cpu_var(perf_cpu_context);
+}
+
+void __perf_sw_event(u32 event_id, u64 nr, int nmi,
+			    struct pt_regs *regs, u64 addr)
+{
+	struct perf_sample_data data = {
+		.addr = addr,
+	};
+
+	do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi,
+				&data, regs);
+}
+
+static void perf_swevent_read(struct perf_event *event)
+{
+}
+
+static int perf_swevent_enable(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (hwc->sample_period) {
+		hwc->last_period = hwc->sample_period;
+		perf_swevent_set_period(event);
+	}
+	return 0;
+}
+
+static void perf_swevent_disable(struct perf_event *event)
+{
+}
+
+static const struct pmu perf_ops_generic = {
+	.enable		= perf_swevent_enable,
+	.disable	= perf_swevent_disable,
+	.read		= perf_swevent_read,
+	.unthrottle	= perf_swevent_unthrottle,
+};
+
+/*
+ * hrtimer based swevent callback
+ */
+
+static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
+{
+	enum hrtimer_restart ret = HRTIMER_RESTART;
+	struct perf_sample_data data;
+	struct pt_regs *regs;
+	struct perf_event *event;
+	u64 period;
+
+	event	= container_of(hrtimer, struct perf_event, hw.hrtimer);
+	event->pmu->read(event);
+
+	data.addr = 0;
+	regs = get_irq_regs();
+	/*
+	 * In case we exclude kernel IPs or are somehow not in interrupt
+	 * context, provide the next best thing, the user IP.
+	 */
+	if ((event->attr.exclude_kernel || !regs) &&
+			!event->attr.exclude_user)
+		regs = task_pt_regs(current);
+
+	if (regs) {
+		if (perf_event_overflow(event, 0, &data, regs))
+			ret = HRTIMER_NORESTART;
+	}
+
+	period = max_t(u64, 10000, event->hw.sample_period);
+	hrtimer_forward_now(hrtimer, ns_to_ktime(period));
+
+	return ret;
+}
+
+/*
+ * Software event: cpu wall time clock
+ */
+
+static void cpu_clock_perf_event_update(struct perf_event *event)
+{
+	int cpu = raw_smp_processor_id();
+	s64 prev;
+	u64 now;
+
+	now = cpu_clock(cpu);
+	prev = atomic64_read(&event->hw.prev_count);
+	atomic64_set(&event->hw.prev_count, now);
+	atomic64_add(now - prev, &event->count);
+}
+
+static int cpu_clock_perf_event_enable(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int cpu = raw_smp_processor_id();
+
+	atomic64_set(&hwc->prev_count, cpu_clock(cpu));
+	hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	hwc->hrtimer.function = perf_swevent_hrtimer;
+	if (hwc->sample_period) {
+		u64 period = max_t(u64, 10000, hwc->sample_period);
+		__hrtimer_start_range_ns(&hwc->hrtimer,
+				ns_to_ktime(period), 0,
+				HRTIMER_MODE_REL, 0);
+	}
+
+	return 0;
+}
+
+static void cpu_clock_perf_event_disable(struct perf_event *event)
+{
+	if (event->hw.sample_period)
+		hrtimer_cancel(&event->hw.hrtimer);
+	cpu_clock_perf_event_update(event);
+}
+
+static void cpu_clock_perf_event_read(struct perf_event *event)
+{
+	cpu_clock_perf_event_update(event);
+}
+
+static const struct pmu perf_ops_cpu_clock = {
+	.enable		= cpu_clock_perf_event_enable,
+	.disable	= cpu_clock_perf_event_disable,
+	.read		= cpu_clock_perf_event_read,
+};
+
+/*
+ * Software event: task time clock
+ */
+
+static void task_clock_perf_event_update(struct perf_event *event, u64 now)
+{
+	u64 prev;
+	s64 delta;
+
+	prev = atomic64_xchg(&event->hw.prev_count, now);
+	delta = now - prev;
+	atomic64_add(delta, &event->count);
+}
+
+static int task_clock_perf_event_enable(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	u64 now;
+
+	now = event->ctx->time;
+
+	atomic64_set(&hwc->prev_count, now);
+	hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	hwc->hrtimer.function = perf_swevent_hrtimer;
+	if (hwc->sample_period) {
+		u64 period = max_t(u64, 10000, hwc->sample_period);
+		__hrtimer_start_range_ns(&hwc->hrtimer,
+				ns_to_ktime(period), 0,
+				HRTIMER_MODE_REL, 0);
+	}
+
+	return 0;
+}
+
+static void task_clock_perf_event_disable(struct perf_event *event)
+{
+	if (event->hw.sample_period)
+		hrtimer_cancel(&event->hw.hrtimer);
+	task_clock_perf_event_update(event, event->ctx->time);
+
+}
+
+static void task_clock_perf_event_read(struct perf_event *event)
+{
+	u64 time;
+
+	if (!in_nmi()) {
+		update_context_time(event->ctx);
+		time = event->ctx->time;
+	} else {
+		u64 now = perf_clock();
+		u64 delta = now - event->ctx->timestamp;
+		time = event->ctx->time + delta;
+	}
+
+	task_clock_perf_event_update(event, time);
+}
+
+static const struct pmu perf_ops_task_clock = {
+	.enable		= task_clock_perf_event_enable,
+	.disable	= task_clock_perf_event_disable,
+	.read		= task_clock_perf_event_read,
+};
+
+#ifdef CONFIG_EVENT_PROFILE
+void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
+			  int entry_size)
+{
+	struct perf_raw_record raw = {
+		.size = entry_size,
+		.data = record,
+	};
+
+	struct perf_sample_data data = {
+		.addr = addr,
+		.raw = &raw,
+	};
+
+	struct pt_regs *regs = get_irq_regs();
+
+	if (!regs)
+		regs = task_pt_regs(current);
+
+	do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1,
+				&data, regs);
+}
+EXPORT_SYMBOL_GPL(perf_tp_event);
+
+extern int ftrace_profile_enable(int);
+extern void ftrace_profile_disable(int);
+
+static void tp_perf_event_destroy(struct perf_event *event)
+{
+	ftrace_profile_disable(event->attr.config);
+}
+
+static const struct pmu *tp_perf_event_init(struct perf_event *event)
+{
+	/*
+	 * Raw tracepoint data is a severe data leak, only allow root to
+	 * have these.
+	 */
+	if ((event->attr.sample_type & PERF_SAMPLE_RAW) &&
+			perf_paranoid_tracepoint_raw() &&
+			!capable(CAP_SYS_ADMIN))
+		return ERR_PTR(-EPERM);
+
+	if (ftrace_profile_enable(event->attr.config))
+		return NULL;
+
+	event->destroy = tp_perf_event_destroy;
+
+	return &perf_ops_generic;
+}
+#else
+static const struct pmu *tp_perf_event_init(struct perf_event *event)
+{
+	return NULL;
+}
+#endif
+
+atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
+
+static void sw_perf_event_destroy(struct perf_event *event)
+{
+	u64 event_id = event->attr.config;
+
+	WARN_ON(event->parent);
+
+	atomic_dec(&perf_swevent_enabled[event_id]);
+}
+
+static const struct pmu *sw_perf_event_init(struct perf_event *event)
+{
+	const struct pmu *pmu = NULL;
+	u64 event_id = event->attr.config;
+
+	/*
+	 * Software events (currently) can't in general distinguish
+	 * between user, kernel and hypervisor events.
+	 * However, context switches and cpu migrations are considered
+	 * to be kernel events, and page faults are never hypervisor
+	 * events.
+	 */
+	switch (event_id) {
+	case PERF_COUNT_SW_CPU_CLOCK:
+		pmu = &perf_ops_cpu_clock;
+
+		break;
+	case PERF_COUNT_SW_TASK_CLOCK:
+		/*
+		 * If the user instantiates this as a per-cpu event,
+		 * use the cpu_clock event instead.
+		 */
+		if (event->ctx->task)
+			pmu = &perf_ops_task_clock;
+		else
+			pmu = &perf_ops_cpu_clock;
+
+		break;
+	case PERF_COUNT_SW_PAGE_FAULTS:
+	case PERF_COUNT_SW_PAGE_FAULTS_MIN:
+	case PERF_COUNT_SW_PAGE_FAULTS_MAJ:
+	case PERF_COUNT_SW_CONTEXT_SWITCHES:
+	case PERF_COUNT_SW_CPU_MIGRATIONS:
+		if (!event->parent) {
+			atomic_inc(&perf_swevent_enabled[event_id]);
+			event->destroy = sw_perf_event_destroy;
+		}
+		pmu = &perf_ops_generic;
+		break;
+	}
+
+	return pmu;
+}
+
+/*
+ * Allocate and initialize a event structure
+ */
+static struct perf_event *
+perf_event_alloc(struct perf_event_attr *attr,
+		   int cpu,
+		   struct perf_event_context *ctx,
+		   struct perf_event *group_leader,
+		   struct perf_event *parent_event,
+		   gfp_t gfpflags)
+{
+	const struct pmu *pmu;
+	struct perf_event *event;
+	struct hw_perf_event *hwc;
+	long err;
+
+	event = kzalloc(sizeof(*event), gfpflags);
+	if (!event)
+		return ERR_PTR(-ENOMEM);
+
+	/*
+	 * Single events are their own group leaders, with an
+	 * empty sibling list:
+	 */
+	if (!group_leader)
+		group_leader = event;
+
+	mutex_init(&event->child_mutex);
+	INIT_LIST_HEAD(&event->child_list);
+
+	INIT_LIST_HEAD(&event->group_entry);
+	INIT_LIST_HEAD(&event->event_entry);
+	INIT_LIST_HEAD(&event->sibling_list);
+	init_waitqueue_head(&event->waitq);
+
+	mutex_init(&event->mmap_mutex);
+
+	event->cpu		= cpu;
+	event->attr		= *attr;
+	event->group_leader	= group_leader;
+	event->pmu		= NULL;
+	event->ctx		= ctx;
+	event->oncpu		= -1;
+
+	event->parent		= parent_event;
+
+	event->ns		= get_pid_ns(current->nsproxy->pid_ns);
+	event->id		= atomic64_inc_return(&perf_event_id);
+
+	event->state		= PERF_EVENT_STATE_INACTIVE;
+
+	if (attr->disabled)
+		event->state = PERF_EVENT_STATE_OFF;
+
+	pmu = NULL;
+
+	hwc = &event->hw;
+	hwc->sample_period = attr->sample_period;
+	if (attr->freq && attr->sample_freq)
+		hwc->sample_period = 1;
+	hwc->last_period = hwc->sample_period;
+
+	atomic64_set(&hwc->period_left, hwc->sample_period);
+
+	/*
+	 * we currently do not support PERF_FORMAT_GROUP on inherited events
+	 */
+	if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP))
+		goto done;
+
+	switch (attr->type) {
+	case PERF_TYPE_RAW:
+	case PERF_TYPE_HARDWARE:
+	case PERF_TYPE_HW_CACHE:
+		pmu = hw_perf_event_init(event);
+		break;
+
+	case PERF_TYPE_SOFTWARE:
+		pmu = sw_perf_event_init(event);
+		break;
+
+	case PERF_TYPE_TRACEPOINT:
+		pmu = tp_perf_event_init(event);
+		break;
+
+	default:
+		break;
+	}
+done:
+	err = 0;
+	if (!pmu)
+		err = -EINVAL;
+	else if (IS_ERR(pmu))
+		err = PTR_ERR(pmu);
+
+	if (err) {
+		if (event->ns)
+			put_pid_ns(event->ns);
+		kfree(event);
+		return ERR_PTR(err);
+	}
+
+	event->pmu = pmu;
+
+	if (!event->parent) {
+		atomic_inc(&nr_events);
+		if (event->attr.mmap)
+			atomic_inc(&nr_mmap_events);
+		if (event->attr.comm)
+			atomic_inc(&nr_comm_events);
+		if (event->attr.task)
+			atomic_inc(&nr_task_events);
+	}
+
+	return event;
+}
+
+static int perf_copy_attr(struct perf_event_attr __user *uattr,
+			  struct perf_event_attr *attr)
+{
+	u32 size;
+	int ret;
+
+	if (!access_ok(VERIFY_WRITE, uattr, PERF_ATTR_SIZE_VER0))
+		return -EFAULT;
+
+	/*
+	 * zero the full structure, so that a short copy will be nice.
+	 */
+	memset(attr, 0, sizeof(*attr));
+
+	ret = get_user(size, &uattr->size);
+	if (ret)
+		return ret;
+
+	if (size > PAGE_SIZE)	/* silly large */
+		goto err_size;
+
+	if (!size)		/* abi compat */
+		size = PERF_ATTR_SIZE_VER0;
+
+	if (size < PERF_ATTR_SIZE_VER0)
+		goto err_size;
+
+	/*
+	 * If we're handed a bigger struct than we know of,
+	 * ensure all the unknown bits are 0 - i.e. new
+	 * user-space does not rely on any kernel feature
+	 * extensions we dont know about yet.
+	 */
+	if (size > sizeof(*attr)) {
+		unsigned char __user *addr;
+		unsigned char __user *end;
+		unsigned char val;
+
+		addr = (void __user *)uattr + sizeof(*attr);
+		end  = (void __user *)uattr + size;
+
+		for (; addr < end; addr++) {
+			ret = get_user(val, addr);
+			if (ret)
+				return ret;
+			if (val)
+				goto err_size;
+		}
+		size = sizeof(*attr);
+	}
+
+	ret = copy_from_user(attr, uattr, size);
+	if (ret)
+		return -EFAULT;
+
+	/*
+	 * If the type exists, the corresponding creation will verify
+	 * the attr->config.
+	 */
+	if (attr->type >= PERF_TYPE_MAX)
+		return -EINVAL;
+
+	if (attr->__reserved_1 || attr->__reserved_2 || attr->__reserved_3)
+		return -EINVAL;
+
+	if (attr->sample_type & ~(PERF_SAMPLE_MAX-1))
+		return -EINVAL;
+
+	if (attr->read_format & ~(PERF_FORMAT_MAX-1))
+		return -EINVAL;
+
+out:
+	return ret;
+
+err_size:
+	put_user(sizeof(*attr), &uattr->size);
+	ret = -E2BIG;
+	goto out;
+}
+
+int perf_event_set_output(struct perf_event *event, int output_fd)
+{
+	struct perf_event *output_event = NULL;
+	struct file *output_file = NULL;
+	struct perf_event *old_output;
+	int fput_needed = 0;
+	int ret = -EINVAL;
+
+	if (!output_fd)
+		goto set;
+
+	output_file = fget_light(output_fd, &fput_needed);
+	if (!output_file)
+		return -EBADF;
+
+	if (output_file->f_op != &perf_fops)
+		goto out;
+
+	output_event = output_file->private_data;
+
+	/* Don't chain output fds */
+	if (output_event->output)
+		goto out;
+
+	/* Don't set an output fd when we already have an output channel */
+	if (event->data)
+		goto out;
+
+	atomic_long_inc(&output_file->f_count);
+
+set:
+	mutex_lock(&event->mmap_mutex);
+	old_output = event->output;
+	rcu_assign_pointer(event->output, output_event);
+	mutex_unlock(&event->mmap_mutex);
+
+	if (old_output) {
+		/*
+		 * we need to make sure no existing perf_output_*()
+		 * is still referencing this event.
+		 */
+		synchronize_rcu();
+		fput(old_output->filp);
+	}
+
+	ret = 0;
+out:
+	fput_light(output_file, fput_needed);
+	return ret;
+}
+
+/**
+ * sys_perf_event_open - open a performance event, associate it to a task/cpu
+ *
+ * @attr_uptr:	event_id type attributes for monitoring/sampling
+ * @pid:		target pid
+ * @cpu:		target cpu
+ * @group_fd:		group leader event fd
+ */
+SYSCALL_DEFINE5(perf_event_open,
+		struct perf_event_attr __user *, attr_uptr,
+		pid_t, pid, int, cpu, int, group_fd, unsigned long, flags)
+{
+	struct perf_event *event, *group_leader;
+	struct perf_event_attr attr;
+	struct perf_event_context *ctx;
+	struct file *event_file = NULL;
+	struct file *group_file = NULL;
+	int fput_needed = 0;
+	int fput_needed2 = 0;
+	int err;
+
+	/* for future expandability... */
+	if (flags & ~(PERF_FLAG_FD_NO_GROUP | PERF_FLAG_FD_OUTPUT))
+		return -EINVAL;
+
+	err = perf_copy_attr(attr_uptr, &attr);
+	if (err)
+		return err;
+
+	if (!attr.exclude_kernel) {
+		if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
+			return -EACCES;
+	}
+
+	if (attr.freq) {
+		if (attr.sample_freq > sysctl_perf_event_sample_rate)
+			return -EINVAL;
+	}
+
+	/*
+	 * Get the target context (task or percpu):
+	 */
+	ctx = find_get_context(pid, cpu);
+	if (IS_ERR(ctx))
+		return PTR_ERR(ctx);
+
+	/*
+	 * Look up the group leader (we will attach this event to it):
+	 */
+	group_leader = NULL;
+	if (group_fd != -1 && !(flags & PERF_FLAG_FD_NO_GROUP)) {
+		err = -EINVAL;
+		group_file = fget_light(group_fd, &fput_needed);
+		if (!group_file)
+			goto err_put_context;
+		if (group_file->f_op != &perf_fops)
+			goto err_put_context;
+
+		group_leader = group_file->private_data;
+		/*
+		 * Do not allow a recursive hierarchy (this new sibling
+		 * becoming part of another group-sibling):
+		 */
+		if (group_leader->group_leader != group_leader)
+			goto err_put_context;
+		/*
+		 * Do not allow to attach to a group in a different
+		 * task or CPU context:
+		 */
+		if (group_leader->ctx != ctx)
+			goto err_put_context;
+		/*
+		 * Only a group leader can be exclusive or pinned
+		 */
+		if (attr.exclusive || attr.pinned)
+			goto err_put_context;
+	}
+
+	event = perf_event_alloc(&attr, cpu, ctx, group_leader,
+				     NULL, GFP_KERNEL);
+	err = PTR_ERR(event);
+	if (IS_ERR(event))
+		goto err_put_context;
+
+	err = anon_inode_getfd("[perf_event]", &perf_fops, event, 0);
+	if (err < 0)
+		goto err_free_put_context;
+
+	event_file = fget_light(err, &fput_needed2);
+	if (!event_file)
+		goto err_free_put_context;
+
+	if (flags & PERF_FLAG_FD_OUTPUT) {
+		err = perf_event_set_output(event, group_fd);
+		if (err)
+			goto err_fput_free_put_context;
+	}
+
+	event->filp = event_file;
+	WARN_ON_ONCE(ctx->parent_ctx);
+	mutex_lock(&ctx->mutex);
+	perf_install_in_context(ctx, event, cpu);
+	++ctx->generation;
+	mutex_unlock(&ctx->mutex);
+
+	event->owner = current;
+	get_task_struct(current);
+	mutex_lock(&current->perf_event_mutex);
+	list_add_tail(&event->owner_entry, &current->perf_event_list);
+	mutex_unlock(&current->perf_event_mutex);
+
+err_fput_free_put_context:
+	fput_light(event_file, fput_needed2);
+
+err_free_put_context:
+	if (err < 0)
+		kfree(event);
+
+err_put_context:
+	if (err < 0)
+		put_ctx(ctx);
+
+	fput_light(group_file, fput_needed);
+
+	return err;
+}
+
+/*
+ * inherit a event from parent task to child task:
+ */
+static struct perf_event *
+inherit_event(struct perf_event *parent_event,
+	      struct task_struct *parent,
+	      struct perf_event_context *parent_ctx,
+	      struct task_struct *child,
+	      struct perf_event *group_leader,
+	      struct perf_event_context *child_ctx)
+{
+	struct perf_event *child_event;
+
+	/*
+	 * Instead of creating recursive hierarchies of events,
+	 * we link inherited events back to the original parent,
+	 * which has a filp for sure, which we use as the reference
+	 * count:
+	 */
+	if (parent_event->parent)
+		parent_event = parent_event->parent;
+
+	child_event = perf_event_alloc(&parent_event->attr,
+					   parent_event->cpu, child_ctx,
+					   group_leader, parent_event,
+					   GFP_KERNEL);
+	if (IS_ERR(child_event))
+		return child_event;
+	get_ctx(child_ctx);
+
+	/*
+	 * Make the child state follow the state of the parent event,
+	 * not its attr.disabled bit.  We hold the parent's mutex,
+	 * so we won't race with perf_event_{en, dis}able_family.
+	 */
+	if (parent_event->state >= PERF_EVENT_STATE_INACTIVE)
+		child_event->state = PERF_EVENT_STATE_INACTIVE;
+	else
+		child_event->state = PERF_EVENT_STATE_OFF;
+
+	if (parent_event->attr.freq)
+		child_event->hw.sample_period = parent_event->hw.sample_period;
+
+	/*
+	 * Link it up in the child's context:
+	 */
+	add_event_to_ctx(child_event, child_ctx);
+
+	/*
+	 * Get a reference to the parent filp - we will fput it
+	 * when the child event exits. This is safe to do because
+	 * we are in the parent and we know that the filp still
+	 * exists and has a nonzero count:
+	 */
+	atomic_long_inc(&parent_event->filp->f_count);
+
+	/*
+	 * Link this into the parent event's child list
+	 */
+	WARN_ON_ONCE(parent_event->ctx->parent_ctx);
+	mutex_lock(&parent_event->child_mutex);
+	list_add_tail(&child_event->child_list, &parent_event->child_list);
+	mutex_unlock(&parent_event->child_mutex);
+
+	return child_event;
+}
+
+static int inherit_group(struct perf_event *parent_event,
+	      struct task_struct *parent,
+	      struct perf_event_context *parent_ctx,
+	      struct task_struct *child,
+	      struct perf_event_context *child_ctx)
+{
+	struct perf_event *leader;
+	struct perf_event *sub;
+	struct perf_event *child_ctr;
+
+	leader = inherit_event(parent_event, parent, parent_ctx,
+				 child, NULL, child_ctx);
+	if (IS_ERR(leader))
+		return PTR_ERR(leader);
+	list_for_each_entry(sub, &parent_event->sibling_list, group_entry) {
+		child_ctr = inherit_event(sub, parent, parent_ctx,
+					    child, leader, child_ctx);
+		if (IS_ERR(child_ctr))
+			return PTR_ERR(child_ctr);
+	}
+	return 0;
+}
+
+static void sync_child_event(struct perf_event *child_event,
+			       struct task_struct *child)
+{
+	struct perf_event *parent_event = child_event->parent;
+	u64 child_val;
+
+	if (child_event->attr.inherit_stat)
+		perf_event_read_event(child_event, child);
+
+	child_val = atomic64_read(&child_event->count);
+
+	/*
+	 * Add back the child's count to the parent's count:
+	 */
+	atomic64_add(child_val, &parent_event->count);
+	atomic64_add(child_event->total_time_enabled,
+		     &parent_event->child_total_time_enabled);
+	atomic64_add(child_event->total_time_running,
+		     &parent_event->child_total_time_running);
+
+	/*
+	 * Remove this event from the parent's list
+	 */
+	WARN_ON_ONCE(parent_event->ctx->parent_ctx);
+	mutex_lock(&parent_event->child_mutex);
+	list_del_init(&child_event->child_list);
+	mutex_unlock(&parent_event->child_mutex);
+
+	/*
+	 * Release the parent event, if this was the last
+	 * reference to it.
+	 */
+	fput(parent_event->filp);
+}
+
+static void
+__perf_event_exit_task(struct perf_event *child_event,
+			 struct perf_event_context *child_ctx,
+			 struct task_struct *child)
+{
+	struct perf_event *parent_event;
+
+	update_event_times(child_event);
+	perf_event_remove_from_context(child_event);
+
+	parent_event = child_event->parent;
+	/*
+	 * It can happen that parent exits first, and has events
+	 * that are still around due to the child reference. These
+	 * events need to be zapped - but otherwise linger.
+	 */
+	if (parent_event) {
+		sync_child_event(child_event, child);
+		free_event(child_event);
+	}
+}
+
+/*
+ * When a child task exits, feed back event values to parent events.
+ */
+void perf_event_exit_task(struct task_struct *child)
+{
+	struct perf_event *child_event, *tmp;
+	struct perf_event_context *child_ctx;
+	unsigned long flags;
+
+	if (likely(!child->perf_event_ctxp)) {
+		perf_event_task(child, NULL, 0);
+		return;
+	}
+
+	local_irq_save(flags);
+	/*
+	 * We can't reschedule here because interrupts are disabled,
+	 * and either child is current or it is a task that can't be
+	 * scheduled, so we are now safe from rescheduling changing
+	 * our context.
+	 */
+	child_ctx = child->perf_event_ctxp;
+	__perf_event_task_sched_out(child_ctx);
+
+	/*
+	 * Take the context lock here so that if find_get_context is
+	 * reading child->perf_event_ctxp, we wait until it has
+	 * incremented the context's refcount before we do put_ctx below.
+	 */
+	spin_lock(&child_ctx->lock);
+	child->perf_event_ctxp = NULL;
+	/*
+	 * If this context is a clone; unclone it so it can't get
+	 * swapped to another process while we're removing all
+	 * the events from it.
+	 */
+	unclone_ctx(child_ctx);
+	spin_unlock_irqrestore(&child_ctx->lock, flags);
+
+	/*
+	 * Report the task dead after unscheduling the events so that we
+	 * won't get any samples after PERF_RECORD_EXIT. We can however still
+	 * get a few PERF_RECORD_READ events.
+	 */
+	perf_event_task(child, child_ctx, 0);
+
+	/*
+	 * We can recurse on the same lock type through:
+	 *
+	 *   __perf_event_exit_task()
+	 *     sync_child_event()
+	 *       fput(parent_event->filp)
+	 *         perf_release()
+	 *           mutex_lock(&ctx->mutex)
+	 *
+	 * But since its the parent context it won't be the same instance.
+	 */
+	mutex_lock_nested(&child_ctx->mutex, SINGLE_DEPTH_NESTING);
+
+again:
+	list_for_each_entry_safe(child_event, tmp, &child_ctx->group_list,
+				 group_entry)
+		__perf_event_exit_task(child_event, child_ctx, child);
+
+	/*
+	 * If the last event was a group event, it will have appended all
+	 * its siblings to the list, but we obtained 'tmp' before that which
+	 * will still point to the list head terminating the iteration.
+	 */
+	if (!list_empty(&child_ctx->group_list))
+		goto again;
+
+	mutex_unlock(&child_ctx->mutex);
+
+	put_ctx(child_ctx);
+}
+
+/*
+ * free an unexposed, unused context as created by inheritance by
+ * init_task below, used by fork() in case of fail.
+ */
+void perf_event_free_task(struct task_struct *task)
+{
+	struct perf_event_context *ctx = task->perf_event_ctxp;
+	struct perf_event *event, *tmp;
+
+	if (!ctx)
+		return;
+
+	mutex_lock(&ctx->mutex);
+again:
+	list_for_each_entry_safe(event, tmp, &ctx->group_list, group_entry) {
+		struct perf_event *parent = event->parent;
+
+		if (WARN_ON_ONCE(!parent))
+			continue;
+
+		mutex_lock(&parent->child_mutex);
+		list_del_init(&event->child_list);
+		mutex_unlock(&parent->child_mutex);
+
+		fput(parent->filp);
+
+		list_del_event(event, ctx);
+		free_event(event);
+	}
+
+	if (!list_empty(&ctx->group_list))
+		goto again;
+
+	mutex_unlock(&ctx->mutex);
+
+	put_ctx(ctx);
+}
+
+/*
+ * Initialize the perf_event context in task_struct
+ */
+int perf_event_init_task(struct task_struct *child)
+{
+	struct perf_event_context *child_ctx, *parent_ctx;
+	struct perf_event_context *cloned_ctx;
+	struct perf_event *event;
+	struct task_struct *parent = current;
+	int inherited_all = 1;
+	int ret = 0;
+
+	child->perf_event_ctxp = NULL;
+
+	mutex_init(&child->perf_event_mutex);
+	INIT_LIST_HEAD(&child->perf_event_list);
+
+	if (likely(!parent->perf_event_ctxp))
+		return 0;
+
+	/*
+	 * This is executed from the parent task context, so inherit
+	 * events that have been marked for cloning.
+	 * First allocate and initialize a context for the child.
+	 */
+
+	child_ctx = kmalloc(sizeof(struct perf_event_context), GFP_KERNEL);
+	if (!child_ctx)
+		return -ENOMEM;
+
+	__perf_event_init_context(child_ctx, child);
+	child->perf_event_ctxp = child_ctx;
+	get_task_struct(child);
+
+	/*
+	 * If the parent's context is a clone, pin it so it won't get
+	 * swapped under us.
+	 */
+	parent_ctx = perf_pin_task_context(parent);
+
+	/*
+	 * No need to check if parent_ctx != NULL here; since we saw
+	 * it non-NULL earlier, the only reason for it to become NULL
+	 * is if we exit, and since we're currently in the middle of
+	 * a fork we can't be exiting at the same time.
+	 */
+
+	/*
+	 * Lock the parent list. No need to lock the child - not PID
+	 * hashed yet and not running, so nobody can access it.
+	 */
+	mutex_lock(&parent_ctx->mutex);
+
+	/*
+	 * We dont have to disable NMIs - we are only looking at
+	 * the list, not manipulating it:
+	 */
+	list_for_each_entry_rcu(event, &parent_ctx->event_list, event_entry) {
+		if (event != event->group_leader)
+			continue;
+
+		if (!event->attr.inherit) {
+			inherited_all = 0;
+			continue;
+		}
+
+		ret = inherit_group(event, parent, parent_ctx,
+					     child, child_ctx);
+		if (ret) {
+			inherited_all = 0;
+			break;
+		}
+	}
+
+	if (inherited_all) {
+		/*
+		 * Mark the child context as a clone of the parent
+		 * context, or of whatever the parent is a clone of.
+		 * Note that if the parent is a clone, it could get
+		 * uncloned at any point, but that doesn't matter
+		 * because the list of events and the generation
+		 * count can't have changed since we took the mutex.
+		 */
+		cloned_ctx = rcu_dereference(parent_ctx->parent_ctx);
+		if (cloned_ctx) {
+			child_ctx->parent_ctx = cloned_ctx;
+			child_ctx->parent_gen = parent_ctx->parent_gen;
+		} else {
+			child_ctx->parent_ctx = parent_ctx;
+			child_ctx->parent_gen = parent_ctx->generation;
+		}
+		get_ctx(child_ctx->parent_ctx);
+	}
+
+	mutex_unlock(&parent_ctx->mutex);
+
+	perf_unpin_context(parent_ctx);
+
+	return ret;
+}
+
+static void __cpuinit perf_event_init_cpu(int cpu)
+{
+	struct perf_cpu_context *cpuctx;
+
+	cpuctx = &per_cpu(perf_cpu_context, cpu);
+	__perf_event_init_context(&cpuctx->ctx, NULL);
+
+	spin_lock(&perf_resource_lock);
+	cpuctx->max_pertask = perf_max_events - perf_reserved_percpu;
+	spin_unlock(&perf_resource_lock);
+
+	hw_perf_event_setup(cpu);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static void __perf_event_exit_cpu(void *info)
+{
+	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+	struct perf_event_context *ctx = &cpuctx->ctx;
+	struct perf_event *event, *tmp;
+
+	list_for_each_entry_safe(event, tmp, &ctx->group_list, group_entry)
+		__perf_event_remove_from_context(event);
+}
+static void perf_event_exit_cpu(int cpu)
+{
+	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
+	struct perf_event_context *ctx = &cpuctx->ctx;
+
+	mutex_lock(&ctx->mutex);
+	smp_call_function_single(cpu, __perf_event_exit_cpu, NULL, 1);
+	mutex_unlock(&ctx->mutex);
+}
+#else
+static inline void perf_event_exit_cpu(int cpu) { }
+#endif
+
+static int __cpuinit
+perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
+{
+	unsigned int cpu = (long)hcpu;
+
+	switch (action) {
+
+	case CPU_UP_PREPARE:
+	case CPU_UP_PREPARE_FROZEN:
+		perf_event_init_cpu(cpu);
+		break;
+
+	case CPU_ONLINE:
+	case CPU_ONLINE_FROZEN:
+		hw_perf_event_setup_online(cpu);
+		break;
+
+	case CPU_DOWN_PREPARE:
+	case CPU_DOWN_PREPARE_FROZEN:
+		perf_event_exit_cpu(cpu);
+		break;
+
+	default:
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+/*
+ * This has to have a higher priority than migration_notifier in sched.c.
+ */
+static struct notifier_block __cpuinitdata perf_cpu_nb = {
+	.notifier_call		= perf_cpu_notify,
+	.priority		= 20,
+};
+
+void __init perf_event_init(void)
+{
+	perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE,
+			(void *)(long)smp_processor_id());
+	perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_ONLINE,
+			(void *)(long)smp_processor_id());
+	register_cpu_notifier(&perf_cpu_nb);
+}
+
+static ssize_t perf_show_reserve_percpu(struct sysdev_class *class, char *buf)
+{
+	return sprintf(buf, "%d\n", perf_reserved_percpu);
+}
+
+static ssize_t
+perf_set_reserve_percpu(struct sysdev_class *class,
+			const char *buf,
+			size_t count)
+{
+	struct perf_cpu_context *cpuctx;
+	unsigned long val;
+	int err, cpu, mpt;
+
+	err = strict_strtoul(buf, 10, &val);
+	if (err)
+		return err;
+	if (val > perf_max_events)
+		return -EINVAL;
+
+	spin_lock(&perf_resource_lock);
+	perf_reserved_percpu = val;
+	for_each_online_cpu(cpu) {
+		cpuctx = &per_cpu(perf_cpu_context, cpu);
+		spin_lock_irq(&cpuctx->ctx.lock);
+		mpt = min(perf_max_events - cpuctx->ctx.nr_events,
+			  perf_max_events - perf_reserved_percpu);
+		cpuctx->max_pertask = mpt;
+		spin_unlock_irq(&cpuctx->ctx.lock);
+	}
+	spin_unlock(&perf_resource_lock);
+
+	return count;
+}
+
+static ssize_t perf_show_overcommit(struct sysdev_class *class, char *buf)
+{
+	return sprintf(buf, "%d\n", perf_overcommit);
+}
+
+static ssize_t
+perf_set_overcommit(struct sysdev_class *class, const char *buf, size_t count)
+{
+	unsigned long val;
+	int err;
+
+	err = strict_strtoul(buf, 10, &val);
+	if (err)
+		return err;
+	if (val > 1)
+		return -EINVAL;
+
+	spin_lock(&perf_resource_lock);
+	perf_overcommit = val;
+	spin_unlock(&perf_resource_lock);
+
+	return count;
+}
+
+static SYSDEV_CLASS_ATTR(
+				reserve_percpu,
+				0644,
+				perf_show_reserve_percpu,
+				perf_set_reserve_percpu
+			);
+
+static SYSDEV_CLASS_ATTR(
+				overcommit,
+				0644,
+				perf_show_overcommit,
+				perf_set_overcommit
+			);
+
+static struct attribute *perfclass_attrs[] = {
+	&attr_reserve_percpu.attr,
+	&attr_overcommit.attr,
+	NULL
+};
+
+static struct attribute_group perfclass_attr_group = {
+	.attrs			= perfclass_attrs,
+	.name			= "perf_events",
+};
+
+static int __init perf_event_sysfs_init(void)
+{
+	return sysfs_create_group(&cpu_sysdev_class.kset.kobj,
+				  &perfclass_attr_group);
+}
+device_initcall(perf_event_sysfs_init);
diff --git a/kernel/sched.c b/kernel/sched.c
index faf4d46..291c8d2 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -39,7 +39,7 @@
 #include <linux/completion.h>
 #include <linux/kernel_stat.h>
 #include <linux/debug_locks.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <linux/security.h>
 #include <linux/notifier.h>
 #include <linux/profile.h>
@@ -2059,7 +2059,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 		if (task_hot(p, old_rq->clock, NULL))
 			schedstat_inc(p, se.nr_forced2_migrations);
 #endif
-		perf_swcounter_event(PERF_COUNT_SW_CPU_MIGRATIONS,
+		perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS,
 				     1, 1, NULL, 0);
 	}
 	p->se.vruntime -= old_cfsrq->min_vruntime -
@@ -2724,7 +2724,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
 	 */
 	prev_state = prev->state;
 	finish_arch_switch(prev);
-	perf_counter_task_sched_in(current, cpu_of(rq));
+	perf_event_task_sched_in(current, cpu_of(rq));
 	finish_lock_switch(rq, prev);
 
 	fire_sched_in_preempt_notifiers(current);
@@ -5199,7 +5199,7 @@ void scheduler_tick(void)
 	curr->sched_class->task_tick(rq, curr, 0);
 	spin_unlock(&rq->lock);
 
-	perf_counter_task_tick(curr, cpu);
+	perf_event_task_tick(curr, cpu);
 
 #ifdef CONFIG_SMP
 	rq->idle_at_tick = idle_cpu(cpu);
@@ -5415,7 +5415,7 @@ need_resched_nonpreemptible:
 
 	if (likely(prev != next)) {
 		sched_info_switch(prev, next);
-		perf_counter_task_sched_out(prev, next, cpu);
+		perf_event_task_sched_out(prev, next, cpu);
 
 		rq->nr_switches++;
 		rq->curr = next;
@@ -7692,7 +7692,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
 /*
  * Register at high priority so that task migration (migrate_all_tasks)
  * happens before everything else.  This has to be lower priority than
- * the notifier in the perf_counter subsystem, though.
+ * the notifier in the perf_event subsystem, though.
  */
 static struct notifier_block __cpuinitdata migration_notifier = {
 	.notifier_call = migration_call,
@@ -9549,7 +9549,7 @@ void __init sched_init(void)
 	alloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT);
 #endif /* SMP */
 
-	perf_counter_init();
+	perf_event_init();
 
 	scheduler_running = 1;
 }
diff --git a/kernel/sys.c b/kernel/sys.c
index b3f1097..ea5c3bc 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -14,7 +14,7 @@
 #include <linux/prctl.h>
 #include <linux/highuid.h>
 #include <linux/fs.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <linux/resource.h>
 #include <linux/kernel.h>
 #include <linux/kexec.h>
@@ -1511,11 +1511,11 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 		case PR_SET_TSC:
 			error = SET_TSC_CTL(arg2);
 			break;
-		case PR_TASK_PERF_COUNTERS_DISABLE:
-			error = perf_counter_task_disable();
+		case PR_TASK_PERF_EVENTS_DISABLE:
+			error = perf_event_task_disable();
 			break;
-		case PR_TASK_PERF_COUNTERS_ENABLE:
-			error = perf_counter_task_enable();
+		case PR_TASK_PERF_EVENTS_ENABLE:
+			error = perf_event_task_enable();
 			break;
 		case PR_GET_TIMERSLACK:
 			error = current->timer_slack_ns;
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 68320f6..515bc23 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -177,4 +177,4 @@ cond_syscall(sys_eventfd);
 cond_syscall(sys_eventfd2);
 
 /* performance counters: */
-cond_syscall(sys_perf_counter_open);
+cond_syscall(sys_perf_event_open);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 1a631ba..6ba49c7 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -50,7 +50,7 @@
 #include <linux/reboot.h>
 #include <linux/ftrace.h>
 #include <linux/slow-work.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 
 #include <asm/uaccess.h>
 #include <asm/processor.h>
@@ -964,28 +964,28 @@ static struct ctl_table kern_table[] = {
 		.child		= slow_work_sysctls,
 	},
 #endif
-#ifdef CONFIG_PERF_COUNTERS
+#ifdef CONFIG_PERF_EVENTS
 	{
 		.ctl_name	= CTL_UNNUMBERED,
-		.procname	= "perf_counter_paranoid",
-		.data		= &sysctl_perf_counter_paranoid,
-		.maxlen		= sizeof(sysctl_perf_counter_paranoid),
+		.procname	= "perf_event_paranoid",
+		.data		= &sysctl_perf_event_paranoid,
+		.maxlen		= sizeof(sysctl_perf_event_paranoid),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
 	{
 		.ctl_name	= CTL_UNNUMBERED,
-		.procname	= "perf_counter_mlock_kb",
-		.data		= &sysctl_perf_counter_mlock,
-		.maxlen		= sizeof(sysctl_perf_counter_mlock),
+		.procname	= "perf_event_mlock_kb",
+		.data		= &sysctl_perf_event_mlock,
+		.maxlen		= sizeof(sysctl_perf_event_mlock),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
 	{
 		.ctl_name	= CTL_UNNUMBERED,
-		.procname	= "perf_counter_max_sample_rate",
-		.data		= &sysctl_perf_counter_sample_rate,
-		.maxlen		= sizeof(sysctl_perf_counter_sample_rate),
+		.procname	= "perf_event_max_sample_rate",
+		.data		= &sysctl_perf_event_sample_rate,
+		.maxlen		= sizeof(sysctl_perf_event_sample_rate),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
diff --git a/kernel/timer.c b/kernel/timer.c
index bbb5107..811e5c3 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -37,7 +37,7 @@
 #include <linux/delay.h>
 #include <linux/tick.h>
 #include <linux/kallsyms.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <linux/sched.h>
 
 #include <asm/uaccess.h>
@@ -1187,7 +1187,7 @@ static void run_timer_softirq(struct softirq_action *h)
 {
 	struct tvec_base *base = __get_cpu_var(tvec_bases);
 
-	perf_counter_do_pending();
+	perf_event_do_pending();
 
 	hrtimer_run_pending();
 
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 8712ce3..233f348 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -2,7 +2,7 @@
 #include <trace/events/syscalls.h>
 #include <linux/kernel.h>
 #include <linux/ftrace.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <asm/syscall.h>
 
 #include "trace_output.h"
@@ -414,7 +414,7 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
 		rec->nr = syscall_nr;
 		syscall_get_arguments(current, regs, 0, sys_data->nb_args,
 				       (unsigned long *)&rec->args);
-		perf_tpcounter_event(sys_data->enter_id, 0, 1, rec, size);
+		perf_tp_event(sys_data->enter_id, 0, 1, rec, size);
 	} while(0);
 }
 
@@ -476,7 +476,7 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
 	rec.nr = syscall_nr;
 	rec.ret = syscall_get_return_value(current, regs);
 
-	perf_tpcounter_event(sys_data->exit_id, 0, 1, &rec, sizeof(rec));
+	perf_tp_event(sys_data->exit_id, 0, 1, &rec, sizeof(rec));
 }
 
 int reg_prof_syscall_exit(char *name)
diff --git a/mm/mmap.c b/mm/mmap.c
index 26892e3..376492e 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -28,7 +28,7 @@
 #include <linux/mempolicy.h>
 #include <linux/rmap.h>
 #include <linux/mmu_notifier.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 
 #include <asm/uaccess.h>
 #include <asm/cacheflush.h>
@@ -1220,7 +1220,7 @@ munmap_back:
 	if (correct_wcount)
 		atomic_inc(&inode->i_writecount);
 out:
-	perf_counter_mmap(vma);
+	perf_event_mmap(vma);
 
 	mm->total_vm += len >> PAGE_SHIFT;
 	vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
@@ -2308,7 +2308,7 @@ int install_special_mapping(struct mm_struct *mm,
 
 	mm->total_vm += len >> PAGE_SHIFT;
 
-	perf_counter_mmap(vma);
+	perf_event_mmap(vma);
 
 	return 0;
 }
diff --git a/mm/mprotect.c b/mm/mprotect.c
index d80311b..8bc969d 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -23,7 +23,7 @@
 #include <linux/swapops.h>
 #include <linux/mmu_notifier.h>
 #include <linux/migrate.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/cacheflush.h>
@@ -300,7 +300,7 @@ SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len,
 		error = mprotect_fixup(vma, &prev, nstart, tmp, newflags);
 		if (error)
 			goto out;
-		perf_counter_mmap(vma);
+		perf_event_mmap(vma);
 		nstart = tmp;
 
 		if (nstart < prev->vm_end)
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 0aba8b6..b5f1953 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -318,7 +318,7 @@ export PERL_PATH
 
 LIB_FILE=libperf.a
 
-LIB_H += ../../include/linux/perf_counter.h
+LIB_H += ../../include/linux/perf_event.h
 LIB_H += ../../include/linux/rbtree.h
 LIB_H += ../../include/linux/list.h
 LIB_H += util/include/linux/list.h
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 043d85b..1ec7416 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -505,7 +505,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
 		return -1;
 	}
 
-	if (event->header.misc & PERF_EVENT_MISC_KERNEL) {
+	if (event->header.misc & PERF_RECORD_MISC_KERNEL) {
 		show = SHOW_KERNEL;
 		level = 'k';
 
@@ -513,7 +513,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
 
 		dump_printf(" ...... dso: %s\n", dso->name);
 
-	} else if (event->header.misc & PERF_EVENT_MISC_USER) {
+	} else if (event->header.misc & PERF_RECORD_MISC_USER) {
 
 		show = SHOW_USER;
 		level = '.';
@@ -565,7 +565,7 @@ process_mmap_event(event_t *event, unsigned long offset, unsigned long head)
 
 	thread = threads__findnew(event->mmap.pid, &threads, &last_match);
 
-	dump_printf("%p [%p]: PERF_EVENT_MMAP %d: [%p(%p) @ %p]: %s\n",
+	dump_printf("%p [%p]: PERF_RECORD_MMAP %d: [%p(%p) @ %p]: %s\n",
 		(void *)(offset + head),
 		(void *)(long)(event->header.size),
 		event->mmap.pid,
@@ -575,7 +575,7 @@ process_mmap_event(event_t *event, unsigned long offset, unsigned long head)
 		event->mmap.filename);
 
 	if (thread == NULL || map == NULL) {
-		dump_printf("problem processing PERF_EVENT_MMAP, skipping event.\n");
+		dump_printf("problem processing PERF_RECORD_MMAP, skipping event.\n");
 		return 0;
 	}
 
@@ -591,14 +591,14 @@ process_comm_event(event_t *event, unsigned long offset, unsigned long head)
 	struct thread *thread;
 
 	thread = threads__findnew(event->comm.pid, &threads, &last_match);
-	dump_printf("%p [%p]: PERF_EVENT_COMM: %s:%d\n",
+	dump_printf("%p [%p]: PERF_RECORD_COMM: %s:%d\n",
 		(void *)(offset + head),
 		(void *)(long)(event->header.size),
 		event->comm.comm, event->comm.pid);
 
 	if (thread == NULL ||
 	    thread__set_comm(thread, event->comm.comm)) {
-		dump_printf("problem processing PERF_EVENT_COMM, skipping event.\n");
+		dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n");
 		return -1;
 	}
 	total_comm++;
@@ -614,7 +614,7 @@ process_fork_event(event_t *event, unsigned long offset, unsigned long head)
 
 	thread = threads__findnew(event->fork.pid, &threads, &last_match);
 	parent = threads__findnew(event->fork.ppid, &threads, &last_match);
-	dump_printf("%p [%p]: PERF_EVENT_FORK: %d:%d\n",
+	dump_printf("%p [%p]: PERF_RECORD_FORK: %d:%d\n",
 		(void *)(offset + head),
 		(void *)(long)(event->header.size),
 		event->fork.pid, event->fork.ppid);
@@ -627,7 +627,7 @@ process_fork_event(event_t *event, unsigned long offset, unsigned long head)
 		return 0;
 
 	if (!thread || !parent || thread__fork(thread, parent)) {
-		dump_printf("problem processing PERF_EVENT_FORK, skipping event.\n");
+		dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n");
 		return -1;
 	}
 	total_fork++;
@@ -639,23 +639,23 @@ static int
 process_event(event_t *event, unsigned long offset, unsigned long head)
 {
 	switch (event->header.type) {
-	case PERF_EVENT_SAMPLE:
+	case PERF_RECORD_SAMPLE:
 		return process_sample_event(event, offset, head);
 
-	case PERF_EVENT_MMAP:
+	case PERF_RECORD_MMAP:
 		return process_mmap_event(event, offset, head);
 
-	case PERF_EVENT_COMM:
+	case PERF_RECORD_COMM:
 		return process_comm_event(event, offset, head);
 
-	case PERF_EVENT_FORK:
+	case PERF_RECORD_FORK:
 		return process_fork_event(event, offset, head);
 	/*
 	 * We dont process them right now but they are fine:
 	 */
 
-	case PERF_EVENT_THROTTLE:
-	case PERF_EVENT_UNTHROTTLE:
+	case PERF_RECORD_THROTTLE:
+	case PERF_RECORD_UNTHROTTLE:
 		return 0;
 
 	default:
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 2459e5a..a5a050a 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -77,7 +77,7 @@ static struct mmap_data		mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
 
 static unsigned long mmap_read_head(struct mmap_data *md)
 {
-	struct perf_counter_mmap_page *pc = md->base;
+	struct perf_event_mmap_page *pc = md->base;
 	long head;
 
 	head = pc->data_head;
@@ -88,7 +88,7 @@ static unsigned long mmap_read_head(struct mmap_data *md)
 
 static void mmap_write_tail(struct mmap_data *md, unsigned long tail)
 {
-	struct perf_counter_mmap_page *pc = md->base;
+	struct perf_event_mmap_page *pc = md->base;
 
 	/*
 	 * ensure all reads are done before we write the tail out.
@@ -233,7 +233,7 @@ static pid_t pid_synthesize_comm_event(pid_t pid, int full)
 		}
 	}
 
-	comm_ev.header.type = PERF_EVENT_COMM;
+	comm_ev.header.type = PERF_RECORD_COMM;
 	size = ALIGN(size, sizeof(u64));
 	comm_ev.header.size = sizeof(comm_ev) - (sizeof(comm_ev.comm) - size);
 
@@ -288,7 +288,7 @@ static void pid_synthesize_mmap_samples(pid_t pid, pid_t tgid)
 	while (1) {
 		char bf[BUFSIZ], *pbf = bf;
 		struct mmap_event mmap_ev = {
-			.header = { .type = PERF_EVENT_MMAP },
+			.header = { .type = PERF_RECORD_MMAP },
 		};
 		int n;
 		size_t size;
@@ -355,7 +355,7 @@ static void synthesize_all(void)
 
 static int group_fd;
 
-static struct perf_header_attr *get_header_attr(struct perf_counter_attr *a, int nr)
+static struct perf_header_attr *get_header_attr(struct perf_event_attr *a, int nr)
 {
 	struct perf_header_attr *h_attr;
 
@@ -371,7 +371,7 @@ static struct perf_header_attr *get_header_attr(struct perf_counter_attr *a, int
 
 static void create_counter(int counter, int cpu, pid_t pid)
 {
-	struct perf_counter_attr *attr = attrs + counter;
+	struct perf_event_attr *attr = attrs + counter;
 	struct perf_header_attr *h_attr;
 	int track = !counter; /* only the first counter needs these */
 	struct {
@@ -417,7 +417,7 @@ static void create_counter(int counter, int cpu, pid_t pid)
 	attr->disabled		= 1;
 
 try_again:
-	fd[nr_cpu][counter] = sys_perf_counter_open(attr, pid, cpu, group_fd, 0);
+	fd[nr_cpu][counter] = sys_perf_event_open(attr, pid, cpu, group_fd, 0);
 
 	if (fd[nr_cpu][counter] < 0) {
 		int err = errno;
@@ -444,7 +444,7 @@ try_again:
 		printf("\n");
 		error("perfcounter syscall returned with %d (%s)\n",
 			fd[nr_cpu][counter], strerror(err));
-		die("No CONFIG_PERF_COUNTERS=y kernel support configured?\n");
+		die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
 		exit(-1);
 	}
 
@@ -478,7 +478,7 @@ try_again:
 	if (multiplex && fd[nr_cpu][counter] != multiplex_fd) {
 		int ret;
 
-		ret = ioctl(fd[nr_cpu][counter], PERF_COUNTER_IOC_SET_OUTPUT, multiplex_fd);
+		ret = ioctl(fd[nr_cpu][counter], PERF_EVENT_IOC_SET_OUTPUT, multiplex_fd);
 		assert(ret != -1);
 	} else {
 		event_array[nr_poll].fd = fd[nr_cpu][counter];
@@ -496,7 +496,7 @@ try_again:
 		}
 	}
 
-	ioctl(fd[nr_cpu][counter], PERF_COUNTER_IOC_ENABLE);
+	ioctl(fd[nr_cpu][counter], PERF_EVENT_IOC_ENABLE);
 }
 
 static void open_counters(int cpu, pid_t pid)
@@ -642,7 +642,7 @@ static int __cmd_record(int argc, const char **argv)
 		if (done) {
 			for (i = 0; i < nr_cpu; i++) {
 				for (counter = 0; counter < nr_counters; counter++)
-					ioctl(fd[i][counter], PERF_COUNTER_IOC_DISABLE);
+					ioctl(fd[i][counter], PERF_EVENT_IOC_DISABLE);
 			}
 		}
 	}
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index cdf9a8d..19669c2 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -1121,7 +1121,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
 		more_data += sizeof(u64);
 	}
 
-	dump_printf("%p [%p]: PERF_EVENT_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n",
+	dump_printf("%p [%p]: PERF_RECORD_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n",
 		(void *)(offset + head),
 		(void *)(long)(event->header.size),
 		event->header.misc,
@@ -1158,9 +1158,9 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
 	if (comm_list && !strlist__has_entry(comm_list, thread->comm))
 		return 0;
 
-	cpumode = event->header.misc & PERF_EVENT_MISC_CPUMODE_MASK;
+	cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
 
-	if (cpumode == PERF_EVENT_MISC_KERNEL) {
+	if (cpumode == PERF_RECORD_MISC_KERNEL) {
 		show = SHOW_KERNEL;
 		level = 'k';
 
@@ -1168,7 +1168,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
 
 		dump_printf(" ...... dso: %s\n", dso->name);
 
-	} else if (cpumode == PERF_EVENT_MISC_USER) {
+	} else if (cpumode == PERF_RECORD_MISC_USER) {
 
 		show = SHOW_USER;
 		level = '.';
@@ -1210,7 +1210,7 @@ process_mmap_event(event_t *event, unsigned long offset, unsigned long head)
 
 	thread = threads__findnew(event->mmap.pid, &threads, &last_match);
 
-	dump_printf("%p [%p]: PERF_EVENT_MMAP %d/%d: [%p(%p) @ %p]: %s\n",
+	dump_printf("%p [%p]: PERF_RECORD_MMAP %d/%d: [%p(%p) @ %p]: %s\n",
 		(void *)(offset + head),
 		(void *)(long)(event->header.size),
 		event->mmap.pid,
@@ -1221,7 +1221,7 @@ process_mmap_event(event_t *event, unsigned long offset, unsigned long head)
 		event->mmap.filename);
 
 	if (thread == NULL || map == NULL) {
-		dump_printf("problem processing PERF_EVENT_MMAP, skipping event.\n");
+		dump_printf("problem processing PERF_RECORD_MMAP, skipping event.\n");
 		return 0;
 	}
 
@@ -1238,14 +1238,14 @@ process_comm_event(event_t *event, unsigned long offset, unsigned long head)
 
 	thread = threads__findnew(event->comm.pid, &threads, &last_match);
 
-	dump_printf("%p [%p]: PERF_EVENT_COMM: %s:%d\n",
+	dump_printf("%p [%p]: PERF_RECORD_COMM: %s:%d\n",
 		(void *)(offset + head),
 		(void *)(long)(event->header.size),
 		event->comm.comm, event->comm.pid);
 
 	if (thread == NULL ||
 	    thread__set_comm_adjust(thread, event->comm.comm)) {
-		dump_printf("problem processing PERF_EVENT_COMM, skipping event.\n");
+		dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n");
 		return -1;
 	}
 	total_comm++;
@@ -1262,10 +1262,10 @@ process_task_event(event_t *event, unsigned long offset, unsigned long head)
 	thread = threads__findnew(event->fork.pid, &threads, &last_match);
 	parent = threads__findnew(event->fork.ppid, &threads, &last_match);
 
-	dump_printf("%p [%p]: PERF_EVENT_%s: (%d:%d):(%d:%d)\n",
+	dump_printf("%p [%p]: PERF_RECORD_%s: (%d:%d):(%d:%d)\n",
 		(void *)(offset + head),
 		(void *)(long)(event->header.size),
-		event->header.type == PERF_EVENT_FORK ? "FORK" : "EXIT",
+		event->header.type == PERF_RECORD_FORK ? "FORK" : "EXIT",
 		event->fork.pid, event->fork.tid,
 		event->fork.ppid, event->fork.ptid);
 
@@ -1276,11 +1276,11 @@ process_task_event(event_t *event, unsigned long offset, unsigned long head)
 	if (thread == parent)
 		return 0;
 
-	if (event->header.type == PERF_EVENT_EXIT)
+	if (event->header.type == PERF_RECORD_EXIT)
 		return 0;
 
 	if (!thread || !parent || thread__fork(thread, parent)) {
-		dump_printf("problem processing PERF_EVENT_FORK, skipping event.\n");
+		dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n");
 		return -1;
 	}
 	total_fork++;
@@ -1291,7 +1291,7 @@ process_task_event(event_t *event, unsigned long offset, unsigned long head)
 static int
 process_lost_event(event_t *event, unsigned long offset, unsigned long head)
 {
-	dump_printf("%p [%p]: PERF_EVENT_LOST: id:%Ld: lost:%Ld\n",
+	dump_printf("%p [%p]: PERF_RECORD_LOST: id:%Ld: lost:%Ld\n",
 		(void *)(offset + head),
 		(void *)(long)(event->header.size),
 		event->lost.id,
@@ -1305,7 +1305,7 @@ process_lost_event(event_t *event, unsigned long offset, unsigned long head)
 static int
 process_read_event(event_t *event, unsigned long offset, unsigned long head)
 {
-	struct perf_counter_attr *attr;
+	struct perf_event_attr *attr;
 
 	attr = perf_header__find_attr(event->read.id, header);
 
@@ -1319,7 +1319,7 @@ process_read_event(event_t *event, unsigned long offset, unsigned long head)
 					   event->read.value);
 	}
 
-	dump_printf("%p [%p]: PERF_EVENT_READ: %d %d %s %Lu\n",
+	dump_printf("%p [%p]: PERF_RECORD_READ: %d %d %s %Lu\n",
 			(void *)(offset + head),
 			(void *)(long)(event->header.size),
 			event->read.pid,
@@ -1337,31 +1337,31 @@ process_event(event_t *event, unsigned long offset, unsigned long head)
 	trace_event(event);
 
 	switch (event->header.type) {
-	case PERF_EVENT_SAMPLE:
+	case PERF_RECORD_SAMPLE:
 		return process_sample_event(event, offset, head);
 
-	case PERF_EVENT_MMAP:
+	case PERF_RECORD_MMAP:
 		return process_mmap_event(event, offset, head);
 
-	case PERF_EVENT_COMM:
+	case PERF_RECORD_COMM:
 		return process_comm_event(event, offset, head);
 
-	case PERF_EVENT_FORK:
-	case PERF_EVENT_EXIT:
+	case PERF_RECORD_FORK:
+	case PERF_RECORD_EXIT:
 		return process_task_event(event, offset, head);
 
-	case PERF_EVENT_LOST:
+	case PERF_RECORD_LOST:
 		return process_lost_event(event, offset, head);
 
-	case PERF_EVENT_READ:
+	case PERF_RECORD_READ:
 		return process_read_event(event, offset, head);
 
 	/*
 	 * We dont process them right now but they are fine:
 	 */
 
-	case PERF_EVENT_THROTTLE:
-	case PERF_EVENT_UNTHROTTLE:
+	case PERF_RECORD_THROTTLE:
+	case PERF_RECORD_UNTHROTTLE:
 		return 0;
 
 	default:
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 275d79c..ea9c15c 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -1573,7 +1573,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
 		more_data += sizeof(u64);
 	}
 
-	dump_printf("%p [%p]: PERF_EVENT_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n",
+	dump_printf("%p [%p]: PERF_RECORD_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n",
 		(void *)(offset + head),
 		(void *)(long)(event->header.size),
 		event->header.misc,
@@ -1589,9 +1589,9 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
 		return -1;
 	}
 
-	cpumode = event->header.misc & PERF_EVENT_MISC_CPUMODE_MASK;
+	cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
 
-	if (cpumode == PERF_EVENT_MISC_KERNEL) {
+	if (cpumode == PERF_RECORD_MISC_KERNEL) {
 		show = SHOW_KERNEL;
 		level = 'k';
 
@@ -1599,7 +1599,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
 
 		dump_printf(" ...... dso: %s\n", dso->name);
 
-	} else if (cpumode == PERF_EVENT_MISC_USER) {
+	} else if (cpumode == PERF_RECORD_MISC_USER) {
 
 		show = SHOW_USER;
 		level = '.';
@@ -1626,23 +1626,23 @@ process_event(event_t *event, unsigned long offset, unsigned long head)
 
 	nr_events++;
 	switch (event->header.type) {
-	case PERF_EVENT_MMAP:
+	case PERF_RECORD_MMAP:
 		return 0;
-	case PERF_EVENT_LOST:
+	case PERF_RECORD_LOST:
 		nr_lost_chunks++;
 		nr_lost_events += event->lost.lost;
 		return 0;
 
-	case PERF_EVENT_COMM:
+	case PERF_RECORD_COMM:
 		return process_comm_event(event, offset, head);
 
-	case PERF_EVENT_EXIT ... PERF_EVENT_READ:
+	case PERF_RECORD_EXIT ... PERF_RECORD_READ:
 		return 0;
 
-	case PERF_EVENT_SAMPLE:
+	case PERF_RECORD_SAMPLE:
 		return process_sample_event(event, offset, head);
 
-	case PERF_EVENT_MAX:
+	case PERF_RECORD_MAX:
 	default:
 		return -1;
 	}
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 61b8282..16af2d8 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -48,7 +48,7 @@
 #include <sys/prctl.h>
 #include <math.h>
 
-static struct perf_counter_attr default_attrs[] = {
+static struct perf_event_attr default_attrs[] = {
 
   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK	},
   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES},
@@ -130,11 +130,11 @@ struct stats			runtime_cycles_stats;
 	 attrs[counter].config == PERF_COUNT_##c)
 
 #define ERR_PERF_OPEN \
-"Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n"
+"Error: counter %d, sys_perf_event_open() syscall returned with %d (%s)\n"
 
 static void create_perf_stat_counter(int counter, int pid)
 {
-	struct perf_counter_attr *attr = attrs + counter;
+	struct perf_event_attr *attr = attrs + counter;
 
 	if (scale)
 		attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
@@ -144,7 +144,7 @@ static void create_perf_stat_counter(int counter, int pid)
 		unsigned int cpu;
 
 		for (cpu = 0; cpu < nr_cpus; cpu++) {
-			fd[cpu][counter] = sys_perf_counter_open(attr, -1, cpu, -1, 0);
+			fd[cpu][counter] = sys_perf_event_open(attr, -1, cpu, -1, 0);
 			if (fd[cpu][counter] < 0 && verbose)
 				fprintf(stderr, ERR_PERF_OPEN, counter,
 					fd[cpu][counter], strerror(errno));
@@ -154,7 +154,7 @@ static void create_perf_stat_counter(int counter, int pid)
 		attr->disabled	     = 1;
 		attr->enable_on_exec = 1;
 
-		fd[0][counter] = sys_perf_counter_open(attr, pid, -1, -1, 0);
+		fd[0][counter] = sys_perf_event_open(attr, pid, -1, -1, 0);
 		if (fd[0][counter] < 0 && verbose)
 			fprintf(stderr, ERR_PERF_OPEN, counter,
 				fd[0][counter], strerror(errno));
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index 6004063..4405681 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -937,21 +937,21 @@ process_event(event_t *event)
 
 	switch (event->header.type) {
 
-	case PERF_EVENT_COMM:
+	case PERF_RECORD_COMM:
 		return process_comm_event(event);
-	case PERF_EVENT_FORK:
+	case PERF_RECORD_FORK:
 		return process_fork_event(event);
-	case PERF_EVENT_EXIT:
+	case PERF_RECORD_EXIT:
 		return process_exit_event(event);
-	case PERF_EVENT_SAMPLE:
+	case PERF_RECORD_SAMPLE:
 		return queue_sample_event(event);
 
 	/*
 	 * We dont process them right now but they are fine:
 	 */
-	case PERF_EVENT_MMAP:
-	case PERF_EVENT_THROTTLE:
-	case PERF_EVENT_UNTHROTTLE:
+	case PERF_RECORD_MMAP:
+	case PERF_RECORD_THROTTLE:
+	case PERF_RECORD_UNTHROTTLE:
 		return 0;
 
 	default:
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 4002ccb..1ca8889 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -901,7 +901,7 @@ struct mmap_data {
 
 static unsigned int mmap_read_head(struct mmap_data *md)
 {
-	struct perf_counter_mmap_page *pc = md->base;
+	struct perf_event_mmap_page *pc = md->base;
 	int head;
 
 	head = pc->data_head;
@@ -977,9 +977,9 @@ static void mmap_read_counter(struct mmap_data *md)
 
 		old += size;
 
-		if (event->header.type == PERF_EVENT_SAMPLE) {
+		if (event->header.type == PERF_RECORD_SAMPLE) {
 			int user =
-	(event->header.misc & PERF_EVENT_MISC_CPUMODE_MASK) == PERF_EVENT_MISC_USER;
+	(event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK) == PERF_RECORD_MISC_USER;
 			process_event(event->ip.ip, md->counter, user);
 		}
 	}
@@ -1005,7 +1005,7 @@ int group_fd;
 
 static void start_counter(int i, int counter)
 {
-	struct perf_counter_attr *attr;
+	struct perf_event_attr *attr;
 	int cpu;
 
 	cpu = profile_cpu;
@@ -1019,7 +1019,7 @@ static void start_counter(int i, int counter)
 	attr->inherit		= (cpu < 0) && inherit;
 
 try_again:
-	fd[i][counter] = sys_perf_counter_open(attr, target_pid, cpu, group_fd, 0);
+	fd[i][counter] = sys_perf_event_open(attr, target_pid, cpu, group_fd, 0);
 
 	if (fd[i][counter] < 0) {
 		int err = errno;
@@ -1044,7 +1044,7 @@ try_again:
 		printf("\n");
 		error("perfcounter syscall returned with %d (%s)\n",
 			fd[i][counter], strerror(err));
-		die("No CONFIG_PERF_COUNTERS=y kernel support configured?\n");
+		die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
 		exit(-1);
 	}
 	assert(fd[i][counter] >= 0);
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 914ab36..e9d256e 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -35,14 +35,14 @@ process_comm_event(event_t *event, unsigned long offset, unsigned long head)
 
 	thread = threads__findnew(event->comm.pid, &threads, &last_match);
 
-	dump_printf("%p [%p]: PERF_EVENT_COMM: %s:%d\n",
+	dump_printf("%p [%p]: PERF_RECORD_COMM: %s:%d\n",
 		(void *)(offset + head),
 		(void *)(long)(event->header.size),
 		event->comm.comm, event->comm.pid);
 
 	if (thread == NULL ||
 	    thread__set_comm(thread, event->comm.comm)) {
-		dump_printf("problem processing PERF_EVENT_COMM, skipping event.\n");
+		dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n");
 		return -1;
 	}
 	total_comm++;
@@ -82,7 +82,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
 		more_data += sizeof(u64);
 	}
 
-	dump_printf("%p [%p]: PERF_EVENT_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n",
+	dump_printf("%p [%p]: PERF_RECORD_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n",
 		(void *)(offset + head),
 		(void *)(long)(event->header.size),
 		event->header.misc,
@@ -98,9 +98,9 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
 		return -1;
 	}
 
-	cpumode = event->header.misc & PERF_EVENT_MISC_CPUMODE_MASK;
+	cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
 
-	if (cpumode == PERF_EVENT_MISC_KERNEL) {
+	if (cpumode == PERF_RECORD_MISC_KERNEL) {
 		show = SHOW_KERNEL;
 		level = 'k';
 
@@ -108,7 +108,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
 
 		dump_printf(" ...... dso: %s\n", dso->name);
 
-	} else if (cpumode == PERF_EVENT_MISC_USER) {
+	} else if (cpumode == PERF_RECORD_MISC_USER) {
 
 		show = SHOW_USER;
 		level = '.';
@@ -146,19 +146,19 @@ process_event(event_t *event, unsigned long offset, unsigned long head)
 	trace_event(event);
 
 	switch (event->header.type) {
-	case PERF_EVENT_MMAP ... PERF_EVENT_LOST:
+	case PERF_RECORD_MMAP ... PERF_RECORD_LOST:
 		return 0;
 
-	case PERF_EVENT_COMM:
+	case PERF_RECORD_COMM:
 		return process_comm_event(event, offset, head);
 
-	case PERF_EVENT_EXIT ... PERF_EVENT_READ:
+	case PERF_RECORD_EXIT ... PERF_RECORD_READ:
 		return 0;
 
-	case PERF_EVENT_SAMPLE:
+	case PERF_RECORD_SAMPLE:
 		return process_sample_event(event, offset, head);
 
-	case PERF_EVENT_MAX:
+	case PERF_RECORD_MAX:
 	default:
 		return -1;
 	}
diff --git a/tools/perf/design.txt b/tools/perf/design.txt
index f71e0d2..f1946d1 100644
--- a/tools/perf/design.txt
+++ b/tools/perf/design.txt
@@ -18,10 +18,10 @@ underlying hardware counters.
 Performance counters are accessed via special file descriptors.
 There's one file descriptor per virtual counter used.
 
-The special file descriptor is opened via the perf_counter_open()
+The special file descriptor is opened via the perf_event_open()
 system call:
 
-   int sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr,
+   int sys_perf_event_open(struct perf_event_hw_event *hw_event_uptr,
 			     pid_t pid, int cpu, int group_fd,
 			     unsigned long flags);
 
@@ -32,9 +32,9 @@ can be used to set the blocking mode, etc.
 Multiple counters can be kept open at a time, and the counters
 can be poll()ed.
 
-When creating a new counter fd, 'perf_counter_hw_event' is:
+When creating a new counter fd, 'perf_event_hw_event' is:
 
-struct perf_counter_hw_event {
+struct perf_event_hw_event {
         /*
          * The MSB of the config word signifies if the rest contains cpu
          * specific (raw) counter configuration data, if unset, the next
@@ -93,7 +93,7 @@ specified by 'event_id':
 
 /*
  * Generalized performance counter event types, used by the hw_event.event_id
- * parameter of the sys_perf_counter_open() syscall:
+ * parameter of the sys_perf_event_open() syscall:
  */
 enum hw_event_ids {
 	/*
@@ -159,7 +159,7 @@ in size.
  * reads on the counter should return the indicated quantities,
  * in increasing order of bit value, after the counter value.
  */
-enum perf_counter_read_format {
+enum perf_event_read_format {
         PERF_FORMAT_TOTAL_TIME_ENABLED  =  1,
         PERF_FORMAT_TOTAL_TIME_RUNNING  =  2,
 };
@@ -178,7 +178,7 @@ interrupt:
  * Bits that can be set in hw_event.record_type to request information
  * in the overflow packets.
  */
-enum perf_counter_record_format {
+enum perf_event_record_format {
         PERF_RECORD_IP          = 1U << 0,
         PERF_RECORD_TID         = 1U << 1,
         PERF_RECORD_TIME        = 1U << 2,
@@ -228,7 +228,7 @@ these events are recorded in the ring-buffer (see below).
 The 'comm' bit allows tracking of process comm data on process creation.
 This too is recorded in the ring-buffer (see below).
 
-The 'pid' parameter to the perf_counter_open() system call allows the
+The 'pid' parameter to the perf_event_open() system call allows the
 counter to be specific to a task:
 
  pid == 0: if the pid parameter is zero, the counter is attached to the
@@ -258,7 +258,7 @@ The 'flags' parameter is currently unused and must be zero.
 
 The 'group_fd' parameter allows counter "groups" to be set up.  A
 counter group has one counter which is the group "leader".  The leader
-is created first, with group_fd = -1 in the perf_counter_open call
+is created first, with group_fd = -1 in the perf_event_open call
 that creates it.  The rest of the group members are created
 subsequently, with group_fd giving the fd of the group leader.
 (A single counter on its own is created with group_fd = -1 and is
@@ -277,13 +277,13 @@ tracking are logged into a ring-buffer. This ring-buffer is created and
 accessed through mmap().
 
 The mmap size should be 1+2^n pages, where the first page is a meta-data page
-(struct perf_counter_mmap_page) that contains various bits of information such
+(struct perf_event_mmap_page) that contains various bits of information such
 as where the ring-buffer head is.
 
 /*
  * Structure of the page that can be mapped via mmap
  */
-struct perf_counter_mmap_page {
+struct perf_event_mmap_page {
         __u32   version;                /* version number of this structure */
         __u32   compat_version;         /* lowest version this is compat with */
 
@@ -317,7 +317,7 @@ struct perf_counter_mmap_page {
          * Control data for the mmap() data buffer.
          *
          * User-space reading this value should issue an rmb(), on SMP capable
-         * platforms, after reading this value -- see perf_counter_wakeup().
+         * platforms, after reading this value -- see perf_event_wakeup().
          */
         __u32   data_head;              /* head in the data section */
 };
@@ -327,9 +327,9 @@ NOTE: the hw-counter userspace bits are arch specific and are currently only
 
 The following 2^n pages are the ring-buffer which contains events of the form:
 
-#define PERF_EVENT_MISC_KERNEL          (1 << 0)
-#define PERF_EVENT_MISC_USER            (1 << 1)
-#define PERF_EVENT_MISC_OVERFLOW        (1 << 2)
+#define PERF_RECORD_MISC_KERNEL          (1 << 0)
+#define PERF_RECORD_MISC_USER            (1 << 1)
+#define PERF_RECORD_MISC_OVERFLOW        (1 << 2)
 
 struct perf_event_header {
         __u32   type;
@@ -353,8 +353,8 @@ enum perf_event_type {
          *      char                            filename[];
          * };
          */
-        PERF_EVENT_MMAP                 = 1,
-        PERF_EVENT_MUNMAP               = 2,
+        PERF_RECORD_MMAP                 = 1,
+        PERF_RECORD_MUNMAP               = 2,
 
         /*
          * struct {
@@ -364,10 +364,10 @@ enum perf_event_type {
          *      char                            comm[];
          * };
          */
-        PERF_EVENT_COMM                 = 3,
+        PERF_RECORD_COMM                 = 3,
 
         /*
-         * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field
+         * When header.misc & PERF_RECORD_MISC_OVERFLOW the event_type field
          * will be PERF_RECORD_*
          *
          * struct {
@@ -397,7 +397,7 @@ Notification of new events is possible through poll()/select()/epoll() and
 fcntl() managing signals.
 
 Normally a notification is generated for every page filled, however one can
-additionally set perf_counter_hw_event.wakeup_events to generate one every
+additionally set perf_event_hw_event.wakeup_events to generate one every
 so many counter overflow events.
 
 Future work will include a splice() interface to the ring-buffer.
@@ -409,11 +409,11 @@ events but does continue to exist and maintain its count value.
 
 An individual counter or counter group can be enabled with
 
-	ioctl(fd, PERF_COUNTER_IOC_ENABLE);
+	ioctl(fd, PERF_EVENT_IOC_ENABLE);
 
 or disabled with
 
-	ioctl(fd, PERF_COUNTER_IOC_DISABLE);
+	ioctl(fd, PERF_EVENT_IOC_DISABLE);
 
 Enabling or disabling the leader of a group enables or disables the
 whole group; that is, while the group leader is disabled, none of the
@@ -424,16 +424,16 @@ other counter.
 
 Additionally, non-inherited overflow counters can use
 
-	ioctl(fd, PERF_COUNTER_IOC_REFRESH, nr);
+	ioctl(fd, PERF_EVENT_IOC_REFRESH, nr);
 
 to enable a counter for 'nr' events, after which it gets disabled again.
 
 A process can enable or disable all the counter groups that are
 attached to it, using prctl:
 
-	prctl(PR_TASK_PERF_COUNTERS_ENABLE);
+	prctl(PR_TASK_PERF_EVENTS_ENABLE);
 
-	prctl(PR_TASK_PERF_COUNTERS_DISABLE);
+	prctl(PR_TASK_PERF_EVENTS_DISABLE);
 
 This applies to all counters on the current process, whether created
 by this process or by another, and doesn't affect any counters that
@@ -447,11 +447,11 @@ Arch requirements
 If your architecture does not have hardware performance metrics, you can
 still use the generic software counters based on hrtimers for sampling.
 
-So to start with, in order to add HAVE_PERF_COUNTERS to your Kconfig, you
+So to start with, in order to add HAVE_PERF_EVENTS to your Kconfig, you
 will need at least this:
-	- asm/perf_counter.h - a basic stub will suffice at first
+	- asm/perf_event.h - a basic stub will suffice at first
 	- support for atomic64 types (and associated helper functions)
-	- set_perf_counter_pending() implemented
+	- set_perf_event_pending() implemented
 
 If your architecture does have hardware capabilities, you can override the
-weak stub hw_perf_counter_init() to register hardware counters.
+weak stub hw_perf_event_init() to register hardware counters.
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 2abeb20..8cc4623 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -52,15 +52,15 @@
 #include <sys/types.h>
 #include <sys/syscall.h>
 
-#include "../../include/linux/perf_counter.h"
+#include "../../include/linux/perf_event.h"
 #include "util/types.h"
 
 /*
- * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all
+ * prctl(PR_TASK_PERF_EVENTS_DISABLE) will (cheaply) disable all
  * counters in the current task.
  */
-#define PR_TASK_PERF_COUNTERS_DISABLE   31
-#define PR_TASK_PERF_COUNTERS_ENABLE    32
+#define PR_TASK_PERF_EVENTS_DISABLE   31
+#define PR_TASK_PERF_EVENTS_ENABLE    32
 
 #ifndef NSEC_PER_SEC
 # define NSEC_PER_SEC			1000000000ULL
@@ -90,12 +90,12 @@ static inline unsigned long long rdclock(void)
 	_min1 < _min2 ? _min1 : _min2; })
 
 static inline int
-sys_perf_counter_open(struct perf_counter_attr *attr,
+sys_perf_event_open(struct perf_event_attr *attr,
 		      pid_t pid, int cpu, int group_fd,
 		      unsigned long flags)
 {
 	attr->size = sizeof(*attr);
-	return syscall(__NR_perf_counter_open, attr, pid, cpu,
+	return syscall(__NR_perf_event_open, attr, pid, cpu,
 		       group_fd, flags);
 }
 
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 018d414..2c9c26d6 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -1,5 +1,5 @@
-#ifndef __PERF_EVENT_H
-#define __PERF_EVENT_H
+#ifndef __PERF_RECORD_H
+#define __PERF_RECORD_H
 #include "../perf.h"
 #include "util.h"
 #include <linux/list.h>
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index bb4fca3..e306857 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -9,7 +9,7 @@
 /*
  * Create new perf.data header attribute:
  */
-struct perf_header_attr *perf_header_attr__new(struct perf_counter_attr *attr)
+struct perf_header_attr *perf_header_attr__new(struct perf_event_attr *attr)
 {
 	struct perf_header_attr *self = malloc(sizeof(*self));
 
@@ -134,7 +134,7 @@ struct perf_file_section {
 };
 
 struct perf_file_attr {
-	struct perf_counter_attr	attr;
+	struct perf_event_attr	attr;
 	struct perf_file_section	ids;
 };
 
@@ -320,7 +320,7 @@ u64 perf_header__sample_type(struct perf_header *header)
 	return type;
 }
 
-struct perf_counter_attr *
+struct perf_event_attr *
 perf_header__find_attr(u64 id, struct perf_header *header)
 {
 	int i;
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 7b0e84a..a0761bc 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -1,12 +1,12 @@
 #ifndef _PERF_HEADER_H
 #define _PERF_HEADER_H
 
-#include "../../../include/linux/perf_counter.h"
+#include "../../../include/linux/perf_event.h"
 #include <sys/types.h>
 #include "types.h"
 
 struct perf_header_attr {
-	struct perf_counter_attr attr;
+	struct perf_event_attr attr;
 	int ids, size;
 	u64 *id;
 	off_t id_offset;
@@ -34,11 +34,11 @@ char *perf_header__find_event(u64 id);
 
 
 struct perf_header_attr *
-perf_header_attr__new(struct perf_counter_attr *attr);
+perf_header_attr__new(struct perf_event_attr *attr);
 void perf_header_attr__add_id(struct perf_header_attr *self, u64 id);
 
 u64 perf_header__sample_type(struct perf_header *header);
-struct perf_counter_attr *
+struct perf_event_attr *
 perf_header__find_attr(u64 id, struct perf_header *header);
 
 
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 89172fd..13ab4b8 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -10,7 +10,7 @@
 
 int					nr_counters;
 
-struct perf_counter_attr		attrs[MAX_COUNTERS];
+struct perf_event_attr		attrs[MAX_COUNTERS];
 
 struct event_symbol {
 	u8		type;
@@ -48,13 +48,13 @@ static struct event_symbol event_symbols[] = {
   { CSW(CPU_MIGRATIONS),	"cpu-migrations",	"migrations"	},
 };
 
-#define __PERF_COUNTER_FIELD(config, name) \
-	((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT)
+#define __PERF_EVENT_FIELD(config, name) \
+	((config & PERF_EVENT_##name##_MASK) >> PERF_EVENT_##name##_SHIFT)
 
-#define PERF_COUNTER_RAW(config)	__PERF_COUNTER_FIELD(config, RAW)
-#define PERF_COUNTER_CONFIG(config)	__PERF_COUNTER_FIELD(config, CONFIG)
-#define PERF_COUNTER_TYPE(config)	__PERF_COUNTER_FIELD(config, TYPE)
-#define PERF_COUNTER_ID(config)		__PERF_COUNTER_FIELD(config, EVENT)
+#define PERF_EVENT_RAW(config)	__PERF_EVENT_FIELD(config, RAW)
+#define PERF_EVENT_CONFIG(config)	__PERF_EVENT_FIELD(config, CONFIG)
+#define PERF_EVENT_TYPE(config)	__PERF_EVENT_FIELD(config, TYPE)
+#define PERF_EVENT_ID(config)		__PERF_EVENT_FIELD(config, EVENT)
 
 static const char *hw_event_names[] = {
 	"cycles",
@@ -352,7 +352,7 @@ static int parse_aliases(const char **str, const char *names[][MAX_ALIASES], int
 }
 
 static enum event_result
-parse_generic_hw_event(const char **str, struct perf_counter_attr *attr)
+parse_generic_hw_event(const char **str, struct perf_event_attr *attr)
 {
 	const char *s = *str;
 	int cache_type = -1, cache_op = -1, cache_result = -1;
@@ -417,7 +417,7 @@ parse_single_tracepoint_event(char *sys_name,
 			      const char *evt_name,
 			      unsigned int evt_length,
 			      char *flags,
-			      struct perf_counter_attr *attr,
+			      struct perf_event_attr *attr,
 			      const char **strp)
 {
 	char evt_path[MAXPATHLEN];
@@ -505,7 +505,7 @@ parse_subsystem_tracepoint_event(char *sys_name, char *flags)
 
 
 static enum event_result parse_tracepoint_event(const char **strp,
-				    struct perf_counter_attr *attr)
+				    struct perf_event_attr *attr)
 {
 	const char *evt_name;
 	char *flags;
@@ -563,7 +563,7 @@ static int check_events(const char *str, unsigned int i)
 }
 
 static enum event_result
-parse_symbolic_event(const char **strp, struct perf_counter_attr *attr)
+parse_symbolic_event(const char **strp, struct perf_event_attr *attr)
 {
 	const char *str = *strp;
 	unsigned int i;
@@ -582,7 +582,7 @@ parse_symbolic_event(const char **strp, struct perf_counter_attr *attr)
 }
 
 static enum event_result
-parse_raw_event(const char **strp, struct perf_counter_attr *attr)
+parse_raw_event(const char **strp, struct perf_event_attr *attr)
 {
 	const char *str = *strp;
 	u64 config;
@@ -601,7 +601,7 @@ parse_raw_event(const char **strp, struct perf_counter_attr *attr)
 }
 
 static enum event_result
-parse_numeric_event(const char **strp, struct perf_counter_attr *attr)
+parse_numeric_event(const char **strp, struct perf_event_attr *attr)
 {
 	const char *str = *strp;
 	char *endp;
@@ -623,7 +623,7 @@ parse_numeric_event(const char **strp, struct perf_counter_attr *attr)
 }
 
 static enum event_result
-parse_event_modifier(const char **strp, struct perf_counter_attr *attr)
+parse_event_modifier(const char **strp, struct perf_event_attr *attr)
 {
 	const char *str = *strp;
 	int eu = 1, ek = 1, eh = 1;
@@ -656,7 +656,7 @@ parse_event_modifier(const char **strp, struct perf_counter_attr *attr)
  * Symbolic names are (almost) exactly matched.
  */
 static enum event_result
-parse_event_symbols(const char **str, struct perf_counter_attr *attr)
+parse_event_symbols(const char **str, struct perf_event_attr *attr)
 {
 	enum event_result ret;
 
@@ -711,7 +711,7 @@ static void store_event_type(const char *orgname)
 
 int parse_events(const struct option *opt __used, const char *str, int unset __used)
 {
-	struct perf_counter_attr attr;
+	struct perf_event_attr attr;
 	enum event_result ret;
 
 	if (strchr(str, ':'))
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 60704c1..30c6081 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -16,7 +16,7 @@ extern struct tracepoint_path *tracepoint_id_to_path(u64 config);
 
 extern int			nr_counters;
 
-extern struct perf_counter_attr attrs[MAX_COUNTERS];
+extern struct perf_event_attr attrs[MAX_COUNTERS];
 
 extern const char *event_name(int ctr);
 extern const char *__event_name(int type, u64 config);
diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c
index 1fd824c..af4b057 100644
--- a/tools/perf/util/trace-event-info.c
+++ b/tools/perf/util/trace-event-info.c
@@ -480,12 +480,12 @@ out:
 }
 
 static struct tracepoint_path *
-get_tracepoints_path(struct perf_counter_attr *pattrs, int nb_counters)
+get_tracepoints_path(struct perf_event_attr *pattrs, int nb_events)
 {
 	struct tracepoint_path path, *ppath = &path;
 	int i;
 
-	for (i = 0; i < nb_counters; i++) {
+	for (i = 0; i < nb_events; i++) {
 		if (pattrs[i].type != PERF_TYPE_TRACEPOINT)
 			continue;
 		ppath->next = tracepoint_id_to_path(pattrs[i].config);
@@ -496,7 +496,7 @@ get_tracepoints_path(struct perf_counter_attr *pattrs, int nb_counters)
 
 	return path.next;
 }
-void read_tracing_data(struct perf_counter_attr *pattrs, int nb_counters)
+void read_tracing_data(struct perf_event_attr *pattrs, int nb_events)
 {
 	char buf[BUFSIZ];
 	struct tracepoint_path *tps;
@@ -530,7 +530,7 @@ void read_tracing_data(struct perf_counter_attr *pattrs, int nb_counters)
 	page_size = getpagesize();
 	write_or_die(&page_size, 4);
 
-	tps = get_tracepoints_path(pattrs, nb_counters);
+	tps = get_tracepoints_path(pattrs, nb_events);
 
 	read_header_files();
 	read_ftrace_files(tps);
diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h
index d35ebf1..693f815 100644
--- a/tools/perf/util/trace-event.h
+++ b/tools/perf/util/trace-event.h
@@ -240,6 +240,6 @@ unsigned long long
 raw_field_value(struct event *event, const char *name, void *data);
 void *raw_field_ptr(struct event *event, const char *name, void *data);
 
-void read_tracing_data(struct perf_counter_attr *pattrs, int nb_counters);
+void read_tracing_data(struct perf_event_attr *pattrs, int nb_events);
 
 #endif /* _TRACE_EVENTS_H */
-- 
cgit v0.10.2


From 57c0c15b5244320065374ad2c54f4fbec77a6428 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 21 Sep 2009 12:20:38 +0200
Subject: perf: Tidy up after the big rename

 - provide compatibility Kconfig entry for existing PERF_COUNTERS .config's

 - provide courtesy copy of old perf_counter.h, for user-space projects

 - small indentation fixups

 - fix up MAINTAINERS

 - fix small x86 printout fallout

 - fix up small PowerPC comment fallout (use 'counter' as in register)

Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/MAINTAINERS b/MAINTAINERS
index 43761a0..751a307 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4000,7 +4000,7 @@ S:	Maintained
 F:	include/linux/delayacct.h
 F:	kernel/delayacct.c
 
-PERFORMANCE COUNTER SUBSYSTEM
+PERFORMANCE EVENTS SUBSYSTEM
 M:	Peter Zijlstra <a.p.zijlstra@chello.nl>
 M:	Paul Mackerras <paulus@samba.org>
 M:	Ingo Molnar <mingo@elte.hu>
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 154f405..7d8514c 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -122,7 +122,7 @@ struct paca_struct {
 	u8 soft_enabled;		/* irq soft-enable flag */
 	u8 hard_enabled;		/* set if irqs are enabled in MSR */
 	u8 io_sync;			/* writel() needs spin_unlock sync */
-	u8 perf_event_pending;	/* PM interrupt while soft-disabled */
+	u8 perf_event_pending;		/* PM interrupt while soft-disabled */
 
 	/* Stuff for accurate time accounting */
 	u64 user_time;			/* accumulated usermode TB ticks */
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index c98321f..197b7d9 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -41,7 +41,7 @@ DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
 struct power_pmu *ppmu;
 
 /*
- * Normally, to ignore kernel events we set the FCS (freeze events
+ * Normally, to ignore kernel events we set the FCS (freeze counters
  * in supervisor mode) bit in MMCR0, but if the kernel runs with the
  * hypervisor bit set in the MSR, or if we are running on a processor
  * where the hypervisor bit is forced to 1 (as on Apple G5 processors),
@@ -159,7 +159,7 @@ void perf_event_print_debug(void)
 }
 
 /*
- * Read one performance monitor event (PMC).
+ * Read one performance monitor counter (PMC).
  */
 static unsigned long read_pmc(int idx)
 {
@@ -409,7 +409,7 @@ static void power_pmu_read(struct perf_event *event)
 		val = read_pmc(event->hw.idx);
 	} while (atomic64_cmpxchg(&event->hw.prev_count, prev, val) != prev);
 
-	/* The events are only 32 bits wide */
+	/* The counters are only 32 bits wide */
 	delta = (val - prev) & 0xfffffffful;
 	atomic64_add(delta, &event->count);
 	atomic64_sub(delta, &event->hw.period_left);
@@ -543,7 +543,7 @@ void hw_perf_disable(void)
 		}
 
 		/*
-		 * Set the 'freeze events' bit.
+		 * Set the 'freeze counters' bit.
 		 * The barrier is to make sure the mtspr has been
 		 * executed and the PMU has frozen the events
 		 * before we return.
@@ -1124,7 +1124,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
 }
 
 /*
- * A event has overflowed; update its count and record
+ * A counter has overflowed; update its count and record
  * things if requested.  Note that interrupts are hard-disabled
  * here so there is no possibility of being interrupted.
  */
@@ -1271,7 +1271,7 @@ static void perf_event_interrupt(struct pt_regs *regs)
 
 	/*
 	 * Reset MMCR0 to its normal value.  This will set PMXE and
-	 * clear FC (freeze events) and PMAO (perf mon alert occurred)
+	 * clear FC (freeze counters) and PMAO (perf mon alert occurred)
 	 * and thus allow interrupts to occur again.
 	 * XXX might want to use MSR.PM to keep the events frozen until
 	 * we get back out of this interrupt.
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 0d03629..a3c7adb 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -2081,13 +2081,13 @@ void __init init_hw_perf_events(void)
 	perf_events_lapic_init();
 	register_die_notifier(&perf_event_nmi_notifier);
 
-	pr_info("... version:                 %d\n",     x86_pmu.version);
-	pr_info("... bit width:               %d\n",     x86_pmu.event_bits);
-	pr_info("... generic events:        %d\n",     x86_pmu.num_events);
-	pr_info("... value mask:              %016Lx\n", x86_pmu.event_mask);
-	pr_info("... max period:              %016Lx\n", x86_pmu.max_period);
-	pr_info("... fixed-purpose events:  %d\n",     x86_pmu.num_events_fixed);
-	pr_info("... event mask:            %016Lx\n", perf_event_mask);
+	pr_info("... version:                %d\n",     x86_pmu.version);
+	pr_info("... bit width:              %d\n",     x86_pmu.event_bits);
+	pr_info("... generic registers:      %d\n",     x86_pmu.num_events);
+	pr_info("... value mask:             %016Lx\n", x86_pmu.event_mask);
+	pr_info("... max period:             %016Lx\n", x86_pmu.max_period);
+	pr_info("... fixed-purpose events:   %d\n",     x86_pmu.num_events_fixed);
+	pr_info("... event mask:             %016Lx\n", perf_event_mask);
 }
 
 static inline void x86_pmu_read(struct perf_event *event)
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
new file mode 100644
index 0000000..368bd70
--- /dev/null
+++ b/include/linux/perf_counter.h
@@ -0,0 +1,441 @@
+/*
+ *  NOTE: this file will be removed in a future kernel release, it is
+ *  provided as a courtesy copy of user-space code that relies on the
+ *  old (pre-rename) symbols and constants.
+ *
+ *  Performance events:
+ *
+ *    Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de>
+ *    Copyright (C) 2008-2009, Red Hat, Inc., Ingo Molnar
+ *    Copyright (C) 2008-2009, Red Hat, Inc., Peter Zijlstra
+ *
+ *  Data type definitions, declarations, prototypes.
+ *
+ *    Started by: Thomas Gleixner and Ingo Molnar
+ *
+ *  For licencing details see kernel-base/COPYING
+ */
+#ifndef _LINUX_PERF_COUNTER_H
+#define _LINUX_PERF_COUNTER_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+#include <asm/byteorder.h>
+
+/*
+ * User-space ABI bits:
+ */
+
+/*
+ * attr.type
+ */
+enum perf_type_id {
+	PERF_TYPE_HARDWARE			= 0,
+	PERF_TYPE_SOFTWARE			= 1,
+	PERF_TYPE_TRACEPOINT			= 2,
+	PERF_TYPE_HW_CACHE			= 3,
+	PERF_TYPE_RAW				= 4,
+
+	PERF_TYPE_MAX,				/* non-ABI */
+};
+
+/*
+ * Generalized performance counter event types, used by the
+ * attr.event_id parameter of the sys_perf_counter_open()
+ * syscall:
+ */
+enum perf_hw_id {
+	/*
+	 * Common hardware events, generalized by the kernel:
+	 */
+	PERF_COUNT_HW_CPU_CYCLES		= 0,
+	PERF_COUNT_HW_INSTRUCTIONS		= 1,
+	PERF_COUNT_HW_CACHE_REFERENCES		= 2,
+	PERF_COUNT_HW_CACHE_MISSES		= 3,
+	PERF_COUNT_HW_BRANCH_INSTRUCTIONS	= 4,
+	PERF_COUNT_HW_BRANCH_MISSES		= 5,
+	PERF_COUNT_HW_BUS_CYCLES		= 6,
+
+	PERF_COUNT_HW_MAX,			/* non-ABI */
+};
+
+/*
+ * Generalized hardware cache counters:
+ *
+ *       { L1-D, L1-I, LLC, ITLB, DTLB, BPU } x
+ *       { read, write, prefetch } x
+ *       { accesses, misses }
+ */
+enum perf_hw_cache_id {
+	PERF_COUNT_HW_CACHE_L1D			= 0,
+	PERF_COUNT_HW_CACHE_L1I			= 1,
+	PERF_COUNT_HW_CACHE_LL			= 2,
+	PERF_COUNT_HW_CACHE_DTLB		= 3,
+	PERF_COUNT_HW_CACHE_ITLB		= 4,
+	PERF_COUNT_HW_CACHE_BPU			= 5,
+
+	PERF_COUNT_HW_CACHE_MAX,		/* non-ABI */
+};
+
+enum perf_hw_cache_op_id {
+	PERF_COUNT_HW_CACHE_OP_READ		= 0,
+	PERF_COUNT_HW_CACHE_OP_WRITE		= 1,
+	PERF_COUNT_HW_CACHE_OP_PREFETCH		= 2,
+
+	PERF_COUNT_HW_CACHE_OP_MAX,		/* non-ABI */
+};
+
+enum perf_hw_cache_op_result_id {
+	PERF_COUNT_HW_CACHE_RESULT_ACCESS	= 0,
+	PERF_COUNT_HW_CACHE_RESULT_MISS		= 1,
+
+	PERF_COUNT_HW_CACHE_RESULT_MAX,		/* non-ABI */
+};
+
+/*
+ * Special "software" counters provided by the kernel, even if the hardware
+ * does not support performance counters. These counters measure various
+ * physical and sw events of the kernel (and allow the profiling of them as
+ * well):
+ */
+enum perf_sw_ids {
+	PERF_COUNT_SW_CPU_CLOCK			= 0,
+	PERF_COUNT_SW_TASK_CLOCK		= 1,
+	PERF_COUNT_SW_PAGE_FAULTS		= 2,
+	PERF_COUNT_SW_CONTEXT_SWITCHES		= 3,
+	PERF_COUNT_SW_CPU_MIGRATIONS		= 4,
+	PERF_COUNT_SW_PAGE_FAULTS_MIN		= 5,
+	PERF_COUNT_SW_PAGE_FAULTS_MAJ		= 6,
+
+	PERF_COUNT_SW_MAX,			/* non-ABI */
+};
+
+/*
+ * Bits that can be set in attr.sample_type to request information
+ * in the overflow packets.
+ */
+enum perf_counter_sample_format {
+	PERF_SAMPLE_IP				= 1U << 0,
+	PERF_SAMPLE_TID				= 1U << 1,
+	PERF_SAMPLE_TIME			= 1U << 2,
+	PERF_SAMPLE_ADDR			= 1U << 3,
+	PERF_SAMPLE_READ			= 1U << 4,
+	PERF_SAMPLE_CALLCHAIN			= 1U << 5,
+	PERF_SAMPLE_ID				= 1U << 6,
+	PERF_SAMPLE_CPU				= 1U << 7,
+	PERF_SAMPLE_PERIOD			= 1U << 8,
+	PERF_SAMPLE_STREAM_ID			= 1U << 9,
+	PERF_SAMPLE_RAW				= 1U << 10,
+
+	PERF_SAMPLE_MAX = 1U << 11,		/* non-ABI */
+};
+
+/*
+ * The format of the data returned by read() on a perf counter fd,
+ * as specified by attr.read_format:
+ *
+ * struct read_format {
+ *	{ u64		value;
+ *	  { u64		time_enabled; } && PERF_FORMAT_ENABLED
+ *	  { u64		time_running; } && PERF_FORMAT_RUNNING
+ *	  { u64		id;           } && PERF_FORMAT_ID
+ *	} && !PERF_FORMAT_GROUP
+ *
+ *	{ u64		nr;
+ *	  { u64		time_enabled; } && PERF_FORMAT_ENABLED
+ *	  { u64		time_running; } && PERF_FORMAT_RUNNING
+ *	  { u64		value;
+ *	    { u64	id;           } && PERF_FORMAT_ID
+ *	  }		cntr[nr];
+ *	} && PERF_FORMAT_GROUP
+ * };
+ */
+enum perf_counter_read_format {
+	PERF_FORMAT_TOTAL_TIME_ENABLED		= 1U << 0,
+	PERF_FORMAT_TOTAL_TIME_RUNNING		= 1U << 1,
+	PERF_FORMAT_ID				= 1U << 2,
+	PERF_FORMAT_GROUP			= 1U << 3,
+
+	PERF_FORMAT_MAX = 1U << 4, 		/* non-ABI */
+};
+
+#define PERF_ATTR_SIZE_VER0	64	/* sizeof first published struct */
+
+/*
+ * Hardware event to monitor via a performance monitoring counter:
+ */
+struct perf_counter_attr {
+
+	/*
+	 * Major type: hardware/software/tracepoint/etc.
+	 */
+	__u32			type;
+
+	/*
+	 * Size of the attr structure, for fwd/bwd compat.
+	 */
+	__u32			size;
+
+	/*
+	 * Type specific configuration information.
+	 */
+	__u64			config;
+
+	union {
+		__u64		sample_period;
+		__u64		sample_freq;
+	};
+
+	__u64			sample_type;
+	__u64			read_format;
+
+	__u64			disabled       :  1, /* off by default        */
+				inherit	       :  1, /* children inherit it   */
+				pinned	       :  1, /* must always be on PMU */
+				exclusive      :  1, /* only group on PMU     */
+				exclude_user   :  1, /* don't count user      */
+				exclude_kernel :  1, /* ditto kernel          */
+				exclude_hv     :  1, /* ditto hypervisor      */
+				exclude_idle   :  1, /* don't count when idle */
+				mmap           :  1, /* include mmap data     */
+				comm	       :  1, /* include comm data     */
+				freq           :  1, /* use freq, not period  */
+				inherit_stat   :  1, /* per task counts       */
+				enable_on_exec :  1, /* next exec enables     */
+				task           :  1, /* trace fork/exit       */
+				watermark      :  1, /* wakeup_watermark      */
+
+				__reserved_1   : 49;
+
+	union {
+		__u32		wakeup_events;	  /* wakeup every n events */
+		__u32		wakeup_watermark; /* bytes before wakeup   */
+	};
+	__u32			__reserved_2;
+
+	__u64			__reserved_3;
+};
+
+/*
+ * Ioctls that can be done on a perf counter fd:
+ */
+#define PERF_COUNTER_IOC_ENABLE		_IO ('$', 0)
+#define PERF_COUNTER_IOC_DISABLE	_IO ('$', 1)
+#define PERF_COUNTER_IOC_REFRESH	_IO ('$', 2)
+#define PERF_COUNTER_IOC_RESET		_IO ('$', 3)
+#define PERF_COUNTER_IOC_PERIOD		_IOW('$', 4, u64)
+#define PERF_COUNTER_IOC_SET_OUTPUT	_IO ('$', 5)
+
+enum perf_counter_ioc_flags {
+	PERF_IOC_FLAG_GROUP		= 1U << 0,
+};
+
+/*
+ * Structure of the page that can be mapped via mmap
+ */
+struct perf_counter_mmap_page {
+	__u32	version;		/* version number of this structure */
+	__u32	compat_version;		/* lowest version this is compat with */
+
+	/*
+	 * Bits needed to read the hw counters in user-space.
+	 *
+	 *   u32 seq;
+	 *   s64 count;
+	 *
+	 *   do {
+	 *     seq = pc->lock;
+	 *
+	 *     barrier()
+	 *     if (pc->index) {
+	 *       count = pmc_read(pc->index - 1);
+	 *       count += pc->offset;
+	 *     } else
+	 *       goto regular_read;
+	 *
+	 *     barrier();
+	 *   } while (pc->lock != seq);
+	 *
+	 * NOTE: for obvious reason this only works on self-monitoring
+	 *       processes.
+	 */
+	__u32	lock;			/* seqlock for synchronization */
+	__u32	index;			/* hardware counter identifier */
+	__s64	offset;			/* add to hardware counter value */
+	__u64	time_enabled;		/* time counter active */
+	__u64	time_running;		/* time counter on cpu */
+
+		/*
+		 * Hole for extension of the self monitor capabilities
+		 */
+
+	__u64	__reserved[123];	/* align to 1k */
+
+	/*
+	 * Control data for the mmap() data buffer.
+	 *
+	 * User-space reading the @data_head value should issue an rmb(), on
+	 * SMP capable platforms, after reading this value -- see
+	 * perf_counter_wakeup().
+	 *
+	 * When the mapping is PROT_WRITE the @data_tail value should be
+	 * written by userspace to reflect the last read data. In this case
+	 * the kernel will not over-write unread data.
+	 */
+	__u64   data_head;		/* head in the data section */
+	__u64	data_tail;		/* user-space written tail */
+};
+
+#define PERF_EVENT_MISC_CPUMODE_MASK		(3 << 0)
+#define PERF_EVENT_MISC_CPUMODE_UNKNOWN		(0 << 0)
+#define PERF_EVENT_MISC_KERNEL			(1 << 0)
+#define PERF_EVENT_MISC_USER			(2 << 0)
+#define PERF_EVENT_MISC_HYPERVISOR		(3 << 0)
+
+struct perf_event_header {
+	__u32	type;
+	__u16	misc;
+	__u16	size;
+};
+
+enum perf_event_type {
+
+	/*
+	 * The MMAP events record the PROT_EXEC mappings so that we can
+	 * correlate userspace IPs to code. They have the following structure:
+	 *
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *
+	 *	u32				pid, tid;
+	 *	u64				addr;
+	 *	u64				len;
+	 *	u64				pgoff;
+	 *	char				filename[];
+	 * };
+	 */
+	PERF_EVENT_MMAP			= 1,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	u64				id;
+	 *	u64				lost;
+	 * };
+	 */
+	PERF_EVENT_LOST			= 2,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *
+	 *	u32				pid, tid;
+	 *	char				comm[];
+	 * };
+	 */
+	PERF_EVENT_COMM			= 3,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	u32				pid, ppid;
+	 *	u32				tid, ptid;
+	 *	u64				time;
+	 * };
+	 */
+	PERF_EVENT_EXIT			= 4,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	u64				time;
+	 *	u64				id;
+	 *	u64				stream_id;
+	 * };
+	 */
+	PERF_EVENT_THROTTLE		= 5,
+	PERF_EVENT_UNTHROTTLE		= 6,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	u32				pid, ppid;
+	 *	u32				tid, ptid;
+	 *	{ u64				time;     } && PERF_SAMPLE_TIME
+	 * };
+	 */
+	PERF_EVENT_FORK			= 7,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	u32				pid, tid;
+	 *
+	 *	struct read_format		values;
+	 * };
+	 */
+	PERF_EVENT_READ			= 8,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *
+	 *	{ u64			ip;	  } && PERF_SAMPLE_IP
+	 *	{ u32			pid, tid; } && PERF_SAMPLE_TID
+	 *	{ u64			time;     } && PERF_SAMPLE_TIME
+	 *	{ u64			addr;     } && PERF_SAMPLE_ADDR
+	 *	{ u64			id;	  } && PERF_SAMPLE_ID
+	 *	{ u64			stream_id;} && PERF_SAMPLE_STREAM_ID
+	 *	{ u32			cpu, res; } && PERF_SAMPLE_CPU
+	 *	{ u64			period;   } && PERF_SAMPLE_PERIOD
+	 *
+	 *	{ struct read_format	values;	  } && PERF_SAMPLE_READ
+	 *
+	 *	{ u64			nr,
+	 *	  u64			ips[nr];  } && PERF_SAMPLE_CALLCHAIN
+	 *
+	 *	#
+	 *	# The RAW record below is opaque data wrt the ABI
+	 *	#
+	 *	# That is, the ABI doesn't make any promises wrt to
+	 *	# the stability of its content, it may vary depending
+	 *	# on event, hardware, kernel version and phase of
+	 *	# the moon.
+	 *	#
+	 *	# In other words, PERF_SAMPLE_RAW contents are not an ABI.
+	 *	#
+	 *
+	 *	{ u32			size;
+	 *	  char                  data[size];}&& PERF_SAMPLE_RAW
+	 * };
+	 */
+	PERF_EVENT_SAMPLE		= 9,
+
+	PERF_EVENT_MAX,			/* non-ABI */
+};
+
+enum perf_callchain_context {
+	PERF_CONTEXT_HV			= (__u64)-32,
+	PERF_CONTEXT_KERNEL		= (__u64)-128,
+	PERF_CONTEXT_USER		= (__u64)-512,
+
+	PERF_CONTEXT_GUEST		= (__u64)-2048,
+	PERF_CONTEXT_GUEST_KERNEL	= (__u64)-2176,
+	PERF_CONTEXT_GUEST_USER		= (__u64)-2560,
+
+	PERF_CONTEXT_MAX		= (__u64)-4095,
+};
+
+#define PERF_FLAG_FD_NO_GROUP		(1U << 0)
+#define PERF_FLAG_FD_OUTPUT		(1U << 1)
+
+/*
+ * In case some app still references the old symbols:
+ */
+
+#define __NR_perf_counter_open		__NR_perf_event_open
+
+#define PR_TASK_PERF_COUNTERS_DISABLE	PR_TASK_PERF_EVENTS_DISABLE
+#define PR_TASK_PERF_COUNTERS_ENABLE	PR_TASK_PERF_EVENTS_ENABLE
+
+#endif /* _LINUX_PERF_COUNTER_H */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index ae9d9ed..acefaf7 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1,15 +1,15 @@
 /*
- *  Performance events:
+ * Performance events:
  *
  *    Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de>
  *    Copyright (C) 2008-2009, Red Hat, Inc., Ingo Molnar
  *    Copyright (C) 2008-2009, Red Hat, Inc., Peter Zijlstra
  *
- *  Data type definitions, declarations, prototypes.
+ * Data type definitions, declarations, prototypes.
  *
  *    Started by: Thomas Gleixner and Ingo Molnar
  *
- *  For licencing details see kernel-base/COPYING
+ * For licencing details see kernel-base/COPYING
  */
 #ifndef _LINUX_PERF_EVENT_H
 #define _LINUX_PERF_EVENT_H
@@ -131,19 +131,19 @@ enum perf_event_sample_format {
  * as specified by attr.read_format:
  *
  * struct read_format {
- * 	{ u64		value;
- * 	  { u64		time_enabled; } && PERF_FORMAT_ENABLED
- * 	  { u64		time_running; } && PERF_FORMAT_RUNNING
- * 	  { u64		id;           } && PERF_FORMAT_ID
- * 	} && !PERF_FORMAT_GROUP
+ *	{ u64		value;
+ *	  { u64		time_enabled; } && PERF_FORMAT_ENABLED
+ *	  { u64		time_running; } && PERF_FORMAT_RUNNING
+ *	  { u64		id;           } && PERF_FORMAT_ID
+ *	} && !PERF_FORMAT_GROUP
  *
- * 	{ u64		nr;
- * 	  { u64		time_enabled; } && PERF_FORMAT_ENABLED
- * 	  { u64		time_running; } && PERF_FORMAT_RUNNING
- * 	  { u64		value;
- * 	    { u64	id;           } && PERF_FORMAT_ID
- * 	  }		cntr[nr];
- * 	} && PERF_FORMAT_GROUP
+ *	{ u64		nr;
+ *	  { u64		time_enabled; } && PERF_FORMAT_ENABLED
+ *	  { u64		time_running; } && PERF_FORMAT_RUNNING
+ *	  { u64		value;
+ *	    { u64	id;           } && PERF_FORMAT_ID
+ *	  }		cntr[nr];
+ *	} && PERF_FORMAT_GROUP
  * };
  */
 enum perf_event_read_format {
@@ -152,7 +152,7 @@ enum perf_event_read_format {
 	PERF_FORMAT_ID				= 1U << 2,
 	PERF_FORMAT_GROUP			= 1U << 3,
 
-	PERF_FORMAT_MAX = 1U << 4, 		/* non-ABI */
+	PERF_FORMAT_MAX = 1U << 4,		/* non-ABI */
 };
 
 #define PERF_ATTR_SIZE_VER0	64	/* sizeof first published struct */
@@ -216,8 +216,8 @@ struct perf_event_attr {
  * Ioctls that can be done on a perf event fd:
  */
 #define PERF_EVENT_IOC_ENABLE		_IO ('$', 0)
-#define PERF_EVENT_IOC_DISABLE	_IO ('$', 1)
-#define PERF_EVENT_IOC_REFRESH	_IO ('$', 2)
+#define PERF_EVENT_IOC_DISABLE		_IO ('$', 1)
+#define PERF_EVENT_IOC_REFRESH		_IO ('$', 2)
 #define PERF_EVENT_IOC_RESET		_IO ('$', 3)
 #define PERF_EVENT_IOC_PERIOD		_IOW('$', 4, u64)
 #define PERF_EVENT_IOC_SET_OUTPUT	_IO ('$', 5)
@@ -314,9 +314,9 @@ enum perf_event_type {
 
 	/*
 	 * struct {
-	 * 	struct perf_event_header	header;
-	 * 	u64				id;
-	 * 	u64				lost;
+	 *	struct perf_event_header	header;
+	 *	u64				id;
+	 *	u64				lost;
 	 * };
 	 */
 	PERF_RECORD_LOST			= 2,
@@ -383,23 +383,23 @@ enum perf_event_type {
 	 *	{ u64			id;	  } && PERF_SAMPLE_ID
 	 *	{ u64			stream_id;} && PERF_SAMPLE_STREAM_ID
 	 *	{ u32			cpu, res; } && PERF_SAMPLE_CPU
-	 * 	{ u64			period;   } && PERF_SAMPLE_PERIOD
+	 *	{ u64			period;   } && PERF_SAMPLE_PERIOD
 	 *
 	 *	{ struct read_format	values;	  } && PERF_SAMPLE_READ
 	 *
 	 *	{ u64			nr,
 	 *	  u64			ips[nr];  } && PERF_SAMPLE_CALLCHAIN
 	 *
-	 * 	#
-	 * 	# The RAW record below is opaque data wrt the ABI
-	 * 	#
-	 * 	# That is, the ABI doesn't make any promises wrt to
-	 * 	# the stability of its content, it may vary depending
-	 * 	# on event_id, hardware, kernel version and phase of
-	 * 	# the moon.
-	 * 	#
-	 * 	# In other words, PERF_SAMPLE_RAW contents are not an ABI.
-	 * 	#
+	 *	#
+	 *	# The RAW record below is opaque data wrt the ABI
+	 *	#
+	 *	# That is, the ABI doesn't make any promises wrt to
+	 *	# the stability of its content, it may vary depending
+	 *	# on event, hardware, kernel version and phase of
+	 *	# the moon.
+	 *	#
+	 *	# In other words, PERF_SAMPLE_RAW contents are not an ABI.
+	 *	#
 	 *
 	 *	{ u32			size;
 	 *	  char                  data[size];}&& PERF_SAMPLE_RAW
@@ -503,10 +503,10 @@ struct pmu {
  * enum perf_event_active_state - the states of a event
  */
 enum perf_event_active_state {
-	PERF_EVENT_STATE_ERROR	= -2,
+	PERF_EVENT_STATE_ERROR		= -2,
 	PERF_EVENT_STATE_OFF		= -1,
 	PERF_EVENT_STATE_INACTIVE	=  0,
-	PERF_EVENT_STATE_ACTIVE	=  1,
+	PERF_EVENT_STATE_ACTIVE		=  1,
 };
 
 struct file;
@@ -529,7 +529,7 @@ struct perf_mmap_data {
 
 	long				watermark;	/* wakeup watermark  */
 
-	struct perf_event_mmap_page   *user_page;
+	struct perf_event_mmap_page	*user_page;
 	void				*data_pages[0];
 };
 
@@ -694,14 +694,14 @@ struct perf_cpu_context {
 };
 
 struct perf_output_handle {
-	struct perf_event	*event;
-	struct perf_mmap_data	*data;
-	unsigned long		head;
-	unsigned long		offset;
-	int			nmi;
-	int			sample;
-	int			locked;
-	unsigned long		flags;
+	struct perf_event		*event;
+	struct perf_mmap_data		*data;
+	unsigned long			head;
+	unsigned long			offset;
+	int				nmi;
+	int				sample;
+	int				locked;
+	unsigned long			flags;
 };
 
 #ifdef CONFIG_PERF_EVENTS
@@ -829,22 +829,22 @@ static inline void
 perf_event_task_sched_out(struct task_struct *task,
 			    struct task_struct *next, int cpu)		{ }
 static inline void
-perf_event_task_tick(struct task_struct *task, int cpu)		{ }
+perf_event_task_tick(struct task_struct *task, int cpu)			{ }
 static inline int perf_event_init_task(struct task_struct *child)	{ return 0; }
 static inline void perf_event_exit_task(struct task_struct *child)	{ }
 static inline void perf_event_free_task(struct task_struct *task)	{ }
-static inline void perf_event_do_pending(void)			{ }
-static inline void perf_event_print_debug(void)			{ }
+static inline void perf_event_do_pending(void)				{ }
+static inline void perf_event_print_debug(void)				{ }
 static inline void perf_disable(void)					{ }
 static inline void perf_enable(void)					{ }
-static inline int perf_event_task_disable(void)	{ return -EINVAL; }
-static inline int perf_event_task_enable(void)	{ return -EINVAL; }
+static inline int perf_event_task_disable(void)				{ return -EINVAL; }
+static inline int perf_event_task_enable(void)				{ return -EINVAL; }
 
 static inline void
 perf_sw_event(u32 event_id, u64 nr, int nmi,
 		     struct pt_regs *regs, u64 addr)			{ }
 
-static inline void perf_event_mmap(struct vm_area_struct *vma)	{ }
+static inline void perf_event_mmap(struct vm_area_struct *vma)		{ }
 static inline void perf_event_comm(struct task_struct *tsk)		{ }
 static inline void perf_event_fork(struct task_struct *tsk)		{ }
 static inline void perf_event_init(void)				{ }
diff --git a/init/Kconfig b/init/Kconfig
index cfdf5c3..706728b 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -920,26 +920,31 @@ config HAVE_PERF_EVENTS
 	help
 	  See tools/perf/design.txt for details.
 
-menu "Performance Counters"
+menu "Kernel Performance Events And Counters"
 
 config PERF_EVENTS
-	bool "Kernel Performance Counters"
-	default y if PROFILING
+	bool "Kernel performance events and counters"
+	default y if (PROFILING || PERF_COUNTERS)
 	depends on HAVE_PERF_EVENTS
 	select ANON_INODES
 	help
-	  Enable kernel support for performance counter hardware.
+	  Enable kernel support for various performance events provided
+	  by software and hardware.
 
-	  Performance counters are special hardware registers available
-	  on most modern CPUs. These registers count the number of certain
+	  Software events are supported either build-in or via the
+	  use of generic tracepoints.
+
+	  Most modern CPUs support performance events via performance
+	  counter registers. These registers count the number of certain
 	  types of hw events: such as instructions executed, cachemisses
 	  suffered, or branches mis-predicted - without slowing down the
 	  kernel or applications. These registers can also trigger interrupts
 	  when a threshold number of events have passed - and can thus be
 	  used to profile the code that runs on that CPU.
 
-	  The Linux Performance Counter subsystem provides an abstraction of
-	  these hardware capabilities, available via a system call. It
+	  The Linux Performance Event subsystem provides an abstraction of
+	  these software and hardware cevent apabilities, available via a
+	  system call and used by the "perf" utility in tools/perf/. It
 	  provides per task and per CPU counters, and it provides event
 	  capabilities on top of those.
 
@@ -950,14 +955,26 @@ config EVENT_PROFILE
 	depends on PERF_EVENTS && EVENT_TRACING
 	default y
 	help
-	 Allow the use of tracepoints as software performance counters.
+	 Allow the use of tracepoints as software performance events.
 
-	 When this is enabled, you can create perf counters based on
+	 When this is enabled, you can create perf events based on
 	 tracepoints using PERF_TYPE_TRACEPOINT and the tracepoint ID
 	 found in debugfs://tracing/events/*/*/id. (The -e/--events
 	 option to the perf tool can parse and interpret symbolic
 	 tracepoints, in the subsystem:tracepoint_name format.)
 
+config PERF_COUNTERS
+	bool "Kernel performance counters (old config option)"
+	depends on HAVE_PERF_EVENTS
+	help
+	  This config has been obsoleted by the PERF_EVENTS
+	  config option - please see that one for details.
+
+	  It has no effect on the kernel whether you enable
+	  it or not, it is a compatibility placeholder.
+
+	  Say N if unsure.
+
 endmenu
 
 config VM_EVENT_COUNTERS
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 6e8b99a..76ac4db 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -1,12 +1,12 @@
 /*
- * Performance event core code
+ * Performance events core code:
  *
  *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
  *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
  *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
  *  Copyright  �  2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
  *
- *  For licensing details see kernel-base/COPYING
+ * For licensing details see kernel-base/COPYING
  */
 
 #include <linux/fs.h>
-- 
cgit v0.10.2


From d3a7dcfeeb6856215412065bd0f774de83f43414 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 17 Sep 2009 18:46:26 +0200
Subject: ALSA: pcm - Simplify snd_pcm_drain() implementation

Simplify snd_pcm_drain() implementation and avoid unneeded array-
allocation for waitqueues.  Instead, one waitqueue is used for the
first draining stream, and wait until all streams finished.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index 59e5fbe..561d6d9 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -1387,11 +1387,6 @@ static struct action_ops snd_pcm_action_drain_init = {
 	.post_action = snd_pcm_post_drain_init
 };
 
-struct drain_rec {
-	struct snd_pcm_substream *substream;
-	wait_queue_t wait;
-};
-
 static int snd_pcm_drop(struct snd_pcm_substream *substream);
 
 /*
@@ -1407,10 +1402,9 @@ static int snd_pcm_drain(struct snd_pcm_substream *substream,
 	struct snd_card *card;
 	struct snd_pcm_runtime *runtime;
 	struct snd_pcm_substream *s;
+	wait_queue_t wait;
 	int result = 0;
-	int i, num_drecs;
 	int nonblock = 0;
-	struct drain_rec *drec, drec_tmp, *d;
 
 	card = substream->pcm->card;
 	runtime = substream->runtime;
@@ -1433,38 +1427,10 @@ static int snd_pcm_drain(struct snd_pcm_substream *substream,
 	} else if (substream->f_flags & O_NONBLOCK)
 		nonblock = 1;
 
-	if (nonblock)
-		goto lock; /* no need to allocate waitqueues */
-
-	/* allocate temporary record for drain sync */
 	down_read(&snd_pcm_link_rwsem);
-	if (snd_pcm_stream_linked(substream)) {
-		drec = kmalloc(substream->group->count * sizeof(*drec), GFP_KERNEL);
-		if (! drec) {
-			up_read(&snd_pcm_link_rwsem);
-			snd_power_unlock(card);
-			return -ENOMEM;
-		}
-	} else
-		drec = &drec_tmp;
-
-	/* count only playback streams */
-	num_drecs = 0;
-	snd_pcm_group_for_each_entry(s, substream) {
-		runtime = s->runtime;
-		if (s->stream == SNDRV_PCM_STREAM_PLAYBACK) {
-			d = &drec[num_drecs++];
-			d->substream = s;
-			init_waitqueue_entry(&d->wait, current);
-			add_wait_queue(&runtime->sleep, &d->wait);
-		}
-	}
-	up_read(&snd_pcm_link_rwsem);
-
- lock:
 	snd_pcm_stream_lock_irq(substream);
 	/* resume pause */
-	if (substream->runtime->status->state == SNDRV_PCM_STATE_PAUSED)
+	if (runtime->status->state == SNDRV_PCM_STATE_PAUSED)
 		snd_pcm_pause(substream, 0);
 
 	/* pre-start/stop - all running streams are changed to DRAINING state */
@@ -1479,25 +1445,35 @@ static int snd_pcm_drain(struct snd_pcm_substream *substream,
 
 	for (;;) {
 		long tout;
+		struct snd_pcm_runtime *to_check;
 		if (signal_pending(current)) {
 			result = -ERESTARTSYS;
 			break;
 		}
-		/* all finished? */
-		for (i = 0; i < num_drecs; i++) {
-			runtime = drec[i].substream->runtime;
-			if (runtime->status->state == SNDRV_PCM_STATE_DRAINING)
+		/* find a substream to drain */
+		to_check = NULL;
+		snd_pcm_group_for_each_entry(s, substream) {
+			if (s->stream != SNDRV_PCM_STREAM_PLAYBACK)
+				continue;
+			runtime = s->runtime;
+			if (runtime->status->state == SNDRV_PCM_STATE_DRAINING) {
+				to_check = runtime;
 				break;
+			}
 		}
-		if (i == num_drecs)
-			break; /* yes, all drained */
-
+		if (!to_check)
+			break; /* all drained */
+		init_waitqueue_entry(&wait, current);
+		add_wait_queue(&to_check->sleep, &wait);
 		set_current_state(TASK_INTERRUPTIBLE);
 		snd_pcm_stream_unlock_irq(substream);
+		up_read(&snd_pcm_link_rwsem);
 		snd_power_unlock(card);
 		tout = schedule_timeout(10 * HZ);
 		snd_power_lock(card);
+		down_read(&snd_pcm_link_rwsem);
 		snd_pcm_stream_lock_irq(substream);
+		remove_wait_queue(&to_check->sleep, &wait);
 		if (tout == 0) {
 			if (substream->runtime->status->state == SNDRV_PCM_STATE_SUSPENDED)
 				result = -ESTRPIPE;
@@ -1512,16 +1488,7 @@ static int snd_pcm_drain(struct snd_pcm_substream *substream,
 
  unlock:
 	snd_pcm_stream_unlock_irq(substream);
-
-	if (!nonblock) {
-		for (i = 0; i < num_drecs; i++) {
-			d = &drec[i];
-			runtime = d->substream->runtime;
-			remove_wait_queue(&runtime->sleep, &d->wait);
-		}
-		if (drec != &drec_tmp)
-			kfree(drec);
-	}
+	up_read(&snd_pcm_link_rwsem);
 	snd_power_unlock(card);
 
 	return result;
-- 
cgit v0.10.2


From 95eff499c96ac728307fd34cbf1a28ea20a5b615 Mon Sep 17 00:00:00 2001
From: Tim Blechmann <tim@klingt.org>
Date: Mon, 21 Sep 2009 15:00:22 +0200
Subject: ALSA: lx6464es - cleanup of rmh message bus function

the rmh bus is not used asynchronously, so it is safe to remove the
specific code pieces.

Signed-off-by: Tim Blechmann <tim@klingt.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/lx6464es/lx6464es.h b/sound/pci/lx6464es/lx6464es.h
index 012c010..16442da 100644
--- a/sound/pci/lx6464es/lx6464es.h
+++ b/sound/pci/lx6464es/lx6464es.h
@@ -86,7 +86,6 @@ struct lx6464es {
 
 	/* messaging */
 	spinlock_t		msg_lock;          /* message spinlock */
-	atomic_t	        send_message_locked;
 	struct lx_rmh           rmh;
 
 	/* configuration */
diff --git a/sound/pci/lx6464es/lx_core.c b/sound/pci/lx6464es/lx_core.c
index 5812780..3086b75 100644
--- a/sound/pci/lx6464es/lx_core.c
+++ b/sound/pci/lx6464es/lx_core.c
@@ -314,98 +314,6 @@ static inline void lx_message_dump(struct lx_rmh *rmh)
 #define XILINX_POLL_NO_SLEEP    100
 #define XILINX_POLL_ITERATIONS  150
 
-#if 0 /* not used now */
-static int lx_message_send(struct lx6464es *chip, struct lx_rmh *rmh)
-{
-	u32 reg = ED_DSP_TIMED_OUT;
-	int dwloop;
-	int answer_received;
-
-	if (lx_dsp_reg_read(chip, eReg_CSM) & (Reg_CSM_MC | Reg_CSM_MR)) {
-		snd_printk(KERN_ERR LXP "PIOSendMessage eReg_CSM %x\n", reg);
-		return -EBUSY;
-	}
-
-	/* write command */
-	lx_dsp_reg_writebuf(chip, eReg_CRM1, rmh->cmd, rmh->cmd_len);
-
-	snd_BUG_ON(atomic_read(&chip->send_message_locked) != 0);
-	atomic_set(&chip->send_message_locked, 1);
-
-	/* MicoBlaze gogogo */
-	lx_dsp_reg_write(chip, eReg_CSM, Reg_CSM_MC);
-
-	/* wait for interrupt to answer */
-	for (dwloop = 0; dwloop != XILINX_TIMEOUT_MS; ++dwloop) {
-		answer_received = atomic_read(&chip->send_message_locked);
-		if (answer_received == 0)
-			break;
-		msleep(1);
-	}
-
-	if (answer_received == 0) {
-		/* in Debug mode verify Reg_CSM_MR */
-		snd_BUG_ON(!(lx_dsp_reg_read(chip, eReg_CSM) & Reg_CSM_MR));
-
-		/* command finished, read status */
-		if (rmh->dsp_stat == 0)
-			reg = lx_dsp_reg_read(chip, eReg_CRM1);
-		else
-			reg = 0;
-	} else {
-		int i;
-		snd_printk(KERN_WARNING LXP "TIMEOUT lx_message_send! "
-			   "Interrupts disabled?\n");
-
-		/* attente bit Reg_CSM_MR */
-		for (i = 0; i != XILINX_POLL_ITERATIONS; i++) {
-			if ((lx_dsp_reg_read(chip, eReg_CSM) & Reg_CSM_MR)) {
-				if (rmh->dsp_stat == 0)
-					reg = lx_dsp_reg_read(chip, eReg_CRM1);
-				else
-					reg = 0;
-				goto polling_successful;
-			}
-
-			if (i > XILINX_POLL_NO_SLEEP)
-				msleep(1);
-		}
-		snd_printk(KERN_WARNING LXP "TIMEOUT lx_message_send! "
-			   "polling failed\n");
-
-polling_successful:
-		atomic_set(&chip->send_message_locked, 0);
-	}
-
-	if ((reg & ERROR_VALUE) == 0) {
-		/* read response */
-		if (rmh->stat_len) {
-			snd_BUG_ON(rmh->stat_len >= (REG_CRM_NUMBER-1));
-
-			lx_dsp_reg_readbuf(chip, eReg_CRM2, rmh->stat,
-					   rmh->stat_len);
-		}
-	} else
-		snd_printk(KERN_WARNING LXP "lx_message_send: error_value %x\n",
-			   reg);
-
-	/* clear Reg_CSM_MR */
-	lx_dsp_reg_write(chip, eReg_CSM, 0);
-
-	switch (reg) {
-	case ED_DSP_TIMED_OUT:
-		snd_printk(KERN_WARNING LXP "lx_message_send: dsp timeout\n");
-		return -ETIMEDOUT;
-
-	case ED_DSP_CRASHED:
-		snd_printk(KERN_WARNING LXP "lx_message_send: dsp crashed\n");
-		return -EAGAIN;
-	}
-
-	lx_message_dump(rmh);
-	return 0;
-}
-#endif /* not used now */
 
 static int lx_message_send_atomic(struct lx6464es *chip, struct lx_rmh *rmh)
 {
@@ -423,7 +331,7 @@ static int lx_message_send_atomic(struct lx6464es *chip, struct lx_rmh *rmh)
 	/* MicoBlaze gogogo */
 	lx_dsp_reg_write(chip, eReg_CSM, Reg_CSM_MC);
 
-	/* wait for interrupt to answer */
+	/* wait for device to answer */
 	for (dwloop = 0; dwloop != XILINX_TIMEOUT_MS * 1000; ++dwloop) {
 		if (lx_dsp_reg_read(chip, eReg_CSM) & Reg_CSM_MR) {
 			if (rmh->dsp_stat == 0)
@@ -1175,10 +1083,6 @@ static int lx_interrupt_ack(struct lx6464es *chip, u32 *r_irqsrc,
 		*r_async_escmd = 1;
 	}
 
-	if (irqsrc & MASK_SYS_STATUS_CMD_DONE)
-		/* xilinx command notification */
-		atomic_set(&chip->send_message_locked, 0);
-
 	if (irq_async) {
 		/* snd_printd("interrupt: async event pending\n"); */
 		*r_async_pending = 1;
-- 
cgit v0.10.2


From 8fdc9e870c07054bc34a62a4655cdb078953255d Mon Sep 17 00:00:00 2001
From: Tim Blechmann <tim@klingt.org>
Date: Mon, 21 Sep 2009 15:00:57 +0200
Subject: ALSA: lx6464es - remove unused struct member

we cannot set the sampling rate of the device, but can only read it
from the board, so we don't need the member for it.

Signed-off-by: Tim Blechmann <tim@klingt.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/lx6464es/lx6464es.h b/sound/pci/lx6464es/lx6464es.h
index 16442da..51afc04 100644
--- a/sound/pci/lx6464es/lx6464es.h
+++ b/sound/pci/lx6464es/lx6464es.h
@@ -94,7 +94,6 @@ struct lx6464es {
 	uint                    hardware_running[2];
 	u32                     board_sample_rate; /* sample rate read from
 						    * board */
-	u32                     sample_rate;	   /* our sample rate */
 	u16                     pcm_granularity;   /* board blocksize */
 
 	/* dma */
-- 
cgit v0.10.2


From bcf56442429a15bdd6e1d81a9d4c89f93a44fdf7 Mon Sep 17 00:00:00 2001
From: GeunSik Lim <leemgs1@gmail.com>
Date: Tue, 16 Jun 2009 10:26:25 +0200
Subject: trivial: change address of the libcap source.

This is patch to change ftp site of the libcap source.
"ftp://linux.kernel.org" address does not exist.

Signed-off-by: GeunSik Lim <geunsik.lim@samsung.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/include/linux/capability.h b/include/linux/capability.h
index c302110..c8f2a5f7 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -7,7 +7,7 @@
  *
  * See here for the libcap library ("POSIX draft" compliance):
  *
- * ftp://linux.kernel.org/pub/linux/libs/security/linux-privs/kernel-2.6/
+ * ftp://www.kernel.org/pub/linux/libs/security/linux-privs/kernel-2.6/
  */
 
 #ifndef _LINUX_CAPABILITY_H
-- 
cgit v0.10.2


From 47a0dfaad9eb82b16193477cf99c2462af03c329 Mon Sep 17 00:00:00 2001
From: Ori Avtalion <ori@avtalion.name>
Date: Tue, 16 Jun 2009 12:17:53 +0300
Subject: trivial: fix typo in namei.h comment

Signed-off-by: Ori Avtalion <ori@avtalion.name>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/include/linux/namei.h b/include/linux/namei.h
index d870ae2..ec0f607 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -40,7 +40,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};
  *  - follow links at the end
  *  - require a directory
  *  - ending slashes ok even for nonexistent files
- *  - internal "there are more path compnents" flag
+ *  - internal "there are more path components" flag
  *  - locked when lookup done with dcache_lock held
  *  - dentry cache is untrusted; force a real lookup
  */
-- 
cgit v0.10.2


From 4b26d50b33637b5b0e6dd84a4e0f38036999a192 Mon Sep 17 00:00:00 2001
From: Anand Gadiyar <gadiyar@ti.com>
Date: Mon, 22 Jun 2009 15:55:45 +0530
Subject: trivial: OHCI: Fix typo in a comment

trivial: OHCI: Fix typo in a comment

Signed-off-by: Anand Gadiyar <gadiyar@ti.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/drivers/usb/host/ohci-q.c b/drivers/usb/host/ohci-q.c
index c2d80f8..16fecb8 100644
--- a/drivers/usb/host/ohci-q.c
+++ b/drivers/usb/host/ohci-q.c
@@ -418,7 +418,7 @@ static struct ed *ed_get (
 		is_out = !(ep->desc.bEndpointAddress & USB_DIR_IN);
 
 		/* FIXME usbcore changes dev->devnum before SET_ADDRESS
-		 * suceeds ... otherwise we wouldn't need "pipe".
+		 * succeeds ... otherwise we wouldn't need "pipe".
 		 */
 		info = usb_pipedevice (pipe);
 		ed->type = usb_pipetype(pipe);
-- 
cgit v0.10.2


From 3c36543aeadae32ed61251fa364881645456eb30 Mon Sep 17 00:00:00 2001
From: Peter Huewe <peterhuewe@gmx.de>
Date: Fri, 12 Jun 2009 13:07:29 +0200
Subject: trivial: add __init/__exit macros to DAC960.c

Trivial patch which adds the __init and __exit macros to the module_init /
module_exit functions from drivers/block/DAC960.c

Signed-off-by: Peter Huewe <peterhuewe@gmx.de>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index 1e6b7c1..b839af1 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -7210,7 +7210,7 @@ static struct pci_driver DAC960_pci_driver = {
 	.remove		= DAC960_Remove,
 };
 
-static int DAC960_init_module(void)
+static int __init DAC960_init_module(void)
 {
 	int ret;
 
@@ -7222,7 +7222,7 @@ static int DAC960_init_module(void)
 	return ret;
 }
 
-static void DAC960_cleanup_module(void)
+static void __exit DAC960_cleanup_module(void)
 {
 	int i;
 
-- 
cgit v0.10.2


From 7d3392e54653171bd13467bf37f1182e83fadd08 Mon Sep 17 00:00:00 2001
From: Markus Heidelberg <markus.heidelberg@web.de>
Date: Fri, 12 Jun 2009 01:02:35 +0200
Subject: trivial: remove references to non-existent include/linux/config.h

Ignore drivers/staging/ since it is very likely that new drivers
introduce it again.

Signed-off-by: Markus Heidelberg <markus.heidelberg@web.de>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/arch/h8300/kernel/timer/tpu.c b/arch/h8300/kernel/timer/tpu.c
index e7c6e61..2193a2e 100644
--- a/arch/h8300/kernel/timer/tpu.c
+++ b/arch/h8300/kernel/timer/tpu.c
@@ -7,7 +7,6 @@
  *
  */
 
-#include <linux/config.h>
 #include <linux/errno.h>
 #include <linux/sched.h>
 #include <linux/kernel.h>
diff --git a/scripts/basic/fixdep.c b/scripts/basic/fixdep.c
index 72c1520..f9c1e54 100644
--- a/scripts/basic/fixdep.c
+++ b/scripts/basic/fixdep.c
@@ -16,8 +16,7 @@
  * tells make when to remake a file.
  *
  * To use this list as-is however has the drawback that virtually
- * every file in the kernel includes <linux/config.h> which then again
- * includes <linux/autoconf.h>
+ * every file in the kernel includes <linux/autoconf.h>.
  *
  * If the user re-runs make *config, linux/autoconf.h will be
  * regenerated.  make notices that and will rebuild every file which
diff --git a/scripts/mod/sumversion.c b/scripts/mod/sumversion.c
index aadc522..ecf9c7d 100644
--- a/scripts/mod/sumversion.c
+++ b/scripts/mod/sumversion.c
@@ -334,8 +334,6 @@ static int parse_source_files(const char *objfile, struct md4_ctx *md)
 		deps_drivers/net/dummy.o := \
 		  drivers/net/dummy.c \
 		    $(wildcard include/config/net/fastroute.h) \
-		  include/linux/config.h \
-		    $(wildcard include/config/h.h) \
 		  include/linux/module.h \
 
 	   Sum all files in the same dir or subdirs.
-- 
cgit v0.10.2


From cf83011d8faec99de51d6ba8879c0c61cf31e836 Mon Sep 17 00:00:00 2001
From: Markus Heidelberg <markus.heidelberg@web.de>
Date: Fri, 12 Jun 2009 01:00:28 +0200
Subject: trivial: update the Kernel Janitors' web-page URL

The former address cannot be resolved.

Signed-off-by: Markus Heidelberg <markus.heidelberg@web.de>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/MAINTAINERS b/MAINTAINERS
index 43761a0..64d4177 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2955,7 +2955,7 @@ F:	scripts/Makefile.*
 KERNEL JANITORS
 L:	kernel-janitors@vger.kernel.org
 W:	http://www.kerneljanitors.org/
-S:	Odd fixes
+S:	Maintained
 
 KERNEL NFSD, SUNRPC, AND LOCKD SERVERS
 M:	"J. Bruce Fields" <bfields@fieldses.org>
-- 
cgit v0.10.2


From b519c15d4aacb3706bfff86ba316f9ed81b5032a Mon Sep 17 00:00:00 2001
From: Frans Pop <elendil@planet.nl>
Date: Mon, 22 Jun 2009 11:08:36 +0200
Subject: trivial: cleanup hpfall example code (checkpatch)

This patch makes hpfall.c conform to kernel coding style.

I have not fixed the C99 // comments on two lines as they
help indicate that those are not actually comments but
incomplete code.

Before:
 total: 10 errors, 6 warnings, 101 lines checked
After:
 total: 2 errors, 0 warnings, 99 lines checked

Signed-off-by: Frans Pop <elendil@planet.nl>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/Documentation/hwmon/hpfall.c b/Documentation/hwmon/hpfall.c
index bbea1cc..d2f6711 100644
--- a/Documentation/hwmon/hpfall.c
+++ b/Documentation/hwmon/hpfall.c
@@ -57,45 +57,43 @@ void ignore_me(void)
 {
 	protect(0);
 	set_led(0);
-
 }
 
-int main(int argc, char* argv[])
+int main(int argc, char *argv[])
 {
-       int fd, ret;
+	int fd, ret;
 
-       fd = open("/dev/freefall", O_RDONLY);
-       if (fd < 0) {
-               perror("open");
-               return EXIT_FAILURE;
-       }
+	fd = open("/dev/freefall", O_RDONLY);
+	if (fd < 0) {
+		perror("open");
+		return EXIT_FAILURE;
+	}
 
 	signal(SIGALRM, ignore_me);
 
-       for (;;) {
-	       unsigned char count;
-
-               ret = read(fd, &count, sizeof(count));
-	       alarm(0);
-	       if ((ret == -1) && (errno == EINTR)) {
-		       /* Alarm expired, time to unpark the heads */
-		       continue;
-	       }
-
-               if (ret != sizeof(count)) {
-                       perror("read");
-                       break;
-               }
-
-	       protect(21);
-	       set_led(1);
-	       if (1 || on_ac() || lid_open()) {
-		       alarm(2);
-	       } else {
-		       alarm(20);
-	       }
-       }
+	for (;;) {
+		unsigned char count;
+
+		ret = read(fd, &count, sizeof(count));
+		alarm(0);
+		if ((ret == -1) && (errno == EINTR)) {
+			/* Alarm expired, time to unpark the heads */
+			continue;
+		}
+
+		if (ret != sizeof(count)) {
+			perror("read");
+			break;
+		}
+
+		protect(21);
+		set_led(1);
+		if (1 || on_ac() || lid_open())
+			alarm(2);
+		else
+			alarm(20);
+	}
 
-       close(fd);
-       return EXIT_SUCCESS;
+	close(fd);
+	return EXIT_SUCCESS;
 }
-- 
cgit v0.10.2


From b9049df5a0e7f35456c06b949b08b898b9c2e7bc Mon Sep 17 00:00:00 2001
From: Dmitri Vorobiev <dmitri.vorobiev@movial.com>
Date: Tue, 23 Jun 2009 12:09:29 +0200
Subject: Change "useing" -> "using".

Signed-off-by: Dmitri Vorobiev <dmitri.vorobiev@movial.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 6273fa9..7ef0c7b 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -94,7 +94,7 @@ struct execute_work {
 /*
  * initialize all of a work item in one go
  *
- * NOTE! No point in using "atomic_long_set()": useing a direct
+ * NOTE! No point in using "atomic_long_set()": using a direct
  * assignment of the work data initializer allows the compiler
  * to generate better code.
  */
-- 
cgit v0.10.2


From fe002a419755f991e1219249c8ffe7dc0b798232 Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@crashcourse.ca>
Date: Sun, 28 Jun 2009 21:10:07 -0400
Subject: trivial: Correct print_tainted routine name in comment

Signed-off-by: Robert P. J. Day <rpjday@crashcourse.ca>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/kernel/panic.c b/kernel/panic.c
index 512ab73..bcdef26 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -177,7 +177,7 @@ static const struct tnt tnts[] = {
  *  'W' - Taint on warning.
  *  'C' - modules from drivers/staging are loaded.
  *
- *	The string is overwritten by the next call to print_taint().
+ *	The string is overwritten by the next call to print_tainted().
  */
 const char *print_tainted(void)
 {
-- 
cgit v0.10.2


From 627df23c61ce28043a0715a941605ab42dfeb05e Mon Sep 17 00:00:00 2001
From: Peter Huewe <peterhuewe@gmx.de>
Date: Thu, 11 Jun 2009 02:23:33 +0200
Subject: trivial: mtd: add __init/__exit macros to init/exitfunctions

Trivial patch which adds the __init and __exit macros to the module_init /
module_exit functions to the following modules from drivers/mtd/
 devices/m25p80.c
 devices/slram.c
 linux version 2.6.30
 ftl.c
 nand/cafe_nand.c
 nand/cmx270_nand.c

Signed-off-by: Peter Huewe <peterhuewe@gmx.de>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c
index 10ed195..eb495d8 100644
--- a/drivers/mtd/devices/m25p80.c
+++ b/drivers/mtd/devices/m25p80.c
@@ -776,13 +776,13 @@ static struct spi_driver m25p80_driver = {
 };
 
 
-static int m25p80_init(void)
+static int __init m25p80_init(void)
 {
 	return spi_register_driver(&m25p80_driver);
 }
 
 
-static void m25p80_exit(void)
+static void __exit m25p80_exit(void)
 {
 	spi_unregister_driver(&m25p80_driver);
 }
diff --git a/drivers/mtd/devices/slram.c b/drivers/mtd/devices/slram.c
index 00248e8..7d846e9 100644
--- a/drivers/mtd/devices/slram.c
+++ b/drivers/mtd/devices/slram.c
@@ -303,7 +303,7 @@ __setup("slram=", mtd_slram_setup);
 
 #endif
 
-static int init_slram(void)
+static int __init init_slram(void)
 {
 	char *devname;
 	int i;
diff --git a/drivers/mtd/ftl.c b/drivers/mtd/ftl.c
index a790c06..e56d6b4 100644
--- a/drivers/mtd/ftl.c
+++ b/drivers/mtd/ftl.c
@@ -1099,7 +1099,7 @@ static struct mtd_blktrans_ops ftl_tr = {
 	.owner		= THIS_MODULE,
 };
 
-static int init_ftl(void)
+static int __init init_ftl(void)
 {
 	return register_mtd_blktrans(&ftl_tr);
 }
diff --git a/drivers/mtd/nand/cafe_nand.c b/drivers/mtd/nand/cafe_nand.c
index 29acd06..1b4690b 100644
--- a/drivers/mtd/nand/cafe_nand.c
+++ b/drivers/mtd/nand/cafe_nand.c
@@ -903,12 +903,12 @@ static struct pci_driver cafe_nand_pci_driver = {
 	.resume = cafe_nand_resume,
 };
 
-static int cafe_nand_init(void)
+static int __init cafe_nand_init(void)
 {
 	return pci_register_driver(&cafe_nand_pci_driver);
 }
 
-static void cafe_nand_exit(void)
+static void __exit cafe_nand_exit(void)
 {
 	pci_unregister_driver(&cafe_nand_pci_driver);
 }
diff --git a/drivers/mtd/nand/cmx270_nand.c b/drivers/mtd/nand/cmx270_nand.c
index 10081e6..826cacf 100644
--- a/drivers/mtd/nand/cmx270_nand.c
+++ b/drivers/mtd/nand/cmx270_nand.c
@@ -147,7 +147,7 @@ static int cmx270_device_ready(struct mtd_info *mtd)
 /*
  * Main initialization routine
  */
-static int cmx270_init(void)
+static int __init cmx270_init(void)
 {
 	struct nand_chip *this;
 	const char *part_type;
@@ -261,7 +261,7 @@ module_init(cmx270_init);
 /*
  * Clean up routine
  */
-static void cmx270_cleanup(void)
+static void __exit cmx270_cleanup(void)
 {
 	/* Release resources, unregister device */
 	nand_release(cmx270_nand_mtd);
-- 
cgit v0.10.2


From b7f5ab6fbbb9459a91c0acae15097a495f800206 Mon Sep 17 00:00:00 2001
From: Horst Schirmeier <horst@schirmeier.com>
Date: Fri, 3 Jul 2009 14:20:17 +0200
Subject: trivial: doc: document missing value 2 for randomize-va-space

The documentation for /proc/sys/kernel/* does not mention the possible
value 2 for randomize-va-space yet.  While being there, doing some
reformatting, fixing grammar problems and clarifying the correlations
between randomize-va-space, kernel parameter "norandmaps" and the
CONFIG_COMPAT_BRK option.

Signed-off-by: Horst Schirmeier <horst@schirmeier.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index 2dbff53..3e5b63e 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -319,25 +319,29 @@ This option can be used to select the type of process address
 space randomization that is used in the system, for architectures
 that support this feature.
 
-0 - Turn the process address space randomization off by default.
+0 - Turn the process address space randomization off.  This is the
+    default for architectures that do not support this feature anyways,
+    and kernels that are booted with the "norandmaps" parameter.
 
 1 - Make the addresses of mmap base, stack and VDSO page randomized.
     This, among other things, implies that shared libraries will be
-    loaded to random addresses. Also for PIE-linked binaries, the location
-    of code start is randomized.
+    loaded to random addresses.  Also for PIE-linked binaries, the
+    location of code start is randomized.  This is the default if the
+    CONFIG_COMPAT_BRK option is enabled.
 
-    With heap randomization, the situation is a little bit more
-    complicated.
-    There a few legacy applications out there (such as some ancient
+2 - Additionally enable heap randomization.  This is the default if
+    CONFIG_COMPAT_BRK is disabled.
+
+    There are a few legacy applications out there (such as some ancient
     versions of libc.so.5 from 1996) that assume that brk area starts
-    just after the end of the code+bss. These applications break when
-    start of the brk area is randomized. There are however no known
+    just after the end of the code+bss.  These applications break when
+    start of the brk area is randomized.  There are however no known
     non-legacy applications that would be broken this way, so for most
-    systems it is safe to choose full randomization. However there is
-    a CONFIG_COMPAT_BRK option for systems with ancient and/or broken
-    binaries, that makes heap non-randomized, but keeps all other
-    parts of process address space randomized if randomize_va_space
-    sysctl is turned on.
+    systems it is safe to choose full randomization.
+
+    Systems with ancient and/or broken binaries should be configured
+    with CONFIG_COMPAT_BRK enabled, which excludes the heap from process
+    address space randomization.
 
 ==============================================================
 
-- 
cgit v0.10.2


From 786d8ca341a30296264bc6cebac52d37b0851647 Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@crashcourse.ca>
Date: Sat, 4 Jul 2009 20:44:04 -0400
Subject: trivial: Remove commented out usage of dead MODULE_PARM() in
 swarm_cs4297a

Get rid of that commented usage of the now defunct MODULE_PARM macro.

Signed-off-by: Robert P. J. Day <rpjday@crashcourse.ca>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/sound/oss/swarm_cs4297a.c b/sound/oss/swarm_cs4297a.c
index 1edab7b..3136c88 100644
--- a/sound/oss/swarm_cs4297a.c
+++ b/sound/oss/swarm_cs4297a.c
@@ -110,9 +110,6 @@ static void start_adc(struct cs4297a_state *s);
 // rather than 64k as some of the games work more responsively.
 // log base 2( buff sz = 32k).
 
-//static unsigned long defaultorder = 3;
-//MODULE_PARM(defaultorder, "i");
-
 //
 // Turn on/off debugging compilation by commenting out "#define CSDEBUG"
 //
-- 
cgit v0.10.2


From 411c94038594b2a3fd123d09bdec3fe2500e383d Mon Sep 17 00:00:00 2001
From: Anand Gadiyar <gadiyar@ti.com>
Date: Tue, 7 Jul 2009 15:24:23 +0530
Subject: trivial: fix typo "for for" in multiple files

trivial: fix typo "for for" in multiple files

Signed-off-by: Anand Gadiyar <gadiyar@ti.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/Documentation/filesystems/nfsroot.txt b/Documentation/filesystems/nfsroot.txt
index 68baddf..3ba0b94 100644
--- a/Documentation/filesystems/nfsroot.txt
+++ b/Documentation/filesystems/nfsroot.txt
@@ -105,7 +105,7 @@ ip=<client-ip>:<server-ip>:<gw-ip>:<netmask>:<hostname>:<device>:<autoconf>
 		the client address and this parameter is NOT empty only
 		replies from the specified server are accepted.
 
-		Only required for for NFS root. That is autoconfiguration
+		Only required for NFS root. That is autoconfiguration
 		will not be triggered if it is missing and NFS root is not
 		in operation.
 
diff --git a/Documentation/powerpc/dts-bindings/marvell.txt b/Documentation/powerpc/dts-bindings/marvell.txt
index 3708a2f..f1533d9 100644
--- a/Documentation/powerpc/dts-bindings/marvell.txt
+++ b/Documentation/powerpc/dts-bindings/marvell.txt
@@ -32,7 +32,7 @@ prefixed with the string "marvell,", for Marvell Technology Group Ltd.
       devices.  This field represents the number of cells needed to
       represent the address of the memory-mapped registers of devices
       within the system controller chip.
-    - #size-cells : Size representation for for the memory-mapped
+    - #size-cells : Size representation for the memory-mapped
       registers within the system controller chip.
     - #interrupt-cells : Defines the width of cells used to represent
       interrupts.
diff --git a/arch/blackfin/mach-bf538/include/mach/defBF539.h b/arch/blackfin/mach-bf538/include/mach/defBF539.h
index bdc330c..1c58914 100644
--- a/arch/blackfin/mach-bf538/include/mach/defBF539.h
+++ b/arch/blackfin/mach-bf538/include/mach/defBF539.h
@@ -2325,7 +2325,7 @@
 #define	AMBEN_B0_B1	0x0004	/* Enable Asynchronous Memory Banks 0 &	1 only */
 #define	AMBEN_B0_B1_B2	0x0006	/* Enable Asynchronous Memory Banks 0, 1, and 2 */
 #define	AMBEN_ALL	0x0008	/* Enable Asynchronous Memory Banks (all) 0, 1,	2, and 3 */
-#define	CDPRIO		0x0100	/* DMA has priority over core for for external accesses */
+#define	CDPRIO		0x0100	/* DMA has priority over core for external accesses */
 
 /* EBIU_AMGCTL Bit Positions */
 #define	AMCKEN_P		0x0000	/* Enable CLKOUT */
diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c
index acb74a1..b4b167b 100644
--- a/arch/powerpc/kernel/udbg_16550.c
+++ b/arch/powerpc/kernel/udbg_16550.c
@@ -1,5 +1,5 @@
 /*
- * udbg for for NS16550 compatable serial ports
+ * udbg for NS16550 compatable serial ports
  *
  * Copyright (C) 2001-2005 PPC 64 Team, IBM Corp
  *
diff --git a/arch/powerpc/platforms/powermac/udbg_scc.c b/arch/powerpc/platforms/powermac/udbg_scc.c
index 572771f..9490157 100644
--- a/arch/powerpc/platforms/powermac/udbg_scc.c
+++ b/arch/powerpc/platforms/powermac/udbg_scc.c
@@ -1,5 +1,5 @@
 /*
- * udbg for for zilog scc ports as found on Apple PowerMacs
+ * udbg for zilog scc ports as found on Apple PowerMacs
  *
  * Copyright (C) 2001-2005 PPC 64 Team, IBM Corp
  *
diff --git a/drivers/edac/edac_core.h b/drivers/edac/edac_core.h
index 871c13b..12f355c 100644
--- a/drivers/edac/edac_core.h
+++ b/drivers/edac/edac_core.h
@@ -286,7 +286,7 @@ enum scrub_type {
  *			is irrespective of the memory devices being mounted
  *			on both sides of the memory stick.
  *
- * Socket set:		All of the memory sticks that are required for for
+ * Socket set:		All of the memory sticks that are required for
  *			a single memory access or all of the memory sticks
  *			spanned by a chip-select row.  A single socket set
  *			has two chip-select rows and if double-sided sticks
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c
index 02831ad..4bd39c8 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6110.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c
@@ -809,7 +809,7 @@ static int ipath_setup_ht_reset(struct ipath_devdata *dd)
  * errors.  We only bother to do this at load time, because it's OK if
  * it happened before we were loaded (first time after boot/reset),
  * but any time after that, it's fatal anyway.  Also need to not check
- * for for upper byte errors if we are in 8 bit mode, so figure out
+ * for upper byte errors if we are in 8 bit mode, so figure out
  * our width.  For now, at least, also complain if it's 8 bit.
  */
 static void slave_or_pri_blk(struct ipath_devdata *dd, struct pci_dev *pdev,
diff --git a/drivers/isdn/i4l/isdn_common.c b/drivers/isdn/i4l/isdn_common.c
index 7188c59..adb1e8c 100644
--- a/drivers/isdn/i4l/isdn_common.c
+++ b/drivers/isdn/i4l/isdn_common.c
@@ -761,7 +761,7 @@ isdn_getnum(char **p)
  * Be aware that this is not an atomic operation when sleep != 0, even though 
  * interrupts are turned off! Well, like that we are currently only called
  * on behalf of a read system call on raw device files (which are documented
- * to be dangerous and for for debugging purpose only). The inode semaphore
+ * to be dangerous and for debugging purpose only). The inode semaphore
  * takes care that this is not called for the same minor device number while
  * we are sleeping, but access is not serialized against simultaneous read()
  * from the corresponding ttyI device. Can other ugly events, like changes
@@ -873,7 +873,7 @@ isdn_readbchan(int di, int channel, u_char * buf, u_char * fp, int len, wait_que
  * Be aware that this is not an atomic operation when sleep != 0, even though
  * interrupts are turned off! Well, like that we are currently only called
  * on behalf of a read system call on raw device files (which are documented
- * to be dangerous and for for debugging purpose only). The inode semaphore
+ * to be dangerous and for debugging purpose only). The inode semaphore
  * takes care that this is not called for the same minor device number while
  * we are sleeping, but access is not serialized against simultaneous read()
  * from the corresponding ttyI device. Can other ugly events, like changes
diff --git a/drivers/md/md.h b/drivers/md/md.h
index f8fc188..f55d2ff 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -201,7 +201,7 @@ struct mddev_s
 	 * INTR:     resync needs to be aborted for some reason
 	 * DONE:     thread is done and is waiting to be reaped
 	 * REQUEST:  user-space has requested a sync (used with SYNC)
-	 * CHECK:    user-space request for for check-only, no repair
+	 * CHECK:    user-space request for check-only, no repair
 	 * RESHAPE:  A reshape is happening
 	 *
 	 * If neither SYNC or RESHAPE are set, then it is a recovery.
diff --git a/drivers/net/bnx2x_reg.h b/drivers/net/bnx2x_reg.h
index 0695be1..aa76cba 100644
--- a/drivers/net/bnx2x_reg.h
+++ b/drivers/net/bnx2x_reg.h
@@ -3122,7 +3122,7 @@
    The fields are:[4:0] - tail pointer; [10:5] - Link List size; 15:11] -
    header pointer. */
 #define TCM_REG_XX_TABLE					 0x50240
-/* [RW 4] Load value for for cfc ac credit cnt. */
+/* [RW 4] Load value for cfc ac credit cnt. */
 #define TM_REG_CFC_AC_CRDCNT_VAL				 0x164208
 /* [RW 4] Load value for cfc cld credit cnt. */
 #define TM_REG_CFC_CLD_CRDCNT_VAL				 0x164210
diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c
index bc98e7f..ede937e 100644
--- a/drivers/net/rionet.c
+++ b/drivers/net/rionet.c
@@ -72,7 +72,7 @@ static int rionet_check = 0;
 static int rionet_capable = 1;
 
 /*
- * This is a fast lookup table for for translating TX
+ * This is a fast lookup table for translating TX
  * Ethernet packets into a destination RIO device. It
  * could be made into a hash table to save memory depending
  * on system trade-offs.
diff --git a/drivers/usb/host/ehci-pci.c b/drivers/usb/host/ehci-pci.c
index c2f1b7d..b5b83c4 100644
--- a/drivers/usb/host/ehci-pci.c
+++ b/drivers/usb/host/ehci-pci.c
@@ -242,7 +242,7 @@ static int ehci_pci_setup(struct usb_hcd *hcd)
 	 * System suspend currently expects to be able to suspend the entire
 	 * device tree, device-at-a-time.  If we failed selective suspend
 	 * reports, system suspend would fail; so the root hub code must claim
-	 * success.  That's lying to usbcore, and it matters for for runtime
+	 * success.  That's lying to usbcore, and it matters for runtime
 	 * PM scenarios with selective suspend and remote wakeup...
 	 */
 	if (ehci->no_selective_suspend && device_can_wakeup(&pdev->dev))
diff --git a/drivers/usb/host/ehci.h b/drivers/usb/host/ehci.h
index 2bfff30..48b9e88 100644
--- a/drivers/usb/host/ehci.h
+++ b/drivers/usb/host/ehci.h
@@ -37,7 +37,7 @@ typedef __u16 __bitwise __hc16;
 #define __hc16	__le16
 #endif
 
-/* statistics can be kept for for tuning/monitoring */
+/* statistics can be kept for tuning/monitoring */
 struct ehci_stats {
 	/* irq usage */
 	unsigned long		normal;
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index c4ea51b..f52ac27 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -117,7 +117,7 @@ struct getbmapx {
 #define BMV_IF_VALID	\
 	(BMV_IF_ATTRFORK|BMV_IF_NO_DMAPI_READ|BMV_IF_PREALLOC|BMV_IF_DELALLOC)
 
-/*	bmv_oflags values - returned for for each non-header segment */
+/*	bmv_oflags values - returned for each non-header segment */
 #define BMV_OF_PREALLOC		0x1	/* segment = unwritten pre-allocation */
 #define BMV_OF_DELALLOC		0x2	/* segment = delayed allocation */
 #define BMV_OF_LAST		0x4	/* segment is the last in the file */
diff --git a/include/linux/usb.h b/include/linux/usb.h
index a8fe05f..19fabc4 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1071,7 +1071,7 @@ typedef void (*usb_complete_t)(struct urb *);
  * @start_frame: Returns the initial frame for isochronous transfers.
  * @number_of_packets: Lists the number of ISO transfer buffers.
  * @interval: Specifies the polling interval for interrupt or isochronous
- *	transfers.  The units are frames (milliseconds) for for full and low
+ *	transfers.  The units are frames (milliseconds) for full and low
  *	speed devices, and microframes (1/8 millisecond) for highspeed ones.
  * @error_count: Returns the number of ISO transfers that reported errors.
  * @context: For use in completion functions.  This normally points to
-- 
cgit v0.10.2


From 3f6db50f1ebc4fa797445dd89af7211cafc52a92 Mon Sep 17 00:00:00 2001
From: Peter Huewe <peterhuewe@gmx.de>
Date: Fri, 3 Jul 2009 14:39:40 +0200
Subject: trivial: media/omap: adding __init/__exit macros to lcd_drivers

Trivial patch which adds the __init and __exit macros to the module_init /
module_exit functions to several files in drivers/video/omap/

Signed-off-by: Peter Huewe <peterhuewe@gmx.de>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/drivers/video/omap/lcd_h3.c b/drivers/video/omap/lcd_h3.c
index 2486237..417ae5e 100644
--- a/drivers/video/omap/lcd_h3.c
+++ b/drivers/video/omap/lcd_h3.c
@@ -124,12 +124,12 @@ struct platform_driver h3_panel_driver = {
 	},
 };
 
-static int h3_panel_drv_init(void)
+static int __init h3_panel_drv_init(void)
 {
 	return platform_driver_register(&h3_panel_driver);
 }
 
-static void h3_panel_drv_cleanup(void)
+static void __exit h3_panel_drv_cleanup(void)
 {
 	platform_driver_unregister(&h3_panel_driver);
 }
diff --git a/drivers/video/omap/lcd_h4.c b/drivers/video/omap/lcd_h4.c
index 6ff5643..0c398bd 100644
--- a/drivers/video/omap/lcd_h4.c
+++ b/drivers/video/omap/lcd_h4.c
@@ -102,12 +102,12 @@ static struct platform_driver h4_panel_driver = {
 	},
 };
 
-static int h4_panel_drv_init(void)
+static int __init h4_panel_drv_init(void)
 {
 	return platform_driver_register(&h4_panel_driver);
 }
 
-static void h4_panel_drv_cleanup(void)
+static void __exit h4_panel_drv_cleanup(void)
 {
 	platform_driver_unregister(&h4_panel_driver);
 }
diff --git a/drivers/video/omap/lcd_inn1510.c b/drivers/video/omap/lcd_inn1510.c
index 6953ed4..cdbd8bb 100644
--- a/drivers/video/omap/lcd_inn1510.c
+++ b/drivers/video/omap/lcd_inn1510.c
@@ -109,12 +109,12 @@ struct platform_driver innovator1510_panel_driver = {
 	},
 };
 
-static int innovator1510_panel_drv_init(void)
+static int __init innovator1510_panel_drv_init(void)
 {
 	return platform_driver_register(&innovator1510_panel_driver);
 }
 
-static void innovator1510_panel_drv_cleanup(void)
+static void __exit innovator1510_panel_drv_cleanup(void)
 {
 	platform_driver_unregister(&innovator1510_panel_driver);
 }
diff --git a/drivers/video/omap/lcd_inn1610.c b/drivers/video/omap/lcd_inn1610.c
index 4c4f7ee..268f7f8 100644
--- a/drivers/video/omap/lcd_inn1610.c
+++ b/drivers/video/omap/lcd_inn1610.c
@@ -133,12 +133,12 @@ struct platform_driver innovator1610_panel_driver = {
 	},
 };
 
-static int innovator1610_panel_drv_init(void)
+static int __init innovator1610_panel_drv_init(void)
 {
 	return platform_driver_register(&innovator1610_panel_driver);
 }
 
-static void innovator1610_panel_drv_cleanup(void)
+static void __exit innovator1610_panel_drv_cleanup(void)
 {
 	platform_driver_unregister(&innovator1610_panel_driver);
 }
diff --git a/drivers/video/omap/lcd_osk.c b/drivers/video/omap/lcd_osk.c
index 379c96d..b3fa88b 100644
--- a/drivers/video/omap/lcd_osk.c
+++ b/drivers/video/omap/lcd_osk.c
@@ -127,12 +127,12 @@ struct platform_driver osk_panel_driver = {
 	},
 };
 
-static int osk_panel_drv_init(void)
+static int __init osk_panel_drv_init(void)
 {
 	return platform_driver_register(&osk_panel_driver);
 }
 
-static void osk_panel_drv_cleanup(void)
+static void __exit osk_panel_drv_cleanup(void)
 {
 	platform_driver_unregister(&osk_panel_driver);
 }
diff --git a/drivers/video/omap/lcd_palmte.c b/drivers/video/omap/lcd_palmte.c
index 2183173..4bf3c79 100644
--- a/drivers/video/omap/lcd_palmte.c
+++ b/drivers/video/omap/lcd_palmte.c
@@ -108,12 +108,12 @@ struct platform_driver palmte_panel_driver = {
 	},
 };
 
-static int palmte_panel_drv_init(void)
+static int __init palmte_panel_drv_init(void)
 {
 	return platform_driver_register(&palmte_panel_driver);
 }
 
-static void palmte_panel_drv_cleanup(void)
+static void __exit palmte_panel_drv_cleanup(void)
 {
 	platform_driver_unregister(&palmte_panel_driver);
 }
diff --git a/drivers/video/omap/lcd_palmtt.c b/drivers/video/omap/lcd_palmtt.c
index 57b0f6c..48ea1f9 100644
--- a/drivers/video/omap/lcd_palmtt.c
+++ b/drivers/video/omap/lcd_palmtt.c
@@ -113,12 +113,12 @@ struct platform_driver palmtt_panel_driver = {
 	},
 };
 
-static int palmtt_panel_drv_init(void)
+static int __init palmtt_panel_drv_init(void)
 {
 	return platform_driver_register(&palmtt_panel_driver);
 }
 
-static void palmtt_panel_drv_cleanup(void)
+static void __exit palmtt_panel_drv_cleanup(void)
 {
 	platform_driver_unregister(&palmtt_panel_driver);
 }
diff --git a/drivers/video/omap/lcd_palmz71.c b/drivers/video/omap/lcd_palmz71.c
index d33d78b..0697d29 100644
--- a/drivers/video/omap/lcd_palmz71.c
+++ b/drivers/video/omap/lcd_palmz71.c
@@ -109,12 +109,12 @@ struct platform_driver palmz71_panel_driver = {
 	},
 };
 
-static int palmz71_panel_drv_init(void)
+static int __init palmz71_panel_drv_init(void)
 {
 	return platform_driver_register(&palmz71_panel_driver);
 }
 
-static void palmz71_panel_drv_cleanup(void)
+static void __exit palmz71_panel_drv_cleanup(void)
 {
 	platform_driver_unregister(&palmz71_panel_driver);
 }
-- 
cgit v0.10.2


From 4f37940d64a155c025968118849b596f6aaa8128 Mon Sep 17 00:00:00 2001
From: "Samuel R. C. Vale" <srcvale@holoscopio.com>
Date: Fri, 10 Jul 2009 12:07:15 -0300
Subject: trivial: fix typos in comments s/DGBU/DBGU/

DBGU means Debug Unit, was refered as "DGBU" in some files. Fixed to "DBGU".

Signed-off-by: Samuel R. C. Vale <srcvale@holoscopio.com>
Acked-by: Andrew Victor <linux@maxim.org.za>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/arch/arm/mach-at91/board-afeb-9260v1.c b/arch/arm/mach-at91/board-afeb-9260v1.c
index 61e52b6..50667be 100644
--- a/arch/arm/mach-at91/board-afeb-9260v1.c
+++ b/arch/arm/mach-at91/board-afeb-9260v1.c
@@ -53,7 +53,7 @@ static void __init afeb9260_map_io(void)
 	/* Initialize processor: 18.432 MHz crystal */
 	at91sam9260_initialize(18432000);
 
-	/* DGBU on ttyS0. (Rx & Tx only) */
+	/* DBGU on ttyS0. (Rx & Tx only) */
 	at91_register_uart(0, 0, 0);
 
 	/* USART0 on ttyS1. (Rx, Tx, CTS, RTS, DTR, DSR, DCD, RI) */
diff --git a/arch/arm/mach-at91/board-cam60.c b/arch/arm/mach-at91/board-cam60.c
index d3ba29c..02138af 100644
--- a/arch/arm/mach-at91/board-cam60.c
+++ b/arch/arm/mach-at91/board-cam60.c
@@ -50,7 +50,7 @@ static void __init cam60_map_io(void)
 	/* Initialize processor: 10 MHz crystal */
 	at91sam9260_initialize(10000000);
 
-	/* DGBU on ttyS0. (Rx & Tx only) */
+	/* DBGU on ttyS0. (Rx & Tx only) */
 	at91_register_uart(0, 0, 0);
 
 	/* set serial console to ttyS0 (ie, DBGU) */
diff --git a/arch/arm/mach-at91/board-neocore926.c b/arch/arm/mach-at91/board-neocore926.c
index 9ba7ba2..8c0b71c 100644
--- a/arch/arm/mach-at91/board-neocore926.c
+++ b/arch/arm/mach-at91/board-neocore926.c
@@ -56,7 +56,7 @@ static void __init neocore926_map_io(void)
 	/* Initialize processor: 20 MHz crystal */
 	at91sam9263_initialize(20000000);
 
-	/* DGBU on ttyS0. (Rx & Tx only) */
+	/* DBGU on ttyS0. (Rx & Tx only) */
 	at91_register_uart(0, 0, 0);
 
 	/* USART0 on ttyS1. (Rx, Tx, RTS, CTS) */
diff --git a/arch/arm/mach-at91/board-qil-a9260.c b/arch/arm/mach-at91/board-qil-a9260.c
index 4cff9a7..664938e 100644
--- a/arch/arm/mach-at91/board-qil-a9260.c
+++ b/arch/arm/mach-at91/board-qil-a9260.c
@@ -53,7 +53,7 @@ static void __init ek_map_io(void)
 	/* Initialize processor: 12.000 MHz crystal */
 	at91sam9260_initialize(12000000);
 
-	/* DGBU on ttyS0. (Rx & Tx only) */
+	/* DBGU on ttyS0. (Rx & Tx only) */
 	at91_register_uart(0, 0, 0);
 
 	/* USART0 on ttyS1. (Rx, Tx, CTS, RTS, DTR, DSR, DCD, RI) */
diff --git a/arch/arm/mach-at91/board-sam9260ek.c b/arch/arm/mach-at91/board-sam9260ek.c
index 93a0f8b..ba9d501 100644
--- a/arch/arm/mach-at91/board-sam9260ek.c
+++ b/arch/arm/mach-at91/board-sam9260ek.c
@@ -54,7 +54,7 @@ static void __init ek_map_io(void)
 	/* Initialize processor: 18.432 MHz crystal */
 	at91sam9260_initialize(18432000);
 
-	/* DGBU on ttyS0. (Rx & Tx only) */
+	/* DBGU on ttyS0. (Rx & Tx only) */
 	at91_register_uart(0, 0, 0);
 
 	/* USART0 on ttyS1. (Rx, Tx, CTS, RTS, DTR, DSR, DCD, RI) */
diff --git a/arch/arm/mach-at91/board-sam9261ek.c b/arch/arm/mach-at91/board-sam9261ek.c
index f9b1999..c4c8865 100644
--- a/arch/arm/mach-at91/board-sam9261ek.c
+++ b/arch/arm/mach-at91/board-sam9261ek.c
@@ -61,7 +61,7 @@ static void __init ek_map_io(void)
 	/* Setup the LEDs */
 	at91_init_leds(AT91_PIN_PA13, AT91_PIN_PA14);
 
-	/* DGBU on ttyS0. (Rx & Tx only) */
+	/* DBGU on ttyS0. (Rx & Tx only) */
 	at91_register_uart(0, 0, 0);
 
 	/* set serial console to ttyS0 (ie, DBGU) */
diff --git a/arch/arm/mach-at91/board-sam9263ek.c b/arch/arm/mach-at91/board-sam9263ek.c
index 1bf7bd4..26f1aa6 100644
--- a/arch/arm/mach-at91/board-sam9263ek.c
+++ b/arch/arm/mach-at91/board-sam9263ek.c
@@ -57,7 +57,7 @@ static void __init ek_map_io(void)
 	/* Initialize processor: 16.367 MHz crystal */
 	at91sam9263_initialize(16367660);
 
-	/* DGBU on ttyS0. (Rx & Tx only) */
+	/* DBGU on ttyS0. (Rx & Tx only) */
 	at91_register_uart(0, 0, 0);
 
 	/* USART0 on ttyS1. (Rx, Tx, RTS, CTS) */
diff --git a/arch/arm/mach-at91/board-sam9g20ek.c b/arch/arm/mach-at91/board-sam9g20ek.c
index ca470d5..29cf831 100644
--- a/arch/arm/mach-at91/board-sam9g20ek.c
+++ b/arch/arm/mach-at91/board-sam9g20ek.c
@@ -50,7 +50,7 @@ static void __init ek_map_io(void)
 	/* Initialize processor: 18.432 MHz crystal */
 	at91sam9260_initialize(18432000);
 
-	/* DGBU on ttyS0. (Rx & Tx only) */
+	/* DBGU on ttyS0. (Rx & Tx only) */
 	at91_register_uart(0, 0, 0);
 
 	/* USART0 on ttyS1. (Rx, Tx, CTS, RTS, DTR, DSR, DCD, RI) */
diff --git a/arch/arm/mach-at91/board-sam9rlek.c b/arch/arm/mach-at91/board-sam9rlek.c
index 9d07679..94ffb5c 100644
--- a/arch/arm/mach-at91/board-sam9rlek.c
+++ b/arch/arm/mach-at91/board-sam9rlek.c
@@ -43,7 +43,7 @@ static void __init ek_map_io(void)
 	/* Initialize processor: 12.000 MHz crystal */
 	at91sam9rl_initialize(12000000);
 
-	/* DGBU on ttyS0. (Rx & Tx only) */
+	/* DBGU on ttyS0. (Rx & Tx only) */
 	at91_register_uart(0, 0, 0);
 
 	/* USART0 on ttyS1. (Rx, Tx, CTS, RTS) */
diff --git a/arch/arm/mach-at91/board-usb-a9260.c b/arch/arm/mach-at91/board-usb-a9260.c
index d13304c..905d6ef 100644
--- a/arch/arm/mach-at91/board-usb-a9260.c
+++ b/arch/arm/mach-at91/board-usb-a9260.c
@@ -53,7 +53,7 @@ static void __init ek_map_io(void)
 	/* Initialize processor: 12.000 MHz crystal */
 	at91sam9260_initialize(12000000);
 
-	/* DGBU on ttyS0. (Rx & Tx only) */
+	/* DBGU on ttyS0. (Rx & Tx only) */
 	at91_register_uart(0, 0, 0);
 
 	/* set serial console to ttyS0 (ie, DBGU) */
diff --git a/arch/arm/mach-at91/board-usb-a9263.c b/arch/arm/mach-at91/board-usb-a9263.c
index d96405b..b6a3480 100644
--- a/arch/arm/mach-at91/board-usb-a9263.c
+++ b/arch/arm/mach-at91/board-usb-a9263.c
@@ -52,7 +52,7 @@ static void __init ek_map_io(void)
 	/* Initialize processor: 12.00 MHz crystal */
 	at91sam9263_initialize(12000000);
 
-	/* DGBU on ttyS0. (Rx & Tx only) */
+	/* DBGU on ttyS0. (Rx & Tx only) */
 	at91_register_uart(0, 0, 0);
 
 	/* set serial console to ttyS0 (ie, DBGU) */
-- 
cgit v0.10.2


From fd589a8f0a13f53a2dd580b1fe170633cf6b095f Mon Sep 17 00:00:00 2001
From: Anand Gadiyar <gadiyar@ti.com>
Date: Thu, 16 Jul 2009 17:13:03 +0200
Subject: trivial: fix typo "to to" in multiple files

Signed-off-by: Anand Gadiyar <gadiyar@ti.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/Documentation/hwmon/pc87427 b/Documentation/hwmon/pc87427
index d1ebbe5..db5cc12 100644
--- a/Documentation/hwmon/pc87427
+++ b/Documentation/hwmon/pc87427
@@ -34,5 +34,5 @@ Fan rotation speeds are reported as 14-bit values from a gated clock
 signal. Speeds down to 83 RPM can be measured.
 
 An alarm is triggered if the rotation speed drops below a programmable
-limit. Another alarm is triggered if the speed is too low to to be measured
+limit. Another alarm is triggered if the speed is too low to be measured
 (including stalled or missing fan).
diff --git a/Documentation/networking/regulatory.txt b/Documentation/networking/regulatory.txt
index eaa1a25..ee31369 100644
--- a/Documentation/networking/regulatory.txt
+++ b/Documentation/networking/regulatory.txt
@@ -96,7 +96,7 @@ Example code - drivers hinting an alpha2:
 
 This example comes from the zd1211rw device driver. You can start
 by having a mapping of your device's EEPROM country/regulatory
-domain value to to a specific alpha2 as follows:
+domain value to a specific alpha2 as follows:
 
 static struct zd_reg_alpha2_map reg_alpha2_map[] = {
 	{ ZD_REGDOMAIN_FCC, "US" },
diff --git a/Documentation/scsi/scsi_fc_transport.txt b/Documentation/scsi/scsi_fc_transport.txt
index d7f1817..aec6549 100644
--- a/Documentation/scsi/scsi_fc_transport.txt
+++ b/Documentation/scsi/scsi_fc_transport.txt
@@ -378,7 +378,7 @@ Vport Disable/Enable:
       int vport_disable(struct fc_vport *vport, bool disable)
 
     where:
-      vport:    Is vport to to be enabled or disabled
+      vport:    Is vport to be enabled or disabled
       disable:  If "true", the vport is to be disabled.
                 If "false", the vport is to be enabled.
 
diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c
index 16ef61a..625ed8f 100644
--- a/arch/ia64/ia32/sys_ia32.c
+++ b/arch/ia64/ia32/sys_ia32.c
@@ -1270,7 +1270,7 @@ putreg (struct task_struct *child, int regno, unsigned int value)
 	      case PT_CS:
 		if (value != __USER_CS)
 			printk(KERN_ERR
-			       "ia32.putreg: attempt to to set invalid segment register %d = %x\n",
+			       "ia32.putreg: attempt to set invalid segment register %d = %x\n",
 			       regno, value);
 		break;
 	      default:
diff --git a/arch/um/include/shared/ptrace_user.h b/arch/um/include/shared/ptrace_user.h
index 4bce6e0..7fd8539 100644
--- a/arch/um/include/shared/ptrace_user.h
+++ b/arch/um/include/shared/ptrace_user.h
@@ -29,7 +29,7 @@ extern int ptrace_setregs(long pid, unsigned long *regs_in);
  * recompilation. So, we use PTRACE_OLDSETOPTIONS in UML.
  * We also want to be able to build the kernel on 2.4, which doesn't
  * have PTRACE_OLDSETOPTIONS. So, if it is missing, we declare
- * PTRACE_OLDSETOPTIONS to to be the same as PTRACE_SETOPTIONS.
+ * PTRACE_OLDSETOPTIONS to be the same as PTRACE_SETOPTIONS.
  *
  * On architectures, that start to support PTRACE_O_TRACESYSGOOD on
  * linux 2.6, PTRACE_OLDSETOPTIONS never is defined, and also isn't
diff --git a/drivers/char/agp/uninorth-agp.c b/drivers/char/agp/uninorth-agp.c
index 20ef1bf..703959e 100644
--- a/drivers/char/agp/uninorth-agp.c
+++ b/drivers/char/agp/uninorth-agp.c
@@ -270,7 +270,7 @@ static void uninorth_agp_enable(struct agp_bridge_data *bridge, u32 mode)
 
 	if ((uninorth_rev >= 0x30) && (uninorth_rev <= 0x33)) {
 		/*
-		 * We need to to set REQ_DEPTH to 7 for U3 versions 1.0, 2.1,
+		 * We need to set REQ_DEPTH to 7 for U3 versions 1.0, 2.1,
 		 * 2.2 and 2.3, Darwin do so.
 		 */
 		if ((command >> AGPSTAT_RQ_DEPTH_SHIFT) > 7)
diff --git a/drivers/gpu/drm/mga/mga_state.c b/drivers/gpu/drm/mga/mga_state.c
index b710fab..a53b848 100644
--- a/drivers/gpu/drm/mga/mga_state.c
+++ b/drivers/gpu/drm/mga/mga_state.c
@@ -239,7 +239,7 @@ static __inline__ void mga_g200_emit_pipe(drm_mga_private_t * dev_priv)
 		  MGA_WR34, 0x00000000,
 		  MGA_WR42, 0x0000ffff, MGA_WR60, 0x0000ffff);
 
-	/* Padding required to to hardware bug.
+	/* Padding required due to hardware bug.
 	 */
 	DMA_BLOCK(MGA_DMAPAD, 0xffffffff,
 		  MGA_DMAPAD, 0xffffffff,
@@ -317,7 +317,7 @@ static __inline__ void mga_g400_emit_pipe(drm_mga_private_t * dev_priv)
 		  MGA_WR52, MGA_G400_WR_MAGIC,	/* tex1 width        */
 		  MGA_WR60, MGA_G400_WR_MAGIC);	/* tex1 height       */
 
-	/* Padding required to to hardware bug */
+	/* Padding required due to hardware bug */
 	DMA_BLOCK(MGA_DMAPAD, 0xffffffff,
 		  MGA_DMAPAD, 0xffffffff,
 		  MGA_DMAPAD, 0xffffffff,
diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c
index a8d0aee..8aaad65 100644
--- a/drivers/lguest/page_tables.c
+++ b/drivers/lguest/page_tables.c
@@ -894,7 +894,7 @@ void guest_set_pte(struct lg_cpu *cpu,
  * tells us they've changed.  When the Guest tries to use the new entry it will
  * fault and demand_page() will fix it up.
  *
- * So with that in mind here's our code to to update a (top-level) PGD entry:
+ * So with that in mind here's our code to update a (top-level) PGD entry:
  */
 void guest_set_pgd(struct lguest *lg, unsigned long gpgdir, u32 idx)
 {
diff --git a/drivers/media/video/gspca/m5602/m5602_core.c b/drivers/media/video/gspca/m5602/m5602_core.c
index 8a5bba1..7f1e541 100644
--- a/drivers/media/video/gspca/m5602/m5602_core.c
+++ b/drivers/media/video/gspca/m5602/m5602_core.c
@@ -56,7 +56,7 @@ int m5602_read_bridge(struct sd *sd, const u8 address, u8 *i2c_data)
 	return (err < 0) ? err : 0;
 }
 
-/* Writes a byte to to the m5602 */
+/* Writes a byte to the m5602 */
 int m5602_write_bridge(struct sd *sd, const u8 address, const u8 i2c_data)
 {
 	int err;
diff --git a/drivers/mmc/host/mxcmmc.c b/drivers/mmc/host/mxcmmc.c
index bc14bb1..8867152 100644
--- a/drivers/mmc/host/mxcmmc.c
+++ b/drivers/mmc/host/mxcmmc.c
@@ -512,7 +512,7 @@ static void mxcmci_cmd_done(struct mxcmci_host *host, unsigned int stat)
 	}
 
 	/* For the DMA case the DMA engine handles the data transfer
-	 * automatically. For non DMA we have to to it ourselves.
+	 * automatically. For non DMA we have to do it ourselves.
 	 * Don't do it in interrupt context though.
 	 */
 	if (!mxcmci_use_dma(host) && host->data)
diff --git a/drivers/mtd/maps/ixp2000.c b/drivers/mtd/maps/ixp2000.c
index d4fb9a3..1bdf0ee 100644
--- a/drivers/mtd/maps/ixp2000.c
+++ b/drivers/mtd/maps/ixp2000.c
@@ -184,7 +184,7 @@ static int ixp2000_flash_probe(struct platform_device *dev)
 	info->map.bankwidth = 1;
 
 	/*
- 	 * map_priv_2 is used to store a ptr to to the bank_setup routine
+ 	 * map_priv_2 is used to store a ptr to the bank_setup routine
  	 */
 	info->map.map_priv_2 = (unsigned long) ixp_data->bank_setup;
 
diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c
index e4d9ef0..9f87c99 100644
--- a/drivers/mtd/ubi/eba.c
+++ b/drivers/mtd/ubi/eba.c
@@ -1065,7 +1065,7 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
 	}
 
 	/*
-	 * Now we have got to calculate how much data we have to to copy. In
+	 * Now we have got to calculate how much data we have to copy. In
 	 * case of a static volume it is fairly easy - the VID header contains
 	 * the data size. In case of a dynamic volume it is more difficult - we
 	 * have to read the contents, cut 0xFF bytes from the end and copy only
diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h
index 6a5fe96..4787794 100644
--- a/drivers/mtd/ubi/ubi.h
+++ b/drivers/mtd/ubi/ubi.h
@@ -570,7 +570,7 @@ void ubi_do_get_volume_info(struct ubi_device *ubi, struct ubi_volume *vol,
 
 /*
  * ubi_rb_for_each_entry - walk an RB-tree.
- * @rb: a pointer to type 'struct rb_node' to to use as a loop counter
+ * @rb: a pointer to type 'struct rb_node' to use as a loop counter
  * @pos: a pointer to RB-tree entry type to use as a loop counter
  * @root: RB-tree's root
  * @member: the name of the 'struct rb_node' within the RB-tree entry
diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index cea5cfe..c3fa31c 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -1987,7 +1987,7 @@ void bond_3ad_unbind_slave(struct slave *slave)
 			// find new aggregator for the related port(s)
 			new_aggregator = __get_first_agg(port);
 			for (; new_aggregator; new_aggregator = __get_next_agg(new_aggregator)) {
-				// if the new aggregator is empty, or it connected to to our port only
+				// if the new aggregator is empty, or it is connected to our port only
 				if (!new_aggregator->lag_ports || ((new_aggregator->lag_ports == port) && !new_aggregator->lag_ports->next_port_in_aggregator)) {
 					break;
 				}
diff --git a/drivers/net/e1000/e1000_hw.c b/drivers/net/e1000/e1000_hw.c
index cda6b39..45ac225 100644
--- a/drivers/net/e1000/e1000_hw.c
+++ b/drivers/net/e1000/e1000_hw.c
@@ -3035,7 +3035,7 @@ s32 e1000_check_for_link(struct e1000_hw *hw)
                 /* If TBI compatibility is was previously off, turn it on. For
                  * compatibility with a TBI link partner, we will store bad
                  * packets. Some frames have an additional byte on the end and
-                 * will look like CRC errors to to the hardware.
+                 * will look like CRC errors to the hardware.
                  */
                 if (!hw->tbi_compatibility_on) {
                     hw->tbi_compatibility_on = true;
diff --git a/drivers/net/wireless/zd1211rw/zd_chip.c b/drivers/net/wireless/zd1211rw/zd_chip.c
index 5e110a2..4e79a98 100644
--- a/drivers/net/wireless/zd1211rw/zd_chip.c
+++ b/drivers/net/wireless/zd1211rw/zd_chip.c
@@ -368,7 +368,7 @@ error:
 	return r;
 }
 
-/* MAC address: if custom mac addresses are to to be used CR_MAC_ADDR_P1 and
+/* MAC address: if custom mac addresses are to be used CR_MAC_ADDR_P1 and
  *              CR_MAC_ADDR_P2 must be overwritten
  */
 int zd_write_mac_addr(struct zd_chip *chip, const u8 *mac_addr)
diff --git a/drivers/scsi/megaraid/megaraid_sas.c b/drivers/scsi/megaraid/megaraid_sas.c
index 7dc3d18..a39addc 100644
--- a/drivers/scsi/megaraid/megaraid_sas.c
+++ b/drivers/scsi/megaraid/megaraid_sas.c
@@ -718,7 +718,7 @@ megasas_build_dcdb(struct megasas_instance *instance, struct scsi_cmnd *scp,
  * megasas_build_ldio -	Prepares IOs to logical devices
  * @instance:		Adapter soft state
  * @scp:		SCSI command
- * @cmd:		Command to to be prepared
+ * @cmd:		Command to be prepared
  *
  * Frames (and accompanying SGLs) for regular SCSI IOs use this function.
  */
diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c
index 40e3caf..83c8b5e 100644
--- a/drivers/scsi/qla4xxx/ql4_os.c
+++ b/drivers/scsi/qla4xxx/ql4_os.c
@@ -1422,7 +1422,7 @@ static void qla4xxx_slave_destroy(struct scsi_device *sdev)
 /**
  * qla4xxx_del_from_active_array - returns an active srb
  * @ha: Pointer to host adapter structure.
- * @index: index into to the active_array
+ * @index: index into the active_array
  *
  * This routine removes and returns the srb at the specified index
  **/
@@ -1500,7 +1500,7 @@ static int qla4xxx_wait_for_hba_online(struct scsi_qla_host *ha)
 
 /**
  * qla4xxx_eh_wait_for_commands - wait for active cmds to finish.
- * @ha: pointer to to HBA
+ * @ha: pointer to HBA
  * @t: target id
  * @l: lun id
  *
diff --git a/drivers/staging/rt2860/rtmp.h b/drivers/staging/rt2860/rtmp.h
index 3f498f6..90fd40f 100644
--- a/drivers/staging/rt2860/rtmp.h
+++ b/drivers/staging/rt2860/rtmp.h
@@ -2060,7 +2060,7 @@ typedef struct _STA_ADMIN_CONFIG {
     BOOLEAN		AdhocBGJoined;		// Indicate Adhoc B/G Join.
     BOOLEAN		Adhoc20NJoined;		// Indicate Adhoc 20MHz N Join.
 #endif
-	// New for WPA, windows want us to to keep association information and
+	// New for WPA, windows want us to keep association information and
 	// Fixed IEs from last association response
 	NDIS_802_11_ASSOCIATION_INFORMATION     AssocInfo;
 	USHORT       ReqVarIELen;                // Length of next VIE include EID & Length
diff --git a/drivers/usb/serial/cypress_m8.h b/drivers/usb/serial/cypress_m8.h
index e772b01..1fd360e 100644
--- a/drivers/usb/serial/cypress_m8.h
+++ b/drivers/usb/serial/cypress_m8.h
@@ -57,7 +57,7 @@
 #define	UART_RI		0x10	/* ring indicator - modem - device to host */
 #define UART_CD		0x40	/* carrier detect - modem - device to host */
 #define CYP_ERROR 	0x08	/* received from input report - device to host */
-/* Note - the below has nothing to to with the "feature report" reset */
+/* Note - the below has nothing to do with the "feature report" reset */
 #define CONTROL_RESET	0x08  	/* sent with output report - host to device */
 
 /* End of RS-232 protocol definitions */
diff --git a/drivers/usb/serial/io_edgeport.c b/drivers/usb/serial/io_edgeport.c
index dc0f832..b97960a 100644
--- a/drivers/usb/serial/io_edgeport.c
+++ b/drivers/usb/serial/io_edgeport.c
@@ -2540,7 +2540,7 @@ static int calc_baud_rate_divisor(int baudrate, int *divisor)
 
 /*****************************************************************************
  * send_cmd_write_uart_register
- *  this function builds up a uart register message and sends to to the device.
+ *  this function builds up a uart register message and sends to the device.
  *****************************************************************************/
 static int send_cmd_write_uart_register(struct edgeport_port *edge_port,
 						__u8 regNum, __u8 regValue)
diff --git a/drivers/usb/serial/kl5kusb105.c b/drivers/usb/serial/kl5kusb105.c
index a616731..f737337 100644
--- a/drivers/usb/serial/kl5kusb105.c
+++ b/drivers/usb/serial/kl5kusb105.c
@@ -38,7 +38,7 @@
  *   0.3a - implemented pools of write URBs
  *   0.3  - alpha version for public testing
  *   0.2  - TIOCMGET works, so autopilot(1) can be used!
- *   0.1  - can be used to to pilot-xfer -p /dev/ttyUSB0 -l
+ *   0.1  - can be used to do pilot-xfer -p /dev/ttyUSB0 -l
  *
  *   The driver skeleton is mainly based on mct_u232.c and various other
  *   pieces of code shamelessly copied from the drivers/usb/serial/ directory.
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 4abd683..3a79873 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2337,7 +2337,7 @@ static int __mpage_da_writepage(struct page *page,
 		/*
 		 * Rest of the page in the page_vec
 		 * redirty then and skip then. We will
-		 * try to to write them again after
+		 * try to write them again after
 		 * starting a new transaction
 		 */
 		redirty_page_for_writepage(wbc, page);
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 28c590b..8f1cfb0 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -179,7 +179,7 @@ static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state)
  * always aligned to a 64 bit boundary.
  *
  * The size of the buffer is in bytes, but is it assumed that it is
- * always ok to to read a complete multiple of 64 bits at the end
+ * always ok to read a complete multiple of 64 bits at the end
  * of the block in case the end is no aligned to a natural boundary.
  *
  * Return: the block number (bitmap buffer scope) that was found
diff --git a/fs/ntfs/layout.h b/fs/ntfs/layout.h
index 50931b1..8b2549f 100644
--- a/fs/ntfs/layout.h
+++ b/fs/ntfs/layout.h
@@ -829,7 +829,7 @@ enum {
 	/* Note, FILE_ATTR_VALID_SET_FLAGS masks out the old DOS VolId, the
 	   F_A_DEVICE, F_A_DIRECTORY, F_A_SPARSE_FILE, F_A_REPARSE_POINT,
 	   F_A_COMPRESSED, and F_A_ENCRYPTED and preserves the rest.  This mask
-	   is used to to obtain all flags that are valid for setting. */
+	   is used to obtain all flags that are valid for setting. */
 	/*
 	 * The flag FILE_ATTR_DUP_FILENAME_INDEX_PRESENT is present in all
 	 * FILENAME_ATTR attributes but not in the STANDARD_INFORMATION
diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h
index 37ba576..8052236 100644
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h
@@ -288,7 +288,7 @@ typedef u32 acpi_physical_address;
 /*
  * Some compilers complain about unused variables. Sometimes we don't want to
  * use all the variables (for example, _acpi_module_name). This allows us
- * to to tell the compiler in a per-variable manner that a variable
+ * to tell the compiler in a per-variable manner that a variable
  * is unused
  */
 #ifndef ACPI_UNUSED_VAR
diff --git a/include/acpi/platform/acgcc.h b/include/acpi/platform/acgcc.h
index 935c5d7..6aadbf8 100644
--- a/include/acpi/platform/acgcc.h
+++ b/include/acpi/platform/acgcc.h
@@ -57,7 +57,7 @@
 /*
  * Some compilers complain about unused variables. Sometimes we don't want to
  * use all the variables (for example, _acpi_module_name). This allows us
- * to to tell the compiler warning in a per-variable manner that a variable
+ * to tell the compiler warning in a per-variable manner that a variable
  * is unused.
  */
 #define ACPI_UNUSED_VAR __attribute__ ((unused))
diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h
index 9388583..c8f94e8 100644
--- a/include/rdma/ib_cm.h
+++ b/include/rdma/ib_cm.h
@@ -482,7 +482,7 @@ int ib_send_cm_rej(struct ib_cm_id *cm_id,
  *   message.
  * @cm_id: Connection identifier associated with the connection message.
  * @service_timeout: The lower 5-bits specify the maximum time required for
- *   the sender to reply to to the connection message.  The upper 3-bits
+ *   the sender to reply to the connection message.  The upper 3-bits
  *   specify additional control flags.
  * @private_data: Optional user-defined private data sent with the
  *   message receipt acknowledgement.
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index 9489a0a..cc89be5 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -48,7 +48,7 @@ static struct hlist_head tracepoint_table[TRACEPOINT_TABLE_SIZE];
 
 /*
  * Note about RCU :
- * It is used to to delay the free of multiple probes array until a quiescent
+ * It is used to delay the free of multiple probes array until a quiescent
  * state is reached.
  * Tracepoint entries modifications are protected by the tracepoints_mutex.
  */
diff --git a/lib/zlib_deflate/deflate.c b/lib/zlib_deflate/deflate.c
index c3e4a2ba..46a31e5 100644
--- a/lib/zlib_deflate/deflate.c
+++ b/lib/zlib_deflate/deflate.c
@@ -135,7 +135,7 @@ static const config configuration_table[10] = {
 
 /* ===========================================================================
  * Update a hash value with the given input byte
- * IN  assertion: all calls to to UPDATE_HASH are made with consecutive
+ * IN  assertion: all calls to UPDATE_HASH are made with consecutive
  *    input characters, so that a running hash key can be computed from the
  *    previous key instead of complete recalculation each time.
  */
@@ -146,7 +146,7 @@ static const config configuration_table[10] = {
  * Insert string str in the dictionary and set match_head to the previous head
  * of the hash chain (the most recent string with same hash key). Return
  * the previous length of the hash chain.
- * IN  assertion: all calls to to INSERT_STRING are made with consecutive
+ * IN  assertion: all calls to INSERT_STRING are made with consecutive
  *    input characters and the first MIN_MATCH bytes of str are valid
  *    (except for the last MIN_MATCH-1 bytes of the input file).
  */
diff --git a/net/rxrpc/ar-call.c b/net/rxrpc/ar-call.c
index d923124..bc0019f 100644
--- a/net/rxrpc/ar-call.c
+++ b/net/rxrpc/ar-call.c
@@ -96,7 +96,7 @@ static struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
 }
 
 /*
- * allocate a new client call and attempt to to get a connection slot for it
+ * allocate a new client call and attempt to get a connection slot for it
  */
 static struct rxrpc_call *rxrpc_alloc_client_call(
 	struct rxrpc_sock *rx,
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 375d64c..2c5c76b 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -77,7 +77,7 @@
  *   The service curve parameters are converted to the internal
  *   representation. The slope values are scaled to avoid overflow.
  *   the inverse slope values as well as the y-projection of the 1st
- *   segment are kept in order to to avoid 64-bit divide operations
+ *   segment are kept in order to avoid 64-bit divide operations
  *   that are expensive on 32-bit architectures.
  */
 
-- 
cgit v0.10.2


From 36726dd9229af008f31edd46b22a658354783232 Mon Sep 17 00:00:00 2001
From: Thadeu Lima de Souza Cascardo <cascardo@holoscopio.com>
Date: Tue, 21 Jul 2009 15:57:47 -0300
Subject: trivial: fix typo s/ketymap/keymap/ in comment

Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@holoscopio.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/drivers/input/keyboard/atkbd.c b/drivers/input/keyboard/atkbd.c
index c9523e4..adb09e2 100644
--- a/drivers/input/keyboard/atkbd.c
+++ b/drivers/input/keyboard/atkbd.c
@@ -229,7 +229,7 @@ struct atkbd {
 };
 
 /*
- * System-specific ketymap fixup routine
+ * System-specific keymap fixup routine
  */
 static void (*atkbd_platform_fixup)(struct atkbd *, const void *data);
 static void *atkbd_platform_fixup_data;
-- 
cgit v0.10.2


From 7da8b6ddc7a03346f825925e0d981ca2bd1ed617 Mon Sep 17 00:00:00 2001
From: Michael Tokarev <mjt@tls.msk.ru>
Date: Wed, 22 Jul 2009 17:50:23 +0400
Subject: trivial: fix missing printk space in amd_k7_smp_check

This trivial patch fixes one missing space in printk.

I already fixed it about half a year ago or more, but the change (in
arch/x86/kernel/cpu/smpboot.c at that time) didn't made into
mainline yet.

Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>

index 28e5f59..6c139ed 100644
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index f32fa71..c910a71 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -184,7 +184,7 @@ static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c)
 	 * approved Athlon
 	 */
 	WARN_ONCE(1, "WARNING: This combination of AMD"
-		"processors is not suitable for SMP.\n");
+		" processors is not suitable for SMP.\n");
 	if (!test_taint(TAINT_UNSAFE_SMP))
 		add_taint(TAINT_UNSAFE_SMP);
 
-- 
cgit v0.10.2


From 3c78f5d81ae8131a24577b5551a6d1467b30e0af Mon Sep 17 00:00:00 2001
From: Marton Nemeth <nm127@freemail.hu>
Date: Wed, 22 Jul 2009 22:44:23 +0200
Subject: trivial: fix typo in CONFIG_DEBUG_FS in gcov doc

The correct name is CONFIG_DEBUG_FS, add the missing underscore.

Signed-off-by: Marton Nemeth <nm127@freemail.hu>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/Documentation/gcov.txt b/Documentation/gcov.txt
index 40ec633..e7ca647 100644
--- a/Documentation/gcov.txt
+++ b/Documentation/gcov.txt
@@ -47,7 +47,7 @@ Possible uses:
 
 Configure the kernel with:
 
-        CONFIG_DEBUGFS=y
+        CONFIG_DEBUG_FS=y
         CONFIG_GCOV_KERNEL=y
 
 and to get coverage data for the entire kernel:
-- 
cgit v0.10.2


From 31d0f84591b3bf49801a7e3f905a6089d857aa87 Mon Sep 17 00:00:00 2001
From: Peter Huewe <peterhuewe@gmx.de>
Date: Fri, 17 Jul 2009 01:00:01 +0200
Subject: trivial: media/video/cx88: add __init/__exit macros to cx88 drivers

Trivial patch which adds the __init and __exit macros to the module_init /
module_exit functions to several files in drivers/media/video/cx88/

Signed-off-by: Peter Huewe <peterhuewe@gmx.de>
Acked-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/drivers/media/video/cx88/cx88-blackbird.c b/drivers/media/video/cx88/cx88-blackbird.c
index 356d689..fbdc1cd 100644
--- a/drivers/media/video/cx88/cx88-blackbird.c
+++ b/drivers/media/video/cx88/cx88-blackbird.c
@@ -1371,7 +1371,7 @@ static struct cx8802_driver cx8802_blackbird_driver = {
 	.advise_release	= cx8802_blackbird_advise_release,
 };
 
-static int blackbird_init(void)
+static int __init blackbird_init(void)
 {
 	printk(KERN_INFO "cx2388x blackbird driver version %d.%d.%d loaded\n",
 	       (CX88_VERSION_CODE >> 16) & 0xff,
@@ -1384,7 +1384,7 @@ static int blackbird_init(void)
 	return cx8802_register_driver(&cx8802_blackbird_driver);
 }
 
-static void blackbird_fini(void)
+static void __exit blackbird_fini(void)
 {
 	cx8802_unregister_driver(&cx8802_blackbird_driver);
 }
diff --git a/drivers/media/video/cx88/cx88-dvb.c b/drivers/media/video/cx88/cx88-dvb.c
index 6e5d142..518bcfe 100644
--- a/drivers/media/video/cx88/cx88-dvb.c
+++ b/drivers/media/video/cx88/cx88-dvb.c
@@ -1350,7 +1350,7 @@ static struct cx8802_driver cx8802_dvb_driver = {
 	.advise_release = cx8802_dvb_advise_release,
 };
 
-static int dvb_init(void)
+static int __init dvb_init(void)
 {
 	printk(KERN_INFO "cx88/2: cx2388x dvb driver version %d.%d.%d loaded\n",
 	       (CX88_VERSION_CODE >> 16) & 0xff,
@@ -1363,7 +1363,7 @@ static int dvb_init(void)
 	return cx8802_register_driver(&cx8802_dvb_driver);
 }
 
-static void dvb_fini(void)
+static void __exit dvb_fini(void)
 {
 	cx8802_unregister_driver(&cx8802_dvb_driver);
 }
diff --git a/drivers/media/video/cx88/cx88-mpeg.c b/drivers/media/video/cx88/cx88-mpeg.c
index 7172dcf..de9ff0f 100644
--- a/drivers/media/video/cx88/cx88-mpeg.c
+++ b/drivers/media/video/cx88/cx88-mpeg.c
@@ -870,7 +870,7 @@ static struct pci_driver cx8802_pci_driver = {
 	.remove   = __devexit_p(cx8802_remove),
 };
 
-static int cx8802_init(void)
+static int __init cx8802_init(void)
 {
 	printk(KERN_INFO "cx88/2: cx2388x MPEG-TS Driver Manager version %d.%d.%d loaded\n",
 	       (CX88_VERSION_CODE >> 16) & 0xff,
@@ -883,7 +883,7 @@ static int cx8802_init(void)
 	return pci_register_driver(&cx8802_pci_driver);
 }
 
-static void cx8802_fini(void)
+static void __exit cx8802_fini(void)
 {
 	pci_unregister_driver(&cx8802_pci_driver);
 }
diff --git a/drivers/media/video/cx88/cx88-video.c b/drivers/media/video/cx88/cx88-video.c
index 2bb54c3..bdd1d8a 100644
--- a/drivers/media/video/cx88/cx88-video.c
+++ b/drivers/media/video/cx88/cx88-video.c
@@ -2113,7 +2113,7 @@ static struct pci_driver cx8800_pci_driver = {
 #endif
 };
 
-static int cx8800_init(void)
+static int __init cx8800_init(void)
 {
 	printk(KERN_INFO "cx88/0: cx2388x v4l2 driver version %d.%d.%d loaded\n",
 	       (CX88_VERSION_CODE >> 16) & 0xff,
@@ -2126,7 +2126,7 @@ static int cx8800_init(void)
 	return pci_register_driver(&cx8800_pci_driver);
 }
 
-static void cx8800_fini(void)
+static void __exit cx8800_fini(void)
 {
 	pci_unregister_driver(&cx8800_pci_driver);
 }
-- 
cgit v0.10.2


From 3dbda77e6f3375f87090cfce97b2551d3723521b Mon Sep 17 00:00:00 2001
From: Uwe Kleine-Koenig <u.kleine-koenig@pengutronix.de>
Date: Thu, 23 Jul 2009 08:31:31 +0200
Subject: trivial: fix typos "man[ae]g?ment" -> "management"

Signed-off-by: Uwe Kleine-Koenig <u.kleine-koenig@pengutronix.de>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/Documentation/DocBook/mtdnand.tmpl b/Documentation/DocBook/mtdnand.tmpl
index 8e14585..df0d089 100644
--- a/Documentation/DocBook/mtdnand.tmpl
+++ b/Documentation/DocBook/mtdnand.tmpl
@@ -568,7 +568,7 @@ static void board_select_chip (struct mtd_info *mtd, int chip)
 			<para>
 				The blocks in which the tables are stored are procteted against
 				accidental access by marking them bad in the memory bad block
-				table. The bad block table managment functions are allowed
+				table. The bad block table management functions are allowed
 				to circumvernt this protection.
 			</para>
 			<para>
diff --git a/Documentation/DocBook/scsi.tmpl b/Documentation/DocBook/scsi.tmpl
index 10a150a..d87f456 100644
--- a/Documentation/DocBook/scsi.tmpl
+++ b/Documentation/DocBook/scsi.tmpl
@@ -317,7 +317,7 @@
         <para>
           The SAS transport class contains common code to deal with SAS HBAs,
           an aproximated representation of SAS topologies in the driver model,
-          and various sysfs attributes to expose these topologies and managment
+          and various sysfs attributes to expose these topologies and management
           interfaces to userspace.
         </para>
         <para>
diff --git a/Documentation/scsi/ChangeLog.megaraid b/Documentation/scsi/ChangeLog.megaraid
index eaa4801..38e9e7c 100644
--- a/Documentation/scsi/ChangeLog.megaraid
+++ b/Documentation/scsi/ChangeLog.megaraid
@@ -514,7 +514,7 @@ iv.	Remove yield() while mailbox handshake in synchronous commands
 
 v.	Remove redundant __megaraid_busywait_mbox routine
 
-vi.	Fix bug in the managment module, which causes a system lockup when the
+vi.	Fix bug in the management module, which causes a system lockup when the
 	IO module is loaded and then unloaded, followed by executing any
 	management utility. The current version of management module does not
 	handle the adapter unregister properly.
diff --git a/Documentation/sound/alsa/HD-Audio-Models.txt b/Documentation/sound/alsa/HD-Audio-Models.txt
index 97eebd6..f1708b7 100644
--- a/Documentation/sound/alsa/HD-Audio-Models.txt
+++ b/Documentation/sound/alsa/HD-Audio-Models.txt
@@ -387,7 +387,7 @@ STAC92HD73*
 STAC92HD83*
 ===========
   ref		Reference board
-  mic-ref	Reference board with power managment for ports
+  mic-ref	Reference board with power management for ports
   dell-s14	Dell laptop
   auto		BIOS setup (default)
 
diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt
index 1b6292b..957b22f 100644
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt
@@ -133,7 +133,7 @@ of ftrace. Here is a list of some of the key files:
 	than requested, the rest of the page will be used,
 	making the actual allocation bigger than requested.
 	( Note, the size may not be a multiple of the page size
-	  due to buffer managment overhead. )
+	  due to buffer management overhead. )
 
 	This can only be updated when the current_tracer
 	is set to "nop".
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index 7350557..5466112 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -25,7 +25,7 @@ KBUILD_CFLAGS	+=$(call cc-option,-marm,)
 # Select a platform tht is kept up-to-date
 KBUILD_DEFCONFIG := versatile_defconfig
 
-# defines filename extension depending memory manement type.
+# defines filename extension depending memory management type.
 ifeq ($(CONFIG_MMU),)
 MMUEXT		:= -nommu
 endif
diff --git a/arch/frv/lib/cache.S b/arch/frv/lib/cache.S
index 0e10ad8..0c4fb20 100644
--- a/arch/frv/lib/cache.S
+++ b/arch/frv/lib/cache.S
@@ -1,4 +1,4 @@
-/* cache.S: cache managment routines
+/* cache.S: cache management routines
  *
  * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
diff --git a/arch/mn10300/include/asm/cacheflush.h b/arch/mn10300/include/asm/cacheflush.h
index 2db746a..1a55d61 100644
--- a/arch/mn10300/include/asm/cacheflush.h
+++ b/arch/mn10300/include/asm/cacheflush.h
@@ -17,7 +17,7 @@
 #include <linux/mm.h>
 
 /*
- * virtually-indexed cache managment (our cache is physically indexed)
+ * virtually-indexed cache management (our cache is physically indexed)
  */
 #define flush_cache_all()			do {} while (0)
 #define flush_cache_mm(mm)			do {} while (0)
@@ -31,7 +31,7 @@
 #define flush_dcache_mmap_unlock(mapping)	do {} while (0)
 
 /*
- * physically-indexed cache managment
+ * physically-indexed cache management
  */
 #ifndef CONFIG_MN10300_CACHE_DISABLED
 
diff --git a/drivers/media/dvb/siano/smscoreapi.c b/drivers/media/dvb/siano/smscoreapi.c
index bd9ab9d..fa6a623 100644
--- a/drivers/media/dvb/siano/smscoreapi.c
+++ b/drivers/media/dvb/siano/smscoreapi.c
@@ -1367,7 +1367,7 @@ int smscore_set_gpio(struct smscore_device_t *coredev, u32 pin, int level)
 					    &msg, sizeof(msg));
 }
 
-/* new GPIO managment implementation */
+/* new GPIO management implementation */
 static int GetGpioPinParams(u32 PinNum, u32 *pTranslatedPinNum,
 		u32 *pGroupNum, u32 *pGroupCfg) {
 
diff --git a/drivers/media/dvb/siano/smscoreapi.h b/drivers/media/dvb/siano/smscoreapi.h
index f1108c6..eec18aa 100644
--- a/drivers/media/dvb/siano/smscoreapi.h
+++ b/drivers/media/dvb/siano/smscoreapi.h
@@ -657,12 +657,12 @@ struct smscore_buffer_t *smscore_getbuffer(struct smscore_device_t *coredev);
 extern void smscore_putbuffer(struct smscore_device_t *coredev,
 			      struct smscore_buffer_t *cb);
 
-/* old GPIO managment */
+/* old GPIO management */
 int smscore_configure_gpio(struct smscore_device_t *coredev, u32 pin,
 			   struct smscore_config_gpio *pinconfig);
 int smscore_set_gpio(struct smscore_device_t *coredev, u32 pin, int level);
 
-/* new GPIO managment */
+/* new GPIO management */
 extern int smscore_gpio_configure(struct smscore_device_t *coredev, u8 PinNum,
 		struct smscore_gpio_config *pGpioConfig);
 extern int smscore_gpio_set_level(struct smscore_device_t *coredev, u8 PinNum,
diff --git a/drivers/media/radio/radio-mr800.c b/drivers/media/radio/radio-mr800.c
index 575bf9d..a123908 100644
--- a/drivers/media/radio/radio-mr800.c
+++ b/drivers/media/radio/radio-mr800.c
@@ -46,7 +46,7 @@
  * Version 0.11:	Converted to v4l2_device.
  *
  * Many things to do:
- * 	- Correct power managment of device (suspend & resume)
+ * 	- Correct power management of device (suspend & resume)
  * 	- Add code for scanning and smooth tuning
  * 	- Add code for sensitivity value
  * 	- Correct mistakes
diff --git a/drivers/message/fusion/mptbase.c b/drivers/message/fusion/mptbase.c
index 76fa2ee..610e914 100644
--- a/drivers/message/fusion/mptbase.c
+++ b/drivers/message/fusion/mptbase.c
@@ -6821,7 +6821,7 @@ mpt_print_ioc_summary(MPT_ADAPTER *ioc, char *buffer, int *size, int len, int sh
 	*size = y;
 }
 /**
- *	mpt_set_taskmgmt_in_progress_flag - set flags associated with task managment
+ *	mpt_set_taskmgmt_in_progress_flag - set flags associated with task management
  *	@ioc: Pointer to MPT_ADAPTER structure
  *
  *	Returns 0 for SUCCESS or -1 if FAILED.
@@ -6854,7 +6854,7 @@ mpt_set_taskmgmt_in_progress_flag(MPT_ADAPTER *ioc)
 EXPORT_SYMBOL(mpt_set_taskmgmt_in_progress_flag);
 
 /**
- *	mpt_clear_taskmgmt_in_progress_flag - clear flags associated with task managment
+ *	mpt_clear_taskmgmt_in_progress_flag - clear flags associated with task management
  *	@ioc: Pointer to MPT_ADAPTER structure
  *
  **/
diff --git a/drivers/net/macb.c b/drivers/net/macb.c
index fb65b42..1d0d4d9 100644
--- a/drivers/net/macb.c
+++ b/drivers/net/macb.c
@@ -241,7 +241,7 @@ static int macb_mii_init(struct macb *bp)
 	struct eth_platform_data *pdata;
 	int err = -ENXIO, i;
 
-	/* Enable managment port */
+	/* Enable management port */
 	macb_writel(bp, NCR, MACB_BIT(MPE));
 
 	bp->mii_bus = mdiobus_alloc();
diff --git a/drivers/net/wireless/ath/ath5k/reg.h b/drivers/net/wireless/ath/ath5k/reg.h
index debad07..c63ea6a 100644
--- a/drivers/net/wireless/ath/ath5k/reg.h
+++ b/drivers/net/wireless/ath/ath5k/reg.h
@@ -982,7 +982,7 @@
 #define AR5K_5414_CBCFG_BUF_DIS	0x10	/* Disable buffer */
 
 /*
- * PCI-E Power managment configuration
+ * PCI-E Power management configuration
  * and status register [5424+]
  */
 #define	AR5K_PCIE_PM_CTL		0x4068			/* Register address */
diff --git a/drivers/net/wireless/atmel.c b/drivers/net/wireless/atmel.c
index a3b36b3..cce1888 100644
--- a/drivers/net/wireless/atmel.c
+++ b/drivers/net/wireless/atmel.c
@@ -3330,7 +3330,7 @@ static void atmel_smooth_qual(struct atmel_private *priv)
 	priv->wstats.qual.updated &= ~IW_QUAL_QUAL_INVALID;
 }
 
-/* deals with incoming managment frames. */
+/* deals with incoming management frames. */
 static void atmel_management_frame(struct atmel_private *priv,
 				   struct ieee80211_hdr *header,
 				   u16 frame_len, u8 rssi)
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index d31d322..ffe1625 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -1150,7 +1150,7 @@ void xhci_dbg_cmd_ptrs(struct xhci_hcd *xhci);
 void xhci_dbg_ring_ptrs(struct xhci_hcd *xhci, struct xhci_ring *ring);
 void xhci_dbg_ctx(struct xhci_hcd *xhci, struct xhci_container_ctx *ctx, unsigned int last_ep);
 
-/* xHCI memory managment */
+/* xHCI memory management */
 void xhci_mem_cleanup(struct xhci_hcd *xhci);
 int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags);
 void xhci_free_virt_device(struct xhci_hcd *xhci, int slot_id);
diff --git a/drivers/usb/wusbcore/wa-hc.h b/drivers/usb/wusbcore/wa-hc.h
index 586d350..d6bea3e 100644
--- a/drivers/usb/wusbcore/wa-hc.h
+++ b/drivers/usb/wusbcore/wa-hc.h
@@ -47,7 +47,7 @@
  *             to an endpoint on a WUSB device that is connected to a
  *             HWA RC.
  *
- *  xfer       Transfer managment -- this is all the code that gets a
+ *  xfer       Transfer management -- this is all the code that gets a
  *             buffer and pushes it to a device (or viceversa). *
  *
  * Some day a lot of this code will be shared between this driver and
diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h
index 536ca12..78c3bed 100644
--- a/include/linux/mISDNif.h
+++ b/include/linux/mISDNif.h
@@ -104,7 +104,7 @@
 #define DL_UNITDATA_IND		0x3108
 #define DL_INFORMATION_IND	0x0008
 
-/* intern layer 2 managment */
+/* intern layer 2 management */
 #define MDL_ASSIGN_REQ		0x1804
 #define MDL_ASSIGN_IND		0x1904
 #define MDL_REMOVE_REQ		0x1A04
-- 
cgit v0.10.2


From 8103b5cc6216d461047514d188248bd14873624a Mon Sep 17 00:00:00 2001
From: Michael Brunner <mibru@gmx.de>
Date: Tue, 4 Aug 2009 00:41:11 +0200
Subject: trivial: SubmittingPatches: Fix reference to renumbered step

This patch fixes a reference to step "Select e-mail destination." which
has been renumbered from 4) to 5) in linux-2.6.22.

Signed-off-by: Michael Brunner <mibru@gmx.de>
Acked-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/Documentation/SubmittingPatches b/Documentation/SubmittingPatches
index 5c555a8b..b7f9d3b 100644
--- a/Documentation/SubmittingPatches
+++ b/Documentation/SubmittingPatches
@@ -183,7 +183,7 @@ the MAN-PAGES maintainer (as listed in the MAINTAINERS file)
 a man-pages patch, or at least a notification of the change,
 so that some information makes its way into the manual pages.
 
-Even if the maintainer did not respond in step #4, make sure to ALWAYS
+Even if the maintainer did not respond in step #5, make sure to ALWAYS
 copy the maintainer when you change their code.
 
 For small patches you may want to CC the Trivial Patch Monkey
-- 
cgit v0.10.2


From 2bace8b95108746f6123d312f47f5bda4eb17a26 Mon Sep 17 00:00:00 2001
From: Christian Thaeter <ct@pipapo.org>
Date: Sat, 25 Jul 2009 20:55:15 +0200
Subject: trivial: doc: hpfall: reduce risk that hpfall can do harm

Improve the example code to be at least useable, as in not causing
harm (as shown below). Code can still be improved further, but this
adds some basic safeguards.

1. hpfall *MUST* mlockall(MCL_CURRENT|MCL_FUTURE); itself!
Since the Program sits and waits most of the time it becomes very likely
swapped out. If it gets woken up when the laptop drops from the table
while it is swapped out it actually triggers harddrive activity!

2. Daemonize hpfall using 'daemon(0,0)' (quick and dirty).

3. Give hpfall realtime priority.
Should give a chance that it has less latency when woken up.

Signed-off-by: Christian Thaeter <ct@pipapo.org>
Signed-off-by: Frans Pop <elendil@planet.nl>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/Documentation/hwmon/hpfall.c b/Documentation/hwmon/hpfall.c
index d2f6711..a3cfe1a 100644
--- a/Documentation/hwmon/hpfall.c
+++ b/Documentation/hwmon/hpfall.c
@@ -16,6 +16,8 @@
 #include <stdint.h>
 #include <errno.h>
 #include <signal.h>
+#include <sys/mman.h>
+#include <sched.h>
 
 void write_int(char *path, int i)
 {
@@ -62,6 +64,7 @@ void ignore_me(void)
 int main(int argc, char *argv[])
 {
 	int fd, ret;
+	struct sched_param param;
 
 	fd = open("/dev/freefall", O_RDONLY);
 	if (fd < 0) {
@@ -69,6 +72,11 @@ int main(int argc, char *argv[])
 		return EXIT_FAILURE;
 	}
 
+	daemon(0, 0);
+	param.sched_priority = sched_get_priority_max(SCHED_FIFO);
+	sched_setscheduler(0, SCHED_FIFO, &param);
+	mlockall(MCL_CURRENT|MCL_FUTURE);
+
 	signal(SIGALRM, ignore_me);
 
 	for (;;) {
-- 
cgit v0.10.2


From be3990b7efbe8784fe063fb6871a772c0703891a Mon Sep 17 00:00:00 2001
From: Frans Pop <elendil@planet.nl>
Date: Sat, 25 Jul 2009 21:00:12 +0200
Subject: trivial: doc: hpfall: accept disk device to unload as argument

Allows users who use an IDE driver for their disk to use hpfall without
having to modify the source. By default /dev/sda is used.
Suggested by Christian Thaeter in http://lkml.org/lkml/2009/3/25/505.

While we're add it, improve error message if opening /dev/freefall fails.

Signed-off-by: Frans Pop <elendil@planet.nl>
Cc: Christian Thaeter <ct@pipapo.org>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/Documentation/hwmon/hpfall.c b/Documentation/hwmon/hpfall.c
index a3cfe1a..681ec22 100644
--- a/Documentation/hwmon/hpfall.c
+++ b/Documentation/hwmon/hpfall.c
@@ -19,6 +19,32 @@
 #include <sys/mman.h>
 #include <sched.h>
 
+char unload_heads_path[64];
+
+int set_unload_heads_path(char *device)
+{
+	char devname[64];
+
+	if (strlen(device) <= 5 || strncmp(device, "/dev/", 5) != 0)
+		return -EINVAL;
+	strncpy(devname, device + 5, sizeof(devname));
+
+	snprintf(unload_heads_path, sizeof(unload_heads_path),
+				"/sys/block/%s/device/unload_heads", devname);
+	return 0;
+}
+int valid_disk(void)
+{
+	int fd = open(unload_heads_path, O_RDONLY);
+	if (fd < 0) {
+		perror(unload_heads_path);
+		return 0;
+	}
+
+	close(fd);
+	return 1;
+}
+
 void write_int(char *path, int i)
 {
 	char buf[1024];
@@ -42,7 +68,7 @@ void set_led(int on)
 
 void protect(int seconds)
 {
-	write_int("/sys/block/sda/device/unload_heads", seconds*1000);
+	write_int(unload_heads_path, seconds*1000);
 }
 
 int on_ac(void)
@@ -61,14 +87,27 @@ void ignore_me(void)
 	set_led(0);
 }
 
-int main(int argc, char *argv[])
+int main(int argc, char **argv)
 {
 	int fd, ret;
 	struct sched_param param;
 
+	if (argc == 1)
+		ret = set_unload_heads_path("/dev/sda");
+	else if (argc == 2)
+		ret = set_unload_heads_path(argv[1]);
+	else
+		ret = -EINVAL;
+
+	if (ret || !valid_disk()) {
+		fprintf(stderr, "usage: %s <device> (default: /dev/sda)\n",
+				argv[0]);
+		exit(1);
+	}
+
 	fd = open("/dev/freefall", O_RDONLY);
 	if (fd < 0) {
-		perror("open");
+		perror("/dev/freefall");
 		return EXIT_FAILURE;
 	}
 
-- 
cgit v0.10.2


From 22f8b458223151272f6a1e57847b90979958cc7b Mon Sep 17 00:00:00 2001
From: Frans Pop <elendil@planet.nl>
Date: Fri, 24 Jul 2009 05:31:17 +0200
Subject: trivial: improve help text for mm debug config options

Improve the help text for PAGE_POISONING.
Also fix some typos and improve consistency within the file.

Signed-of-by: Frans Pop <elendil@planet.nl>

Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug
index aa99fd1..af7cfb4 100644
--- a/mm/Kconfig.debug
+++ b/mm/Kconfig.debug
@@ -6,7 +6,7 @@ config DEBUG_PAGEALLOC
 	---help---
 	  Unmap pages from the kernel linear mapping after free_pages().
 	  This results in a large slowdown, but helps to find certain types
-	  of memory corruptions.
+	  of memory corruption.
 
 config WANT_PAGE_DEBUG_FLAGS
 	bool
@@ -17,11 +17,11 @@ config PAGE_POISONING
 	depends on !HIBERNATION
 	select DEBUG_PAGEALLOC
 	select WANT_PAGE_DEBUG_FLAGS
-	help
+	---help---
 	   Fill the pages with poison patterns after free_pages() and verify
 	   the patterns before alloc_pages(). This results in a large slowdown,
-	   but helps to find certain types of memory corruptions.
+	   but helps to find certain types of memory corruption.
 
-	   This option cannot enalbe with hibernation. Otherwise, it will get
-	   wrong messages for memory corruption because the free pages are not
-	   saved to the suspend image.
+	   This option cannot be enabled in combination with hibernation as
+	   that would result in incorrect warnings of memory corruption after
+	   a resume because free pages are not saved to the suspend image.
-- 
cgit v0.10.2


From f0a75770bde5cb5f9596bd0256d8ec3649720525 Mon Sep 17 00:00:00 2001
From: Trevor Keith <tsrk@tsrk.net>
Date: Fri, 24 Jul 2009 11:29:29 -0700
Subject: trivial: kbuild: remove extraneous blank line after declaration of
 usage()

Signed-off-by: Trevor Keith <tsrk@tsrk.net>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/scripts/basic/fixdep.c b/scripts/basic/fixdep.c
index f9c1e54..8ab4486 100644
--- a/scripts/basic/fixdep.c
+++ b/scripts/basic/fixdep.c
@@ -125,7 +125,6 @@ char *depfile;
 char *cmdline;
 
 void usage(void)
-
 {
 	fprintf(stderr, "Usage: fixdep <depfile> <target> <cmdline>\n");
 	exit(1);
-- 
cgit v0.10.2


From 2944fcbe03d65a704f07e43efe14adb0d226fd09 Mon Sep 17 00:00:00 2001
From: Uwe Kleine-Koenig <u.kleine-koenig@pengutronix.de>
Date: Wed, 5 Aug 2009 22:06:42 +0200
Subject: trivial: Fix duplicated word "options" in comment

this was introduced in

	5e0a093 (tracing: fix config options to not show when automatically selected)

Signed-off-by: Uwe Kleine-Koenig <u.kleine-koenig@pengutronix.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: trivial@kernel.org
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index e716346..b416512 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -83,7 +83,7 @@ config RING_BUFFER_ALLOW_SWAP
 # This allows those options to appear when no other tracer is selected. But the
 # options do not appear when something else selects it. We need the two options
 # GENERIC_TRACER and TRACING to avoid circular dependencies to accomplish the
-# hidding of the automatic options options.
+# hidding of the automatic options.
 
 config TRACING
 	bool
-- 
cgit v0.10.2


From a419aef8b858a2bdb98df60336063d28df4b272f Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 18 Aug 2009 11:18:35 -0700
Subject: trivial: remove unnecessary semicolons

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index bd9914b..1baa635 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -496,7 +496,7 @@ static int __init hypfs_init(void)
 	}
 	s390_kobj = kobject_create_and_add("s390", hypervisor_kobj);
 	if (!s390_kobj) {
-		rc = -ENOMEM;;
+		rc = -ENOMEM;
 		goto fail_sysfs;
 	}
 	rc = register_filesystem(&hypfs_type);
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 2c2f983..43486c2 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -478,7 +478,7 @@ int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
 	if (!inti)
 		return -ENOMEM;
 
-	inti->type = KVM_S390_PROGRAM_INT;;
+	inti->type = KVM_S390_PROGRAM_INT;
 	inti->pgm.code = code;
 
 	VCPU_EVENT(vcpu, 3, "inject: program check %d (from kernel)", code);
diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c
index 8daab33..8ab1d47 100644
--- a/arch/sparc/kernel/irq_64.c
+++ b/arch/sparc/kernel/irq_64.c
@@ -229,7 +229,7 @@ static unsigned int sun4u_compute_tid(unsigned long imap, unsigned long cpuid)
 				tid = ((a << IMAP_AID_SHIFT) |
 				       (n << IMAP_NID_SHIFT));
 				tid &= (IMAP_AID_SAFARI |
-					IMAP_NID_SAFARI);;
+					IMAP_NID_SAFARI);
 			}
 		} else {
 			tid = cpuid << IMAP_TID_SHIFT;
diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c
index f114813..a74245a 100644
--- a/arch/um/drivers/net_kern.c
+++ b/arch/um/drivers/net_kern.c
@@ -533,7 +533,7 @@ static int eth_parse(char *str, int *index_out, char **str_out,
 		     char **error_out)
 {
 	char *end;
-	int n, err = -EINVAL;;
+	int n, err = -EINVAL;
 
 	n = simple_strtoul(str, &end, 0);
 	if (end == str) {
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index b839af1..26caa3f 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -6653,7 +6653,7 @@ static long DAC960_gam_ioctl(struct file *file, unsigned int Request,
 	else ErrorCode = get_user(ControllerNumber,
 			     &UserSpaceControllerInfo->ControllerNumber);
 	if (ErrorCode != 0)
-		break;;
+		break;
 	ErrorCode = -ENXIO;
 	if (ControllerNumber < 0 ||
 	    ControllerNumber > DAC960_ControllerCount - 1) {
@@ -6661,7 +6661,7 @@ static long DAC960_gam_ioctl(struct file *file, unsigned int Request,
 	}
 	Controller = DAC960_Controllers[ControllerNumber];
 	if (Controller == NULL)
-		break;;
+		break;
 	memset(&ControllerInfo, 0, sizeof(DAC960_ControllerInfo_T));
 	ControllerInfo.ControllerNumber = ControllerNumber;
 	ControllerInfo.FirmwareType = Controller->FirmwareType;
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index 80df93e..572ec61 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -1062,7 +1062,7 @@ static int swim3_add_device(struct macio_dev *mdev, int index)
 		goto out_release;
 	}
 	fs->swim3_intr = macio_irq(mdev, 0);
-	fs->dma_intr = macio_irq(mdev, 1);;
+	fs->dma_intr = macio_irq(mdev, 1);
 	fs->cur_cyl = -1;
 	fs->cur_sector = -1;
 	fs->secpercyl = 36;
diff --git a/drivers/char/epca.c b/drivers/char/epca.c
index ff647ca..9d589e3 100644
--- a/drivers/char/epca.c
+++ b/drivers/char/epca.c
@@ -2239,7 +2239,7 @@ static void do_softint(struct work_struct *work)
 	struct channel *ch = container_of(work, struct channel, tqueue);
 	/* Called in response to a modem change event */
 	if (ch && ch->magic == EPCA_MAGIC) {
-		struct tty_struct *tty = tty_port_tty_get(&ch->port);;
+		struct tty_struct *tty = tty_port_tty_get(&ch->port);
 
 		if (tty && tty->driver_data) {
 			if (test_and_clear_bit(EPCA_EVENT_HANGUP, &ch->event)) {
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 2b914d7..f4856a5 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -232,7 +232,7 @@ intel_dp_aux_ch(struct intel_output *intel_output,
 	for (try = 0; try < 5; try++) {
 		/* Load the send data into the aux channel data registers */
 		for (i = 0; i < send_bytes; i += 4) {
-			uint32_t    d = pack_aux(send + i, send_bytes - i);;
+			uint32_t    d = pack_aux(send + i, send_bytes - i);
 	
 			I915_WRITE(ch_data + i, d);
 		}
diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index 051bca6..b2f5d32 100644
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@@ -1319,11 +1319,11 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
 	case 0x443C:
 		/* TX_FILTER0_[0-15] */
 		i = (reg - 0x4400) >> 2;
-		tmp = ib_chunk->kdata[idx] & 0x7;;
+		tmp = ib_chunk->kdata[idx] & 0x7;
 		if (tmp == 2 || tmp == 4 || tmp == 6) {
 			track->textures[i].roundup_w = false;
 		}
-		tmp = (ib_chunk->kdata[idx] >> 3) & 0x7;;
+		tmp = (ib_chunk->kdata[idx] >> 3) & 0x7;
 		if (tmp == 2 || tmp == 4 || tmp == 6) {
 			track->textures[i].roundup_h = false;
 		}
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 8de442c..63c53d6 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -1212,7 +1212,7 @@ static int ide_find_port_slot(const struct ide_port_info *d)
 {
 	int idx = -ENOENT;
 	u8 bootable = (d && (d->host_flags & IDE_HFLAG_NON_BOOTABLE)) ? 0 : 1;
-	u8 i = (d && (d->host_flags & IDE_HFLAG_QD_2ND_PORT)) ? 1 : 0;;
+	u8 i = (d && (d->host_flags & IDE_HFLAG_QD_2ND_PORT)) ? 1 : 0;
 
 	/*
 	 * Claim an unassigned slot.
diff --git a/drivers/ide/umc8672.c b/drivers/ide/umc8672.c
index 0608d41..60f936e 100644
--- a/drivers/ide/umc8672.c
+++ b/drivers/ide/umc8672.c
@@ -170,9 +170,9 @@ static int __init umc8672_init(void)
 		goto out;
 
 	if (umc8672_probe() == 0)
-		return 0;;
+		return 0;
 out:
-	return -ENODEV;;
+	return -ENODEV;
 }
 
 module_init(umc8672_init);
diff --git a/drivers/isdn/capi/capiutil.c b/drivers/isdn/capi/capiutil.c
index 16f2e46..26626ee 100644
--- a/drivers/isdn/capi/capiutil.c
+++ b/drivers/isdn/capi/capiutil.c
@@ -1019,7 +1019,7 @@ int __init cdebug_init(void)
 	if (!g_debbuf->buf) {
 		kfree(g_cmsg);
 		kfree(g_debbuf);
-		return -ENOMEM;;
+		return -ENOMEM;
 	}
 	g_debbuf->size = CDEBUG_GSIZE;
 	g_debbuf->buf[0] = 0;
diff --git a/drivers/macintosh/rack-meter.c b/drivers/macintosh/rack-meter.c
index a98ab72..93fb320 100644
--- a/drivers/macintosh/rack-meter.c
+++ b/drivers/macintosh/rack-meter.c
@@ -274,7 +274,7 @@ static void __devinit rackmeter_init_cpu_sniffer(struct rackmeter *rm)
 
 		if (cpu > 1)
 			continue;
-		rcpu = &rm->cpu[cpu];;
+		rcpu = &rm->cpu[cpu];
 		rcpu->prev_idle = get_cpu_idle_time(cpu);
 		rcpu->prev_wall = jiffies64_to_cputime64(get_jiffies_64());
 		schedule_delayed_work_on(cpu, &rm->cpu[cpu].sniffer,
diff --git a/drivers/net/arcnet/arc-rawmode.c b/drivers/net/arcnet/arc-rawmode.c
index 646dfc5..8ea9c75 100644
--- a/drivers/net/arcnet/arc-rawmode.c
+++ b/drivers/net/arcnet/arc-rawmode.c
@@ -123,7 +123,6 @@ static void rx(struct net_device *dev, int bufnum,
 	BUGLVL(D_SKB) arcnet_dump_skb(dev, skb, "rx");
 
 	skb->protocol = cpu_to_be16(ETH_P_ARCNET);
-;
 	netif_rx(skb);
 }
 
diff --git a/drivers/net/arcnet/capmode.c b/drivers/net/arcnet/capmode.c
index 083e210..66bcbbb 100644
--- a/drivers/net/arcnet/capmode.c
+++ b/drivers/net/arcnet/capmode.c
@@ -149,7 +149,6 @@ static void rx(struct net_device *dev, int bufnum,
 	BUGLVL(D_SKB) arcnet_dump_skb(dev, skb, "rx");
 
 	skb->protocol = cpu_to_be16(ETH_P_ARCNET);
-;
 	netif_rx(skb);
 }
 
diff --git a/drivers/net/gianfar_ethtool.c b/drivers/net/gianfar_ethtool.c
index 2234118..6c144b5 100644
--- a/drivers/net/gianfar_ethtool.c
+++ b/drivers/net/gianfar_ethtool.c
@@ -293,7 +293,7 @@ static int gfar_gcoalesce(struct net_device *dev, struct ethtool_coalesce *cvals
 	rxtime  = get_ictt_value(priv->rxic);
 	rxcount = get_icft_value(priv->rxic);
 	txtime  = get_ictt_value(priv->txic);
-	txcount = get_icft_value(priv->txic);;
+	txcount = get_icft_value(priv->txic);
 	cvals->rx_coalesce_usecs = gfar_ticks2usecs(priv, rxtime);
 	cvals->rx_max_coalesced_frames = rxcount;
 
diff --git a/drivers/net/ibm_newemac/core.c b/drivers/net/ibm_newemac/core.c
index 1d7d7fe..89c82c5 100644
--- a/drivers/net/ibm_newemac/core.c
+++ b/drivers/net/ibm_newemac/core.c
@@ -2556,13 +2556,13 @@ static int __devinit emac_init_config(struct emac_instance *dev)
 	if (emac_read_uint_prop(np, "mdio-device", &dev->mdio_ph, 0))
 		dev->mdio_ph = 0;
 	if (emac_read_uint_prop(np, "zmii-device", &dev->zmii_ph, 0))
-		dev->zmii_ph = 0;;
+		dev->zmii_ph = 0;
 	if (emac_read_uint_prop(np, "zmii-channel", &dev->zmii_port, 0))
-		dev->zmii_port = 0xffffffff;;
+		dev->zmii_port = 0xffffffff;
 	if (emac_read_uint_prop(np, "rgmii-device", &dev->rgmii_ph, 0))
-		dev->rgmii_ph = 0;;
+		dev->rgmii_ph = 0;
 	if (emac_read_uint_prop(np, "rgmii-channel", &dev->rgmii_port, 0))
-		dev->rgmii_port = 0xffffffff;;
+		dev->rgmii_port = 0xffffffff;
 	if (emac_read_uint_prop(np, "fifo-entry-size", &dev->fifo_entry_size, 0))
 		dev->fifo_entry_size = 16;
 	if (emac_read_uint_prop(np, "mal-burst-size", &dev->mal_burst_size, 0))
diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c
index d2639c4..5d6c153 100644
--- a/drivers/net/igb/igb_main.c
+++ b/drivers/net/igb/igb_main.c
@@ -3966,7 +3966,7 @@ static int igb_set_vf_multicasts(struct igb_adapter *adapter,
 	/* VFs are limited to using the MTA hash table for their multicast
 	 * addresses */
 	for (i = 0; i < n; i++)
-		vf_data->vf_mc_hashes[i] = hash_list[i];;
+		vf_data->vf_mc_hashes[i] = hash_list[i];
 
 	/* Flush and reset the mta with the new values */
 	igb_set_rx_mode(adapter->netdev);
diff --git a/drivers/net/ll_temac_main.c b/drivers/net/ll_temac_main.c
index da8d0a0..f2a197f 100644
--- a/drivers/net/ll_temac_main.c
+++ b/drivers/net/ll_temac_main.c
@@ -865,7 +865,7 @@ temac_of_probe(struct of_device *op, const struct of_device_id *match)
 	dcrs = dcr_resource_start(np, 0);
 	if (dcrs == 0) {
 		dev_err(&op->dev, "could not get DMA register address\n");
-		goto nodev;;
+		goto nodev;
 	}
 	lp->sdma_dcrs = dcr_map(np, dcrs, dcr_resource_len(np, 0));
 	dev_dbg(&op->dev, "DCR base: %x\n", dcrs);
diff --git a/drivers/net/ni52.c b/drivers/net/ni52.c
index bd0ac69..aad3b37 100644
--- a/drivers/net/ni52.c
+++ b/drivers/net/ni52.c
@@ -615,10 +615,10 @@ static int init586(struct net_device *dev)
 	/* addr_len |!src_insert |pre-len |loopback */
 	writeb(0x2e, &cfg_cmd->adr_len);
 	writeb(0x00, &cfg_cmd->priority);
-	writeb(0x60, &cfg_cmd->ifs);;
+	writeb(0x60, &cfg_cmd->ifs);
 	writeb(0x00, &cfg_cmd->time_low);
 	writeb(0xf2, &cfg_cmd->time_high);
-	writeb(0x00, &cfg_cmd->promisc);;
+	writeb(0x00, &cfg_cmd->promisc);
 	if (dev->flags & IFF_ALLMULTI) {
 		int len = ((char __iomem *)p->iscp - (char __iomem *)ptr - 8) / 6;
 		if (num_addrs > len) {
diff --git a/drivers/net/qlge/qlge_main.c b/drivers/net/qlge/qlge_main.c
index 2205292..7783c5d 100644
--- a/drivers/net/qlge/qlge_main.c
+++ b/drivers/net/qlge/qlge_main.c
@@ -2630,7 +2630,7 @@ static int ql_start_rx_ring(struct ql_adapter *qdev, struct rx_ring *rx_ring)
 	    FLAGS_LI;		/* Load irq delay values */
 	if (rx_ring->lbq_len) {
 		cqicb->flags |= FLAGS_LL;	/* Load lbq values */
-		tmp = (u64)rx_ring->lbq_base_dma;;
+		tmp = (u64)rx_ring->lbq_base_dma;
 		base_indirect_ptr = (__le64 *) rx_ring->lbq_base_indirect;
 		page_entries = 0;
 		do {
@@ -2654,7 +2654,7 @@ static int ql_start_rx_ring(struct ql_adapter *qdev, struct rx_ring *rx_ring)
 	}
 	if (rx_ring->sbq_len) {
 		cqicb->flags |= FLAGS_LS;	/* Load sbq values */
-		tmp = (u64)rx_ring->sbq_base_dma;;
+		tmp = (u64)rx_ring->sbq_base_dma;
 		base_indirect_ptr = (__le64 *) rx_ring->sbq_base_indirect;
 		page_entries = 0;
 		do {
diff --git a/drivers/net/skfp/pcmplc.c b/drivers/net/skfp/pcmplc.c
index f1df2ec..e6b33ee 100644
--- a/drivers/net/skfp/pcmplc.c
+++ b/drivers/net/skfp/pcmplc.c
@@ -960,7 +960,7 @@ static void pcm_fsm(struct s_smc *smc, struct s_phy *phy, int cmd)
 			/*PC88b*/
 			if (!phy->cf_join) {
 				phy->cf_join = TRUE ;
-				queue_event(smc,EVENT_CFM,CF_JOIN+np) ; ;
+				queue_event(smc,EVENT_CFM,CF_JOIN+np) ;
 			}
 			if (cmd == PC_JOIN)
 				GO_STATE(PC8_ACTIVE) ;
diff --git a/drivers/net/skfp/pmf.c b/drivers/net/skfp/pmf.c
index 79e665e..a320fdb 100644
--- a/drivers/net/skfp/pmf.c
+++ b/drivers/net/skfp/pmf.c
@@ -807,9 +807,9 @@ void smt_add_para(struct s_smc *smc, struct s_pcon *pcon, u_short para,
 				mib_p->fddiPORTLerFlag ;
 			sp->p4050_pad = 0 ;
 			sp->p4050_cutoff =
-				mib_p->fddiPORTLer_Cutoff ; ;
+				mib_p->fddiPORTLer_Cutoff ;
 			sp->p4050_alarm =
-				mib_p->fddiPORTLer_Alarm ; ;
+				mib_p->fddiPORTLer_Alarm ;
 			sp->p4050_estimate =
 				mib_p->fddiPORTLer_Estimate ;
 			sp->p4050_reject_ct =
@@ -829,7 +829,7 @@ void smt_add_para(struct s_smc *smc, struct s_pcon *pcon, u_short para,
 			sp->p4051_porttype =
 				mib_p->fddiPORTMy_Type ;
 			sp->p4051_connectstate =
-				mib_p->fddiPORTConnectState ; ;
+				mib_p->fddiPORTConnectState ;
 			sp->p4051_pc_neighbor =
 				mib_p->fddiPORTNeighborType ;
 			sp->p4051_pc_withhold =
@@ -853,7 +853,7 @@ void smt_add_para(struct s_smc *smc, struct s_pcon *pcon, u_short para,
 			struct smt_p_4053	*sp ;
 			sp = (struct smt_p_4053 *) to ;
 			sp->p4053_multiple =
-				mib_p->fddiPORTMultiple_P ; ;
+				mib_p->fddiPORTMultiple_P ;
 			sp->p4053_availablepaths =
 				mib_p->fddiPORTAvailablePaths ;
 			sp->p4053_currentpath =
diff --git a/drivers/net/skge.c b/drivers/net/skge.c
index 62e852e..55bad40 100644
--- a/drivers/net/skge.c
+++ b/drivers/net/skge.c
@@ -215,7 +215,7 @@ static void skge_wol_init(struct skge_port *skge)
 	if (skge->wol & WAKE_MAGIC)
 		ctrl |= WOL_CTL_ENA_PME_ON_MAGIC_PKT|WOL_CTL_ENA_MAGIC_PKT_UNIT;
 	else
-		ctrl |= WOL_CTL_DIS_PME_ON_MAGIC_PKT|WOL_CTL_DIS_MAGIC_PKT_UNIT;;
+		ctrl |= WOL_CTL_DIS_PME_ON_MAGIC_PKT|WOL_CTL_DIS_MAGIC_PKT_UNIT;
 
 	ctrl |= WOL_CTL_DIS_PME_ON_PATTERN|WOL_CTL_DIS_PATTERN_UNIT;
 	skge_write16(hw, WOL_REGS(port, WOL_CTRL_STAT), ctrl);
diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c
index 4bb52e9..15140f9 100644
--- a/drivers/net/sky2.c
+++ b/drivers/net/sky2.c
@@ -765,7 +765,7 @@ static void sky2_wol_init(struct sky2_port *sky2)
 	if (sky2->wol & WAKE_MAGIC)
 		ctrl |= WOL_CTL_ENA_PME_ON_MAGIC_PKT|WOL_CTL_ENA_MAGIC_PKT_UNIT;
 	else
-		ctrl |= WOL_CTL_DIS_PME_ON_MAGIC_PKT|WOL_CTL_DIS_MAGIC_PKT_UNIT;;
+		ctrl |= WOL_CTL_DIS_PME_ON_MAGIC_PKT|WOL_CTL_DIS_MAGIC_PKT_UNIT;
 
 	ctrl |= WOL_CTL_DIS_PME_ON_PATTERN|WOL_CTL_DIS_PATTERN_UNIT;
 	sky2_write16(hw, WOL_REGS(port, WOL_CTRL_STAT), ctrl);
diff --git a/drivers/net/vxge/vxge-config.h b/drivers/net/vxge/vxge-config.h
index 62779a5..3e94f0c 100644
--- a/drivers/net/vxge/vxge-config.h
+++ b/drivers/net/vxge/vxge-config.h
@@ -1541,7 +1541,7 @@ void vxge_hw_ring_rxd_1b_info_get(
 	rxd_info->l4_cksum_valid =
 		(u32)VXGE_HW_RING_RXD_L4_CKSUM_CORRECT_GET(rxdp->control_0);
 	rxd_info->l4_cksum =
-		(u32)VXGE_HW_RING_RXD_L4_CKSUM_GET(rxdp->control_0);;
+		(u32)VXGE_HW_RING_RXD_L4_CKSUM_GET(rxdp->control_0);
 	rxd_info->frame =
 		(u32)VXGE_HW_RING_RXD_ETHER_ENCAP_GET(rxdp->control_0);
 	rxd_info->proto =
diff --git a/drivers/net/vxge/vxge-main.c b/drivers/net/vxge/vxge-main.c
index b378037..068d7a9 100644
--- a/drivers/net/vxge/vxge-main.c
+++ b/drivers/net/vxge/vxge-main.c
@@ -2350,7 +2350,7 @@ static int vxge_enable_msix(struct vxgedev *vdev)
 	enum vxge_hw_status status;
 	/* 0 - Tx, 1 - Rx  */
 	int tim_msix_id[4];
-	int alarm_msix_id = 0, msix_intr_vect = 0;;
+	int alarm_msix_id = 0, msix_intr_vect = 0;
 	vdev->intr_cnt = 0;
 
 	/* allocate msix vectors */
diff --git a/drivers/rtc/rtc-omap.c b/drivers/rtc/rtc-omap.c
index bd1ce8e..0587d53 100644
--- a/drivers/rtc/rtc-omap.c
+++ b/drivers/rtc/rtc-omap.c
@@ -430,7 +430,7 @@ fail:
 
 static int __exit omap_rtc_remove(struct platform_device *pdev)
 {
-	struct rtc_device	*rtc = platform_get_drvdata(pdev);;
+	struct rtc_device	*rtc = platform_get_drvdata(pdev);
 
 	device_init_wakeup(&pdev->dev, 0);
 
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index a1ce573..bd9fe2e 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -706,7 +706,7 @@ static int dasd_eckd_generate_uid(struct dasd_device *device,
 	       sizeof(uid->serial) - 1);
 	EBCASC(uid->serial, sizeof(uid->serial) - 1);
 	uid->ssid = private->gneq->subsystemID;
-	uid->real_unit_addr = private->ned->unit_addr;;
+	uid->real_unit_addr = private->ned->unit_addr;
 	if (private->sneq) {
 		uid->type = private->sneq->sua_flags;
 		if (uid->type == UA_BASE_PAV_ALIAS)
diff --git a/drivers/s390/net/netiucv.c b/drivers/s390/net/netiucv.c
index a4b2c57..c84eadd 100644
--- a/drivers/s390/net/netiucv.c
+++ b/drivers/s390/net/netiucv.c
@@ -2113,7 +2113,7 @@ static ssize_t remove_write (struct device_driver *drv,
 	IUCV_DBF_TEXT(trace, 3, __func__);
 
         if (count >= IFNAMSIZ)
-                count = IFNAMSIZ - 1;;
+                count = IFNAMSIZ - 1;
 
 	for (i = 0, p = buf; i < count && *p; i++, p++) {
 		if (*p == '\n' || *p == ' ')
diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c
index 3ff726a..0e1a346 100644
--- a/drivers/s390/scsi/zfcp_scsi.c
+++ b/drivers/s390/scsi/zfcp_scsi.c
@@ -102,7 +102,7 @@ static int zfcp_scsi_queuecommand(struct scsi_cmnd *scpnt,
 	if (unlikely((status & ZFCP_STATUS_COMMON_ERP_FAILED) ||
 		     !(status & ZFCP_STATUS_COMMON_RUNNING))) {
 		zfcp_scsi_command_fail(scpnt, DID_ERROR);
-		return 0;;
+		return 0;
 	}
 
 	ret = zfcp_fsf_send_fcp_command_task(unit, scpnt);
diff --git a/drivers/scsi/bnx2i/bnx2i_hwi.c b/drivers/scsi/bnx2i/bnx2i_hwi.c
index 906cef5..41e1b0e 100644
--- a/drivers/scsi/bnx2i/bnx2i_hwi.c
+++ b/drivers/scsi/bnx2i/bnx2i_hwi.c
@@ -1340,7 +1340,7 @@ static int bnx2i_process_login_resp(struct iscsi_session *session,
 	resp_hdr->opcode = login->op_code;
 	resp_hdr->flags = login->response_flags;
 	resp_hdr->max_version = login->version_max;
-	resp_hdr->active_version = login->version_active;;
+	resp_hdr->active_version = login->version_active;
 	resp_hdr->hlength = 0;
 
 	hton24(resp_hdr->dlength, login->data_length);
diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c
index 9df7ed3..9a1bd95 100644
--- a/drivers/scsi/lpfc/lpfc_ct.c
+++ b/drivers/scsi/lpfc/lpfc_ct.c
@@ -1207,7 +1207,7 @@ lpfc_ns_cmd(struct lpfc_vport *vport, int cmdcode,
 		vport->ct_flags &= ~FC_CT_RFF_ID;
 		CtReq->CommandResponse.bits.CmdRsp =
 		    be16_to_cpu(SLI_CTNS_RFF_ID);
-		CtReq->un.rff.PortId = cpu_to_be32(vport->fc_myDID);;
+		CtReq->un.rff.PortId = cpu_to_be32(vport->fc_myDID);
 		CtReq->un.rff.fbits = FC4_FEATURE_INIT;
 		CtReq->un.rff.type_code = FC_FCP_DATA;
 		cmpl = lpfc_cmpl_ct_cmd_rff_id;
diff --git a/drivers/spi/omap_uwire.c b/drivers/spi/omap_uwire.c
index 8980a56..e75ba9b 100644
--- a/drivers/spi/omap_uwire.c
+++ b/drivers/spi/omap_uwire.c
@@ -213,7 +213,7 @@ static int uwire_txrx(struct spi_device *spi, struct spi_transfer *t)
 	unsigned	bits = ust->bits_per_word;
 	unsigned	bytes;
 	u16		val, w;
-	int		status = 0;;
+	int		status = 0;
 
 	if (!t->tx_buf && !t->rx_buf)
 		return 0;
diff --git a/drivers/spi/spi_s3c24xx.c b/drivers/spi/spi_s3c24xx.c
index 3f3119d..6ba8aec 100644
--- a/drivers/spi/spi_s3c24xx.c
+++ b/drivers/spi/spi_s3c24xx.c
@@ -388,7 +388,7 @@ static int __init s3c24xx_spi_probe(struct platform_device *pdev)
 
  err_no_iores:
  err_no_pdata:
-	spi_master_put(hw->master);;
+	spi_master_put(hw->master);
 
  err_nomem:
 	return err;
diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c
index ba589d4..8c64c01 100644
--- a/drivers/usb/class/cdc-wdm.c
+++ b/drivers/usb/class/cdc-wdm.c
@@ -506,8 +506,6 @@ static int wdm_open(struct inode *inode, struct file *file)
 	desc = usb_get_intfdata(intf);
 	if (test_bit(WDM_DISCONNECTING, &desc->flags))
 		goto out;
-
-	;
 	file->private_data = desc;
 
 	rv = usb_autopm_get_interface(desc->intf);
diff --git a/drivers/usb/serial/spcp8x5.c b/drivers/usb/serial/spcp8x5.c
index 61e7c40..1e58220 100644
--- a/drivers/usb/serial/spcp8x5.c
+++ b/drivers/usb/serial/spcp8x5.c
@@ -544,7 +544,7 @@ static void spcp8x5_set_termios(struct tty_struct *tty,
 	}
 
 	/* Set Baud Rate */
-	baud = tty_get_baud_rate(tty);;
+	baud = tty_get_baud_rate(tty);
 	switch (baud) {
 	case 300:	buf[0] = 0x00;	break;
 	case 600:	buf[0] = 0x01;	break;
diff --git a/drivers/uwb/i1480/i1480u-wlp/netdev.c b/drivers/uwb/i1480/i1480u-wlp/netdev.c
index 7305553..b236e69 100644
--- a/drivers/uwb/i1480/i1480u-wlp/netdev.c
+++ b/drivers/uwb/i1480/i1480u-wlp/netdev.c
@@ -214,7 +214,7 @@ int i1480u_open(struct net_device *net_dev)
 
 	netif_wake_queue(net_dev);
 #ifdef i1480u_FLOW_CONTROL
-	result = usb_submit_urb(i1480u->notif_urb, GFP_KERNEL);;
+	result = usb_submit_urb(i1480u->notif_urb, GFP_KERNEL);
 	if (result < 0) {
 		dev_err(dev, "Can't submit notification URB: %d\n", result);
 		goto error_notif_urb_submit;
diff --git a/drivers/video/cfbcopyarea.c b/drivers/video/cfbcopyarea.c
index df03f37..79e5f40 100644
--- a/drivers/video/cfbcopyarea.c
+++ b/drivers/video/cfbcopyarea.c
@@ -114,7 +114,7 @@ bitcpy(struct fb_info *p, unsigned long __iomem *dst, int dst_idx,
 				d0 >>= right;
 			} else if (src_idx+n <= bits) {
 				// Single source word
-				d0 <<= left;;
+				d0 <<= left;
 			} else {
 				// 2 source words
 				d1 = FB_READL(src + 1);
diff --git a/drivers/video/imxfb.c b/drivers/video/imxfb.c
index 30ae302..66358fa 100644
--- a/drivers/video/imxfb.c
+++ b/drivers/video/imxfb.c
@@ -710,7 +710,7 @@ static int __init imxfb_probe(struct platform_device *pdev)
 
 	fbi->clk = clk_get(&pdev->dev, NULL);
 	if (IS_ERR(fbi->clk)) {
-		ret = PTR_ERR(fbi->clk);;
+		ret = PTR_ERR(fbi->clk);
 		dev_err(&pdev->dev, "unable to get clock: %d\n", ret);
 		goto failed_getclock;
 	}
diff --git a/drivers/video/s3c2410fb.c b/drivers/video/s3c2410fb.c
index 7da0027..5ffca2a 100644
--- a/drivers/video/s3c2410fb.c
+++ b/drivers/video/s3c2410fb.c
@@ -1119,7 +1119,7 @@ int __init s3c2410fb_init(void)
 	int ret = platform_driver_register(&s3c2410fb_driver);
 
 	if (ret == 0)
-		ret = platform_driver_register(&s3c2412fb_driver);;
+		ret = platform_driver_register(&s3c2412fb_driver);
 
 	return ret;
 }
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index f5bbd9e..4d1b322 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -214,7 +214,7 @@ static int increase_reservation(unsigned long nr_pages)
 	page = balloon_first_page();
 	for (i = 0; i < nr_pages; i++) {
 		BUG_ON(page == NULL);
-		frame_list[i] = page_to_pfn(page);;
+		frame_list[i] = page_to_pfn(page);
 		page = balloon_next_page(page);
 	}
 
diff --git a/fs/autofs/dirhash.c b/fs/autofs/dirhash.c
index 2316e94..e947915 100644
--- a/fs/autofs/dirhash.c
+++ b/fs/autofs/dirhash.c
@@ -90,7 +90,7 @@ struct autofs_dir_ent *autofs_expire(struct super_block *sb,
 			DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name));
 			continue;
 		}
-		while (d_mountpoint(path.dentry) && follow_down(&path));
+		while (d_mountpoint(path.dentry) && follow_down(&path))
 			;
 		umount_ok = may_umount(path.mnt);
 		path_put(&path);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index d91b0de..30c0d45 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -2605,7 +2605,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
 								extent);
 				cs = btrfs_file_extent_offset(src, extent);
 				cl = btrfs_file_extent_num_bytes(src,
-								extent);;
+								extent);
 				if (btrfs_file_extent_compression(src,
 								  extent)) {
 					cs = 0;
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 606912d..5e8bc99 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -142,7 +142,7 @@ char *cifs_compose_mount_options(const char *sb_mountdata,
 	rc = dns_resolve_server_name_to_ip(*devname, &srvIP);
 	if (rc != 0) {
 		cERROR(1, ("%s: Failed to resolve server part of %s to IP: %d",
-			  __func__, *devname, rc));;
+			  __func__, *devname, rc));
 		goto compose_mount_options_err;
 	}
 	/* md_len = strlen(...) + 12 for 'sep+prefixpath='
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index e5a2dac..76b0aa0 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -222,7 +222,7 @@ static unsigned decode_sessionid(struct xdr_stream *xdr,
 
 	p = read_buf(xdr, len);
 	if (unlikely(p == NULL))
-		return htonl(NFS4ERR_RESOURCE);;
+		return htonl(NFS4ERR_RESOURCE);
 
 	memcpy(sid->data, p, len);
 	return 0;
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 44f2a5e..0578cc1 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -154,7 +154,7 @@ static int ocfs2_get_quota_block(struct inode *inode, int block,
 		err = -EIO;
 		mlog_errno(err);
 	}
-	return err;;
+	return err;
 }
 
 /* Read data from global quotafile - avoid pagecache and such because we cannot
diff --git a/include/scsi/fc/fc_fc2.h b/include/scsi/fc/fc_fc2.h
index cff8a8c..f87777d 100644
--- a/include/scsi/fc/fc_fc2.h
+++ b/include/scsi/fc/fc_fc2.h
@@ -92,8 +92,7 @@ struct fc_esb {
 	__u8	_esb_resvd[4];
 	__u8	esb_service_params[112]; /* TBD */
 	__u8	esb_seq_status[8];	/* sequence statuses, 8 bytes each */
-} __attribute__((packed));;
-
+} __attribute__((packed));
 
 /*
  * Define expected size for ASSERTs.
diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c
index ca7d7c4..23b6385 100644
--- a/kernel/trace/trace_hw_branches.c
+++ b/kernel/trace/trace_hw_branches.c
@@ -155,7 +155,7 @@ static enum print_line_t bts_trace_print_line(struct trace_iterator *iter)
 		    seq_print_ip_sym(seq, it->from, symflags) &&
 		    trace_seq_printf(seq, "\n"))
 			return TRACE_TYPE_HANDLED;
-		return TRACE_TYPE_PARTIAL_LINE;;
+		return TRACE_TYPE_PARTIAL_LINE;
 	}
 	return TRACE_TYPE_UNHANDLED;
 }
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 429dd06..561a45c 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -834,7 +834,7 @@ int cfg80211_wext_siwtxpower(struct net_device *dev,
 		return 0;
 	}
 
-	return rdev->ops->set_tx_power(wdev->wiphy, type, dbm);;
+	return rdev->ops->set_tx_power(wdev->wiphy, type, dbm);
 }
 EXPORT_SYMBOL_GPL(cfg80211_wext_siwtxpower);
 
diff --git a/sound/oss/sys_timer.c b/sound/oss/sys_timer.c
index 1075344..8db6aef 100644
--- a/sound/oss/sys_timer.c
+++ b/sound/oss/sys_timer.c
@@ -100,9 +100,6 @@ def_tmr_open(int dev, int mode)
 	curr_tempo = 60;
 	curr_timebase = 100;
 	opened = 1;
-
-	;
-
 	{
 		def_tmr.expires = (1) + jiffies;
 		add_timer(&def_tmr);
diff --git a/sound/soc/codecs/wm9081.c b/sound/soc/codecs/wm9081.c
index c64e55a..686e5aa 100644
--- a/sound/soc/codecs/wm9081.c
+++ b/sound/soc/codecs/wm9081.c
@@ -1027,7 +1027,7 @@ static int wm9081_hw_params(struct snd_pcm_substream *substream,
 		       - wm9081->fs);
 	for (i = 1; i < ARRAY_SIZE(clk_sys_rates); i++) {
 		cur_val = abs((wm9081->sysclk_rate /
-			       clk_sys_rates[i].ratio) - wm9081->fs);;
+			       clk_sys_rates[i].ratio) - wm9081->fs);
 		if (cur_val < best_val) {
 			best = i;
 			best_val = cur_val;
diff --git a/sound/soc/pxa/pxa-ssp.c b/sound/soc/pxa/pxa-ssp.c
index 5b9ed64..d11a6d7 100644
--- a/sound/soc/pxa/pxa-ssp.c
+++ b/sound/soc/pxa/pxa-ssp.c
@@ -351,7 +351,7 @@ static int pxa_ssp_set_dai_pll(struct snd_soc_dai *cpu_dai,
 			do_div(tmp, freq_out);
 			val = tmp;
 
-			val = (val << 16) | 64;;
+			val = (val << 16) | 64;
 			ssp_write_reg(ssp, SSACDD, val);
 
 			ssacd |= (0x6 << 4);
diff --git a/sound/soc/s3c24xx/s3c24xx_uda134x.c b/sound/soc/s3c24xx/s3c24xx_uda134x.c
index 8e79a41..c215d32 100644
--- a/sound/soc/s3c24xx/s3c24xx_uda134x.c
+++ b/sound/soc/s3c24xx/s3c24xx_uda134x.c
@@ -67,7 +67,7 @@ static int s3c24xx_uda134x_startup(struct snd_pcm_substream *substream)
 {
 	int ret = 0;
 #ifdef ENFORCE_RATES
-	struct snd_pcm_runtime *runtime = substream->runtime;;
+	struct snd_pcm_runtime *runtime = substream->runtime;
 #endif
 
 	mutex_lock(&clk_lock);
-- 
cgit v0.10.2


From 0e6b9e8c2c7b55569333a15e39d684dec986e226 Mon Sep 17 00:00:00 2001
From: Peter Huewe <peterhuewe@gmx.de>
Date: Sun, 23 Aug 2009 01:46:55 +0200
Subject: trivial: add __init macro/ fix of __exit macro location in
 ipmi_poweroff.c

Trivial patch which adds the __init to the module_init function of
drivers/char/ipmi/ipmy_poweroff.c and corrects the location of __exit for the
cleanup function.

Signed-off-by: Peter Huewe <peterhuewe@gmx.de>
Acked-by: Corey Minyard <minyard@acm.org>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/drivers/char/ipmi/ipmi_poweroff.c b/drivers/char/ipmi/ipmi_poweroff.c
index a261bd7..2e66b5f 100644
--- a/drivers/char/ipmi/ipmi_poweroff.c
+++ b/drivers/char/ipmi/ipmi_poweroff.c
@@ -691,7 +691,7 @@ static struct ctl_table_header *ipmi_table_header;
 /*
  * Startup and shutdown functions.
  */
-static int ipmi_poweroff_init(void)
+static int __init ipmi_poweroff_init(void)
 {
 	int rv;
 
@@ -725,7 +725,7 @@ static int ipmi_poweroff_init(void)
 }
 
 #ifdef MODULE
-static __exit void ipmi_poweroff_cleanup(void)
+static void __exit ipmi_poweroff_cleanup(void)
 {
 	int rv;
 
-- 
cgit v0.10.2


From 59f6a6c6e5625474b5e8e73a59425935b0028cfc Mon Sep 17 00:00:00 2001
From: Peter Huewe <peterhuewe@gmx.de>
Date: Sun, 23 Aug 2009 02:01:10 +0200
Subject: trivial: add __init/__exit macros in drivers/gpio/bt8xxgpio.c

Trivial patch which adds the __init/__exit macro to the init/exit functions of
drivers/gpio/bt8xxgpio.c

Signed-off-by: Peter Huewe <peterhuewe@gmx.de>
Acked-by: Michael Buesch <mb@bu3sch.de>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/drivers/gpio/bt8xxgpio.c b/drivers/gpio/bt8xxgpio.c
index 984b587..5590414 100644
--- a/drivers/gpio/bt8xxgpio.c
+++ b/drivers/gpio/bt8xxgpio.c
@@ -331,13 +331,13 @@ static struct pci_driver bt8xxgpio_pci_driver = {
 	.resume		= bt8xxgpio_resume,
 };
 
-static int bt8xxgpio_init(void)
+static int __init bt8xxgpio_init(void)
 {
 	return pci_register_driver(&bt8xxgpio_pci_driver);
 }
 module_init(bt8xxgpio_init)
 
-static void bt8xxgpio_exit(void)
+static void __exit bt8xxgpio_exit(void)
 {
 	pci_unregister_driver(&bt8xxgpio_pci_driver);
 }
-- 
cgit v0.10.2


From 6afb1c65d0e67547cef19e2d4c40f7b2d8578bff Mon Sep 17 00:00:00 2001
From: Michal Sojka <sojkam1@fel.cvut.cz>
Date: Thu, 10 Sep 2009 08:02:21 +0200
Subject: trivial: fix typo in tracing documentation

Signed-off-by: Michal Sojka <sojkam1@fel.cvut.cz>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/Documentation/trace/events.txt b/Documentation/trace/events.txt
index 78c45a8..02ac6ed 100644
--- a/Documentation/trace/events.txt
+++ b/Documentation/trace/events.txt
@@ -72,7 +72,7 @@ To enable all events in sched subsystem:
 
 	# echo 1 > /sys/kernel/debug/tracing/events/sched/enable
 
-To eanble all events:
+To enable all events:
 
 	# echo 1 > /sys/kernel/debug/tracing/events/enable
 
-- 
cgit v0.10.2


From a9ed83a581d01b8330cd1fc867fd8a770342828f Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Thu, 17 Sep 2009 14:14:45 -0700
Subject: trivial: typo in kernel-parameters.txt

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 0f17d16..c363840 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -933,7 +933,7 @@ and is between 256 and 4096 characters. It is defined in the file
 			1 -- enable informational integrity auditing messages.
 
 	ima_hash=	[IMA]
-			Formt: { "sha1" | "md5" }
+			Format: { "sha1" | "md5" }
 			default: "sha1"
 
 	ima_tcb		[IMA]
-- 
cgit v0.10.2


From ad452d64c625147c77fca7e3986d59d1826fca84 Mon Sep 17 00:00:00 2001
From: Krzysztof Halasa <khc@pm.waw.pl>
Date: Sun, 20 Sep 2009 16:22:51 +0200
Subject: trivial: fix comment typo in drivers/ata/pata_hpt37x.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A comment fix in drivers/ata/pata_hpt37x.c.

Signed-off-by: Krzysztof Ha³asa <khc@pm.waw.pl>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/drivers/ata/pata_hpt37x.c b/drivers/ata/pata_hpt37x.c
index 122c786..d0a7df2 100644
--- a/drivers/ata/pata_hpt37x.c
+++ b/drivers/ata/pata_hpt37x.c
@@ -624,7 +624,7 @@ static struct ata_port_operations hpt374_fn1_port_ops = {
 };
 
 /**
- *	htp37x_clock_slot	-	Turn timing to PC clock entry
+ *	hpt37x_clock_slot	-	Turn timing to PC clock entry
  *	@freq: Reported frequency timing
  *	@base: Base timing
  *
-- 
cgit v0.10.2


From 24ed7a97464db44592495f98cff8bcee02f92bc2 Mon Sep 17 00:00:00 2001
From: Uwe Kleine-Koenig <u.kleine-koenig@pengutronix.de>
Date: Mon, 21 Sep 2009 10:39:22 +0200
Subject: trivial: fix typo in aic7xxx comment
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/drivers/scsi/aic7xxx/aic7xxx_core.c b/drivers/scsi/aic7xxx/aic7xxx_core.c
index e6f2bb7..8dfb59d 100644
--- a/drivers/scsi/aic7xxx/aic7xxx_core.c
+++ b/drivers/scsi/aic7xxx/aic7xxx_core.c
@@ -5223,7 +5223,7 @@ ahc_chip_init(struct ahc_softc *ahc)
 
 	/*
 	 * Setup the allowed SCSI Sequences based on operational mode.
-	 * If we are a target, we'll enalbe select in operations once
+	 * If we are a target, we'll enable select in operations once
 	 * we've had a lun enabled.
 	 */
 	scsiseq_template = ENSELO|ENAUTOATNO|ENAUTOATNP;
-- 
cgit v0.10.2


From 285a0f00c27a02f1223a198c88de2130e9bab059 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Sun, 20 Sep 2009 17:01:33 -0400
Subject: nfsd: revise 4.1 status documentation

Some small updates, a caveat about the minorversion control interface,
and an attempt to put missing features in context.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/Documentation/filesystems/nfs41-server.txt b/Documentation/filesystems/nfs41-server.txt
index 05d81cb..5920fe2 100644
--- a/Documentation/filesystems/nfs41-server.txt
+++ b/Documentation/filesystems/nfs41-server.txt
@@ -11,6 +11,11 @@ the /proc/fs/nfsd/versions control file.  Note that to write this
 control file, the nfsd service must be taken down.  Use your user-mode
 nfs-utils to set this up; see rpc.nfsd(8)
 
+(Warning: older servers will interpret "+4.1" and "-4.1" as "+4" and
+"-4", respectively.  Therefore, code meant to work on both new and old
+kernels must turn 4.1 on or off *before* turning support for version 4
+on or off; rpc.nfsd does this correctly.)
+
 The NFSv4 minorversion 1 (NFSv4.1) implementation in nfsd is based
 on the latest NFSv4.1 Internet Draft:
 http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion1-29
@@ -25,6 +30,49 @@ are still under development out of tree.
 See http://wiki.linux-nfs.org/wiki/index.php/PNFS_prototype_design
 for more information.
 
+The current implementation is intended for developers only: while it
+does support ordinary file operations on clients we have tested against
+(including the linux client), it is incomplete in ways which may limit
+features unexpectedly, cause known bugs in rare cases, or cause
+interoperability problems with future clients.  Known issues:
+
+	- gss support is questionable: currently mounts with kerberos
+	  from a linux client are possible, but we aren't really
+	  conformant with the spec (for example, we don't use kerberos
+	  on the backchannel correctly).
+	- no trunking support: no clients currently take advantage of
+	  trunking, but this is a mandatory failure, and its use is
+	  recommended to clients in a number of places.  (E.g. to ensure
+	  timely renewal in case an existing connection's retry timeouts
+	  have gotten too long; see section 8.3 of the draft.)
+	  Therefore, lack of this feature may cause future clients to
+	  fail.
+	- Incomplete backchannel support: incomplete backchannel gss
+	  support and no support for BACKCHANNEL_CTL mean that
+	  callbacks (hence delegations and layouts) may not be
+	  available and clients confused by the incomplete
+	  implementation may fail.
+	- Server reboot recovery is unsupported; if the server reboots,
+	  clients may fail.
+	- We do not support SSV, which provides security for shared
+	  client-server state (thus preventing unauthorized tampering
+	  with locks and opens, for example).  It is mandatory for
+	  servers to support this, though no clients use it yet.
+	- Mandatory operations which we do not support, such as
+	  DESTROY_CLIENTID, FREE_STATEID, SECINFO_NO_NAME, and
+	  TEST_STATEID, are not currently used by clients, but will be
+	  (and the spec recommends their uses in common cases), and
+	  clients should not be expected to know how to recover from the
+	  case where they are not supported.  This will eventually cause
+	  interoperability failures.
+
+In addition, some limitations are inherited from the current NFSv4
+implementation:
+
+	- Incomplete delegation enforcement: if a file is renamed or
+	  unlinked, a client holding a delegation may continue to
+	  indefinitely allow opens of the file under the old name.
+
 The table below, taken from the NFSv4.1 document, lists
 the operations that are mandatory to implement (REQ), optional
 (OPT), and NFSv4.0 operations that are required not to implement (MNI)
@@ -142,6 +190,12 @@ NS*| CB_WANTS_CANCELLED      | OPT       | FDELG,      | Section 20.10 |
 
 Implementation notes:
 
+DELEGPURGE:
+* mandatory only for servers that support CLAIM_DELEGATE_PREV and/or
+  CLAIM_DELEG_PREV_FH (which allows clients to keep delegations that
+  persist across client reboots).  Thus we need not implement this for
+  now.
+
 EXCHANGE_ID:
 * only SP4_NONE state protection supported
 * implementation ids are ignored
-- 
cgit v0.10.2


From ceb69a899f0819bce825083bd487c6afacc0d1b4 Mon Sep 17 00:00:00 2001
From: Mikael Pettersson <mikpe@it.uu.se>
Date: Fri, 11 Sep 2009 00:59:07 +0200
Subject: ixp4xx: timer and clocks cleanups
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch does a few simple cleanups of the ixp4xx timer
and clocksource/clockevent code in mach-ixp4xx/common.c:

- ixp4xx_clocksource_init() is static and always returns 0,
  which is ignored by its only caller: make it return void
- ixp4xx_clockevent_init(): ditto
- ixp4xx_get_cycles() is only referenced locally: make it static
- use the ixp4xx_timer_irq.dev_id field to pass &clockevent_ixp4xx
  to ixp4xx_timer_interrupt() via its dev_id parameter, allowing
  the code in ixp4xx_timer_interrupt() to be smaller and faster

Tested on an ixp420 machine (ds101).

Signed-off-by: Mikael Pettersson <mikpe@it.uu.se>
Signed-off-by: Krzysztof Hałasa <khc@pm.waw.pl>

diff --git a/arch/arm/mach-ixp4xx/common.c b/arch/arm/mach-ixp4xx/common.c
index 5083f03..cfd52fb 100644
--- a/arch/arm/mach-ixp4xx/common.c
+++ b/arch/arm/mach-ixp4xx/common.c
@@ -41,8 +41,8 @@
 #include <asm/mach/irq.h>
 #include <asm/mach/time.h>
 
-static int __init ixp4xx_clocksource_init(void);
-static int __init ixp4xx_clockevent_init(void);
+static void __init ixp4xx_clocksource_init(void);
+static void __init ixp4xx_clockevent_init(void);
 static struct clock_event_device clockevent_ixp4xx;
 
 /*************************************************************************
@@ -267,7 +267,7 @@ void __init ixp4xx_init_irq(void)
 
 static irqreturn_t ixp4xx_timer_interrupt(int irq, void *dev_id)
 {
-	struct clock_event_device *evt = &clockevent_ixp4xx;
+	struct clock_event_device *evt = dev_id;
 
 	/* Clear Pending Interrupt by writing '1' to it */
 	*IXP4XX_OSST = IXP4XX_OSST_TIMER_1_PEND;
@@ -281,6 +281,7 @@ static struct irqaction ixp4xx_timer_irq = {
 	.name		= "timer1",
 	.flags		= IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL,
 	.handler	= ixp4xx_timer_interrupt,
+	.dev_id		= &clockevent_ixp4xx,
 };
 
 void __init ixp4xx_timer_init(void)
@@ -401,7 +402,7 @@ void __init ixp4xx_sys_init(void)
 /*
  * clocksource
  */
-cycle_t ixp4xx_get_cycles(struct clocksource *cs)
+static cycle_t ixp4xx_get_cycles(struct clocksource *cs)
 {
 	return *IXP4XX_OSTS;
 }
@@ -417,14 +418,12 @@ static struct clocksource clocksource_ixp4xx = {
 
 unsigned long ixp4xx_timer_freq = FREQ;
 EXPORT_SYMBOL(ixp4xx_timer_freq);
-static int __init ixp4xx_clocksource_init(void)
+static void __init ixp4xx_clocksource_init(void)
 {
 	clocksource_ixp4xx.mult =
 		clocksource_hz2mult(ixp4xx_timer_freq,
 				    clocksource_ixp4xx.shift);
 	clocksource_register(&clocksource_ixp4xx);
-
-	return 0;
 }
 
 /*
@@ -480,7 +479,7 @@ static struct clock_event_device clockevent_ixp4xx = {
 	.set_next_event	= ixp4xx_set_next_event,
 };
 
-static int __init ixp4xx_clockevent_init(void)
+static void __init ixp4xx_clockevent_init(void)
 {
 	clockevent_ixp4xx.mult = div_sc(FREQ, NSEC_PER_SEC,
 					clockevent_ixp4xx.shift);
@@ -491,5 +490,4 @@ static int __init ixp4xx_clockevent_init(void)
 	clockevent_ixp4xx.cpumask = cpumask_of(0);
 
 	clockevents_register_device(&clockevent_ixp4xx);
-	return 0;
 }
-- 
cgit v0.10.2


From 74c32e7234afc4586391c0c0f290d266a3e57fe5 Mon Sep 17 00:00:00 2001
From: Mikael Pettersson <mikpe@it.uu.se>
Date: Fri, 11 Sep 2009 00:56:54 +0200
Subject: ixp4xx: arch_idle() documentation fixup
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The body of the mach-ixp4xx arch_idle() is mysteriously
disabled by an #if 0 .. #endif. Normally one would expect
to find a call to cpu_do_idle() there, but that call is
disabled, even though cpu_do_idle() is implemented for
XScale cores (and ixp4xx is one).

The explanation can be found in the ixp42x developer's manual
which states that the XScale core clock and power management
registers aren't implemented on ixp42x [3.5.2.2].

Also, the disabled code has suffered from bit rot:
- it checks hlt_counter which is obsolete, as that variable
  and all related code now is private to kernel/process.c
- it passes too many parameters to cpu_do_idle()

So this patch:
- adds a comment before the #if 0 to explain why
  cpu_do_idle() mustn't be called on ixp4xx
- removes the obsolete test of hlt_counter and the
  obsolete parameter to cpu_do_idle()

This is purely a documentation fixup and changes no
generated code. Even so, it has been tested on an
ixp420 machine (ds101).

Signed-off-by: Mikael Pettersson <mikpe@it.uu.se>
Signed-off-by: Krzysztof Hałasa <khc@pm.waw.pl>

diff --git a/arch/arm/mach-ixp4xx/include/mach/system.h b/arch/arm/mach-ixp4xx/include/mach/system.h
index d2aa26f..54c0af7 100644
--- a/arch/arm/mach-ixp4xx/include/mach/system.h
+++ b/arch/arm/mach-ixp4xx/include/mach/system.h
@@ -13,9 +13,11 @@
 
 static inline void arch_idle(void)
 {
+	/* ixp4xx does not implement the XScale PWRMODE register,
+	 * so it must not call cpu_do_idle() here.
+	 */
 #if 0
-	if (!hlt_counter)
-		cpu_do_idle(0);
+	cpu_do_idle();
 #endif
 }
 
-- 
cgit v0.10.2


From dfdd8cc903288bb2e2ad6731545be3db7304c133 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Krzysztof=20Ha=C5=82asa?= <khc@pm.waw.pl>
Date: Mon, 21 Sep 2009 18:31:24 +0200
Subject: Add MAINTAINERS entry for ARM/INTEL IXP4xx arch support.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Krzysztof Hałasa <khc@pm.waw.pl>

diff --git a/MAINTAINERS b/MAINTAINERS
index 751a307..7bf7045 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -685,6 +685,13 @@ M:	Lennert Buytenhek <kernel@wantstofly.org>
 L:	linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only)
 S:	Maintained
 
+ARM/INTEL IXP4XX ARM ARCHITECTURE
+M:	Imre Kaloz <kaloz@openwrt.org>
+M:	Krzysztof Halasa <khc@pm.waw.pl>
+L:	linux-arm-kernel@lists.infradead.org
+S:	Maintained
+F:	arch/arm/mach-ixp4xx/
+
 ARM/INTEL XSC3 (MANZANO) ARM CORE
 M:	Lennert Buytenhek <kernel@wantstofly.org>
 M:	Dan Williams <dan.j.williams@intel.com>
-- 
cgit v0.10.2


From 3c394ddaa7ea4205f933fd9b481166b2669368a9 Mon Sep 17 00:00:00 2001
From: Steve Dickson <SteveD@redhat.com>
Date: Wed, 9 Sep 2009 15:02:40 -0400
Subject: nfsd4: nfsv4 clients should cross mountpoints

Allow NFS v4 clients to seamlessly cross mount point without
have to set either the 'crossmnt' or the 'nohide' export
options.

Signed-Off-By: Steve Dickson <steved@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 23341c1..e069ab3 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -89,6 +89,12 @@ struct raparm_hbucket {
 #define RAPARM_HASH_MASK	(RAPARM_HASH_SIZE-1)
 static struct raparm_hbucket	raparm_hash[RAPARM_HASH_SIZE];
 
+static inline int
+nfsd_v4client(struct svc_rqst *rq)
+{
+    return rq->rq_prog == NFS_PROGRAM && rq->rq_vers == 4;
+}
+
 /* 
  * Called from nfsd_lookup and encode_dirent. Check if we have crossed 
  * a mount point.
@@ -115,7 +121,8 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
 		path_put(&path);
 		goto out;
 	}
-	if ((exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) {
+	if (nfsd_v4client(rqstp) ||
+		(exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) {
 		/* successfully crossed mount point */
 		/*
 		 * This is subtle: path.dentry is *not* on path.mnt
-- 
cgit v0.10.2


From 181f7c5dd3832763bdf2756b6d2d8a49bdf12791 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 6 Jul 2009 11:53:03 +0200
Subject: kmemcheck: add missing braces to do-while in
 kmemcheck_annotate_bitfield

Whether or not the sparse warning

	warning: do-while statement is not a compound statement

is justified or not in this case, it is annoying and trivial to fix.

[vegard.nossum@gmail.com: title and cleanup]
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Vegard Nossum <vegard.nossum@gmail.com>

diff --git a/include/linux/kmemcheck.h b/include/linux/kmemcheck.h
index 47b39b7..06c6c55 100644
--- a/include/linux/kmemcheck.h
+++ b/include/linux/kmemcheck.h
@@ -137,7 +137,10 @@ static inline void kmemcheck_mark_initialized_pages(struct page *p,
 	int name##_end[0];
 
 #define kmemcheck_annotate_bitfield(ptr, name)				\
-	do if (ptr) {							\
+	do {								\
+		if (!ptr)						\
+			break;						\
+									\
 		int _n = (long) &((ptr)->name##_end)			\
 			- (long) &((ptr)->name##_begin);		\
 		BUILD_BUG_ON(_n < 0);					\
-- 
cgit v0.10.2


From 98235a4bb01f288c82f7b878996e507f6159fd69 Mon Sep 17 00:00:00 2001
From: Barry Song <21cnbao@gmail.com>
Date: Mon, 21 Sep 2009 11:20:51 +0800
Subject: ASoC: some minor changes for AD1836 and AD1938 codec drivers

1. delete redundant assignment to bus field in spi_driver structure
2. fix lost assignment to set_bias_level entry in ad1938 codec dai
3. change spi driver name of ad1836 from "ad1836-spi" to "ad1836"

Signed-off-by: Barry Song <barry.song@analog.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/codecs/ad1836.c b/sound/soc/codecs/ad1836.c
index 01343dc..c48485f 100644
--- a/sound/soc/codecs/ad1836.c
+++ b/sound/soc/codecs/ad1836.c
@@ -251,8 +251,7 @@ static int __devexit ad1836_spi_remove(struct spi_device *spi)
 
 static struct spi_driver ad1836_spi_driver = {
 	.driver = {
-		.name	= "ad1836-spi",
-		.bus	= &spi_bus_type,
+		.name	= "ad1836",
 		.owner	= THIS_MODULE,
 	},
 	.probe		= ad1836_spi_probe,
diff --git a/sound/soc/codecs/ad1938.c b/sound/soc/codecs/ad1938.c
index 9a049a1..34b30ef 100644
--- a/sound/soc/codecs/ad1938.c
+++ b/sound/soc/codecs/ad1938.c
@@ -456,7 +456,6 @@ static int __devexit ad1938_spi_remove(struct spi_device *spi)
 static struct spi_driver ad1938_spi_driver = {
 	.driver = {
 		.name	= "ad1938",
-		.bus	= &spi_bus_type,
 		.owner	= THIS_MODULE,
 	},
 	.probe		= ad1938_spi_probe,
@@ -515,6 +514,7 @@ static int ad1938_register(struct ad1938_priv *ad1938)
 	codec->num_dai = 1;
 	codec->write = ad1938_write_reg;
 	codec->read = ad1938_read_reg_cache;
+	codec->set_bias_level = ad1938_set_bias_level;
 	INIT_LIST_HEAD(&codec->dapm_widgets);
 	INIT_LIST_HEAD(&codec->dapm_paths);
 
-- 
cgit v0.10.2


From 877ae70763fa0b19ffbe25e7e119fc96f52fbbd3 Mon Sep 17 00:00:00 2001
From: Phil Vandry <vandry@TZoNE.ORG>
Date: Mon, 21 Sep 2009 11:36:08 -0400
Subject: ASoC: wm8753: fix mapping when MONOMIX is set to Stereo

When MONOMIX is set to Stereo, Left PGA was not powered on but should be.
Add a mapping from Capture Left Mux to Capture Left Mixer to fix the issue.

Signed-off-by: Phil Vandry <vandry@TZoNE.ORG>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/codecs/wm8753.c b/sound/soc/codecs/wm8753.c
index d80d414..5ad677c 100644
--- a/sound/soc/codecs/wm8753.c
+++ b/sound/soc/codecs/wm8753.c
@@ -595,6 +595,7 @@ static const struct snd_soc_dapm_route audio_map[] = {
 
 	/* Mono Capture mixer-mux */
 	{"Capture Right Mixer", "Stereo", "Capture Right Mux"},
+	{"Capture Left Mixer", "Stereo", "Capture Left Mux"},
 	{"Capture Left Mixer", "Analogue Mix Left", "Capture Left Mux"},
 	{"Capture Left Mixer", "Analogue Mix Left", "Capture Right Mux"},
 	{"Capture Right Mixer", "Analogue Mix Right", "Capture Left Mux"},
-- 
cgit v0.10.2


From a8f90e906783f1f815120eefe813b23cb396e9bd Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Tue, 22 Sep 2009 09:48:08 +1000
Subject: perf_event, powerpc: Fix compilation after big perf_counter rename

This fixes two places in the powerpc perf_event (perf_counter) code
where 'list_entry' needs to be changed to 'group_entry', but were
missed in commit 65abc865 ("perf_counter: Rename list_entry ->
group_entry, counter_list -> group_list").

This also changes 'event' back to 'counter' in a couple of
contexts:

* Field and function names that deal with the limited-function
  counters: it's really the hardware counters whose function is
  limited, not the events that they count.  Hence:

  MAX_LIMITED_HWEVENTS -> MAX_LIMITED_HWCOUNTERS
  limited_event -> limited_counter
  freeze/thaw_limited_events -> freeze/thaw_limited_counters

* The machine-specific PMU description struct (struct power_pmu): this
  renames 'n_event' back to 'n_counter' since it really describes how
  many hardware counters the machine has.  (Renaming this back avoids
  a compile error in each of the machine-specific PMU back-ends where
  they initialize their power_pmu struct.)

Signed-off-by: Paul Mackerras <paulus@samba.org>
Cc: linuxppc-dev@ozlabs.org
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <19128.4280.813369.589704@cargo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/powerpc/include/asm/perf_event.h b/arch/powerpc/include/asm/perf_event.h
index 2499aaa..3288ce3 100644
--- a/arch/powerpc/include/asm/perf_event.h
+++ b/arch/powerpc/include/asm/perf_event.h
@@ -14,7 +14,7 @@
 
 #define MAX_HWEVENTS		8
 #define MAX_EVENT_ALTERNATIVES	8
-#define MAX_LIMITED_HWEVENTS	2
+#define MAX_LIMITED_HWCOUNTERS	2
 
 /*
  * This struct provides the constants and functions needed to
@@ -22,7 +22,7 @@
  */
 struct power_pmu {
 	const char	*name;
-	int		n_event;
+	int		n_counter;
 	int		max_alternatives;
 	unsigned long	add_fields;
 	unsigned long	test_adder;
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index 197b7d9..bbcbae1 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -30,8 +30,8 @@ struct cpu_hw_events {
 	u64 events[MAX_HWEVENTS];
 	unsigned int flags[MAX_HWEVENTS];
 	unsigned long mmcr[3];
-	struct perf_event *limited_event[MAX_LIMITED_HWEVENTS];
-	u8  limited_hwidx[MAX_LIMITED_HWEVENTS];
+	struct perf_event *limited_counter[MAX_LIMITED_HWCOUNTERS];
+	u8  limited_hwidx[MAX_LIMITED_HWCOUNTERS];
 	u64 alternatives[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES];
 	unsigned long amasks[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES];
 	unsigned long avalues[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES];
@@ -253,7 +253,7 @@ static int power_check_constraints(struct cpu_hw_events *cpuhw,
 	unsigned long addf = ppmu->add_fields;
 	unsigned long tadd = ppmu->test_adder;
 
-	if (n_ev > ppmu->n_event)
+	if (n_ev > ppmu->n_counter)
 		return -1;
 
 	/* First see if the events will go on as-is */
@@ -426,7 +426,7 @@ static int is_limited_pmc(int pmcnum)
 		&& (pmcnum == 5 || pmcnum == 6);
 }
 
-static void freeze_limited_events(struct cpu_hw_events *cpuhw,
+static void freeze_limited_counters(struct cpu_hw_events *cpuhw,
 				    unsigned long pmc5, unsigned long pmc6)
 {
 	struct perf_event *event;
@@ -434,7 +434,7 @@ static void freeze_limited_events(struct cpu_hw_events *cpuhw,
 	int i;
 
 	for (i = 0; i < cpuhw->n_limited; ++i) {
-		event = cpuhw->limited_event[i];
+		event = cpuhw->limited_counter[i];
 		if (!event->hw.idx)
 			continue;
 		val = (event->hw.idx == 5) ? pmc5 : pmc6;
@@ -445,7 +445,7 @@ static void freeze_limited_events(struct cpu_hw_events *cpuhw,
 	}
 }
 
-static void thaw_limited_events(struct cpu_hw_events *cpuhw,
+static void thaw_limited_counters(struct cpu_hw_events *cpuhw,
 				  unsigned long pmc5, unsigned long pmc6)
 {
 	struct perf_event *event;
@@ -453,7 +453,7 @@ static void thaw_limited_events(struct cpu_hw_events *cpuhw,
 	int i;
 
 	for (i = 0; i < cpuhw->n_limited; ++i) {
-		event = cpuhw->limited_event[i];
+		event = cpuhw->limited_counter[i];
 		event->hw.idx = cpuhw->limited_hwidx[i];
 		val = (event->hw.idx == 5) ? pmc5 : pmc6;
 		atomic64_set(&event->hw.prev_count, val);
@@ -495,9 +495,9 @@ static void write_mmcr0(struct cpu_hw_events *cpuhw, unsigned long mmcr0)
 		       "i" (SPRN_PMC5), "i" (SPRN_PMC6));
 
 	if (mmcr0 & MMCR0_FC)
-		freeze_limited_events(cpuhw, pmc5, pmc6);
+		freeze_limited_counters(cpuhw, pmc5, pmc6);
 	else
-		thaw_limited_events(cpuhw, pmc5, pmc6);
+		thaw_limited_counters(cpuhw, pmc5, pmc6);
 
 	/*
 	 * Write the full MMCR0 including the event overflow interrupt
@@ -653,7 +653,7 @@ void hw_perf_enable(void)
 			continue;
 		idx = hwc_index[i] + 1;
 		if (is_limited_pmc(idx)) {
-			cpuhw->limited_event[n_lim] = event;
+			cpuhw->limited_counter[n_lim] = event;
 			cpuhw->limited_hwidx[n_lim] = idx;
 			++n_lim;
 			continue;
@@ -702,7 +702,7 @@ static int collect_events(struct perf_event *group, int max_count,
 		flags[n] = group->hw.event_base;
 		events[n++] = group->hw.config;
 	}
-	list_for_each_entry(event, &group->sibling_list, list_entry) {
+	list_for_each_entry(event, &group->sibling_list, group_entry) {
 		if (!is_software_event(event) &&
 		    event->state != PERF_EVENT_STATE_OFF) {
 			if (n >= max_count)
@@ -742,7 +742,7 @@ int hw_perf_group_sched_in(struct perf_event *group_leader,
 		return 0;
 	cpuhw = &__get_cpu_var(cpu_hw_events);
 	n0 = cpuhw->n_events;
-	n = collect_events(group_leader, ppmu->n_event - n0,
+	n = collect_events(group_leader, ppmu->n_counter - n0,
 			   &cpuhw->event[n0], &cpuhw->events[n0],
 			   &cpuhw->flags[n0]);
 	if (n < 0)
@@ -764,7 +764,7 @@ int hw_perf_group_sched_in(struct perf_event *group_leader,
 	cpuctx->active_oncpu += n;
 	n = 1;
 	event_sched_in(group_leader, cpu);
-	list_for_each_entry(sub, &group_leader->sibling_list, list_entry) {
+	list_for_each_entry(sub, &group_leader->sibling_list, group_entry) {
 		if (sub->state != PERF_EVENT_STATE_OFF) {
 			event_sched_in(sub, cpu);
 			++n;
@@ -797,7 +797,7 @@ static int power_pmu_enable(struct perf_event *event)
 	 */
 	cpuhw = &__get_cpu_var(cpu_hw_events);
 	n0 = cpuhw->n_events;
-	if (n0 >= ppmu->n_event)
+	if (n0 >= ppmu->n_counter)
 		goto out;
 	cpuhw->event[n0] = event;
 	cpuhw->events[n0] = event->hw.config;
@@ -848,11 +848,11 @@ static void power_pmu_disable(struct perf_event *event)
 		}
 	}
 	for (i = 0; i < cpuhw->n_limited; ++i)
-		if (event == cpuhw->limited_event[i])
+		if (event == cpuhw->limited_counter[i])
 			break;
 	if (i < cpuhw->n_limited) {
 		while (++i < cpuhw->n_limited) {
-			cpuhw->limited_event[i-1] = cpuhw->limited_event[i];
+			cpuhw->limited_counter[i-1] = cpuhw->limited_counter[i];
 			cpuhw->limited_hwidx[i-1] = cpuhw->limited_hwidx[i];
 		}
 		--cpuhw->n_limited;
@@ -1078,7 +1078,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
 	 */
 	n = 0;
 	if (event->group_leader != event) {
-		n = collect_events(event->group_leader, ppmu->n_event - 1,
+		n = collect_events(event->group_leader, ppmu->n_counter - 1,
 				   ctrs, events, cflags);
 		if (n < 0)
 			return ERR_PTR(-EINVAL);
@@ -1230,7 +1230,7 @@ static void perf_event_interrupt(struct pt_regs *regs)
 	int nmi;
 
 	if (cpuhw->n_limited)
-		freeze_limited_events(cpuhw, mfspr(SPRN_PMC5),
+		freeze_limited_counters(cpuhw, mfspr(SPRN_PMC5),
 					mfspr(SPRN_PMC6));
 
 	perf_read_regs(regs);
@@ -1260,7 +1260,7 @@ static void perf_event_interrupt(struct pt_regs *regs)
 	 * Any that we processed in the previous loop will not be negative.
 	 */
 	if (!found) {
-		for (i = 0; i < ppmu->n_event; ++i) {
+		for (i = 0; i < ppmu->n_counter; ++i) {
 			if (is_limited_pmc(i + 1))
 				continue;
 			val = read_pmc(i + 1);
-- 
cgit v0.10.2


From 4dee4d441d3f90cd8cec10a9eb222d8a4f2fa2a3 Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Mon, 15 Jun 2009 22:30:39 +0200
Subject: regulator: add check index of wm8350->pmic.pdev[]

Ensure that reg is within the bounds of array wm8350->pmic.pdev[].

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>

diff --git a/drivers/regulator/wm8350-regulator.c b/drivers/regulator/wm8350-regulator.c
index 17a00b0..768bd0e 100644
--- a/drivers/regulator/wm8350-regulator.c
+++ b/drivers/regulator/wm8350-regulator.c
@@ -1419,6 +1419,8 @@ int wm8350_register_regulator(struct wm8350 *wm8350, int reg,
 {
 	struct platform_device *pdev;
 	int ret;
+	if (reg < 0 || reg >= NUM_WM8350_REGULATORS)
+		return -EINVAL;
 
 	if (wm8350->pmic.pdev[reg])
 		return -EBUSY;
-- 
cgit v0.10.2


From d61c3d56e23b3548a91b70ecce9dc226a8655a57 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 15 Jun 2009 20:01:01 +0100
Subject: regulator: Report regulator_get() failure in virtual consumer

The core will no longer complain so we should log an error here.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>

diff --git a/drivers/regulator/virtual.c b/drivers/regulator/virtual.c
index e7db566..e953c18 100644
--- a/drivers/regulator/virtual.c
+++ b/drivers/regulator/virtual.c
@@ -285,6 +285,8 @@ static int regulator_virtual_consumer_probe(struct platform_device *pdev)
 	drvdata->regulator = regulator_get(&pdev->dev, reg_id);
 	if (IS_ERR(drvdata->regulator)) {
 		ret = PTR_ERR(drvdata->regulator);
+		dev_err(&pdev->dev, "Failed to obtain supply '%s': %d\n",
+			reg_id, ret);
 		goto err;
 	}
 
-- 
cgit v0.10.2


From a07ac217146e0fac18c80d93e02109f2c96574d0 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 17 Jun 2009 15:45:06 +0100
Subject: regulator: Make virtual consumer use dev_printk

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>

diff --git a/drivers/regulator/virtual.c b/drivers/regulator/virtual.c
index e953c18..5c1d756 100644
--- a/drivers/regulator/virtual.c
+++ b/drivers/regulator/virtual.c
@@ -27,17 +27,18 @@ struct virtual_consumer_data {
 	unsigned int mode;
 };
 
-static void update_voltage_constraints(struct virtual_consumer_data *data)
+static void update_voltage_constraints(struct device *dev,
+				       struct virtual_consumer_data *data)
 {
 	int ret;
 
 	if (data->min_uV && data->max_uV
 	    && data->min_uV <= data->max_uV) {
 		ret = regulator_set_voltage(data->regulator,
-					    data->min_uV, data->max_uV);
+					data->min_uV, data->max_uV);
 		if (ret != 0) {
-			printk(KERN_ERR "regulator_set_voltage() failed: %d\n",
-			       ret);
+			dev_err(dev,
+				"regulator_set_voltage() failed: %d\n", ret);
 			return;
 		}
 	}
@@ -47,7 +48,7 @@ static void update_voltage_constraints(struct virtual_consumer_data *data)
 		if (ret == 0)
 			data->enabled = 1;
 		else
-			printk(KERN_ERR "regulator_enable() failed: %d\n",
+			dev_err(dev, "regulator_enable() failed: %d\n",
 				ret);
 	}
 
@@ -56,13 +57,13 @@ static void update_voltage_constraints(struct virtual_consumer_data *data)
 		if (ret == 0)
 			data->enabled = 0;
 		else
-			printk(KERN_ERR "regulator_disable() failed: %d\n",
+			dev_err(dev, "regulator_disable() failed: %d\n",
 				ret);
 	}
 }
 
-static void update_current_limit_constraints(struct virtual_consumer_data
-						*data)
+static void update_current_limit_constraints(struct device *dev,
+					  struct virtual_consumer_data *data)
 {
 	int ret;
 
@@ -71,8 +72,9 @@ static void update_current_limit_constraints(struct virtual_consumer_data
 		ret = regulator_set_current_limit(data->regulator,
 					data->min_uA, data->max_uA);
 		if (ret != 0) {
-			pr_err("regulator_set_current_limit() failed: %d\n",
-			       ret);
+			dev_err(dev,
+				"regulator_set_current_limit() failed: %d\n",
+				ret);
 			return;
 		}
 	}
@@ -82,7 +84,7 @@ static void update_current_limit_constraints(struct virtual_consumer_data
 		if (ret == 0)
 			data->enabled = 1;
 		else
-			printk(KERN_ERR "regulator_enable() failed: %d\n",
+			dev_err(dev, "regulator_enable() failed: %d\n",
 				ret);
 	}
 
@@ -91,7 +93,7 @@ static void update_current_limit_constraints(struct virtual_consumer_data
 		if (ret == 0)
 			data->enabled = 0;
 		else
-			printk(KERN_ERR "regulator_disable() failed: %d\n",
+			dev_err(dev, "regulator_disable() failed: %d\n",
 				ret);
 	}
 }
@@ -115,7 +117,7 @@ static ssize_t set_min_uV(struct device *dev, struct device_attribute *attr,
 	mutex_lock(&data->lock);
 
 	data->min_uV = val;
-	update_voltage_constraints(data);
+	update_voltage_constraints(dev, data);
 
 	mutex_unlock(&data->lock);
 
@@ -141,7 +143,7 @@ static ssize_t set_max_uV(struct device *dev, struct device_attribute *attr,
 	mutex_lock(&data->lock);
 
 	data->max_uV = val;
-	update_voltage_constraints(data);
+	update_voltage_constraints(dev, data);
 
 	mutex_unlock(&data->lock);
 
@@ -167,7 +169,7 @@ static ssize_t set_min_uA(struct device *dev, struct device_attribute *attr,
 	mutex_lock(&data->lock);
 
 	data->min_uA = val;
-	update_current_limit_constraints(data);
+	update_current_limit_constraints(dev, data);
 
 	mutex_unlock(&data->lock);
 
@@ -193,7 +195,7 @@ static ssize_t set_max_uA(struct device *dev, struct device_attribute *attr,
 	mutex_lock(&data->lock);
 
 	data->max_uA = val;
-	update_current_limit_constraints(data);
+	update_current_limit_constraints(dev, data);
 
 	mutex_unlock(&data->lock);
 
-- 
cgit v0.10.2


From a5d2abce4373810c0109c5939c0094ac16698625 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 17 Jun 2009 15:45:07 +0100
Subject: regulator: Make virtual consumer a bit more chatty

This makes it easier to read the logs when doing testing.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>

diff --git a/drivers/regulator/virtual.c b/drivers/regulator/virtual.c
index 5c1d756..1441107 100644
--- a/drivers/regulator/virtual.c
+++ b/drivers/regulator/virtual.c
@@ -34,6 +34,8 @@ static void update_voltage_constraints(struct device *dev,
 
 	if (data->min_uV && data->max_uV
 	    && data->min_uV <= data->max_uV) {
+		dev_dbg(dev, "Requesting %d-%duV\n",
+			data->min_uV, data->max_uV);
 		ret = regulator_set_voltage(data->regulator,
 					data->min_uV, data->max_uV);
 		if (ret != 0) {
@@ -44,6 +46,7 @@ static void update_voltage_constraints(struct device *dev,
 	}
 
 	if (data->min_uV && data->max_uV && !data->enabled) {
+		dev_dbg(dev, "Enabling regulator\n");
 		ret = regulator_enable(data->regulator);
 		if (ret == 0)
 			data->enabled = 1;
@@ -53,6 +56,7 @@ static void update_voltage_constraints(struct device *dev,
 	}
 
 	if (!(data->min_uV && data->max_uV) && data->enabled) {
+		dev_dbg(dev, "Disabling regulator\n");
 		ret = regulator_disable(data->regulator);
 		if (ret == 0)
 			data->enabled = 0;
@@ -69,6 +73,8 @@ static void update_current_limit_constraints(struct device *dev,
 
 	if (data->max_uA
 	    && data->min_uA <= data->max_uA) {
+		dev_dbg(dev, "Requesting %d-%duA\n",
+			data->min_uA, data->max_uA);
 		ret = regulator_set_current_limit(data->regulator,
 					data->min_uA, data->max_uA);
 		if (ret != 0) {
@@ -80,6 +86,7 @@ static void update_current_limit_constraints(struct device *dev,
 	}
 
 	if (data->max_uA && !data->enabled) {
+		dev_dbg(dev, "Enabling regulator\n");
 		ret = regulator_enable(data->regulator);
 		if (ret == 0)
 			data->enabled = 1;
@@ -89,6 +96,7 @@ static void update_current_limit_constraints(struct device *dev,
 	}
 
 	if (!(data->min_uA && data->max_uA) && data->enabled) {
+		dev_dbg(dev, "Disabling regulator\n");
 		ret = regulator_disable(data->regulator);
 		if (ret == 0)
 			data->enabled = 0;
-- 
cgit v0.10.2


From 40f9244f4da8976eeb6d5ed6313c635ba238a9d3 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 17 Jun 2009 17:56:39 +0100
Subject: regulator: Allow consumer supplies to be set up with dev_name()

Follow the approach suggested by Russell King and implemented by him in
the clkdev API and allow consumer device supply mapings to be set up
using the dev_name() for the consumer instead of the struct device.
In order to avoid making existing machines instabuggy and creating merge
issues the use of struct device is still supported for the time being.

This resolves problems working with buses such as I2C which make the
struct device available late providing that the final device name is
known, which is the case for most embedded systems with fixed setups.

Consumers must still use the struct device when calling regulator_get().

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index 91ba9bf..24e05b7 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -37,7 +37,7 @@ static int has_full_constraints;
  */
 struct regulator_map {
 	struct list_head list;
-	struct device *dev;
+	const char *dev_name;   /* The dev_name() for the consumer */
 	const char *supply;
 	struct regulator_dev *regulator;
 };
@@ -857,23 +857,33 @@ out:
  * set_consumer_device_supply: Bind a regulator to a symbolic supply
  * @rdev:         regulator source
  * @consumer_dev: device the supply applies to
+ * @consumer_dev_name: dev_name() string for device supply applies to
  * @supply:       symbolic name for supply
  *
  * Allows platform initialisation code to map physical regulator
  * sources to symbolic names for supplies for use by devices.  Devices
  * should use these symbolic names to request regulators, avoiding the
  * need to provide board-specific regulator names as platform data.
+ *
+ * Only one of consumer_dev and consumer_dev_name may be specified.
  */
 static int set_consumer_device_supply(struct regulator_dev *rdev,
-	struct device *consumer_dev, const char *supply)
+	struct device *consumer_dev, const char *consumer_dev_name,
+	const char *supply)
 {
 	struct regulator_map *node;
 
+	if (consumer_dev && consumer_dev_name)
+		return -EINVAL;
+
+	if (!consumer_dev_name && consumer_dev)
+		consumer_dev_name = dev_name(consumer_dev);
+
 	if (supply == NULL)
 		return -EINVAL;
 
 	list_for_each_entry(node, &regulator_map_list, list) {
-		if (consumer_dev != node->dev)
+		if (consumer_dev_name != node->dev_name)
 			continue;
 		if (strcmp(node->supply, supply) != 0)
 			continue;
@@ -891,25 +901,38 @@ static int set_consumer_device_supply(struct regulator_dev *rdev,
 		return -ENOMEM;
 
 	node->regulator = rdev;
-	node->dev = consumer_dev;
+	node->dev_name = kstrdup(consumer_dev_name, GFP_KERNEL);
 	node->supply = supply;
 
+	if (node->dev_name == NULL) {
+		kfree(node);
+		return -ENOMEM;
+	}
+
 	list_add(&node->list, &regulator_map_list);
 	return 0;
 }
 
 static void unset_consumer_device_supply(struct regulator_dev *rdev,
-	struct device *consumer_dev)
+	const char *consumer_dev_name, struct device *consumer_dev)
 {
 	struct regulator_map *node, *n;
 
+	if (consumer_dev && !consumer_dev_name)
+		consumer_dev_name = dev_name(consumer_dev);
+
 	list_for_each_entry_safe(node, n, &regulator_map_list, list) {
-		if (rdev == node->regulator &&
-			consumer_dev == node->dev) {
-			list_del(&node->list);
-			kfree(node);
-			return;
-		}
+		if (rdev != node->regulator)
+			continue;
+
+		if (consumer_dev_name && node->dev_name &&
+		    strcmp(consumer_dev_name, node->dev_name))
+			continue;
+
+		list_del(&node->list);
+		kfree(node->dev_name);
+		kfree(node);
+		return;
 	}
 }
 
@@ -920,6 +943,7 @@ static void unset_regulator_supplies(struct regulator_dev *rdev)
 	list_for_each_entry_safe(node, n, &regulator_map_list, list) {
 		if (rdev == node->regulator) {
 			list_del(&node->list);
+			kfree(node->dev_name);
 			kfree(node);
 			return;
 		}
@@ -1019,17 +1043,25 @@ struct regulator *regulator_get(struct device *dev, const char *id)
 	struct regulator_dev *rdev;
 	struct regulator_map *map;
 	struct regulator *regulator = ERR_PTR(-ENODEV);
+	const char *devname = NULL;
 
 	if (id == NULL) {
 		printk(KERN_ERR "regulator: get() with no identifier\n");
 		return regulator;
 	}
 
+	if (dev)
+		devname = dev_name(dev);
+
 	mutex_lock(&regulator_list_mutex);
 
 	list_for_each_entry(map, &regulator_map_list, list) {
-		if (dev == map->dev &&
-		    strcmp(map->supply, id) == 0) {
+		/* If the mapping has a device set up it must match */
+		if (map->dev_name &&
+		    (!devname || strcmp(map->dev_name, devname)))
+			continue;
+
+		if (strcmp(map->supply, id) == 0) {
 			rdev = map->regulator;
 			goto found;
 		}
@@ -2091,11 +2123,13 @@ struct regulator_dev *regulator_register(struct regulator_desc *regulator_desc,
 	for (i = 0; i < init_data->num_consumer_supplies; i++) {
 		ret = set_consumer_device_supply(rdev,
 			init_data->consumer_supplies[i].dev,
+			init_data->consumer_supplies[i].dev_name,
 			init_data->consumer_supplies[i].supply);
 		if (ret < 0) {
 			for (--i; i >= 0; i--)
 				unset_consumer_device_supply(rdev,
-					init_data->consumer_supplies[i].dev);
+				    init_data->consumer_supplies[i].dev_name,
+				    init_data->consumer_supplies[i].dev);
 			goto scrub;
 		}
 	}
diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h
index bac64fa..9328090 100644
--- a/include/linux/regulator/machine.h
+++ b/include/linux/regulator/machine.h
@@ -126,13 +126,18 @@ struct regulation_constraints {
 /**
  * struct regulator_consumer_supply - supply -> device mapping
  *
- * This maps a supply name to a device.
+ * This maps a supply name to a device.  Only one of dev or dev_name
+ * can be specified.  Use of dev_name allows support for buses which
+ * make struct device available late such as I2C and is the preferred
+ * form.
  *
  * @dev: Device structure for the consumer.
+ * @dev_name: Result of dev_name() for the consumer.
  * @supply: Name for the supply.
  */
 struct regulator_consumer_supply {
 	struct device *dev;	/* consumer */
+	const char *dev_name;   /* dev_name() for consumer */
 	const char *supply;	/* consumer supply - e.g. "vcc" */
 };
 
-- 
cgit v0.10.2


From 561864e8e3c263ff72bd0888aca80089027195ca Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Sun, 28 Jun 2009 09:26:42 -0700
Subject: drivers/regulator/pcf50633-regulator.c: Remove unnecessary semicolons

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>

diff --git a/drivers/regulator/pcf50633-regulator.c b/drivers/regulator/pcf50633-regulator.c
index 8e14900..70ba775 100644
--- a/drivers/regulator/pcf50633-regulator.c
+++ b/drivers/regulator/pcf50633-regulator.c
@@ -155,7 +155,7 @@ static int pcf50633_regulator_get_voltage(struct regulator_dev *rdev)
 	int regulator_id, millivolts, volt_bits;
 	u8 regnr;
 
-	pcf = rdev_get_drvdata(rdev);;
+	pcf = rdev_get_drvdata(rdev);
 
 	regulator_id = rdev_get_id(rdev);
 	if (regulator_id >= PCF50633_NUM_REGULATORS)
-- 
cgit v0.10.2


From 9c19bc0444490e76197f47316c649590dc6f10a4 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Thu, 9 Jul 2009 15:44:31 +0100
Subject: regulator: Define full constraints function with REGULATOR disabled

This allows machine drivers to build without ifdefs if they have
full constraints. Suggested by machine drivers contributed by
Haojian Zhuang <haojian.zhuang@gmail.com>.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>

diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h
index 9328090..73a88f6 100644
--- a/include/linux/regulator/machine.h
+++ b/include/linux/regulator/machine.h
@@ -171,6 +171,12 @@ struct regulator_init_data {
 
 int regulator_suspend_prepare(suspend_state_t state);
 
+#ifdef CONFIG_REGULATOR
 void regulator_has_full_constraints(void);
+#else
+static inline void regulator_has_full_constraints(void)
+{
+}
+#endif
 
 #endif
-- 
cgit v0.10.2


From 0198d1163b3e0313b3f073b62384abfab1a17cff Mon Sep 17 00:00:00 2001
From: Haojian Zhuang <haojian.zhuang@marvell.com>
Date: Fri, 26 Jun 2009 19:20:59 +0800
Subject: regulator: add buck3 in da903x driver

BUCK3 is the new component in DA9035. So there're three BUCKs in DA9035.
And there're two BUCKs in DA9034.

Signed-off-by: Haojian Zhuang <haojian.zhuang@marvell.com>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Eric Miao <eric.y.miao@gmail.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>

diff --git a/drivers/regulator/da903x.c b/drivers/regulator/da903x.c
index b8b89ef..33dfeeb 100644
--- a/drivers/regulator/da903x.c
+++ b/drivers/regulator/da903x.c
@@ -64,6 +64,14 @@
 #define DA9034_MDTV2		(0x33)
 #define DA9034_MVRC		(0x34)
 
+/* DA9035 Registers. DA9034 Registers are comptabile to DA9035. */
+#define DA9035_OVER3		(0x12)
+#define DA9035_VCC2		(0x1f)
+#define DA9035_3DTV1		(0x2c)
+#define DA9035_3DTV2		(0x2d)
+#define DA9035_3VRC		(0x2e)
+#define DA9035_AUTOSKIP		(0x2f)
+
 struct da903x_regulator_info {
 	struct regulator_desc desc;
 
@@ -388,6 +396,27 @@ static struct regulator_ops da9034_regulator_ldo12_ops = {
 	.enable_bit	= (ebit),					\
 }
 
+#define DA9035_DVC(_id, min, max, step, vreg, nbits, ureg, ubit, ereg, ebit) \
+{									\
+	.desc	= {							\
+		.name	= #_id,						\
+		.ops	= &da9034_regulator_dvc_ops,			\
+		.type	= REGULATOR_VOLTAGE,				\
+		.id	= DA9035_ID_##_id,				\
+		.owner	= THIS_MODULE,					\
+	},								\
+	.min_uV		= (min) * 1000,					\
+	.max_uV		= (max) * 1000,					\
+	.step_uV	= (step) * 1000,				\
+	.vol_reg	= DA9035_##vreg,				\
+	.vol_shift	= (0),						\
+	.vol_nbits	= (nbits),					\
+	.update_reg	= DA9035_##ureg,				\
+	.update_bit	= (ubit),					\
+	.enable_reg	= DA9035_##ereg,				\
+	.enable_bit	= (ebit),					\
+}
+
 #define DA9034_LDO(_id, min, max, step, vreg, shift, nbits, ereg, ebit)	\
 	DA903x_LDO(DA9034, _id, min, max, step, vreg, shift, nbits, ereg, ebit)
 
@@ -435,6 +464,9 @@ static struct da903x_regulator_info da903x_regulator_info[] = {
 	DA9034_LDO(14, 1800, 3300, 100, LDO1514, 0, 4, OVER3, 0),
 	DA9034_LDO(15, 1800, 3300, 100, LDO1514, 4, 4, OVER3, 1),
 	DA9034_LDO(5, 3100, 3100, 0, INVAL, 0, 0, OVER3, 7), /* fixed @3.1V */
+
+	/* DA9035 */
+	DA9035_DVC(BUCK3, 1800, 2200, 100, 3DTV1, 3, VCC2, 0, OVER3, 3),
 };
 
 static inline struct da903x_regulator_info *find_regulator_info(int id)
diff --git a/include/linux/mfd/da903x.h b/include/linux/mfd/da903x.h
index 115dbe9..c63b65c 100644
--- a/include/linux/mfd/da903x.h
+++ b/include/linux/mfd/da903x.h
@@ -1,7 +1,7 @@
 #ifndef __LINUX_PMIC_DA903X_H
 #define __LINUX_PMIC_DA903X_H
 
-/* Unified sub device IDs for DA9030/DA9034 */
+/* Unified sub device IDs for DA9030/DA9034/DA9035 */
 enum {
 	DA9030_ID_LED_1,
 	DA9030_ID_LED_2,
@@ -57,6 +57,8 @@ enum {
 	DA9034_ID_LDO13,
 	DA9034_ID_LDO14,
 	DA9034_ID_LDO15,
+
+	DA9035_ID_BUCK3,
 };
 
 /*
-- 
cgit v0.10.2


From fc4f42e7fb021340c14dfd726313be6cfdeab19e Mon Sep 17 00:00:00 2001
From: Haojian Zhuang <haojian.zhuang@marvell.com>
Date: Wed, 8 Jul 2009 17:57:24 +0800
Subject: regulator: support da9030 BUCK in da903x driver

Support the operation of DA9030 BUCK2 in da903x driver.

Signed-off-by: Haojian Zhuang <haojian.zhuang@marvell.com>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Eric Miao <eric.y.miao@gmail.com>
Acked-by: Mike Rapoport <mike@compulab.co.il>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>

diff --git a/drivers/regulator/da903x.c b/drivers/regulator/da903x.c
index 33dfeeb..49081b4 100644
--- a/drivers/regulator/da903x.c
+++ b/drivers/regulator/da903x.c
@@ -375,6 +375,27 @@ static struct regulator_ops da9034_regulator_ldo12_ops = {
 	.enable_bit	= (ebit),					\
 }
 
+#define DA9030_DVC(_id, min, max, step, vreg, nbits, ureg, ubit, ereg, ebit) \
+{									\
+	.desc	= {							\
+		.name	= #_id,						\
+		.ops	= &da9034_regulator_dvc_ops,			\
+		.type	= REGULATOR_VOLTAGE,				\
+		.id	= DA9030_ID_##_id,				\
+		.owner	= THIS_MODULE,					\
+	},								\
+	.min_uV		= (min) * 1000,					\
+	.max_uV		= (max) * 1000,					\
+	.step_uV	= (step) * 1000,				\
+	.vol_reg	= DA9030_##vreg,				\
+	.vol_shift	= (0),						\
+	.vol_nbits	= (nbits),					\
+	.update_reg	= DA9030_##ureg,				\
+	.update_bit	= (ubit),					\
+	.enable_reg	= DA9030_##ereg,				\
+	.enable_bit	= (ebit),					\
+}
+
 #define DA9034_DVC(_id, min, max, step, vreg, nbits, ureg, ubit, ereg, ebit) \
 {									\
 	.desc	= {							\
@@ -425,6 +446,8 @@ static struct regulator_ops da9034_regulator_ldo12_ops = {
 
 static struct da903x_regulator_info da903x_regulator_info[] = {
 	/* DA9030 */
+	DA9030_DVC(BUCK2, 850, 1625, 25, BUCK2DVM1, 5, BUCK2DVM1, 7, RCTL11, 0),
+
 	DA9030_LDO( 1, 1200, 3200, 100,    LDO1, 0, 5, RCTL12, 1),
 	DA9030_LDO( 2, 1800, 3200, 100,   LDO23, 0, 4, RCTL12, 2),
 	DA9030_LDO( 3, 1800, 3200, 100,   LDO23, 4, 4, RCTL12, 3),
-- 
cgit v0.10.2


From e88267e1646037fa2c155515c78bd01a5c81f058 Mon Sep 17 00:00:00 2001
From: Haojian Zhuang <haojian.zhuang@marvell.com>
Date: Thu, 9 Jul 2009 17:52:30 +0800
Subject: regulator: replace ADTV1 register by ADTV2 in da903x

In PXA3xx SoC family, V_CORE power doamin is supplied by BUCK1 that is
controller by ADTV1 or ADTV2 register.

By default, v1 and v2 has the same copy. If v1 or v2 is updated, the last
value that is written to either register takes effect. It means that v1
and v2 has different copy. And the actual voltage output is determinated
by last update on either register.

DA9034/35 is binded with PXA3xx SoC family. While SoC is scaling OP or
entering/exiting lower power mode, SoC needs to change voltage of V_CORE
power doamin. In order to be efficient, POWER I2C (hardcode) mode could
be enabled in SoC. In this mode, SoC will control v2 register directly.

In original DA903x driver, software will only read regulator data from v1
register. But SoC controls v2 register directly. It results that v1 and v2
isn't synchronized. Wrong data will be read from v1 register. So access v2
register in da903x driver instead.

Signed-off-by: Haojian Zhuang <haojian.zhuang@marvell.com>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>

diff --git a/drivers/regulator/da903x.c b/drivers/regulator/da903x.c
index 49081b4..d8d251f 100644
--- a/drivers/regulator/da903x.c
+++ b/drivers/regulator/da903x.c
@@ -469,9 +469,9 @@ static struct da903x_regulator_info da903x_regulator_info[] = {
 	DA9030_LDO(13, 2100, 2100, 0, INVAL, 0, 0, RCTL11, 3), /* fixed @2.1V */
 
 	/* DA9034 */
-	DA9034_DVC(BUCK1, 725, 1500, 25, ADTV1, 5, VCC1, 0, OVER1, 0),
-	DA9034_DVC(BUCK2, 725, 1500, 25, CDTV1, 5, VCC1, 2, OVER1, 1),
-	DA9034_DVC(LDO2,  725, 1500, 25, SDTV1, 5, VCC1, 4, OVER1, 2),
+	DA9034_DVC(BUCK1, 725, 1500, 25, ADTV2, 5, VCC1, 0, OVER1, 0),
+	DA9034_DVC(BUCK2, 725, 1500, 25, CDTV2, 5, VCC1, 2, OVER1, 1),
+	DA9034_DVC(LDO2,  725, 1500, 25, SDTV2, 5, VCC1, 4, OVER1, 2),
 	DA9034_DVC(LDO1, 1700, 2075, 25, MDTV1, 4, VCC1, 6, OVER3, 4),
 
 	DA9034_LDO( 3, 1800, 3300, 100,  LDO643, 0, 4, OVER3, 5),
-- 
cgit v0.10.2


From ed6543243a1c557dbe2005a86f6d8e851c1ebb79 Mon Sep 17 00:00:00 2001
From: roald <roald@sh-dt-4505.(none)>
Date: Mon, 13 Jul 2009 17:25:21 +0800
Subject: regulator: add initialization macro of regulator supply

Signed-off-by: Haojian Zhuang <haojian.zhuang@marvell.com>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>

diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h
index 73a88f6..99a4e2e 100644
--- a/include/linux/regulator/machine.h
+++ b/include/linux/regulator/machine.h
@@ -141,6 +141,13 @@ struct regulator_consumer_supply {
 	const char *supply;	/* consumer supply - e.g. "vcc" */
 };
 
+/* Initialize struct regulator_consumer_supply */
+#define REGULATOR_SUPPLY(_name, _dev_name)			\
+{								\
+	.supply		= _name,				\
+	.dev_name	= _dev_name,				\
+}
+
 /**
  * struct regulator_init_data - regulator platform initialisation data.
  *
-- 
cgit v0.10.2


From c1b60873ca2078bfca94b73bc88ef1c5adcc928b Mon Sep 17 00:00:00 2001
From: Haojian Zhuang <haojian.zhuang@marvell.com>
Date: Fri, 10 Jul 2009 16:03:36 +0800
Subject: regulator: support list voltage in da903x

Make da903x driver to list voltage and count voltage.

Signed-off-by: Haojian Zhuang <haojian.zhuang@marvell.com>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Eric Miao <eric.y.miao@gmail.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>

diff --git a/drivers/regulator/da903x.c b/drivers/regulator/da903x.c
index d8d251f..236de113 100644
--- a/drivers/regulator/da903x.c
+++ b/drivers/regulator/da903x.c
@@ -87,6 +87,10 @@ struct da903x_regulator_info {
 	int	enable_bit;
 };
 
+static int da9034_ldo12_data[] = { 1700, 1750, 1800, 1850, 1900, 1950,
+				   2000, 2050, 2700, 2750, 2800, 2850,
+				   2900, 2950, 3000, 3050 };
+
 static inline struct device *to_da903x_dev(struct regulator_dev *rdev)
 {
 	return rdev_get_dev(rdev)->parent->parent;
@@ -170,6 +174,17 @@ static int da903x_is_enabled(struct regulator_dev *rdev)
 	return !!(reg_val & (1 << info->enable_bit));
 }
 
+static int da903x_list_voltage(struct regulator_dev *rdev, unsigned selector)
+{
+	struct da903x_regulator_info *info = rdev_get_drvdata(rdev);
+	int ret;
+
+	ret = info->min_uV + info->step_uV * selector;
+	if (ret > info->max_uV)
+		return -EINVAL;
+	return ret;
+}
+
 /* DA9030 specific operations */
 static int da9030_set_ldo1_15_voltage(struct regulator_dev *rdev,
 				       int min_uV, int max_uV)
@@ -313,9 +328,18 @@ static int da9034_get_ldo12_voltage(struct regulator_dev *rdev)
 	return info->min_uV + info->step_uV * val;
 }
 
+static int da9034_list_ldo12_voltage(struct regulator_dev *rdev,
+				     unsigned selector)
+{
+	if (selector > ARRAY_SIZE(da9034_ldo12_data))
+		return -EINVAL;
+	return da9034_ldo12_data[selector] * 1000;
+}
+
 static struct regulator_ops da903x_regulator_ldo_ops = {
 	.set_voltage	= da903x_set_ldo_voltage,
 	.get_voltage	= da903x_get_voltage,
+	.list_voltage	= da903x_list_voltage,
 	.enable		= da903x_enable,
 	.disable	= da903x_disable,
 	.is_enabled	= da903x_is_enabled,
@@ -325,6 +349,7 @@ static struct regulator_ops da903x_regulator_ldo_ops = {
 static struct regulator_ops da9030_regulator_ldo14_ops = {
 	.set_voltage	= da9030_set_ldo14_voltage,
 	.get_voltage	= da9030_get_ldo14_voltage,
+	.list_voltage	= da903x_list_voltage,
 	.enable		= da903x_enable,
 	.disable	= da903x_disable,
 	.is_enabled	= da903x_is_enabled,
@@ -334,6 +359,7 @@ static struct regulator_ops da9030_regulator_ldo14_ops = {
 static struct regulator_ops da9030_regulator_ldo1_15_ops = {
 	.set_voltage	= da9030_set_ldo1_15_voltage,
 	.get_voltage	= da903x_get_voltage,
+	.list_voltage	= da903x_list_voltage,
 	.enable		= da903x_enable,
 	.disable	= da903x_disable,
 	.is_enabled	= da903x_is_enabled,
@@ -342,6 +368,7 @@ static struct regulator_ops da9030_regulator_ldo1_15_ops = {
 static struct regulator_ops da9034_regulator_dvc_ops = {
 	.set_voltage	= da9034_set_dvc_voltage,
 	.get_voltage	= da903x_get_voltage,
+	.list_voltage	= da903x_list_voltage,
 	.enable		= da903x_enable,
 	.disable	= da903x_disable,
 	.is_enabled	= da903x_is_enabled,
@@ -351,6 +378,7 @@ static struct regulator_ops da9034_regulator_dvc_ops = {
 static struct regulator_ops da9034_regulator_ldo12_ops = {
 	.set_voltage	= da9034_set_ldo12_voltage,
 	.get_voltage	= da9034_get_ldo12_voltage,
+	.list_voltage	= da9034_list_ldo12_voltage,
 	.enable		= da903x_enable,
 	.disable	= da903x_disable,
 	.is_enabled	= da903x_is_enabled,
@@ -363,6 +391,7 @@ static struct regulator_ops da9034_regulator_ldo12_ops = {
 		.ops	= &da903x_regulator_ldo_ops,			\
 		.type	= REGULATOR_VOLTAGE,				\
 		.id	= _pmic##_ID_LDO##_id,				\
+		.n_voltages = (step) ? ((max - min) / step + 1) : 1,	\
 		.owner	= THIS_MODULE,					\
 	},								\
 	.min_uV		= (min) * 1000,					\
@@ -382,6 +411,7 @@ static struct regulator_ops da9034_regulator_ldo12_ops = {
 		.ops	= &da9034_regulator_dvc_ops,			\
 		.type	= REGULATOR_VOLTAGE,				\
 		.id	= DA9030_ID_##_id,				\
+		.n_voltages = (step) ? ((max - min) / step + 1) : 1,	\
 		.owner	= THIS_MODULE,					\
 	},								\
 	.min_uV		= (min) * 1000,					\
@@ -403,6 +433,7 @@ static struct regulator_ops da9034_regulator_ldo12_ops = {
 		.ops	= &da9034_regulator_dvc_ops,			\
 		.type	= REGULATOR_VOLTAGE,				\
 		.id	= DA9034_ID_##_id,				\
+		.n_voltages = (step) ? ((max - min) / step + 1) : 1,	\
 		.owner	= THIS_MODULE,					\
 	},								\
 	.min_uV		= (min) * 1000,					\
@@ -424,6 +455,7 @@ static struct regulator_ops da9034_regulator_ldo12_ops = {
 		.ops	= &da9034_regulator_dvc_ops,			\
 		.type	= REGULATOR_VOLTAGE,				\
 		.id	= DA9035_ID_##_id,				\
+		.n_voltages = (step) ? ((max - min) / step + 1) : 1,	\
 		.owner	= THIS_MODULE,					\
 	},								\
 	.min_uV		= (min) * 1000,					\
@@ -517,8 +549,10 @@ static int __devinit da903x_regulator_probe(struct platform_device *pdev)
 	}
 
 	/* Workaround for the weird LDO12 voltage setting */
-	if (ri->desc.id == DA9034_ID_LDO12)
+	if (ri->desc.id == DA9034_ID_LDO12) {
 		ri->desc.ops = &da9034_regulator_ldo12_ops;
+		ri->desc.n_voltages = ARRAY_SIZE(da9034_ldo12_data);
+	}
 
 	if (ri->desc.id == DA9030_ID_LDO14)
 		ri->desc.ops = &da9030_regulator_ldo14_ops;
-- 
cgit v0.10.2


From 5ffbd136e6c51c8d1eec7a4a0c5d2180c81aea30 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 21 Jul 2009 16:00:23 +0100
Subject: regulator: Add regulator_get_exclusive() API

Some consumers require complete control of the regulator and can't
tolerate sharing it with other consumers, most commonly because they need
to have the regulator actually disabled so can't have other consumers
forcing it on. This new regulator_get_exclusive() API call allows these
consumers to explicitly request this, documenting the assumptions that
they are making.

In order to simplify coding of such consumers the use count for regulators
they request is forced to match the enabled state of the regulator when
it is requested. This is not possible for consumers which can share
regulators due to the need to keep track of the ownership of use counts.

A new API call is used rather than an additional argument to the existing
regulator_get() in order to avoid merge headaches with driver code in
other trees.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index 24e05b7..6854900 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -1025,25 +1025,15 @@ overflow_err:
 	return NULL;
 }
 
-/**
- * regulator_get - lookup and obtain a reference to a regulator.
- * @dev: device for regulator "consumer"
- * @id: Supply name or regulator ID.
- *
- * Returns a struct regulator corresponding to the regulator producer,
- * or IS_ERR() condition containing errno.
- *
- * Use of supply names configured via regulator_set_device_supply() is
- * strongly encouraged.  It is recommended that the supply name used
- * should match the name used for the supply and/or the relevant
- * device pins in the datasheet.
- */
-struct regulator *regulator_get(struct device *dev, const char *id)
+/* Internal regulator request function */
+static struct regulator *_regulator_get(struct device *dev, const char *id,
+					int exclusive)
 {
 	struct regulator_dev *rdev;
 	struct regulator_map *map;
 	struct regulator *regulator = ERR_PTR(-ENODEV);
 	const char *devname = NULL;
+	int ret;
 
 	if (id == NULL) {
 		printk(KERN_ERR "regulator: get() with no identifier\n");
@@ -1070,6 +1060,16 @@ struct regulator *regulator_get(struct device *dev, const char *id)
 	return regulator;
 
 found:
+	if (rdev->exclusive) {
+		regulator = ERR_PTR(-EPERM);
+		goto out;
+	}
+
+	if (exclusive && rdev->open_count) {
+		regulator = ERR_PTR(-EBUSY);
+		goto out;
+	}
+
 	if (!try_module_get(rdev->owner))
 		goto out;
 
@@ -1079,13 +1079,70 @@ found:
 		module_put(rdev->owner);
 	}
 
+	rdev->open_count++;
+	if (exclusive) {
+		rdev->exclusive = 1;
+
+		ret = _regulator_is_enabled(rdev);
+		if (ret > 0)
+			rdev->use_count = 1;
+		else
+			rdev->use_count = 0;
+	}
+
 out:
 	mutex_unlock(&regulator_list_mutex);
+
 	return regulator;
 }
+
+/**
+ * regulator_get - lookup and obtain a reference to a regulator.
+ * @dev: device for regulator "consumer"
+ * @id: Supply name or regulator ID.
+ *
+ * Returns a struct regulator corresponding to the regulator producer,
+ * or IS_ERR() condition containing errno.
+ *
+ * Use of supply names configured via regulator_set_device_supply() is
+ * strongly encouraged.  It is recommended that the supply name used
+ * should match the name used for the supply and/or the relevant
+ * device pins in the datasheet.
+ */
+struct regulator *regulator_get(struct device *dev, const char *id)
+{
+	return _regulator_get(dev, id, 0);
+}
 EXPORT_SYMBOL_GPL(regulator_get);
 
 /**
+ * regulator_get_exclusive - obtain exclusive access to a regulator.
+ * @dev: device for regulator "consumer"
+ * @id: Supply name or regulator ID.
+ *
+ * Returns a struct regulator corresponding to the regulator producer,
+ * or IS_ERR() condition containing errno.  Other consumers will be
+ * unable to obtain this reference is held and the use count for the
+ * regulator will be initialised to reflect the current state of the
+ * regulator.
+ *
+ * This is intended for use by consumers which cannot tolerate shared
+ * use of the regulator such as those which need to force the
+ * regulator off for correct operation of the hardware they are
+ * controlling.
+ *
+ * Use of supply names configured via regulator_set_device_supply() is
+ * strongly encouraged.  It is recommended that the supply name used
+ * should match the name used for the supply and/or the relevant
+ * device pins in the datasheet.
+ */
+struct regulator *regulator_get_exclusive(struct device *dev, const char *id)
+{
+	return _regulator_get(dev, id, 1);
+}
+EXPORT_SYMBOL_GPL(regulator_get_exclusive);
+
+/**
  * regulator_put - "free" the regulator source
  * @regulator: regulator source
  *
@@ -1113,6 +1170,9 @@ void regulator_put(struct regulator *regulator)
 	list_del(&regulator->list);
 	kfree(regulator);
 
+	rdev->open_count--;
+	rdev->exclusive = 0;
+
 	module_put(rdev->owner);
 	mutex_unlock(&regulator_list_mutex);
 }
diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h
index 277f4b9..976b57b 100644
--- a/include/linux/regulator/consumer.h
+++ b/include/linux/regulator/consumer.h
@@ -125,6 +125,8 @@ struct regulator_bulk_data {
 /* regulator get and put */
 struct regulator *__must_check regulator_get(struct device *dev,
 					     const char *id);
+struct regulator *__must_check regulator_get_exclusive(struct device *dev,
+						       const char *id);
 void regulator_put(struct regulator *regulator);
 
 /* regulator output control and status */
diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index ce1be70..73c9cd6 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -162,6 +162,8 @@ struct regulator_desc {
 struct regulator_dev {
 	struct regulator_desc *desc;
 	int use_count;
+	int open_count;
+	int exclusive;
 
 	/* lists we belong to */
 	struct list_head list; /* list of all regulators */
-- 
cgit v0.10.2


From a7a1ad9066e0266c8a4357ba3dbaeebfb80f531d Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 21 Jul 2009 16:00:24 +0100
Subject: regulator: Add regulator voltage range check API

Simplify checking of support for voltage ranges by providing an API which
wraps the existing count and list operations.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index 6854900..e11c222 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -1442,6 +1442,35 @@ int regulator_list_voltage(struct regulator *regulator, unsigned selector)
 EXPORT_SYMBOL_GPL(regulator_list_voltage);
 
 /**
+ * regulator_is_supported_voltage - check if a voltage range can be supported
+ *
+ * @regulator: Regulator to check.
+ * @min_uV: Minimum required voltage in uV.
+ * @max_uV: Maximum required voltage in uV.
+ *
+ * Returns a boolean or a negative error code.
+ */
+int regulator_is_supported_voltage(struct regulator *regulator,
+				   int min_uV, int max_uV)
+{
+	int i, voltages, ret;
+
+	ret = regulator_count_voltages(regulator);
+	if (ret < 0)
+		return ret;
+	voltages = ret;
+
+	for (i = 0; i < voltages; i++) {
+		ret = regulator_list_voltage(regulator, i);
+
+		if (ret >= min_uV && ret <= max_uV)
+			return 1;
+	}
+
+	return 0;
+}
+
+/**
  * regulator_set_voltage - set regulator output voltage
  * @regulator: regulator source
  * @min_uV: Minimum required voltage in uV
diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h
index 976b57b..490c5b3 100644
--- a/include/linux/regulator/consumer.h
+++ b/include/linux/regulator/consumer.h
@@ -146,6 +146,8 @@ void regulator_bulk_free(int num_consumers,
 
 int regulator_count_voltages(struct regulator *regulator);
 int regulator_list_voltage(struct regulator *regulator, unsigned selector);
+int regulator_is_supported_voltage(struct regulator *regulator,
+				   int min_uV, int max_uV);
 int regulator_set_voltage(struct regulator *regulator, int min_uV, int max_uV);
 int regulator_get_voltage(struct regulator *regulator);
 int regulator_set_current_limit(struct regulator *regulator,
-- 
cgit v0.10.2


From 6bf87d17c9f5b855e9dde7b3d6f726385b966814 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 21 Jul 2009 16:00:25 +0100
Subject: regulator: Warn when unregistering an in-use regulator

We're probably going to start oopsing fairly soon after this happens.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index e11c222..79a6910 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -2253,6 +2253,7 @@ void regulator_unregister(struct regulator_dev *rdev)
 		return;
 
 	mutex_lock(&regulator_list_mutex);
+	WARN_ON(rdev->open_count);
 	unset_regulator_supplies(rdev);
 	list_del(&rdev->list);
 	if (rdev->supply)
-- 
cgit v0.10.2


From 9ed2099edca26d07947beb42c12bd1d6669e82bc Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 21 Jul 2009 16:00:26 +0100
Subject: regulator: Fix support for deviceless supply mappings

The patch to add support for looking up consumers by device name
had the side effect of causing us to require a device which is
at best premature since at least cpufreq still operates outside
the device model. Remove that requirement.

Reported-by: Haojian Zhuang <haojian.zhuang@gmail.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index 79a6910..e38db55 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -872,6 +872,7 @@ static int set_consumer_device_supply(struct regulator_dev *rdev,
 	const char *supply)
 {
 	struct regulator_map *node;
+	int has_dev;
 
 	if (consumer_dev && consumer_dev_name)
 		return -EINVAL;
@@ -882,6 +883,11 @@ static int set_consumer_device_supply(struct regulator_dev *rdev,
 	if (supply == NULL)
 		return -EINVAL;
 
+	if (consumer_dev_name != NULL)
+		has_dev = 1;
+	else
+		has_dev = 0;
+
 	list_for_each_entry(node, &regulator_map_list, list) {
 		if (consumer_dev_name != node->dev_name)
 			continue;
@@ -896,17 +902,19 @@ static int set_consumer_device_supply(struct regulator_dev *rdev,
 		return -EBUSY;
 	}
 
-	node = kmalloc(sizeof(struct regulator_map), GFP_KERNEL);
+	node = kzalloc(sizeof(struct regulator_map), GFP_KERNEL);
 	if (node == NULL)
 		return -ENOMEM;
 
 	node->regulator = rdev;
-	node->dev_name = kstrdup(consumer_dev_name, GFP_KERNEL);
 	node->supply = supply;
 
-	if (node->dev_name == NULL) {
-		kfree(node);
-		return -ENOMEM;
+	if (has_dev) {
+		node->dev_name = kstrdup(consumer_dev_name, GFP_KERNEL);
+		if (node->dev_name == NULL) {
+			kfree(node);
+			return -ENOMEM;
+		}
 	}
 
 	list_add(&node->list, &regulator_map_list);
-- 
cgit v0.10.2


From 72b86876d437a33253a47373579787b6dcc3bd36 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 21 Jul 2009 16:00:27 +0100
Subject: regulator: Improve virtual consumer probe error handling

Report errors to the user and try harder to clean up if we're not
able to probe.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>

diff --git a/drivers/regulator/virtual.c b/drivers/regulator/virtual.c
index 1441107..addc032 100644
--- a/drivers/regulator/virtual.c
+++ b/drivers/regulator/virtual.c
@@ -286,8 +286,7 @@ static int regulator_virtual_consumer_probe(struct platform_device *pdev)
 
 	drvdata = kzalloc(sizeof(struct virtual_consumer_data), GFP_KERNEL);
 	if (drvdata == NULL) {
-		ret = -ENOMEM;
-		goto err;
+		return -ENOMEM;
 	}
 
 	mutex_init(&drvdata->lock);
@@ -302,8 +301,11 @@ static int regulator_virtual_consumer_probe(struct platform_device *pdev)
 
 	for (i = 0; i < ARRAY_SIZE(attributes); i++) {
 		ret = device_create_file(&pdev->dev, attributes[i]);
-		if (ret != 0)
-			goto err;
+		if (ret != 0) {
+			dev_err(&pdev->dev, "Failed to create attr %d: %d\n",
+				i, ret);
+			goto err_regulator;
+		}
 	}
 
 	drvdata->mode = regulator_get_mode(drvdata->regulator);
@@ -312,6 +314,8 @@ static int regulator_virtual_consumer_probe(struct platform_device *pdev)
 
 	return 0;
 
+err_regulator:
+	regulator_put(drvdata->regulator);
 err:
 	for (i = 0; i < ARRAY_SIZE(attributes); i++)
 		device_remove_file(&pdev->dev, attributes[i]);
-- 
cgit v0.10.2


From c6db182822e292575b5beb56c003e95f616407f4 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <mike@compulab.co.il>
Date: Sun, 26 Jul 2009 13:33:04 +0300
Subject: regulator: da903x: consolidate DA903[045]_DVC macros

Signed-off-by: Mike Rapoport <mike@compulab.co.il>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>

diff --git a/drivers/regulator/da903x.c b/drivers/regulator/da903x.c
index 236de113..7d9c250 100644
--- a/drivers/regulator/da903x.c
+++ b/drivers/regulator/da903x.c
@@ -404,69 +404,25 @@ static struct regulator_ops da9034_regulator_ldo12_ops = {
 	.enable_bit	= (ebit),					\
 }
 
-#define DA9030_DVC(_id, min, max, step, vreg, nbits, ureg, ubit, ereg, ebit) \
+#define DA903x_DVC(_pmic, _id, min, max, step, vreg, nbits, ureg, ubit, ereg, ebit) \
 {									\
 	.desc	= {							\
 		.name	= #_id,						\
 		.ops	= &da9034_regulator_dvc_ops,			\
 		.type	= REGULATOR_VOLTAGE,				\
-		.id	= DA9030_ID_##_id,				\
+		.id	= _pmic##_ID_##_id,				\
 		.n_voltages = (step) ? ((max - min) / step + 1) : 1,	\
 		.owner	= THIS_MODULE,					\
 	},								\
 	.min_uV		= (min) * 1000,					\
 	.max_uV		= (max) * 1000,					\
 	.step_uV	= (step) * 1000,				\
-	.vol_reg	= DA9030_##vreg,				\
-	.vol_shift	= (0),						\
-	.vol_nbits	= (nbits),					\
-	.update_reg	= DA9030_##ureg,				\
-	.update_bit	= (ubit),					\
-	.enable_reg	= DA9030_##ereg,				\
-	.enable_bit	= (ebit),					\
-}
-
-#define DA9034_DVC(_id, min, max, step, vreg, nbits, ureg, ubit, ereg, ebit) \
-{									\
-	.desc	= {							\
-		.name	= #_id,						\
-		.ops	= &da9034_regulator_dvc_ops,			\
-		.type	= REGULATOR_VOLTAGE,				\
-		.id	= DA9034_ID_##_id,				\
-		.n_voltages = (step) ? ((max - min) / step + 1) : 1,	\
-		.owner	= THIS_MODULE,					\
-	},								\
-	.min_uV		= (min) * 1000,					\
-	.max_uV		= (max) * 1000,					\
-	.step_uV	= (step) * 1000,				\
-	.vol_reg	= DA9034_##vreg,				\
-	.vol_shift	= (0),						\
-	.vol_nbits	= (nbits),					\
-	.update_reg	= DA9034_##ureg,				\
-	.update_bit	= (ubit),					\
-	.enable_reg	= DA9034_##ereg,				\
-	.enable_bit	= (ebit),					\
-}
-
-#define DA9035_DVC(_id, min, max, step, vreg, nbits, ureg, ubit, ereg, ebit) \
-{									\
-	.desc	= {							\
-		.name	= #_id,						\
-		.ops	= &da9034_regulator_dvc_ops,			\
-		.type	= REGULATOR_VOLTAGE,				\
-		.id	= DA9035_ID_##_id,				\
-		.n_voltages = (step) ? ((max - min) / step + 1) : 1,	\
-		.owner	= THIS_MODULE,					\
-	},								\
-	.min_uV		= (min) * 1000,					\
-	.max_uV		= (max) * 1000,					\
-	.step_uV	= (step) * 1000,				\
-	.vol_reg	= DA9035_##vreg,				\
+	.vol_reg	= _pmic##_##vreg,				\
 	.vol_shift	= (0),						\
 	.vol_nbits	= (nbits),					\
-	.update_reg	= DA9035_##ureg,				\
+	.update_reg	= _pmic##_##ureg,				\
 	.update_bit	= (ubit),					\
-	.enable_reg	= DA9035_##ereg,				\
+	.enable_reg	= _pmic##_##ereg,				\
 	.enable_bit	= (ebit),					\
 }
 
@@ -476,6 +432,18 @@ static struct regulator_ops da9034_regulator_ldo12_ops = {
 #define DA9030_LDO(_id, min, max, step, vreg, shift, nbits, ereg, ebit)	\
 	DA903x_LDO(DA9030, _id, min, max, step, vreg, shift, nbits, ereg, ebit)
 
+#define DA9030_DVC(_id, min, max, step, vreg, nbits, ureg, ubit, ereg, ebit) \
+	DA903x_DVC(DA9030, _id, min, max, step, vreg, nbits, ureg, ubit, \
+		   ereg, ebit)
+
+#define DA9034_DVC(_id, min, max, step, vreg, nbits, ureg, ubit, ereg, ebit) \
+	DA903x_DVC(DA9034, _id, min, max, step, vreg, nbits, ureg, ubit, \
+		   ereg, ebit)
+
+#define DA9035_DVC(_id, min, max, step, vreg, nbits, ureg, ubit, ereg, ebit) \
+	DA903x_DVC(DA9035, _id, min, max, step, vreg, nbits, ureg, ubit, \
+		   ereg, ebit)
+
 static struct da903x_regulator_info da903x_regulator_info[] = {
 	/* DA9030 */
 	DA9030_DVC(BUCK2, 850, 1625, 25, BUCK2DVM1, 5, BUCK2DVM1, 7, RCTL11, 0),
-- 
cgit v0.10.2


From c53ad7fe5759cea10137c9e176d14f8c8f22d286 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 3 Aug 2009 18:49:53 +0100
Subject: regulator: More explict error reporting for fixed regulator

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>

diff --git a/drivers/regulator/fixed.c b/drivers/regulator/fixed.c
index cdc674f..9c7f956 100644
--- a/drivers/regulator/fixed.c
+++ b/drivers/regulator/fixed.c
@@ -70,12 +70,14 @@ static int regulator_fixed_voltage_probe(struct platform_device *pdev)
 
 	drvdata = kzalloc(sizeof(struct fixed_voltage_data), GFP_KERNEL);
 	if (drvdata == NULL) {
+		dev_err(&pdev->dev, "Failed to allocate device data\n");
 		ret = -ENOMEM;
 		goto err;
 	}
 
 	drvdata->desc.name = kstrdup(config->supply_name, GFP_KERNEL);
 	if (drvdata->desc.name == NULL) {
+		dev_err(&pdev->dev, "Failed to allocate supply name\n");
 		ret = -ENOMEM;
 		goto err;
 	}
@@ -90,6 +92,7 @@ static int regulator_fixed_voltage_probe(struct platform_device *pdev)
 					  config->init_data, drvdata);
 	if (IS_ERR(drvdata->dev)) {
 		ret = PTR_ERR(drvdata->dev);
+		dev_err(&pdev->dev, "Failed to register regulator: %d\n", ret);
 		goto err_name;
 	}
 
-- 
cgit v0.10.2


From b39480ac37951de126455991744c9dbb61bbb839 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 3 Aug 2009 18:49:54 +0100
Subject: regulator: Check for constraints before using them for name

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index e38db55..6e0c723 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -232,7 +232,7 @@ static ssize_t regulator_name_show(struct device *dev,
 	struct regulator_dev *rdev = dev_get_drvdata(dev);
 	const char *name;
 
-	if (rdev->constraints->name)
+	if (rdev->constraints && rdev->constraints->name)
 		name = rdev->constraints->name;
 	else if (rdev->desc->name)
 		name = rdev->desc->name;
-- 
cgit v0.10.2


From f25e0b4fcc38d120e704c377791158c4b2a54daa Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 3 Aug 2009 18:49:55 +0100
Subject: regulator: Check for constraints in regulator_init_complete()

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index 6e0c723..dfbf431 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -2409,14 +2409,14 @@ static int __init regulator_init_complete(void)
 		ops = rdev->desc->ops;
 		c = rdev->constraints;
 
-		if (c->name)
+		if (c && c->name)
 			name = c->name;
 		else if (rdev->desc->name)
 			name = rdev->desc->name;
 		else
 			name = "regulator";
 
-		if (!ops->disable || c->always_on)
+		if (!ops->disable || (c && c->always_on))
 			continue;
 
 		mutex_lock(&rdev->mutex);
-- 
cgit v0.10.2


From 9332546fe88fa88bf6a7d9b1dce53ff5d314934e Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 3 Aug 2009 18:49:56 +0100
Subject: regulator: Push locking for regulator_is_enabled() out

Allows use by more of the internal regulator API code.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index dfbf431..60fcd98 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -280,8 +280,13 @@ static ssize_t regulator_state_show(struct device *dev,
 				   struct device_attribute *attr, char *buf)
 {
 	struct regulator_dev *rdev = dev_get_drvdata(dev);
+	ssize_t ret;
+
+	mutex_lock(&rdev->mutex);
+	ret = regulator_print_state(buf, _regulator_is_enabled(rdev));
+	mutex_unlock(&rdev->mutex);
 
-	return regulator_print_state(buf, _regulator_is_enabled(rdev));
+	return ret;
 }
 static DEVICE_ATTR(state, 0444, regulator_state_show, NULL);
 
@@ -1365,20 +1370,11 @@ EXPORT_SYMBOL_GPL(regulator_force_disable);
 
 static int _regulator_is_enabled(struct regulator_dev *rdev)
 {
-	int ret;
-
-	mutex_lock(&rdev->mutex);
-
 	/* sanity check */
-	if (!rdev->desc->ops->is_enabled) {
-		ret = -EINVAL;
-		goto out;
-	}
+	if (!rdev->desc->ops->is_enabled)
+		return -EINVAL;
 
-	ret = rdev->desc->ops->is_enabled(rdev);
-out:
-	mutex_unlock(&rdev->mutex);
-	return ret;
+	return rdev->desc->ops->is_enabled(rdev);
 }
 
 /**
@@ -1395,7 +1391,13 @@ out:
  */
 int regulator_is_enabled(struct regulator *regulator)
 {
-	return _regulator_is_enabled(regulator->rdev);
+	int ret;
+
+	mutex_lock(&regulator->rdev->mutex);
+	ret = _regulator_is_enabled(regulator->rdev);
+	mutex_unlock(&regulator->rdev->mutex);
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(regulator_is_enabled);
 
-- 
cgit v0.10.2


From 9a2372fa7a403ba327873d0208a619d781a8a150 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 3 Aug 2009 18:49:57 +0100
Subject: regulator: regulator_enable() permission checking

The regulator_enable() code wasn't actually checking that the
machine constraints had given permission to enable the regulator.
Add code to do that, but only if the regulator is not already on
due to something like always_on or being left on at startup since
in those cases there's no physical change being introduced and the
constraint wouldn't make any sense.

Also add matching code for disable(). We need to do less there since
either regulator_enable() should have succeeded first or the board
setup makes no sense.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index 60fcd98..dbf27bf 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -1191,16 +1191,21 @@ void regulator_put(struct regulator *regulator)
 }
 EXPORT_SYMBOL_GPL(regulator_put);
 
+static int _regulator_can_change_status(struct regulator_dev *rdev)
+{
+	if (!rdev->constraints)
+		return 0;
+
+	if (rdev->constraints->valid_ops_mask & REGULATOR_CHANGE_STATUS)
+		return 1;
+	else
+		return 0;
+}
+
 /* locks held by regulator_enable() */
 static int _regulator_enable(struct regulator_dev *rdev)
 {
-	int ret = -EINVAL;
-
-	if (!rdev->constraints) {
-		printk(KERN_ERR "%s: %s has no constraints\n",
-		       __func__, rdev->desc->name);
-		return ret;
-	}
+	int ret;
 
 	/* do we need to enable the supply regulator first */
 	if (rdev->supply) {
@@ -1213,24 +1218,34 @@ static int _regulator_enable(struct regulator_dev *rdev)
 	}
 
 	/* check voltage and requested load before enabling */
-	if (rdev->desc->ops->enable) {
+	if (rdev->constraints &&
+	    (rdev->constraints->valid_ops_mask & REGULATOR_CHANGE_DRMS))
+		drms_uA_update(rdev);
 
-		if (rdev->constraints &&
-			(rdev->constraints->valid_ops_mask &
-			REGULATOR_CHANGE_DRMS))
-			drms_uA_update(rdev);
-
-		ret = rdev->desc->ops->enable(rdev);
-		if (ret < 0) {
-			printk(KERN_ERR "%s: failed to enable %s: %d\n",
+	if (rdev->use_count == 0) {
+		/* The regulator may on if it's not switchable or left on */
+		ret = _regulator_is_enabled(rdev);
+		if (ret == -EINVAL || ret == 0) {
+			if (!_regulator_can_change_status(rdev))
+				return -EPERM;
+
+			if (rdev->desc->ops->enable) {
+				ret = rdev->desc->ops->enable(rdev);
+				if (ret < 0)
+					return ret;
+			} else {
+				return -EINVAL;
+			}
+		} else {
+			printk(KERN_ERR "%s: is_enabled() failed for %s: %d\n",
 			       __func__, rdev->desc->name, ret);
 			return ret;
 		}
-		rdev->use_count++;
-		return ret;
 	}
 
-	return ret;
+	rdev->use_count++;
+
+	return 0;
 }
 
 /**
@@ -1270,7 +1285,8 @@ static int _regulator_disable(struct regulator_dev *rdev)
 	if (rdev->use_count == 1 && !rdev->constraints->always_on) {
 
 		/* we are last user */
-		if (rdev->desc->ops->disable) {
+		if (_regulator_can_change_status(rdev) &&
+		    rdev->desc->ops->disable) {
 			ret = rdev->desc->ops->disable(rdev);
 			if (ret < 0) {
 				printk(KERN_ERR "%s: failed to disable %s\n",
-- 
cgit v0.10.2


From a6576cff1801e2f1a9f328f02bd4cbcab7b03f91 Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Tue, 4 Aug 2009 02:03:52 +0200
Subject: Regulator: Implement list_voltage for pcf50633 regulator driver.

This patch implements list_voltage for the pcf50644 regulator driver.
As the voltages are linearly scaled the code to convert register values to
voltages can be reused and most of the code can be shared with get_voltage.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>

diff --git a/drivers/regulator/pcf50633-regulator.c b/drivers/regulator/pcf50633-regulator.c
index 70ba775..0803ffe 100644
--- a/drivers/regulator/pcf50633-regulator.c
+++ b/drivers/regulator/pcf50633-regulator.c
@@ -24,11 +24,12 @@
 #include <linux/mfd/pcf50633/core.h>
 #include <linux/mfd/pcf50633/pmic.h>
 
-#define PCF50633_REGULATOR(_name, _id) 		\
+#define PCF50633_REGULATOR(_name, _id, _n) 		\
 	{					\
 		.name = _name, 			\
 		.id = _id,			\
 		.ops = &pcf50633_regulator_ops,	\
+		.n_voltages = _n, \
 		.type = REGULATOR_VOLTAGE, 	\
 		.owner = THIS_MODULE, 		\
 	}
@@ -149,11 +150,42 @@ static int pcf50633_regulator_set_voltage(struct regulator_dev *rdev,
 	return pcf50633_reg_write(pcf, regnr, volt_bits);
 }
 
+static int pcf50633_regulator_voltage_value(enum pcf50633_regulator_id id,
+						u8 bits)
+{
+	int millivolts;
+
+	switch (id) {
+	case PCF50633_REGULATOR_AUTO:
+		millivolts = auto_voltage_value(bits);
+		break;
+	case PCF50633_REGULATOR_DOWN1:
+		millivolts = down_voltage_value(bits);
+		break;
+	case PCF50633_REGULATOR_DOWN2:
+		millivolts = down_voltage_value(bits);
+		break;
+	case PCF50633_REGULATOR_LDO1:
+	case PCF50633_REGULATOR_LDO2:
+	case PCF50633_REGULATOR_LDO3:
+	case PCF50633_REGULATOR_LDO4:
+	case PCF50633_REGULATOR_LDO5:
+	case PCF50633_REGULATOR_LDO6:
+	case PCF50633_REGULATOR_HCLDO:
+		millivolts = ldo_voltage_value(bits);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return millivolts * 1000;
+}
+
 static int pcf50633_regulator_get_voltage(struct regulator_dev *rdev)
 {
 	struct pcf50633 *pcf;
-	int regulator_id, millivolts, volt_bits;
-	u8 regnr;
+	int regulator_id;
+	u8 volt_bits, regnr;
 
 	pcf = rdev_get_drvdata(rdev);
 
@@ -164,33 +196,32 @@ static int pcf50633_regulator_get_voltage(struct regulator_dev *rdev)
 	regnr = pcf50633_regulator_registers[regulator_id];
 
 	volt_bits = pcf50633_reg_read(pcf, regnr);
-	if (volt_bits < 0)
-		return -1;
+
+	return pcf50633_regulator_voltage_value(regulator_id, volt_bits);
+}
+
+static int pcf50633_regulator_list_voltage(struct regulator_dev *rdev,
+						unsigned int index)
+{
+	struct pcf50633 *pcf;
+	int regulator_id;
+
+	pcf = rdev_get_drvdata(rdev);
+
+	regulator_id = rdev_get_id(rdev);
 
 	switch (regulator_id) {
 	case PCF50633_REGULATOR_AUTO:
-		millivolts = auto_voltage_value(volt_bits);
-		break;
-	case PCF50633_REGULATOR_DOWN1:
-		millivolts = down_voltage_value(volt_bits);
-		break;
-	case PCF50633_REGULATOR_DOWN2:
-		millivolts = down_voltage_value(volt_bits);
+		index += 0x2f;
 		break;
-	case PCF50633_REGULATOR_LDO1:
-	case PCF50633_REGULATOR_LDO2:
-	case PCF50633_REGULATOR_LDO3:
-	case PCF50633_REGULATOR_LDO4:
-	case PCF50633_REGULATOR_LDO5:
-	case PCF50633_REGULATOR_LDO6:
 	case PCF50633_REGULATOR_HCLDO:
-		millivolts = ldo_voltage_value(volt_bits);
+		index += 0x01;
 		break;
 	default:
-		return -EINVAL;
+		break;
 	}
 
-	return millivolts * 1000;
+	return pcf50633_regulator_voltage_value(regulator_id, index);
 }
 
 static int pcf50633_regulator_enable(struct regulator_dev *rdev)
@@ -246,6 +277,7 @@ static int pcf50633_regulator_is_enabled(struct regulator_dev *rdev)
 static struct regulator_ops pcf50633_regulator_ops = {
 	.set_voltage = pcf50633_regulator_set_voltage,
 	.get_voltage = pcf50633_regulator_get_voltage,
+	.list_voltage = pcf50633_regulator_list_voltage,
 	.enable = pcf50633_regulator_enable,
 	.disable = pcf50633_regulator_disable,
 	.is_enabled = pcf50633_regulator_is_enabled,
@@ -253,27 +285,27 @@ static struct regulator_ops pcf50633_regulator_ops = {
 
 static struct regulator_desc regulators[] = {
 	[PCF50633_REGULATOR_AUTO] =
-		PCF50633_REGULATOR("auto", PCF50633_REGULATOR_AUTO),
+		PCF50633_REGULATOR("auto", PCF50633_REGULATOR_AUTO, 80),
 	[PCF50633_REGULATOR_DOWN1] =
-		PCF50633_REGULATOR("down1", PCF50633_REGULATOR_DOWN1),
+		PCF50633_REGULATOR("down1", PCF50633_REGULATOR_DOWN1, 95),
 	[PCF50633_REGULATOR_DOWN2] =
-		PCF50633_REGULATOR("down2", PCF50633_REGULATOR_DOWN2),
+		PCF50633_REGULATOR("down2", PCF50633_REGULATOR_DOWN2, 95),
 	[PCF50633_REGULATOR_LDO1] =
-		PCF50633_REGULATOR("ldo1", PCF50633_REGULATOR_LDO1),
+		PCF50633_REGULATOR("ldo1", PCF50633_REGULATOR_LDO1, 27),
 	[PCF50633_REGULATOR_LDO2] =
-		PCF50633_REGULATOR("ldo2", PCF50633_REGULATOR_LDO2),
+		PCF50633_REGULATOR("ldo2", PCF50633_REGULATOR_LDO2, 27),
 	[PCF50633_REGULATOR_LDO3] =
-		PCF50633_REGULATOR("ldo3", PCF50633_REGULATOR_LDO3),
+		PCF50633_REGULATOR("ldo3", PCF50633_REGULATOR_LDO3, 27),
 	[PCF50633_REGULATOR_LDO4] =
-		PCF50633_REGULATOR("ldo4", PCF50633_REGULATOR_LDO4),
+		PCF50633_REGULATOR("ldo4", PCF50633_REGULATOR_LDO4, 27),
 	[PCF50633_REGULATOR_LDO5] =
-		PCF50633_REGULATOR("ldo5", PCF50633_REGULATOR_LDO5),
+		PCF50633_REGULATOR("ldo5", PCF50633_REGULATOR_LDO5, 27),
 	[PCF50633_REGULATOR_LDO6] =
-		PCF50633_REGULATOR("ldo6", PCF50633_REGULATOR_LDO6),
+		PCF50633_REGULATOR("ldo6", PCF50633_REGULATOR_LDO6, 27),
 	[PCF50633_REGULATOR_HCLDO] =
-		PCF50633_REGULATOR("hcldo", PCF50633_REGULATOR_HCLDO),
+		PCF50633_REGULATOR("hcldo", PCF50633_REGULATOR_HCLDO, 26),
 	[PCF50633_REGULATOR_MEMLDO] =
-		PCF50633_REGULATOR("memldo", PCF50633_REGULATOR_MEMLDO),
+		PCF50633_REGULATOR("memldo", PCF50633_REGULATOR_MEMLDO, 0),
 };
 
 static int __devinit pcf50633_regulator_probe(struct platform_device *pdev)
-- 
cgit v0.10.2


From 86d9884b6a3646bc24e57430f1f694c5171c1bf6 Mon Sep 17 00:00:00 2001
From: Roger Quadros <ext-roger.quadros@nokia.com>
Date: Thu, 6 Aug 2009 19:37:29 +0300
Subject: regulator: Add GPIO enable control to fixed voltage regulator driver

Now fixed regulators that have their enable pin connected to a GPIO line
can use the fixed regulator driver for regulator enable/disable control.
The GPIO number and polarity information is passed through platform data.
GPIO enable control is achieved using gpiolib.

Signed-off-by: Roger Quadros <ext-roger.quadros@nokia.com>
Reviewed-by: Philipp Zabel <philipp.zabel@gmail.com>
Reviewed-by: Felipe Balbi <felipe.balbi@nokia.com>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>

diff --git a/drivers/regulator/fixed.c b/drivers/regulator/fixed.c
index 9c7f956..f8b2957 100644
--- a/drivers/regulator/fixed.c
+++ b/drivers/regulator/fixed.c
@@ -5,6 +5,9 @@
  *
  * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
  *
+ * Copyright (c) 2009 Nokia Corporation
+ * Roger Quadros <ext-roger.quadros@nokia.com>
+ *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation; either version 2 of the
@@ -20,20 +23,45 @@
 #include <linux/platform_device.h>
 #include <linux/regulator/driver.h>
 #include <linux/regulator/fixed.h>
+#include <linux/gpio.h>
 
 struct fixed_voltage_data {
 	struct regulator_desc desc;
 	struct regulator_dev *dev;
 	int microvolts;
+	int gpio;
+	unsigned enable_high:1;
+	unsigned is_enabled:1;
 };
 
 static int fixed_voltage_is_enabled(struct regulator_dev *dev)
 {
-	return 1;
+	struct fixed_voltage_data *data = rdev_get_drvdata(dev);
+
+	return data->is_enabled;
 }
 
 static int fixed_voltage_enable(struct regulator_dev *dev)
 {
+	struct fixed_voltage_data *data = rdev_get_drvdata(dev);
+
+	if (gpio_is_valid(data->gpio)) {
+		gpio_set_value_cansleep(data->gpio, data->enable_high);
+		data->is_enabled = 1;
+	}
+
+	return 0;
+}
+
+static int fixed_voltage_disable(struct regulator_dev *dev)
+{
+	struct fixed_voltage_data *data = rdev_get_drvdata(dev);
+
+	if (gpio_is_valid(data->gpio)) {
+		gpio_set_value_cansleep(data->gpio, !data->enable_high);
+		data->is_enabled = 0;
+	}
+
 	return 0;
 }
 
@@ -58,6 +86,7 @@ static int fixed_voltage_list_voltage(struct regulator_dev *dev,
 static struct regulator_ops fixed_voltage_ops = {
 	.is_enabled = fixed_voltage_is_enabled,
 	.enable = fixed_voltage_enable,
+	.disable = fixed_voltage_disable,
 	.get_voltage = fixed_voltage_get_voltage,
 	.list_voltage = fixed_voltage_list_voltage,
 };
@@ -87,13 +116,62 @@ static int regulator_fixed_voltage_probe(struct platform_device *pdev)
 	drvdata->desc.n_voltages = 1;
 
 	drvdata->microvolts = config->microvolts;
+	drvdata->gpio = config->gpio;
+
+	if (gpio_is_valid(config->gpio)) {
+		drvdata->enable_high = config->enable_high;
+
+		/* FIXME: Remove below print warning
+		 *
+		 * config->gpio must be set to -EINVAL by platform code if
+		 * GPIO control is not required. However, early adopters
+		 * not requiring GPIO control may forget to initialize
+		 * config->gpio to -EINVAL. This will cause GPIO 0 to be used
+		 * for GPIO control.
+		 *
+		 * This warning will be removed once there are a couple of users
+		 * for this driver.
+		 */
+		if (!config->gpio)
+			dev_warn(&pdev->dev,
+				"using GPIO 0 for regulator enable control\n");
+
+		ret = gpio_request(config->gpio, config->supply_name);
+		if (ret) {
+			dev_err(&pdev->dev,
+			   "Could not obtain regulator enable GPIO %d: %d\n",
+							config->gpio, ret);
+			goto err_name;
+		}
+
+		/* set output direction without changing state
+		 * to prevent glitch
+		 */
+		drvdata->is_enabled = config->enabled_at_boot;
+		ret = drvdata->is_enabled ?
+				config->enable_high : !config->enable_high;
+
+		ret = gpio_direction_output(config->gpio, ret);
+		if (ret) {
+			dev_err(&pdev->dev,
+			   "Could not configure regulator enable GPIO %d direction: %d\n",
+							config->gpio, ret);
+			goto err_gpio;
+		}
+
+	} else {
+		/* Regulator without GPIO control is considered
+		 * always enabled
+		 */
+		drvdata->is_enabled = 1;
+	}
 
 	drvdata->dev = regulator_register(&drvdata->desc, &pdev->dev,
 					  config->init_data, drvdata);
 	if (IS_ERR(drvdata->dev)) {
 		ret = PTR_ERR(drvdata->dev);
 		dev_err(&pdev->dev, "Failed to register regulator: %d\n", ret);
-		goto err_name;
+		goto err_gpio;
 	}
 
 	platform_set_drvdata(pdev, drvdata);
@@ -103,6 +181,9 @@ static int regulator_fixed_voltage_probe(struct platform_device *pdev)
 
 	return 0;
 
+err_gpio:
+	if (gpio_is_valid(config->gpio))
+		gpio_free(config->gpio);
 err_name:
 	kfree(drvdata->desc.name);
 err:
@@ -118,6 +199,9 @@ static int regulator_fixed_voltage_remove(struct platform_device *pdev)
 	kfree(drvdata->desc.name);
 	kfree(drvdata);
 
+	if (gpio_is_valid(drvdata->gpio))
+		gpio_free(drvdata->gpio);
+
 	return 0;
 }
 
diff --git a/include/linux/regulator/fixed.h b/include/linux/regulator/fixed.h
index 91b4da3..e94a4a1 100644
--- a/include/linux/regulator/fixed.h
+++ b/include/linux/regulator/fixed.h
@@ -5,6 +5,9 @@
  *
  * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
  *
+ * Copyright (c) 2009 Nokia Corporation
+ * Roger Quadros <ext-roger.quadros@nokia.com>
+ *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation; either version 2 of the
@@ -16,9 +19,30 @@
 
 struct regulator_init_data;
 
+/**
+ * struct fixed_voltage_config - fixed_voltage_config structure
+ * @supply_name:	Name of the regulator supply
+ * @microvolts:		Output voltage of regulator
+ * @gpio:		GPIO to use for enable control
+ * 			set to -EINVAL if not used
+ * @enable_high:	Polarity of enable GPIO
+ *			1 = Active high, 0 = Active low
+ * @enabled_at_boot:	Whether regulator has been enabled at
+ * 			boot or not. 1 = Yes, 0 = No
+ * 			This is used to keep the regulator at
+ * 			the default state
+ * @init_data:		regulator_init_data
+ *
+ * This structure contains fixed voltage regulator configuration
+ * information that must be passed by platform code to the fixed
+ * voltage regulator driver.
+ */
 struct fixed_voltage_config {
 	const char *supply_name;
 	int microvolts;
+	int gpio;
+	unsigned enable_high:1;
+	unsigned enabled_at_boot:1;
 	struct regulator_init_data *init_data;
 };
 
-- 
cgit v0.10.2


From e9d62698e8e5228638093c48783eb9dda788f1c3 Mon Sep 17 00:00:00 2001
From: Felipe Balbi <felipe.balbi@nokia.com>
Date: Mon, 10 Aug 2009 09:05:13 +0300
Subject: regulator: userspace: use sysfs_create_group

and avoid introducing our own loops for creating
several sysfs entries.

Signed-off-by: Felipe Balbi <felipe.balbi@nokia.com>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Mike Rapoport <mike@compulab.co.il>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>

diff --git a/drivers/regulator/userspace-consumer.c b/drivers/regulator/userspace-consumer.c
index 06d2fa9..44917da 100644
--- a/drivers/regulator/userspace-consumer.c
+++ b/drivers/regulator/userspace-consumer.c
@@ -93,16 +93,21 @@ static ssize_t reg_set_state(struct device *dev, struct device_attribute *attr,
 static DEVICE_ATTR(name, 0444, reg_show_name, NULL);
 static DEVICE_ATTR(state, 0644, reg_show_state, reg_set_state);
 
-static struct device_attribute *attributes[] = {
-	&dev_attr_name,
-	&dev_attr_state,
+static struct attribute *attributes[] = {
+	&dev_attr_name.attr,
+	&dev_attr_state.attr,
+	NULL,
+};
+
+static const struct attribute_group attr_group = {
+	.attrs	= attributes,
 };
 
 static int regulator_userspace_consumer_probe(struct platform_device *pdev)
 {
 	struct regulator_userspace_consumer_data *pdata;
 	struct userspace_consumer_data *drvdata;
-	int ret, i;
+	int ret;
 
 	pdata = pdev->dev.platform_data;
 	if (!pdata)
@@ -125,31 +130,29 @@ static int regulator_userspace_consumer_probe(struct platform_device *pdev)
 		goto err_alloc_supplies;
 	}
 
-	for (i = 0; i < ARRAY_SIZE(attributes); i++) {
-		ret = device_create_file(&pdev->dev, attributes[i]);
-		if (ret != 0)
-			goto err_create_attrs;
-	}
+	ret = sysfs_create_group(&pdev->dev.kobj, &attr_group);
+	if (ret != 0)
+		goto err_create_attrs;
 
-	if (pdata->init_on)
+	if (pdata->init_on) {
 		ret = regulator_bulk_enable(drvdata->num_supplies,
 					    drvdata->supplies);
-
-	drvdata->enabled = pdata->init_on;
-
-	if (ret) {
-		dev_err(&pdev->dev, "Failed to set initial state: %d\n", ret);
-		goto err_create_attrs;
+		if (ret) {
+			dev_err(&pdev->dev,
+				"Failed to set initial state: %d\n", ret);
+			goto err_enable;
+		}
 	}
 
+	drvdata->enabled = pdata->init_on;
 	platform_set_drvdata(pdev, drvdata);
 
 	return 0;
 
-err_create_attrs:
-	for (i = 0; i < ARRAY_SIZE(attributes); i++)
-		device_remove_file(&pdev->dev, attributes[i]);
+err_enable:
+	sysfs_remove_group(&pdev->dev.kobj, &attr_group);
 
+err_create_attrs:
 	regulator_bulk_free(drvdata->num_supplies, drvdata->supplies);
 
 err_alloc_supplies:
@@ -160,10 +163,8 @@ err_alloc_supplies:
 static int regulator_userspace_consumer_remove(struct platform_device *pdev)
 {
 	struct userspace_consumer_data *data = platform_get_drvdata(pdev);
-	int i;
 
-	for (i = 0; i < ARRAY_SIZE(attributes); i++)
-		device_remove_file(&pdev->dev, attributes[i]);
+	sysfs_remove_group(&pdev->dev.kobj, &attr_group);
 
 	if (data->enabled)
 		regulator_bulk_disable(data->num_supplies, data->supplies);
-- 
cgit v0.10.2


From 30e6599d317ec83c664f341f18b5b2b57b831a6d Mon Sep 17 00:00:00 2001
From: Anuj Aggarwal <anuj.aggarwal@ti.com>
Date: Fri, 21 Aug 2009 00:39:31 +0530
Subject: Regulator: Add TPS65023 regulator driver

Adding support for TI TPS65023 regulator driver

Signed-off-by: Anuj Aggarwal <anuj.aggarwal@ti.com>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>

diff --git a/drivers/regulator/tps65023-regulator.c b/drivers/regulator/tps65023-regulator.c
new file mode 100644
index 0000000..1e54f46
--- /dev/null
+++ b/drivers/regulator/tps65023-regulator.c
@@ -0,0 +1,631 @@
+/*
+ * tps65023-regulator.c
+ *
+ * Supports TPS65023 Regulator
+ *
+ * Copyright (C) 2009 Texas Instrument Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any kind,
+ * whether express or implied; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/err.h>
+#include <linux/platform_device.h>
+#include <linux/regulator/driver.h>
+#include <linux/regulator/machine.h>
+#include <linux/i2c.h>
+#include <linux/delay.h>
+
+/* Register definitions */
+#define	TPS65023_REG_VERSION		0
+#define	TPS65023_REG_PGOODZ		1
+#define	TPS65023_REG_MASK		2
+#define	TPS65023_REG_REG_CTRL		3
+#define	TPS65023_REG_CON_CTRL		4
+#define	TPS65023_REG_CON_CTRL2		5
+#define	TPS65023_REG_DEF_CORE		6
+#define	TPS65023_REG_DEFSLEW		7
+#define	TPS65023_REG_LDO_CTRL		8
+
+/* PGOODZ bitfields */
+#define	TPS65023_PGOODZ_PWRFAILZ	BIT(7)
+#define	TPS65023_PGOODZ_LOWBATTZ	BIT(6)
+#define	TPS65023_PGOODZ_VDCDC1		BIT(5)
+#define	TPS65023_PGOODZ_VDCDC2		BIT(4)
+#define	TPS65023_PGOODZ_VDCDC3		BIT(3)
+#define	TPS65023_PGOODZ_LDO2		BIT(2)
+#define	TPS65023_PGOODZ_LDO1		BIT(1)
+
+/* MASK bitfields */
+#define	TPS65023_MASK_PWRFAILZ		BIT(7)
+#define	TPS65023_MASK_LOWBATTZ		BIT(6)
+#define	TPS65023_MASK_VDCDC1		BIT(5)
+#define	TPS65023_MASK_VDCDC2		BIT(4)
+#define	TPS65023_MASK_VDCDC3		BIT(3)
+#define	TPS65023_MASK_LDO2		BIT(2)
+#define	TPS65023_MASK_LDO1		BIT(1)
+
+/* REG_CTRL bitfields */
+#define TPS65023_REG_CTRL_VDCDC1_EN	BIT(5)
+#define TPS65023_REG_CTRL_VDCDC2_EN	BIT(4)
+#define TPS65023_REG_CTRL_VDCDC3_EN	BIT(3)
+#define TPS65023_REG_CTRL_LDO2_EN	BIT(2)
+#define TPS65023_REG_CTRL_LDO1_EN	BIT(1)
+
+/* LDO_CTRL bitfields */
+#define TPS65023_LDO_CTRL_LDOx_SHIFT(ldo_id)	((ldo_id)*4)
+#define TPS65023_LDO_CTRL_LDOx_MASK(ldo_id)	(0xF0 >> ((ldo_id)*4))
+
+/* Number of step-down converters available */
+#define TPS65023_NUM_DCDC		3
+/* Number of LDO voltage regulators  available */
+#define TPS65023_NUM_LDO		2
+/* Number of total regulators available */
+#define TPS65023_NUM_REGULATOR	(TPS65023_NUM_DCDC + TPS65023_NUM_LDO)
+
+/* DCDCs */
+#define TPS65023_DCDC_1			0
+#define TPS65023_DCDC_2			1
+#define TPS65023_DCDC_3			2
+/* LDOs */
+#define TPS65023_LDO_1			3
+#define TPS65023_LDO_2			4
+
+#define TPS65023_MAX_REG_ID		TPS65023_LDO_2
+
+/* Supported voltage values for regulators */
+static const u16 VDCDC1_VSEL_table[] = {
+	800, 825, 850, 875,
+	900, 925, 950, 975,
+	1000, 1025, 1050, 1075,
+	1100, 1125, 1150, 1175,
+	1200, 1225, 1250, 1275,
+	1300, 1325, 1350, 1375,
+	1400, 1425, 1450, 1475,
+	1500, 1525, 1550, 1600,
+};
+
+static const u16 LDO1_VSEL_table[] = {
+	1000, 1100, 1300, 1800,
+	2200, 2600, 2800, 3150,
+};
+
+static const u16 LDO2_VSEL_table[] = {
+	1050, 1200, 1300, 1800,
+	2500, 2800, 3000, 3300,
+};
+
+static unsigned int num_voltages[] = {ARRAY_SIZE(VDCDC1_VSEL_table),
+				0, 0, ARRAY_SIZE(LDO1_VSEL_table),
+				ARRAY_SIZE(LDO2_VSEL_table)};
+
+/* Regulator specific details */
+struct tps_info {
+	const char *name;
+	unsigned min_uV;
+	unsigned max_uV;
+	bool fixed;
+	u8 table_len;
+	const u16 *table;
+};
+
+/* PMIC details */
+struct tps_pmic {
+	struct regulator_desc desc[TPS65023_NUM_REGULATOR];
+	struct i2c_client *client;
+	struct regulator_dev *rdev[TPS65023_NUM_REGULATOR];
+	const struct tps_info *info[TPS65023_NUM_REGULATOR];
+	struct mutex io_lock;
+};
+
+static inline int tps_65023_read(struct tps_pmic *tps, u8 reg)
+{
+	return i2c_smbus_read_byte_data(tps->client, reg);
+}
+
+static inline int tps_65023_write(struct tps_pmic *tps, u8 reg, u8 val)
+{
+	return i2c_smbus_write_byte_data(tps->client, reg, val);
+}
+
+static int tps_65023_set_bits(struct tps_pmic *tps, u8 reg, u8 mask)
+{
+	int err, data;
+
+	mutex_lock(&tps->io_lock);
+
+	data = tps_65023_read(tps, reg);
+	if (data < 0) {
+		dev_err(&tps->client->dev, "Read from reg 0x%x failed\n", reg);
+		err = data;
+		goto out;
+	}
+
+	data |= mask;
+	err = tps_65023_write(tps, reg, data);
+	if (err)
+		dev_err(&tps->client->dev, "Write for reg 0x%x failed\n", reg);
+
+out:
+	mutex_unlock(&tps->io_lock);
+	return err;
+}
+
+static int tps_65023_clear_bits(struct tps_pmic *tps, u8 reg, u8 mask)
+{
+	int err, data;
+
+	mutex_lock(&tps->io_lock);
+
+	data = tps_65023_read(tps, reg);
+	if (data < 0) {
+		dev_err(&tps->client->dev, "Read from reg 0x%x failed\n", reg);
+		err = data;
+		goto out;
+	}
+
+	data &= ~mask;
+
+	err = tps_65023_write(tps, reg, data);
+	if (err)
+		dev_err(&tps->client->dev, "Write for reg 0x%x failed\n", reg);
+
+out:
+	mutex_unlock(&tps->io_lock);
+	return err;
+
+}
+
+static int tps_65023_reg_read(struct tps_pmic *tps, u8 reg)
+{
+	int data;
+
+	mutex_lock(&tps->io_lock);
+
+	data = tps_65023_read(tps, reg);
+	if (data < 0)
+		dev_err(&tps->client->dev, "Read from reg 0x%x failed\n", reg);
+
+	mutex_unlock(&tps->io_lock);
+	return data;
+}
+
+static int tps_65023_reg_write(struct tps_pmic *tps, u8 reg, u8 val)
+{
+	int err;
+
+	mutex_lock(&tps->io_lock);
+
+	err = tps_65023_write(tps, reg, val);
+	if (err < 0)
+		dev_err(&tps->client->dev, "Write for reg 0x%x failed\n", reg);
+
+	mutex_unlock(&tps->io_lock);
+	return err;
+}
+
+static int tps65023_dcdc_is_enabled(struct regulator_dev *dev)
+{
+	struct tps_pmic *tps = rdev_get_drvdata(dev);
+	int data, dcdc = rdev_get_id(dev);
+	u8 shift;
+
+	if (dcdc < TPS65023_DCDC_1 || dcdc > TPS65023_DCDC_3)
+		return -EINVAL;
+
+	shift = TPS65023_NUM_REGULATOR - dcdc;
+	data = tps_65023_reg_read(tps, TPS65023_REG_REG_CTRL);
+
+	if (data < 0)
+		return data;
+	else
+		return (data & 1<<shift) ? 1 : 0;
+}
+
+static int tps65023_ldo_is_enabled(struct regulator_dev *dev)
+{
+	struct tps_pmic *tps = rdev_get_drvdata(dev);
+	int data, ldo = rdev_get_id(dev);
+	u8 shift;
+
+	if (ldo < TPS65023_LDO_1 || ldo > TPS65023_LDO_2)
+		return -EINVAL;
+
+	shift = (ldo == TPS65023_LDO_1 ? 1 : 2);
+	data = tps_65023_reg_read(tps, TPS65023_REG_REG_CTRL);
+
+	if (data < 0)
+		return data;
+	else
+		return (data & 1<<shift) ? 1 : 0;
+}
+
+static int tps65023_dcdc_enable(struct regulator_dev *dev)
+{
+	struct tps_pmic *tps = rdev_get_drvdata(dev);
+	int dcdc = rdev_get_id(dev);
+	u8 shift;
+
+	if (dcdc < TPS65023_DCDC_1 || dcdc > TPS65023_DCDC_3)
+		return -EINVAL;
+
+	shift = TPS65023_NUM_REGULATOR - dcdc;
+	return tps_65023_set_bits(tps, TPS65023_REG_REG_CTRL, 1 << shift);
+}
+
+static int tps65023_dcdc_disable(struct regulator_dev *dev)
+{
+	struct tps_pmic *tps = rdev_get_drvdata(dev);
+	int dcdc = rdev_get_id(dev);
+	u8 shift;
+
+	if (dcdc < TPS65023_DCDC_1 || dcdc > TPS65023_DCDC_3)
+		return -EINVAL;
+
+	shift = TPS65023_NUM_REGULATOR - dcdc;
+	return tps_65023_clear_bits(tps, TPS65023_REG_REG_CTRL, 1 << shift);
+}
+
+static int tps65023_ldo_enable(struct regulator_dev *dev)
+{
+	struct tps_pmic *tps = rdev_get_drvdata(dev);
+	int ldo = rdev_get_id(dev);
+	u8 shift;
+
+	if (ldo < TPS65023_LDO_1 || ldo > TPS65023_LDO_2)
+		return -EINVAL;
+
+	shift = (ldo == TPS65023_LDO_1 ? 1 : 2);
+	return tps_65023_set_bits(tps, TPS65023_REG_REG_CTRL, 1 << shift);
+}
+
+static int tps65023_ldo_disable(struct regulator_dev *dev)
+{
+	struct tps_pmic *tps = rdev_get_drvdata(dev);
+	int ldo = rdev_get_id(dev);
+	u8 shift;
+
+	if (ldo < TPS65023_LDO_1 || ldo > TPS65023_LDO_2)
+		return -EINVAL;
+
+	shift = (ldo == TPS65023_LDO_1 ? 1 : 2);
+	return tps_65023_clear_bits(tps, TPS65023_REG_REG_CTRL, 1 << shift);
+}
+
+static int tps65023_dcdc_get_voltage(struct regulator_dev *dev)
+{
+	struct tps_pmic *tps = rdev_get_drvdata(dev);
+	int data, dcdc = rdev_get_id(dev);
+
+	if (dcdc < TPS65023_DCDC_1 || dcdc > TPS65023_DCDC_3)
+		return -EINVAL;
+
+	if (dcdc == TPS65023_DCDC_1) {
+		data = tps_65023_reg_read(tps, TPS65023_REG_DEF_CORE);
+		if (data < 0)
+			return data;
+		data &= (tps->info[dcdc]->table_len - 1);
+		return tps->info[dcdc]->table[data] * 1000;
+	} else
+		return tps->info[dcdc]->min_uV;
+}
+
+static int tps65023_dcdc_set_voltage(struct regulator_dev *dev,
+				int min_uV, int max_uV)
+{
+	struct tps_pmic *tps = rdev_get_drvdata(dev);
+	int dcdc = rdev_get_id(dev);
+	int vsel;
+
+	if (dcdc != TPS65023_DCDC_1)
+		return -EINVAL;
+
+	if (min_uV < tps->info[dcdc]->min_uV
+			|| min_uV > tps->info[dcdc]->max_uV)
+		return -EINVAL;
+	if (max_uV < tps->info[dcdc]->min_uV
+			|| max_uV > tps->info[dcdc]->max_uV)
+		return -EINVAL;
+
+	for (vsel = 0; vsel < tps->info[dcdc]->table_len; vsel++) {
+		int mV = tps->info[dcdc]->table[vsel];
+		int uV = mV * 1000;
+
+		/* Break at the first in-range value */
+		if (min_uV <= uV && uV <= max_uV)
+			break;
+	}
+
+	/* write to the register in case we found a match */
+	if (vsel == tps->info[dcdc]->table_len)
+		return -EINVAL;
+	else
+		return tps_65023_reg_write(tps, TPS65023_REG_DEF_CORE, vsel);
+}
+
+static int tps65023_ldo_get_voltage(struct regulator_dev *dev)
+{
+	struct tps_pmic *tps = rdev_get_drvdata(dev);
+	int data, ldo = rdev_get_id(dev);
+
+	if (ldo < TPS65023_LDO_1 || ldo > TPS65023_LDO_2)
+		return -EINVAL;
+
+	data = tps_65023_reg_read(tps, TPS65023_REG_LDO_CTRL);
+	if (data < 0)
+		return data;
+
+	data >>= (TPS65023_LDO_CTRL_LDOx_SHIFT(ldo - TPS65023_LDO_1));
+	data &= (tps->info[ldo]->table_len - 1);
+	return tps->info[ldo]->table[data] * 1000;
+}
+
+static int tps65023_ldo_set_voltage(struct regulator_dev *dev,
+				int min_uV, int max_uV)
+{
+	struct tps_pmic *tps = rdev_get_drvdata(dev);
+	int data, vsel, ldo = rdev_get_id(dev);
+
+	if (ldo < TPS65023_LDO_1 || ldo > TPS65023_LDO_2)
+		return -EINVAL;
+
+	if (min_uV < tps->info[ldo]->min_uV || min_uV > tps->info[ldo]->max_uV)
+		return -EINVAL;
+	if (max_uV < tps->info[ldo]->min_uV || max_uV > tps->info[ldo]->max_uV)
+		return -EINVAL;
+
+	for (vsel = 0; vsel < tps->info[ldo]->table_len; vsel++) {
+		int mV = tps->info[ldo]->table[vsel];
+		int uV = mV * 1000;
+
+		/* Break at the first in-range value */
+		if (min_uV <= uV && uV <= max_uV)
+			break;
+	}
+
+	if (vsel == tps->info[ldo]->table_len)
+		return -EINVAL;
+
+	data = tps_65023_reg_read(tps, TPS65023_REG_LDO_CTRL);
+	if (data < 0)
+		return data;
+
+	data &= TPS65023_LDO_CTRL_LDOx_MASK(ldo - TPS65023_LDO_1);
+	data |= (vsel << (TPS65023_LDO_CTRL_LDOx_SHIFT(ldo - TPS65023_LDO_1)));
+	return tps_65023_reg_write(tps, TPS65023_REG_LDO_CTRL, data);
+}
+
+static int tps65023_dcdc_list_voltage(struct regulator_dev *dev,
+					unsigned selector)
+{
+	struct tps_pmic *tps = rdev_get_drvdata(dev);
+	int dcdc = rdev_get_id(dev);
+
+	if (dcdc < TPS65023_DCDC_1 || dcdc > TPS65023_DCDC_3)
+		return -EINVAL;
+
+	if (dcdc == TPS65023_DCDC_1) {
+		if (selector >= tps->info[dcdc]->table_len)
+			return -EINVAL;
+		else
+			return tps->info[dcdc]->table[selector] * 1000;
+	} else
+		return tps->info[dcdc]->min_uV;
+}
+
+static int tps65023_ldo_list_voltage(struct regulator_dev *dev,
+					unsigned selector)
+{
+	struct tps_pmic *tps = rdev_get_drvdata(dev);
+	int ldo = rdev_get_id(dev);
+
+	if (ldo < TPS65023_LDO_1 || ldo > TPS65023_LDO_2)
+		return -EINVAL;
+
+	if (selector >= tps->info[ldo]->table_len)
+		return -EINVAL;
+	else
+		return tps->info[ldo]->table[selector] * 1000;
+}
+
+/* Operations permitted on VDCDCx */
+static struct regulator_ops tps65023_dcdc_ops = {
+	.is_enabled = tps65023_dcdc_is_enabled,
+	.enable = tps65023_dcdc_enable,
+	.disable = tps65023_dcdc_disable,
+	.get_voltage = tps65023_dcdc_get_voltage,
+	.set_voltage = tps65023_dcdc_set_voltage,
+	.list_voltage = tps65023_dcdc_list_voltage,
+};
+
+/* Operations permitted on LDOx */
+static struct regulator_ops tps65023_ldo_ops = {
+	.is_enabled = tps65023_ldo_is_enabled,
+	.enable = tps65023_ldo_enable,
+	.disable = tps65023_ldo_disable,
+	.get_voltage = tps65023_ldo_get_voltage,
+	.set_voltage = tps65023_ldo_set_voltage,
+	.list_voltage = tps65023_ldo_list_voltage,
+};
+
+static
+int tps_65023_probe(struct i2c_client *client, const struct i2c_device_id *id)
+{
+	static int desc_id;
+	const struct tps_info *info = (void *)id->driver_data;
+	struct regulator_init_data *init_data;
+	struct regulator_dev *rdev;
+	struct tps_pmic *tps;
+	int i;
+
+	if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_BYTE_DATA))
+		return -EIO;
+
+	/**
+	 * init_data points to array of regulator_init structures
+	 * coming from the board-evm file.
+	 */
+	init_data = client->dev.platform_data;
+
+	if (!init_data)
+		return -EIO;
+
+	tps = kzalloc(sizeof(*tps), GFP_KERNEL);
+	if (!tps)
+		return -ENOMEM;
+
+	mutex_init(&tps->io_lock);
+
+	/* common for all regulators */
+	tps->client = client;
+
+	for (i = 0; i < TPS65023_NUM_REGULATOR; i++, info++, init_data++) {
+		/* Store regulator specific information */
+		tps->info[i] = info;
+
+		tps->desc[i].name = info->name;
+		tps->desc[i].id = desc_id++;
+		tps->desc[i].n_voltages = num_voltages[i];
+		tps->desc[i].ops = (i > TPS65023_DCDC_3 ?
+					&tps65023_ldo_ops : &tps65023_dcdc_ops);
+		tps->desc[i].type = REGULATOR_VOLTAGE;
+		tps->desc[i].owner = THIS_MODULE;
+
+		/* Register the regulators */
+		rdev = regulator_register(&tps->desc[i], &client->dev,
+								init_data, tps);
+		if (IS_ERR(rdev)) {
+			dev_err(&client->dev, "failed to register %s\n",
+				id->name);
+
+			/* Unregister */
+			while (i)
+				regulator_unregister(tps->rdev[--i]);
+
+			tps->client = NULL;
+
+			/* clear the client data in i2c */
+			i2c_set_clientdata(client, NULL);
+			kfree(tps);
+			return PTR_ERR(rdev);
+		}
+
+		/* Save regulator for cleanup */
+		tps->rdev[i] = rdev;
+	}
+
+	i2c_set_clientdata(client, tps);
+
+	return 0;
+}
+
+/**
+ * tps_65023_remove - TPS65023 driver i2c remove handler
+ * @client: i2c driver client device structure
+ *
+ * Unregister TPS driver as an i2c client device driver
+ */
+static int __devexit tps_65023_remove(struct i2c_client *client)
+{
+	struct tps_pmic *tps = i2c_get_clientdata(client);
+	int i;
+
+	for (i = 0; i < TPS65023_NUM_REGULATOR; i++)
+		regulator_unregister(tps->rdev[i]);
+
+	tps->client = NULL;
+
+	/* clear the client data in i2c */
+	i2c_set_clientdata(client, NULL);
+	kfree(tps);
+
+	return 0;
+}
+
+static const struct tps_info tps65023_regs[] = {
+	{
+		.name = "VDCDC1",
+		.min_uV =  800000,
+		.max_uV = 1600000,
+		.table_len = ARRAY_SIZE(VDCDC1_VSEL_table),
+		.table = VDCDC1_VSEL_table,
+	},
+	{
+		.name = "VDCDC2",
+		.min_uV =  3300000,
+		.max_uV = 3300000,
+		.fixed = 1,
+	},
+	{
+		.name = "VDCDC3",
+		.min_uV =  1800000,
+		.max_uV = 1800000,
+		.fixed = 1,
+	},
+	{
+		.name = "LDO1",
+		.min_uV = 1000000,
+		.max_uV = 3150000,
+		.table_len = ARRAY_SIZE(LDO1_VSEL_table),
+		.table = LDO1_VSEL_table,
+	},
+	{
+		.name = "LDO2",
+		.min_uV = 1050000,
+		.max_uV = 3300000,
+		.table_len = ARRAY_SIZE(LDO2_VSEL_table),
+		.table = LDO2_VSEL_table,
+	},
+};
+
+static const struct i2c_device_id tps_65023_id = {
+	.name = "tps65023",
+	.driver_data = (unsigned long) &tps65023_regs[0],
+};
+
+MODULE_DEVICE_TABLE(i2c, tps_65023_id);
+
+static struct i2c_driver tps_65023_i2c_driver = {
+	.driver = {
+		.name = "tps65023",
+		.owner = THIS_MODULE,
+	},
+	.probe = tps_65023_probe,
+	.remove = __devexit_p(tps_65023_remove),
+	.id_table = &tps_65023_id,
+};
+
+/**
+ * tps_65023_init
+ *
+ * Module init function
+ */
+static int __init tps_65023_init(void)
+{
+	return i2c_add_driver(&tps_65023_i2c_driver);
+}
+subsys_initcall(tps_65023_init);
+
+/**
+ * tps_65023_cleanup
+ *
+ * Module exit function
+ */
+static void __exit tps_65023_cleanup(void)
+{
+	i2c_del_driver(&tps_65023_i2c_driver);
+}
+module_exit(tps_65023_cleanup);
+
+MODULE_AUTHOR("Texas Instruments");
+MODULE_DESCRIPTION("TPS65023 voltage regulator driver");
+MODULE_LICENSE("GPLv2");
-- 
cgit v0.10.2


From 3fa5b8e08296b250088b1a6b8e3db500ab1b847d Mon Sep 17 00:00:00 2001
From: Anuj Aggarwal <anuj.aggarwal@ti.com>
Date: Fri, 21 Aug 2009 00:39:39 +0530
Subject: Regulator: Add TPS6507x regulator driver

Adding support for TI TPS6507x regulator driver

Signed-off-by: Anuj Aggarwal <anuj.aggarwal@ti.com>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>

diff --git a/drivers/regulator/tps6507x-regulator.c b/drivers/regulator/tps6507x-regulator.c
new file mode 100644
index 0000000..1aa3636
--- /dev/null
+++ b/drivers/regulator/tps6507x-regulator.c
@@ -0,0 +1,713 @@
+/*
+ * tps6507x-regulator.c
+ *
+ * Regulator driver for TPS65073 PMIC
+ *
+ * Copyright (C) 2009 Texas Instrument Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any kind,
+ * whether express or implied; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/err.h>
+#include <linux/platform_device.h>
+#include <linux/regulator/driver.h>
+#include <linux/regulator/machine.h>
+#include <linux/i2c.h>
+#include <linux/delay.h>
+
+/* Register definitions */
+#define	TPS6507X_REG_PPATH1				0X01
+#define	TPS6507X_REG_INT				0X02
+#define	TPS6507X_REG_CHGCONFIG0				0X03
+#define	TPS6507X_REG_CHGCONFIG1				0X04
+#define	TPS6507X_REG_CHGCONFIG2				0X05
+#define	TPS6507X_REG_CHGCONFIG3				0X06
+#define	TPS6507X_REG_REG_ADCONFIG			0X07
+#define	TPS6507X_REG_TSCMODE				0X08
+#define	TPS6507X_REG_ADRESULT_1				0X09
+#define	TPS6507X_REG_ADRESULT_2				0X0A
+#define	TPS6507X_REG_PGOOD				0X0B
+#define	TPS6507X_REG_PGOODMASK				0X0C
+#define	TPS6507X_REG_CON_CTRL1				0X0D
+#define	TPS6507X_REG_CON_CTRL2				0X0E
+#define	TPS6507X_REG_CON_CTRL3				0X0F
+#define	TPS6507X_REG_DEFDCDC1				0X10
+#define	TPS6507X_REG_DEFDCDC2_LOW			0X11
+#define	TPS6507X_REG_DEFDCDC2_HIGH			0X12
+#define	TPS6507X_REG_DEFDCDC3_LOW			0X13
+#define	TPS6507X_REG_DEFDCDC3_HIGH			0X14
+#define	TPS6507X_REG_DEFSLEW				0X15
+#define	TPS6507X_REG_LDO_CTRL1				0X16
+#define	TPS6507X_REG_DEFLDO2				0X17
+#define	TPS6507X_REG_WLED_CTRL1				0X18
+#define	TPS6507X_REG_WLED_CTRL2				0X19
+
+/* CON_CTRL1 bitfields */
+#define	TPS6507X_CON_CTRL1_DCDC1_ENABLE		BIT(4)
+#define	TPS6507X_CON_CTRL1_DCDC2_ENABLE		BIT(3)
+#define	TPS6507X_CON_CTRL1_DCDC3_ENABLE		BIT(2)
+#define	TPS6507X_CON_CTRL1_LDO1_ENABLE		BIT(1)
+#define	TPS6507X_CON_CTRL1_LDO2_ENABLE		BIT(0)
+
+/* DEFDCDC1 bitfields */
+#define TPS6507X_DEFDCDC1_DCDC1_EXT_ADJ_EN	BIT(7)
+#define TPS6507X_DEFDCDC1_DCDC1_MASK		0X3F
+
+/* DEFDCDC2_LOW bitfields */
+#define TPS6507X_DEFDCDC2_LOW_DCDC2_MASK	0X3F
+
+/* DEFDCDC2_HIGH bitfields */
+#define TPS6507X_DEFDCDC2_HIGH_DCDC2_MASK	0X3F
+
+/* DEFDCDC3_LOW bitfields */
+#define TPS6507X_DEFDCDC3_LOW_DCDC3_MASK	0X3F
+
+/* DEFDCDC3_HIGH bitfields */
+#define TPS6507X_DEFDCDC3_HIGH_DCDC3_MASK	0X3F
+
+/* TPS6507X_REG_LDO_CTRL1 bitfields */
+#define TPS6507X_REG_LDO_CTRL1_LDO1_MASK	0X0F
+
+/* TPS6507X_REG_DEFLDO2 bitfields */
+#define TPS6507X_REG_DEFLDO2_LDO2_MASK		0X3F
+
+/* VDCDC MASK */
+#define TPS6507X_DEFDCDCX_DCDC_MASK		0X3F
+
+/* DCDC's */
+#define TPS6507X_DCDC_1				0
+#define TPS6507X_DCDC_2				1
+#define TPS6507X_DCDC_3				2
+/* LDOs */
+#define TPS6507X_LDO_1				3
+#define TPS6507X_LDO_2				4
+
+#define TPS6507X_MAX_REG_ID			TPS6507X_LDO_2
+
+/* Number of step-down converters available */
+#define TPS6507X_NUM_DCDC			3
+/* Number of LDO voltage regulators  available */
+#define TPS6507X_NUM_LDO			2
+/* Number of total regulators available */
+#define TPS6507X_NUM_REGULATOR		(TPS6507X_NUM_DCDC + TPS6507X_NUM_LDO)
+
+/* Supported voltage values for regulators (in milliVolts) */
+static const u16 VDCDCx_VSEL_table[] = {
+	725, 750, 775, 800,
+	825, 850, 875, 900,
+	925, 950, 975, 1000,
+	1025, 1050, 1075, 1100,
+	1125, 1150, 1175, 1200,
+	1225, 1250, 1275, 1300,
+	1325, 1350, 1375, 1400,
+	1425, 1450, 1475, 1500,
+	1550, 1600, 1650, 1700,
+	1750, 1800, 1850, 1900,
+	1950, 2000, 2050, 2100,
+	2150, 2200, 2250, 2300,
+	2350, 2400, 2450, 2500,
+	2550, 2600, 2650, 2700,
+	2750, 2800, 2850, 2900,
+	3000, 3100, 3200, 3300,
+};
+
+static const u16 LDO1_VSEL_table[] = {
+	1000, 1100, 1200, 1250,
+	1300, 1350, 1400, 1500,
+	1600, 1800, 2500, 2750,
+	2800, 3000, 3100, 3300,
+};
+
+static const u16 LDO2_VSEL_table[] = {
+	725, 750, 775, 800,
+	825, 850, 875, 900,
+	925, 950, 975, 1000,
+	1025, 1050, 1075, 1100,
+	1125, 1150, 1175, 1200,
+	1225, 1250, 1275, 1300,
+	1325, 1350, 1375, 1400,
+	1425, 1450, 1475, 1500,
+	1550, 1600, 1650, 1700,
+	1750, 1800, 1850, 1900,
+	1950, 2000, 2050, 2100,
+	2150, 2200, 2250, 2300,
+	2350, 2400, 2450, 2500,
+	2550, 2600, 2650, 2700,
+	2750, 2800, 2850, 2900,
+	3000, 3100, 3200, 3300,
+};
+
+static unsigned int num_voltages[] = {ARRAY_SIZE(VDCDCx_VSEL_table),
+				ARRAY_SIZE(VDCDCx_VSEL_table),
+				ARRAY_SIZE(VDCDCx_VSEL_table),
+				ARRAY_SIZE(LDO1_VSEL_table),
+				ARRAY_SIZE(LDO2_VSEL_table)};
+
+struct tps_info {
+	const char *name;
+	unsigned min_uV;
+	unsigned max_uV;
+	u8 table_len;
+	const u16 *table;
+};
+
+struct tps_pmic {
+	struct regulator_desc desc[TPS6507X_NUM_REGULATOR];
+	struct i2c_client *client;
+	struct regulator_dev *rdev[TPS6507X_NUM_REGULATOR];
+	const struct tps_info *info[TPS6507X_NUM_REGULATOR];
+	struct mutex io_lock;
+};
+
+static inline int tps_6507x_read(struct tps_pmic *tps, u8 reg)
+{
+	return i2c_smbus_read_byte_data(tps->client, reg);
+}
+
+static inline int tps_6507x_write(struct tps_pmic *tps, u8 reg, u8 val)
+{
+	return i2c_smbus_write_byte_data(tps->client, reg, val);
+}
+
+static int tps_6507x_set_bits(struct tps_pmic *tps, u8 reg, u8 mask)
+{
+	int err, data;
+
+	mutex_lock(&tps->io_lock);
+
+	data = tps_6507x_read(tps, reg);
+	if (data < 0) {
+		dev_err(&tps->client->dev, "Read from reg 0x%x failed\n", reg);
+		err = data;
+		goto out;
+	}
+
+	data |= mask;
+	err = tps_6507x_write(tps, reg, data);
+	if (err)
+		dev_err(&tps->client->dev, "Write for reg 0x%x failed\n", reg);
+
+out:
+	mutex_unlock(&tps->io_lock);
+	return err;
+}
+
+static int tps_6507x_clear_bits(struct tps_pmic *tps, u8 reg, u8 mask)
+{
+	int err, data;
+
+	mutex_lock(&tps->io_lock);
+
+	data = tps_6507x_read(tps, reg);
+	if (data < 0) {
+		dev_err(&tps->client->dev, "Read from reg 0x%x failed\n", reg);
+		err = data;
+		goto out;
+	}
+
+	data &= ~mask;
+	err = tps_6507x_write(tps, reg, data);
+	if (err)
+		dev_err(&tps->client->dev, "Write for reg 0x%x failed\n", reg);
+
+out:
+	mutex_unlock(&tps->io_lock);
+	return err;
+}
+
+static int tps_6507x_reg_read(struct tps_pmic *tps, u8 reg)
+{
+	int data;
+
+	mutex_lock(&tps->io_lock);
+
+	data = tps_6507x_read(tps, reg);
+	if (data < 0)
+		dev_err(&tps->client->dev, "Read from reg 0x%x failed\n", reg);
+
+	mutex_unlock(&tps->io_lock);
+	return data;
+}
+
+static int tps_6507x_reg_write(struct tps_pmic *tps, u8 reg, u8 val)
+{
+	int err;
+
+	mutex_lock(&tps->io_lock);
+
+	err = tps_6507x_write(tps, reg, val);
+	if (err < 0)
+		dev_err(&tps->client->dev, "Write for reg 0x%x failed\n", reg);
+
+	mutex_unlock(&tps->io_lock);
+	return err;
+}
+
+static int tps6507x_dcdc_is_enabled(struct regulator_dev *dev)
+{
+	struct tps_pmic *tps = rdev_get_drvdata(dev);
+	int data, dcdc = rdev_get_id(dev);
+	u8 shift;
+
+	if (dcdc < TPS6507X_DCDC_1 || dcdc > TPS6507X_DCDC_3)
+		return -EINVAL;
+
+	shift = TPS6507X_MAX_REG_ID - dcdc;
+	data = tps_6507x_reg_read(tps, TPS6507X_REG_CON_CTRL1);
+
+	if (data < 0)
+		return data;
+	else
+		return (data & 1<<shift) ? 1 : 0;
+}
+
+static int tps6507x_ldo_is_enabled(struct regulator_dev *dev)
+{
+	struct tps_pmic *tps = rdev_get_drvdata(dev);
+	int data, ldo = rdev_get_id(dev);
+	u8 shift;
+
+	if (ldo < TPS6507X_LDO_1 || ldo > TPS6507X_LDO_2)
+		return -EINVAL;
+
+	shift = TPS6507X_MAX_REG_ID - ldo;
+	data = tps_6507x_reg_read(tps, TPS6507X_REG_CON_CTRL1);
+
+	if (data < 0)
+		return data;
+	else
+		return (data & 1<<shift) ? 1 : 0;
+}
+
+static int tps6507x_dcdc_enable(struct regulator_dev *dev)
+{
+	struct tps_pmic *tps = rdev_get_drvdata(dev);
+	int dcdc = rdev_get_id(dev);
+	u8 shift;
+
+	if (dcdc < TPS6507X_DCDC_1 || dcdc > TPS6507X_DCDC_3)
+		return -EINVAL;
+
+	shift = TPS6507X_MAX_REG_ID - dcdc;
+	return tps_6507x_set_bits(tps, TPS6507X_REG_CON_CTRL1, 1 << shift);
+}
+
+static int tps6507x_dcdc_disable(struct regulator_dev *dev)
+{
+	struct tps_pmic *tps = rdev_get_drvdata(dev);
+	int dcdc = rdev_get_id(dev);
+	u8 shift;
+
+	if (dcdc < TPS6507X_DCDC_1 || dcdc > TPS6507X_DCDC_3)
+		return -EINVAL;
+
+	shift = TPS6507X_MAX_REG_ID - dcdc;
+	return tps_6507x_clear_bits(tps, TPS6507X_REG_CON_CTRL1, 1 << shift);
+}
+
+static int tps6507x_ldo_enable(struct regulator_dev *dev)
+{
+	struct tps_pmic *tps = rdev_get_drvdata(dev);
+	int ldo = rdev_get_id(dev);
+	u8 shift;
+
+	if (ldo < TPS6507X_LDO_1 || ldo > TPS6507X_LDO_2)
+		return -EINVAL;
+
+	shift = TPS6507X_MAX_REG_ID - ldo;
+	return tps_6507x_set_bits(tps, TPS6507X_REG_CON_CTRL1, 1 << shift);
+}
+
+static int tps6507x_ldo_disable(struct regulator_dev *dev)
+{
+	struct tps_pmic *tps = rdev_get_drvdata(dev);
+	int ldo = rdev_get_id(dev);
+	u8 shift;
+
+	if (ldo < TPS6507X_LDO_1 || ldo > TPS6507X_LDO_2)
+		return -EINVAL;
+
+	shift = TPS6507X_MAX_REG_ID - ldo;
+	return tps_6507x_clear_bits(tps, TPS6507X_REG_CON_CTRL1, 1 << shift);
+}
+
+static int tps6507x_dcdc_get_voltage(struct regulator_dev *dev)
+{
+	struct tps_pmic *tps = rdev_get_drvdata(dev);
+	int data, dcdc = rdev_get_id(dev);
+	u8 reg;
+
+	switch (dcdc) {
+	case TPS6507X_DCDC_1:
+		reg = TPS6507X_REG_DEFDCDC1;
+		break;
+	case TPS6507X_DCDC_2:
+		reg = TPS6507X_REG_DEFDCDC2_LOW;
+		break;
+	case TPS6507X_DCDC_3:
+		reg = TPS6507X_REG_DEFDCDC3_LOW;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	data = tps_6507x_reg_read(tps, reg);
+	if (data < 0)
+		return data;
+
+	data &= TPS6507X_DEFDCDCX_DCDC_MASK;
+	return tps->info[dcdc]->table[data] * 1000;
+}
+
+static int tps6507x_dcdc_set_voltage(struct regulator_dev *dev,
+				int min_uV, int max_uV)
+{
+	struct tps_pmic *tps = rdev_get_drvdata(dev);
+	int data, vsel, dcdc = rdev_get_id(dev);
+	u8 reg;
+
+	switch (dcdc) {
+	case TPS6507X_DCDC_1:
+		reg = TPS6507X_REG_DEFDCDC1;
+		break;
+	case TPS6507X_DCDC_2:
+		reg = TPS6507X_REG_DEFDCDC2_LOW;
+		break;
+	case TPS6507X_DCDC_3:
+		reg = TPS6507X_REG_DEFDCDC3_LOW;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (min_uV < tps->info[dcdc]->min_uV
+		|| min_uV > tps->info[dcdc]->max_uV)
+		return -EINVAL;
+	if (max_uV < tps->info[dcdc]->min_uV
+		|| max_uV > tps->info[dcdc]->max_uV)
+		return -EINVAL;
+
+	for (vsel = 0; vsel < tps->info[dcdc]->table_len; vsel++) {
+		int mV = tps->info[dcdc]->table[vsel];
+		int uV = mV * 1000;
+
+		/* Break at the first in-range value */
+		if (min_uV <= uV && uV <= max_uV)
+			break;
+	}
+
+	/* write to the register in case we found a match */
+	if (vsel == tps->info[dcdc]->table_len)
+		return -EINVAL;
+
+	data = tps_6507x_reg_read(tps, reg);
+	if (data < 0)
+		return data;
+
+	data &= ~TPS6507X_DEFDCDCX_DCDC_MASK;
+	data |= vsel;
+
+	return tps_6507x_reg_write(tps, reg, data);
+}
+
+static int tps6507x_ldo_get_voltage(struct regulator_dev *dev)
+{
+	struct tps_pmic *tps = rdev_get_drvdata(dev);
+	int data, ldo = rdev_get_id(dev);
+	u8 reg, mask;
+
+	if (ldo < TPS6507X_LDO_1 || ldo > TPS6507X_LDO_2)
+		return -EINVAL;
+	else {
+		reg = (ldo == TPS6507X_LDO_1 ?
+			TPS6507X_REG_LDO_CTRL1 : TPS6507X_REG_DEFLDO2);
+		mask = (ldo == TPS6507X_LDO_1 ?
+			TPS6507X_REG_LDO_CTRL1_LDO1_MASK :
+				TPS6507X_REG_DEFLDO2_LDO2_MASK);
+	}
+
+	data = tps_6507x_reg_read(tps, reg);
+	if (data < 0)
+		return data;
+
+	data &= mask;
+	return tps->info[ldo]->table[data] * 1000;
+}
+
+static int tps6507x_ldo_set_voltage(struct regulator_dev *dev,
+				int min_uV, int max_uV)
+{
+	struct tps_pmic *tps = rdev_get_drvdata(dev);
+	int data, vsel, ldo = rdev_get_id(dev);
+	u8 reg, mask;
+
+	if (ldo < TPS6507X_LDO_1 || ldo > TPS6507X_LDO_2)
+		return -EINVAL;
+	else {
+		reg = (ldo == TPS6507X_LDO_1 ?
+			TPS6507X_REG_LDO_CTRL1 : TPS6507X_REG_DEFLDO2);
+		mask = (ldo == TPS6507X_LDO_1 ?
+			TPS6507X_REG_LDO_CTRL1_LDO1_MASK :
+				TPS6507X_REG_DEFLDO2_LDO2_MASK);
+	}
+
+	if (min_uV < tps->info[ldo]->min_uV || min_uV > tps->info[ldo]->max_uV)
+		return -EINVAL;
+	if (max_uV < tps->info[ldo]->min_uV || max_uV > tps->info[ldo]->max_uV)
+		return -EINVAL;
+
+	for (vsel = 0; vsel < tps->info[ldo]->table_len; vsel++) {
+		int mV = tps->info[ldo]->table[vsel];
+		int uV = mV * 1000;
+
+		/* Break at the first in-range value */
+		if (min_uV <= uV && uV <= max_uV)
+			break;
+	}
+
+	if (vsel == tps->info[ldo]->table_len)
+		return -EINVAL;
+
+	data = tps_6507x_reg_read(tps, reg);
+	if (data < 0)
+		return data;
+
+	data &= ~mask;
+	data |= vsel;
+
+	return tps_6507x_reg_write(tps, reg, data);
+}
+
+static int tps6507x_dcdc_list_voltage(struct regulator_dev *dev,
+					unsigned selector)
+{
+	struct tps_pmic *tps = rdev_get_drvdata(dev);
+	int dcdc = rdev_get_id(dev);
+
+	if (dcdc < TPS6507X_DCDC_1 || dcdc > TPS6507X_DCDC_3)
+		return -EINVAL;
+
+	if (selector >= tps->info[dcdc]->table_len)
+		return -EINVAL;
+	else
+		return tps->info[dcdc]->table[selector] * 1000;
+}
+
+static int tps6507x_ldo_list_voltage(struct regulator_dev *dev,
+					unsigned selector)
+{
+	struct tps_pmic *tps = rdev_get_drvdata(dev);
+	int ldo = rdev_get_id(dev);
+
+	if (ldo < TPS6507X_LDO_1 || ldo > TPS6507X_LDO_2)
+		return -EINVAL;
+
+	if (selector >= tps->info[ldo]->table_len)
+		return -EINVAL;
+	else
+		return tps->info[ldo]->table[selector] * 1000;
+}
+
+/* Operations permitted on VDCDCx */
+static struct regulator_ops tps6507x_dcdc_ops = {
+	.is_enabled = tps6507x_dcdc_is_enabled,
+	.enable = tps6507x_dcdc_enable,
+	.disable = tps6507x_dcdc_disable,
+	.get_voltage = tps6507x_dcdc_get_voltage,
+	.set_voltage = tps6507x_dcdc_set_voltage,
+	.list_voltage = tps6507x_dcdc_list_voltage,
+};
+
+/* Operations permitted on LDOx */
+static struct regulator_ops tps6507x_ldo_ops = {
+	.is_enabled = tps6507x_ldo_is_enabled,
+	.enable = tps6507x_ldo_enable,
+	.disable = tps6507x_ldo_disable,
+	.get_voltage = tps6507x_ldo_get_voltage,
+	.set_voltage = tps6507x_ldo_set_voltage,
+	.list_voltage = tps6507x_ldo_list_voltage,
+};
+
+static
+int tps_6507x_probe(struct i2c_client *client, const struct i2c_device_id *id)
+{
+	static int desc_id;
+	const struct tps_info *info = (void *)id->driver_data;
+	struct regulator_init_data *init_data;
+	struct regulator_dev *rdev;
+	struct tps_pmic *tps;
+	int i;
+
+	if (!i2c_check_functionality(client->adapter,
+				I2C_FUNC_SMBUS_BYTE_DATA))
+		return -EIO;
+
+	/**
+	 * init_data points to array of regulator_init structures
+	 * coming from the board-evm file.
+	 */
+	init_data = client->dev.platform_data;
+
+	if (!init_data)
+		return -EIO;
+
+	tps = kzalloc(sizeof(*tps), GFP_KERNEL);
+	if (!tps)
+		return -ENOMEM;
+
+	mutex_init(&tps->io_lock);
+
+	/* common for all regulators */
+	tps->client = client;
+
+	for (i = 0; i < TPS6507X_NUM_REGULATOR; i++, info++, init_data++) {
+		/* Register the regulators */
+		tps->info[i] = info;
+		tps->desc[i].name = info->name;
+		tps->desc[i].id = desc_id++;
+		tps->desc[i].n_voltages = num_voltages[i];
+		tps->desc[i].ops = (i > TPS6507X_DCDC_3 ?
+				&tps6507x_ldo_ops : &tps6507x_dcdc_ops);
+		tps->desc[i].type = REGULATOR_VOLTAGE;
+		tps->desc[i].owner = THIS_MODULE;
+
+		rdev = regulator_register(&tps->desc[i],
+					&client->dev, init_data, tps);
+		if (IS_ERR(rdev)) {
+			dev_err(&client->dev, "failed to register %s\n",
+				id->name);
+
+			/* Unregister */
+			while (i)
+				regulator_unregister(tps->rdev[--i]);
+
+			tps->client = NULL;
+
+			/* clear the client data in i2c */
+			i2c_set_clientdata(client, NULL);
+
+			kfree(tps);
+			return PTR_ERR(rdev);
+		}
+
+		/* Save regulator for cleanup */
+		tps->rdev[i] = rdev;
+	}
+
+	i2c_set_clientdata(client, tps);
+
+	return 0;
+}
+
+/**
+ * tps_6507x_remove - TPS6507x driver i2c remove handler
+ * @client: i2c driver client device structure
+ *
+ * Unregister TPS driver as an i2c client device driver
+ */
+static int __devexit tps_6507x_remove(struct i2c_client *client)
+{
+	struct tps_pmic *tps = i2c_get_clientdata(client);
+	int i;
+
+	for (i = 0; i < TPS6507X_NUM_REGULATOR; i++)
+		regulator_unregister(tps->rdev[i]);
+
+	tps->client = NULL;
+
+	/* clear the client data in i2c */
+	i2c_set_clientdata(client, NULL);
+	kfree(tps);
+
+	return 0;
+}
+
+static const struct tps_info tps6507x_regs[] = {
+	{
+		.name = "VDCDC1",
+		.min_uV = 725000,
+		.max_uV = 3300000,
+		.table_len = ARRAY_SIZE(VDCDCx_VSEL_table),
+		.table = VDCDCx_VSEL_table,
+	},
+	{
+		.name = "VDCDC2",
+		.min_uV = 725000,
+		.max_uV = 3300000,
+		.table_len = ARRAY_SIZE(VDCDCx_VSEL_table),
+		.table = VDCDCx_VSEL_table,
+	},
+	{
+		.name = "VDCDC3",
+		.min_uV = 725000,
+		.max_uV = 3300000,
+		.table_len = ARRAY_SIZE(VDCDCx_VSEL_table),
+		.table = VDCDCx_VSEL_table,
+	},
+	{
+		.name = "LDO1",
+		.min_uV = 1000000,
+		.max_uV = 3300000,
+		.table_len = ARRAY_SIZE(LDO1_VSEL_table),
+		.table = LDO1_VSEL_table,
+	},
+	{
+		.name = "LDO2",
+		.min_uV = 725000,
+		.max_uV = 3300000,
+		.table_len = ARRAY_SIZE(LDO2_VSEL_table),
+		.table = LDO2_VSEL_table,
+	},
+};
+
+static const struct i2c_device_id tps_6507x_id = {
+	.name = "tps6507x",
+	.driver_data = (unsigned long) &tps6507x_regs[0],
+};
+MODULE_DEVICE_TABLE(i2c, tps_6507x_id);
+
+static struct i2c_driver tps_6507x_i2c_driver = {
+	.driver = {
+		.name = "tps6507x",
+		.owner = THIS_MODULE,
+	},
+	.probe = tps_6507x_probe,
+	.remove = __devexit_p(tps_6507x_remove),
+	.id_table = &tps_6507x_id,
+};
+
+/**
+ * tps_6507x_init
+ *
+ * Module init function
+ */
+static int __init tps_6507x_init(void)
+{
+	return i2c_add_driver(&tps_6507x_i2c_driver);
+}
+subsys_initcall(tps_6507x_init);
+
+/**
+ * tps_6507x_cleanup
+ *
+ * Module exit function
+ */
+static void __exit tps_6507x_cleanup(void)
+{
+	i2c_del_driver(&tps_6507x_i2c_driver);
+}
+module_exit(tps_6507x_cleanup);
+
+MODULE_AUTHOR("Texas Instruments");
+MODULE_DESCRIPTION("TPS6507x voltage regulator driver");
+MODULE_LICENSE("GPLv2");
-- 
cgit v0.10.2


From 2de798506d7300830a102e18d3d299f740475bc8 Mon Sep 17 00:00:00 2001
From: Anuj Aggarwal <anuj.aggarwal@ti.com>
Date: Fri, 21 Aug 2009 00:39:44 +0530
Subject: Regulator: Adding TPS65023 and TPS6507x in Kconfig and Makefile

Adding TPS65023 and TPS6507x regulator driver support in
drivers/regulator/Makefile and drivers/regulator/Kconfig

Signed-off-by: Anuj Aggarwal <anuj.aggarwal@ti.com>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>

diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index 2dc42bb..29910e3 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -147,5 +147,21 @@ config REGULATOR_AB3100
 	 AB3100 analog baseband dealing with power regulators
 	 for the system.
 
+config REGULATOR_TPS65023
+	tristate "TI TPS65023 Power regulators"
+	depends on I2C
+	help
+	  This driver supports TPS65023 voltage regulator chips. TPS65023 provides
+	  three step-down converters and two general-purpose LDO voltage regulators.
+	  It supports TI's software based Class-2 SmartReflex implementation.
+
+config REGULATOR_TPS6507X
+	tristate "TI TPS6507X Power regulators"
+	depends on I2C
+	help
+	  This driver supports TPS6507X voltage regulator chips. TPS6507X provides
+	  three step-down converters and two general-purpose LDO voltage regulators.
+	  It supports TI's software based Class-2 SmartReflex implementation.
+
 endif
 
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index 768b331..4257a86 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -23,4 +23,7 @@ obj-$(CONFIG_REGULATOR_PCAP) += pcap-regulator.o
 obj-$(CONFIG_REGULATOR_MC13783) += mc13783.o
 obj-$(CONFIG_REGULATOR_AB3100) += ab3100.o
 
+obj-$(CONFIG_REGULATOR_TPS65023) += tps65023-regulator.o
+obj-$(CONFIG_REGULATOR_TPS6507X) += tps6507x-regulator.o
+
 ccflags-$(CONFIG_REGULATOR_DEBUG) += -DDEBUG
-- 
cgit v0.10.2


From 040932cdcfca9b0ac55a4f74f194c2e2c8a2527b Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@stericsson.com>
Date: Fri, 21 Aug 2009 14:00:57 +0200
Subject: Fix some regulator documentation

This fixes a spelling error and an API function signature mismatch
in the regulator documentation.

Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>

diff --git a/Documentation/power/regulator/overview.txt b/Documentation/power/regulator/overview.txt
index 0cded69..50b8fa8 100644
--- a/Documentation/power/regulator/overview.txt
+++ b/Documentation/power/regulator/overview.txt
@@ -29,7 +29,7 @@ Some terms used in this document:-
 
 
   o PMIC         - Power Management IC. An IC that contains numerous regulators
-                   and often contains other susbsystems.
+                   and often contains other subsystems.
 
 
   o Consumer     - Electronic device that is supplied power by a regulator.
diff --git a/Documentation/power/regulator/regulator.txt b/Documentation/power/regulator/regulator.txt
index 4200acc..3f8b528 100644
--- a/Documentation/power/regulator/regulator.txt
+++ b/Documentation/power/regulator/regulator.txt
@@ -10,8 +10,9 @@ Registration
 
 Drivers can register a regulator by calling :-
 
-struct regulator_dev *regulator_register(struct device *dev,
-	struct regulator_desc *regulator_desc);
+struct regulator_dev *regulator_register(struct regulator_desc *regulator_desc,
+	struct device *dev, struct regulator_init_data *init_data,
+	void *driver_data);
 
 This will register the regulators capabilities and operations to the regulator
 core.
-- 
cgit v0.10.2


From 9e108d33edcb88bac3db39ba1683fc2c0591d7d4 Mon Sep 17 00:00:00 2001
From: Liam Girdwood <lrg@slimlogic.co.uk>
Date: Mon, 24 Aug 2009 10:31:34 +0100
Subject: regulator: tps650xx - build fixes for x86_64

Fixes the following errors on both tps650xx regulator drivers :-

drivers/regulator/tps65023-regulator: struct i2c_device_id is 32 bytes.  The last of 1 is:
0x74 0x70 0x73 0x36 0x35 0x30 0x32 0x33 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
FATAL: drivers/regulator/tps65023-regulator: struct i2c_device_id is not terminated with a NULL entry!

This patch also fixes the GPL v2 licence string for both drivers.

Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>

diff --git a/drivers/regulator/tps65023-regulator.c b/drivers/regulator/tps65023-regulator.c
index 1e54f46..07fda0a 100644
--- a/drivers/regulator/tps65023-regulator.c
+++ b/drivers/regulator/tps65023-regulator.c
@@ -587,9 +587,10 @@ static const struct tps_info tps65023_regs[] = {
 	},
 };
 
-static const struct i2c_device_id tps_65023_id = {
-	.name = "tps65023",
-	.driver_data = (unsigned long) &tps65023_regs[0],
+static const struct i2c_device_id tps_65023_id[] = {
+	{.name = "tps65023",
+	.driver_data = (unsigned long) tps65023_regs,},
+	{ },
 };
 
 MODULE_DEVICE_TABLE(i2c, tps_65023_id);
@@ -601,7 +602,7 @@ static struct i2c_driver tps_65023_i2c_driver = {
 	},
 	.probe = tps_65023_probe,
 	.remove = __devexit_p(tps_65023_remove),
-	.id_table = &tps_65023_id,
+	.id_table = tps_65023_id,
 };
 
 /**
@@ -628,4 +629,4 @@ module_exit(tps_65023_cleanup);
 
 MODULE_AUTHOR("Texas Instruments");
 MODULE_DESCRIPTION("TPS65023 voltage regulator driver");
-MODULE_LICENSE("GPLv2");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/regulator/tps6507x-regulator.c b/drivers/regulator/tps6507x-regulator.c
index 1aa3636..f8a6dfb 100644
--- a/drivers/regulator/tps6507x-regulator.c
+++ b/drivers/regulator/tps6507x-regulator.c
@@ -670,9 +670,10 @@ static const struct tps_info tps6507x_regs[] = {
 	},
 };
 
-static const struct i2c_device_id tps_6507x_id = {
-	.name = "tps6507x",
-	.driver_data = (unsigned long) &tps6507x_regs[0],
+static const struct i2c_device_id tps_6507x_id[] = {
+	{.name = "tps6507x",
+	.driver_data = (unsigned long) tps6507x_regs,},
+	{ },
 };
 MODULE_DEVICE_TABLE(i2c, tps_6507x_id);
 
@@ -683,7 +684,7 @@ static struct i2c_driver tps_6507x_i2c_driver = {
 	},
 	.probe = tps_6507x_probe,
 	.remove = __devexit_p(tps_6507x_remove),
-	.id_table = &tps_6507x_id,
+	.id_table = tps_6507x_id,
 };
 
 /**
@@ -710,4 +711,4 @@ module_exit(tps_6507x_cleanup);
 
 MODULE_AUTHOR("Texas Instruments");
 MODULE_DESCRIPTION("TPS6507x voltage regulator driver");
-MODULE_LICENSE("GPLv2");
+MODULE_LICENSE("GPL v2");
-- 
cgit v0.10.2


From a7433cff9ed8e7982de8e0f210f0325d0f3d1949 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@stericsson.com>
Date: Wed, 26 Aug 2009 12:54:04 +0200
Subject: REGULATOR Handle positive returncode from enable

This makes _regulator_enable() properly handle the case where
a regulator is already on when you try to enable it. Currently
it will erroneously handle positive return values as an error.

Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index dbf27bf..744ea1d 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -1236,11 +1236,12 @@ static int _regulator_enable(struct regulator_dev *rdev)
 			} else {
 				return -EINVAL;
 			}
-		} else {
+		} else if (ret < 0) {
 			printk(KERN_ERR "%s: is_enabled() failed for %s: %d\n",
 			       __func__, rdev->desc->name, ret);
 			return ret;
 		}
+		/* Fallthrough on positive return values - already enabled */
 	}
 
 	rdev->use_count++;
-- 
cgit v0.10.2


From 6f2653e63a4aedf877efbbcdbd4cea7db088bf29 Mon Sep 17 00:00:00 2001
From: Michael Prokop <mika@grml.org>
Date: Sat, 5 Sep 2009 02:59:14 +0200
Subject: drivers/regulator/Kconfig: fix typo (s/Usersapce/Userspace/) in
 REGULATOR_USERSPACE_CONSUMER description

Signed-off-by: Michael Prokop <mika@grml.org>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>

diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index 29910e3..2b52a4f 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -52,7 +52,7 @@ config REGULATOR_USERSPACE_CONSUMER
 	default n
 	help
 	  There are some classes of devices that are controlled entirely
-	  from user space. Usersapce consumer driver provides ability to
+	  from user space. Userspace consumer driver provides ability to
 	  control power supplies for such devices.
 
           If unsure, say no.
-- 
cgit v0.10.2


From 77bb8ff968dddb42a773c7b32d1a6a07f96f3f79 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <w.sang@pengutronix.de>
Date: Sun, 6 Sep 2009 21:30:18 +0200
Subject: regulator: update a filename in documentation

Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
Cc: Liam Girdwood <lrg@slimlogic.co.uk>
Cc: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>

diff --git a/Documentation/power/regulator/overview.txt b/Documentation/power/regulator/overview.txt
index 50b8fa8..ffd185b 100644
--- a/Documentation/power/regulator/overview.txt
+++ b/Documentation/power/regulator/overview.txt
@@ -168,4 +168,4 @@ relevant to non SoC devices and is split into the following four interfaces:-
       userspace via sysfs. This could be used to help monitor device power
       consumption and status.
 
-        See Documentation/ABI/testing/regulator-sysfs.txt
+        See Documentation/ABI/testing/sysfs-class-regulator
-- 
cgit v0.10.2


From 656d0498ea14c51cd8ec00081b5e0662acc72614 Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Fri, 18 Sep 2009 12:56:20 -0700
Subject: regulator: fix calculation of voltage range in
 da9034_set_ldo12_voltage()

For val to be greater than 7 or less than 20 is logically always true.

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Cc: Liam Girdwood <lrg@slimlogic.co.uk>
Cc: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>

diff --git a/drivers/regulator/da903x.c b/drivers/regulator/da903x.c
index 7d9c250..c9de730 100644
--- a/drivers/regulator/da903x.c
+++ b/drivers/regulator/da903x.c
@@ -301,7 +301,7 @@ static int da9034_set_ldo12_voltage(struct regulator_dev *rdev,
 	}
 
 	val = (min_uV - info->min_uV + info->step_uV - 1) / info->step_uV;
-	val = (val > 7 || val < 20) ? 8 : val - 12;
+	val = (val > 7 && val < 20) ? 8 : val - 12;
 	val <<= info->vol_shift;
 	mask = ((1 << info->vol_nbits) - 1)  << info->vol_shift;
 
-- 
cgit v0.10.2


From 2e7e65ce55566fc81036960b00e5e15f5d9578ea Mon Sep 17 00:00:00 2001
From: Wolfram Sang <w.sang@pengutronix.de>
Date: Fri, 18 Sep 2009 22:44:43 +0200
Subject: regulator: fix typos

Fix a couple of typos I found while working with this subsystem.

Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>

diff --git a/Documentation/power/regulator/machine.txt b/Documentation/power/regulator/machine.txt
index ce3487d..63728fe 100644
--- a/Documentation/power/regulator/machine.txt
+++ b/Documentation/power/regulator/machine.txt
@@ -87,7 +87,7 @@ static struct platform_device regulator_devices[] = {
 },
 };
 /* register regulator 1 device */
-platform_device_register(&wm8350_regulator_devices[0]);
+platform_device_register(&regulator_devices[0]);
 
 /* register regulator 2 device */
-platform_device_register(&wm8350_regulator_devices[1]);
+platform_device_register(&regulator_devices[1]);
diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h
index 99a4e2e..87f5f17 100644
--- a/include/linux/regulator/machine.h
+++ b/include/linux/regulator/machine.h
@@ -41,7 +41,7 @@ struct regulator;
 #define REGULATOR_CHANGE_DRMS		0x10
 
 /**
- * struct regulator_state - regulator state during low power syatem states
+ * struct regulator_state - regulator state during low power system states
  *
  * This describes a regulators state during a system wide low power state.
  *
@@ -117,10 +117,10 @@ struct regulation_constraints {
 	/* mode to set on startup */
 	unsigned int initial_mode;
 
-	/* constriant flags */
+	/* constraint flags */
 	unsigned always_on:1;	/* regulator never off when system is on */
 	unsigned boot_on:1;	/* bootloader/firmware enabled regulator */
-	unsigned apply_uV:1;	/* apply uV constraint iff min == max */
+	unsigned apply_uV:1;	/* apply uV constraint if min == max */
 };
 
 /**
diff --git a/include/linux/regulator/max1586.h b/include/linux/regulator/max1586.h
index 4456319..de9a7fa 100644
--- a/include/linux/regulator/max1586.h
+++ b/include/linux/regulator/max1586.h
@@ -36,7 +36,7 @@
  * max1586_subdev_data - regulator data
  * @id: regulator Id (either MAX1586_V3 or MAX1586_V6)
  * @name: regulator cute name (example for V3: "vcc_core")
- * @platform_data: regulator init data (contraints, supplies, ...)
+ * @platform_data: regulator init data (constraints, supplies, ...)
  */
 struct max1586_subdev_data {
 	int				id;
@@ -46,7 +46,7 @@ struct max1586_subdev_data {
 
 /**
  * max1586_platform_data - platform data for max1586
- * @num_subdevs: number of regultors used (may be 1 or 2)
+ * @num_subdevs: number of regulators used (may be 1 or 2)
  * @subdevs: regulator used
  *           At most, there will be a regulator for V3 and one for V6 voltages.
  * @v3_gain: gain on the V3 voltage output multiplied by 1e6.
-- 
cgit v0.10.2


From a954c487b95e7061d9546f6897edbc20a73454d3 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <w.sang@pengutronix.de>
Date: Fri, 18 Sep 2009 22:44:44 +0200
Subject: regulator: drop 'default n'

Specifying 'default n' is superfluous.

Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>

diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index 2b52a4f..bcbb161 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -1,6 +1,5 @@
 menuconfig REGULATOR
 	bool "Voltage and Current Regulator Support"
-	default n
 	help
 	  Generic Voltage and Current Regulator support.
 
@@ -30,7 +29,6 @@ config REGULATOR_DEBUG
 
 config REGULATOR_FIXED_VOLTAGE
 	tristate "Fixed voltage regulator support"
-	default n
 	help
 	  This driver provides support for fixed voltage regulators,
 	  useful for systems which use a combination of software
@@ -38,7 +36,6 @@ config REGULATOR_FIXED_VOLTAGE
 
 config REGULATOR_VIRTUAL_CONSUMER
 	tristate "Virtual regulator consumer support"
-	default n
 	help
 	  This driver provides a virtual consumer for the voltage and
           current regulator API which provides sysfs controls for
@@ -49,7 +46,6 @@ config REGULATOR_VIRTUAL_CONSUMER
 
 config REGULATOR_USERSPACE_CONSUMER
 	tristate "Userspace regulator consumer support"
-	default n
 	help
 	  There are some classes of devices that are controlled entirely
 	  from user space. Userspace consumer driver provides ability to
@@ -59,7 +55,6 @@ config REGULATOR_USERSPACE_CONSUMER
 
 config REGULATOR_BQ24022
 	tristate "TI bq24022 Dual Input 1-Cell Li-Ion Charger IC"
-	default n
 	help
 	  This driver controls a TI bq24022 Charger attached via
 	  GPIOs. The provided current regulator can enable/disable
@@ -69,7 +64,6 @@ config REGULATOR_BQ24022
 config REGULATOR_MAX1586
 	tristate "Maxim 1586/1587 voltage regulator"
 	depends on I2C
-	default n
 	help
 	  This driver controls a Maxim 1586 or 1587 voltage output
 	  regulator via I2C bus. The provided regulator is suitable
-- 
cgit v0.10.2


From 12a1d933a99e1a2901575390dceea3819f2a575a Mon Sep 17 00:00:00 2001
From: Wolfram Sang <w.sang@pengutronix.de>
Date: Fri, 18 Sep 2009 22:44:45 +0200
Subject: regulator/lp3971: drop unnecessary initialization

Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>

diff --git a/drivers/regulator/lp3971.c b/drivers/regulator/lp3971.c
index a61018a..7803a32 100644
--- a/drivers/regulator/lp3971.c
+++ b/drivers/regulator/lp3971.c
@@ -541,7 +541,7 @@ static struct i2c_driver lp3971_i2c_driver = {
 
 static int __init lp3971_module_init(void)
 {
-	int ret = -ENODEV;
+	int ret;
 
 	ret = i2c_add_driver(&lp3971_i2c_driver);
 	if (ret != 0)
-- 
cgit v0.10.2


From d87b969d15a084503870da598c97278fb4877753 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <w.sang@pengutronix.de>
Date: Fri, 18 Sep 2009 22:44:46 +0200
Subject: regulator/driver: be more specific in nanodoc for is_enabled

Document the possibility that is_enabled may also return with negative
errorcodes.

Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>

diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index 73c9cd6..31f2055 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -37,7 +37,8 @@ enum regulator_status {
  *
  * @enable: Configure the regulator as enabled.
  * @disable: Configure the regulator as disabled.
- * @is_enabled: Return 1 if the regulator is enabled, 0 otherwise.
+ * @is_enabled: Return 1 if the regulator is enabled, 0 if not.
+ *		May also return negative errno.
  *
  * @set_voltage: Set the voltage for the regulator within the range specified.
  *               The driver should select the voltage closest to min_uV.
-- 
cgit v0.10.2


From 55c1d7c60d9b269551cd7cc31e6be8323e1d94ec Mon Sep 17 00:00:00 2001
From: Haojian Zhuang <haojian.zhuang@marvell.com>
Date: Mon, 21 Sep 2009 12:14:12 -0400
Subject: regulator: fix voltage range in da9034 ldo12

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Signed-off-by: Haojian Zhuang <haojian.zhuang@marvell.com>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>

diff --git a/drivers/regulator/da903x.c b/drivers/regulator/da903x.c
index c9de730..aa224d9 100644
--- a/drivers/regulator/da903x.c
+++ b/drivers/regulator/da903x.c
@@ -301,7 +301,7 @@ static int da9034_set_ldo12_voltage(struct regulator_dev *rdev,
 	}
 
 	val = (min_uV - info->min_uV + info->step_uV - 1) / info->step_uV;
-	val = (val > 7 && val < 20) ? 8 : val - 12;
+	val = (val >= 20) ? val - 12 : ((val > 7) ? 8 : val);
 	val <<= info->vol_shift;
 	mask = ((1 << info->vol_nbits) - 1)  << info->vol_shift;
 
-- 
cgit v0.10.2


From 2a20b05f8178801c0a07324d92fe53c32177de97 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Mon, 21 Sep 2009 17:00:58 -0700
Subject: drivers/media/dvb/pt1/pt1.c needs vmalloc.h

alpha:

drivers/media/dvb/pt1/pt1.c: In function 'pt1_cleanup_tables':
drivers/media/dvb/pt1/pt1.c:422: error: implicit declaration of function 'vfree'
drivers/media/dvb/pt1/pt1.c: In function 'pt1_init_tables':
drivers/media/dvb/pt1/pt1.c:431: error: implicit declaration of function 'vmalloc'
drivers/media/dvb/pt1/pt1.c:431: warning: assignment makes pointer from integer without a cast

Acked-by: Mauro Carvalho Chehab <mchehab@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/media/dvb/pt1/pt1.c b/drivers/media/dvb/pt1/pt1.c
index 8ffbcec..81e623a 100644
--- a/drivers/media/dvb/pt1/pt1.c
+++ b/drivers/media/dvb/pt1/pt1.c
@@ -23,6 +23,7 @@
 
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/vmalloc.h>
 #include <linux/pci.h>
 #include <linux/kthread.h>
 #include <linux/freezer.h>
-- 
cgit v0.10.2


From 5be461657be65460ad92be3527e3bb1dd11c49ea Mon Sep 17 00:00:00 2001
From: Pierre Ossman <pierre@ossman.eu>
Date: Mon, 21 Sep 2009 17:00:59 -0700
Subject: sdhci: orphan driver and list

Remove myself as maintainer from the sdhci driver and steer people
towards the new MMC list for discussing it.

Signed-off-by: Pierre Ossman <pierre@ossman.eu>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/MAINTAINERS b/MAINTAINERS
index 751a307..b212f55 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4533,20 +4533,20 @@ S:	Maintained
 F:	drivers/mmc/host/sdricoh_cs.c
 
 SECURE DIGITAL HOST CONTROLLER INTERFACE (SDHCI) DRIVER
-M:	Pierre Ossman <pierre@ossman.eu>
-L:	sdhci-devel@lists.ossman.eu
-S:	Maintained
+S:     Orphan
+L:     linux-mmc@vger.kernel.org
+F:     drivers/mmc/host/sdhci.*
 
 SECURE DIGITAL HOST CONTROLLER INTERFACE, OPEN FIRMWARE BINDINGS (SDHCI-OF)
 M:	Anton Vorontsov <avorontsov@ru.mvista.com>
 L:	linuxppc-dev@ozlabs.org
-L:	sdhci-devel@lists.ossman.eu
+L:     linux-mmc@vger.kernel.org
 S:	Maintained
-F:	drivers/mmc/host/sdhci.*
+F:     drivers/mmc/host/sdhci-of.*
 
 SECURE DIGITAL HOST CONTROLLER INTERFACE (SDHCI) SAMSUNG DRIVER
 M:	Ben Dooks <ben-linux@fluff.org>
-L:	sdhci-devel@lists.ossman.eu
+L:     linux-mmc@vger.kernel.org
 S:	Maintained
 F:	drivers/mmc/host/sdhci-s3c.c
 
-- 
cgit v0.10.2


From 580be0837a7a59b207c3d5c661d044d8dd0a6a30 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 21 Sep 2009 17:01:06 -0700
Subject: fs: make sure data stored into inode is properly seen before
 unlocking new inode

In theory it could happen that on one CPU we initialize a new inode but
clearing of I_NEW | I_LOCK gets reordered before some of the
initialization.  Thus on another CPU we return not fully uptodate inode
from iget_locked().

This seems to fix a corruption issue on ext3 mounted over NFS.

[akpm@linux-foundation.org: add some commentary]
Signed-off-by: Jan Kara <jack@suse.cz>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/inode.c b/fs/inode.c
index b2ba83d..798052f 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -695,13 +695,15 @@ void unlock_new_inode(struct inode *inode)
 	}
 #endif
 	/*
-	 * This is special!  We do not need the spinlock
-	 * when clearing I_LOCK, because we're guaranteed
-	 * that nobody else tries to do anything about the
-	 * state of the inode when it is locked, as we
-	 * just created it (so there can be no old holders
-	 * that haven't tested I_LOCK).
+	 * This is special!  We do not need the spinlock when clearing I_LOCK,
+	 * because we're guaranteed that nobody else tries to do anything about
+	 * the state of the inode when it is locked, as we just created it (so
+	 * there can be no old holders that haven't tested I_LOCK).
+	 * However we must emit the memory barrier so that other CPUs reliably
+	 * see the clearing of I_LOCK after the other inode initialisation has
+	 * completed.
 	 */
+	smp_mb();
 	WARN_ON((inode->i_state & (I_LOCK|I_NEW)) != (I_LOCK|I_NEW));
 	inode->i_state &= ~(I_LOCK|I_NEW);
 	wake_up_inode(inode);
-- 
cgit v0.10.2


From c574358e8b48adf646f9d5ef70dc76c5d4ad9387 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 21 Sep 2009 17:01:06 -0700
Subject: proc: document `guest' column in /proc/stat

We added a new column in cpuX lines of /proc/stat, to show the amount of
time spent by a cpu servicing a guest, without updating
Documentation/filesystems/proc.txt

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index ffead13..1c96cb6 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -1032,9 +1032,9 @@ Various pieces   of  information about  kernel activity  are  available in the
 since the system first booted.  For a quick look, simply cat the file:
 
   > cat /proc/stat
-  cpu  2255 34 2290 22625563 6290 127 456 0
-  cpu0 1132 34 1441 11311718 3675 127 438 0
-  cpu1 1123 0 849 11313845 2614 0 18 0
+  cpu  2255 34 2290 22625563 6290 127 456 0 0
+  cpu0 1132 34 1441 11311718 3675 127 438 0 0
+  cpu1 1123 0 849 11313845 2614 0 18 0 0
   intr 114930548 113199788 3 0 5 263 0 4 [... lots more numbers ...]
   ctxt 1990473
   btime 1062191376
@@ -1056,6 +1056,7 @@ second).  The meanings of the columns are as follows, from left to right:
 - irq: servicing interrupts
 - softirq: servicing softirqs
 - steal: involuntary wait
+- guest: running a guest
 
 The "intr" line gives counts of interrupts  serviced since boot time, for each
 of the  possible system interrupts.   The first  column  is the  total of  all
-- 
cgit v0.10.2


From 00d3803b656a5f0935518d746f6bb27d5181d29d Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Mon, 21 Sep 2009 17:01:07 -0700
Subject: drivers/mfd/ab3100-core.c: fix powerpc build error

drivers/mfd/ab3100-core.c:647: error: ab3100_init_settings causes a section type conflict

Cc: Anton Vorontsov <avorontsov@ru.mvista.com>
Cc: Samuel Ortiz <sameo@linux.intel.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/mfd/ab3100-core.c b/drivers/mfd/ab3100-core.c
index c533f86..5447da1 100644
--- a/drivers/mfd/ab3100-core.c
+++ b/drivers/mfd/ab3100-core.c
@@ -647,7 +647,7 @@ struct ab3100_init_setting {
 	u8 setting;
 };
 
-static const struct ab3100_init_setting __initdata
+static const struct ab3100_init_setting __initconst
 ab3100_init_settings[] = {
 	{
 		.abreg = AB3100_MCA,
-- 
cgit v0.10.2


From 61e225dc341107be304fd1088146c2a5e88ff9e0 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 21 Sep 2009 17:01:08 -0700
Subject: const: make struct super_block::dq_op const

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index a8d80a7..e7a4e11 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -720,7 +720,7 @@ static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data,
 static ssize_t ext3_quota_write(struct super_block *sb, int type,
 				const char *data, size_t len, loff_t off);
 
-static struct dquot_operations ext3_quota_operations = {
+static const struct dquot_operations ext3_quota_operations = {
 	.initialize	= dquot_initialize,
 	.drop		= dquot_drop,
 	.alloc_space	= dquot_alloc_space,
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index a6b1ab7..7ffb62e 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -964,7 +964,7 @@ static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
 static ssize_t ext4_quota_write(struct super_block *sb, int type,
 				const char *data, size_t len, loff_t off);
 
-static struct dquot_operations ext4_quota_operations = {
+static const struct dquot_operations ext4_quota_operations = {
 	.initialize	= dquot_initialize,
 	.drop		= dquot_drop,
 	.alloc_space	= dquot_alloc_space,
diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h
index 3fb96fcd..e5df9d1 100644
--- a/fs/ocfs2/quota.h
+++ b/fs/ocfs2/quota.h
@@ -109,7 +109,7 @@ void ocfs2_unlock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex);
 int ocfs2_read_quota_block(struct inode *inode, u64 v_block,
 			   struct buffer_head **bh);
 
-extern struct dquot_operations ocfs2_quota_operations;
+extern const struct dquot_operations ocfs2_quota_operations;
 extern struct quota_format_type ocfs2_quota_format;
 
 int ocfs2_quota_setup(void);
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 44f2a5e..3af4954 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -849,7 +849,7 @@ static void ocfs2_destroy_dquot(struct dquot *dquot)
 	kmem_cache_free(ocfs2_dquot_cachep, dquot);
 }
 
-struct dquot_operations ocfs2_quota_operations = {
+const struct dquot_operations ocfs2_quota_operations = {
 	.initialize	= dquot_initialize,
 	.drop		= dquot_drop,
 	.alloc_space	= dquot_alloc_space,
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 38f7bd5..635ae2e 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1839,7 +1839,7 @@ EXPORT_SYMBOL(dquot_commit_info);
 /*
  * Definitions of diskquota operations.
  */
-struct dquot_operations dquot_operations = {
+const struct dquot_operations dquot_operations = {
 	.initialize	= dquot_initialize,
 	.drop		= dquot_drop,
 	.alloc_space	= dquot_alloc_space,
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 7adea74..09c93c1 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -612,7 +612,7 @@ static int reiserfs_mark_dquot_dirty(struct dquot *);
 static int reiserfs_write_info(struct super_block *, int);
 static int reiserfs_quota_on(struct super_block *, int, int, char *, int);
 
-static struct dquot_operations reiserfs_quota_operations = {
+static const struct dquot_operations reiserfs_quota_operations = {
 	.initialize = dquot_initialize,
 	.drop = dquot_drop,
 	.alloc_space = dquot_alloc_space,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 90162fb..83e1a0c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1318,7 +1318,7 @@ struct super_block {
 	unsigned long long	s_maxbytes;	/* Max file size */
 	struct file_system_type	*s_type;
 	const struct super_operations	*s_op;
-	struct dquot_operations	*dq_op;
+	const struct dquot_operations	*dq_op;
  	struct quotactl_ops	*s_qcop;
 	const struct export_operations *s_export_op;
 	unsigned long		s_flags;
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 26361c4..8dcbdb6 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -135,7 +135,7 @@ static inline int sb_any_quota_active(struct super_block *sb)
 /*
  * Operations supported for diskquotas.
  */
-extern struct dquot_operations dquot_operations;
+extern const struct dquot_operations dquot_operations;
 extern struct quotactl_ops vfs_quotactl_ops;
 
 #define sb_dquot_ops (&dquot_operations)
-- 
cgit v0.10.2


From 0d54b217a247f39605361f867fefbb9e099a5432 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 21 Sep 2009 17:01:09 -0700
Subject: const: make struct super_block::s_qcop const

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 3610e99..d79ce2e 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -50,7 +50,7 @@
 #define CIFS_MAGIC_NUMBER 0xFF534D42	/* the first four bytes of SMB PDUs */
 
 #ifdef CONFIG_CIFS_QUOTA
-static struct quotactl_ops cifs_quotactl_ops;
+static const struct quotactl_ops cifs_quotactl_ops;
 #endif /* QUOTA */
 
 int cifsFYI = 0;
@@ -517,7 +517,7 @@ int cifs_xstate_get(struct super_block *sb, struct fs_quota_stat *qstats)
 	return rc;
 }
 
-static struct quotactl_ops cifs_quotactl_ops = {
+static const struct quotactl_ops cifs_quotactl_ops = {
 	.set_xquota	= cifs_xquota_set,
 	.get_xquota	= cifs_xquota_get,
 	.set_xstate	= cifs_xstate_set,
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index e7a4e11..72743d3 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -737,7 +737,7 @@ static const struct dquot_operations ext3_quota_operations = {
 	.destroy_dquot	= dquot_destroy,
 };
 
-static struct quotactl_ops ext3_qctl_operations = {
+static const struct quotactl_ops ext3_qctl_operations = {
 	.quota_on	= ext3_quota_on,
 	.quota_off	= vfs_quota_off,
 	.quota_sync	= vfs_quota_sync,
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 7ffb62e..df539ba 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -985,7 +985,7 @@ static const struct dquot_operations ext4_quota_operations = {
 	.destroy_dquot	= dquot_destroy,
 };
 
-static struct quotactl_ops ext4_qctl_operations = {
+static const struct quotactl_ops ext4_qctl_operations = {
 	.quota_on	= ext4_quota_on,
 	.quota_off	= vfs_quota_off,
 	.quota_sync	= vfs_quota_sync,
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index a3f8871..faca472 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -965,7 +965,7 @@ static int ocfs2_quota_off(struct super_block *sb, int type, int remount)
 	return vfs_quota_disable(sb, type, DQUOT_LIMITS_ENABLED);
 }
 
-static struct quotactl_ops ocfs2_quotactl_ops = {
+static const struct quotactl_ops ocfs2_quotactl_ops = {
 	.quota_on	= ocfs2_quota_on,
 	.quota_off	= ocfs2_quota_off,
 	.quota_sync	= vfs_quota_sync,
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 635ae2e..39b49c4 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -2461,7 +2461,7 @@ out:
 }
 EXPORT_SYMBOL(vfs_set_dqinfo);
 
-struct quotactl_ops vfs_quotactl_ops = {
+const struct quotactl_ops vfs_quotactl_ops = {
 	.quota_on	= vfs_quota_on,
 	.quota_off	= vfs_quota_off,
 	.quota_sync	= vfs_quota_sync,
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 09c93c1..f0ad05f 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -629,7 +629,7 @@ static const struct dquot_operations reiserfs_quota_operations = {
 	.destroy_dquot	= dquot_destroy,
 };
 
-static struct quotactl_ops reiserfs_qctl_operations = {
+static const struct quotactl_ops reiserfs_qctl_operations = {
 	.quota_on = reiserfs_quota_on,
 	.quota_off = vfs_quota_off,
 	.quota_sync = vfs_quota_sync,
diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c
index cb6e2cc..9e41f91 100644
--- a/fs/xfs/linux-2.6/xfs_quotaops.c
+++ b/fs/xfs/linux-2.6/xfs_quotaops.c
@@ -150,7 +150,7 @@ xfs_fs_set_xquota(
 	return -xfs_qm_scall_setqlim(mp, id, xfs_quota_type(type), fdq);
 }
 
-struct quotactl_ops xfs_quotactl_operations = {
+const struct quotactl_ops xfs_quotactl_operations = {
 	.quota_sync		= xfs_fs_quota_sync,
 	.get_xstate		= xfs_fs_get_xstate,
 	.set_xstate		= xfs_fs_set_xstate,
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h
index 5a2ea3a..18175eb 100644
--- a/fs/xfs/linux-2.6/xfs_super.h
+++ b/fs/xfs/linux-2.6/xfs_super.h
@@ -93,7 +93,7 @@ extern void xfs_blkdev_issue_flush(struct xfs_buftarg *);
 
 extern const struct export_operations xfs_export_operations;
 extern struct xattr_handler *xfs_xattr_handlers[];
-extern struct quotactl_ops xfs_quotactl_operations;
+extern const struct quotactl_ops xfs_quotactl_operations;
 
 #define XFS_M(sb)		((struct xfs_mount *)((sb)->s_fs_info))
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 83e1a0c..13c030e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1319,7 +1319,7 @@ struct super_block {
 	struct file_system_type	*s_type;
 	const struct super_operations	*s_op;
 	const struct dquot_operations	*dq_op;
- 	struct quotactl_ops	*s_qcop;
+	const struct quotactl_ops	*s_qcop;
 	const struct export_operations *s_export_op;
 	unsigned long		s_flags;
 	unsigned long		s_magic;
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 8dcbdb6..3ebb231 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -136,7 +136,7 @@ static inline int sb_any_quota_active(struct super_block *sb)
  * Operations supported for diskquotas.
  */
 extern const struct dquot_operations dquot_operations;
-extern struct quotactl_ops vfs_quotactl_ops;
+extern const struct quotactl_ops vfs_quotactl_ops;
 
 #define sb_dquot_ops (&dquot_operations)
 #define sb_quotactl_ops (&vfs_quotactl_ops)
-- 
cgit v0.10.2


From b87221de6a4934eda856475a0065688d12973a04 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 21 Sep 2009 17:01:09 -0700
Subject: const: mark remaining super_operations const

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index 24b30b6..78f2a52 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -773,7 +773,7 @@ static int
 spufs_fill_super(struct super_block *sb, void *data, int silent)
 {
 	struct spufs_sb_info *info;
-	static struct super_operations s_ops = {
+	static const struct super_operations s_ops = {
 		.alloc_inode = spufs_alloc_inode,
 		.destroy_inode = spufs_destroy_inode,
 		.statfs = simple_statfs,
diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index bd9914b..dc2d666 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -41,7 +41,7 @@ struct hypfs_sb_info {
 
 static const struct file_operations hypfs_file_ops;
 static struct file_system_type hypfs_type;
-static struct super_operations hypfs_s_ops;
+static const struct super_operations hypfs_s_ops;
 
 /* start of list of all dentries, which have to be deleted on update */
 static struct dentry *hypfs_last_dentry;
@@ -472,7 +472,7 @@ static struct file_system_type hypfs_type = {
 	.kill_sb	= hypfs_kill_super
 };
 
-static struct super_operations hypfs_s_ops = {
+static const struct super_operations hypfs_s_ops = {
 	.statfs		= simple_statfs,
 	.drop_inode	= hypfs_drop_inode,
 	.show_options	= hypfs_show_options,
diff --git a/drivers/isdn/capi/capifs.c b/drivers/isdn/capi/capifs.c
index bff72d8..9f8f67b 100644
--- a/drivers/isdn/capi/capifs.c
+++ b/drivers/isdn/capi/capifs.c
@@ -89,7 +89,7 @@ static int capifs_remount(struct super_block *s, int *flags, char *data)
 	return 0;
 }
 
-static struct super_operations capifs_sops =
+static const struct super_operations capifs_sops =
 {
 	.statfs		= simple_statfs,
 	.remount_fs	= capifs_remount,
diff --git a/drivers/misc/ibmasm/ibmasmfs.c b/drivers/misc/ibmasm/ibmasmfs.c
index de966a6..aecf40e 100644
--- a/drivers/misc/ibmasm/ibmasmfs.c
+++ b/drivers/misc/ibmasm/ibmasmfs.c
@@ -97,7 +97,7 @@ static int ibmasmfs_get_super(struct file_system_type *fst,
 	return get_sb_single(fst, flags, data, ibmasmfs_fill_super, mnt);
 }
 
-static struct super_operations ibmasmfs_s_ops = {
+static const struct super_operations ibmasmfs_s_ops = {
 	.statfs		= simple_statfs,
 	.drop_inode	= generic_delete_inode,
 };
diff --git a/drivers/oprofile/oprofilefs.c b/drivers/oprofile/oprofilefs.c
index b7e4cee..2766a6d 100644
--- a/drivers/oprofile/oprofilefs.c
+++ b/drivers/oprofile/oprofilefs.c
@@ -35,7 +35,7 @@ static struct inode *oprofilefs_get_inode(struct super_block *sb, int mode)
 }
 
 
-static struct super_operations s_ops = {
+static const struct super_operations s_ops = {
 	.statfs		= simple_statfs,
 	.drop_inode 	= generic_delete_inode,
 };
diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c
index ffe75e8..97b40ce 100644
--- a/drivers/usb/core/inode.c
+++ b/drivers/usb/core/inode.c
@@ -48,7 +48,6 @@
 #define USBFS_DEFAULT_BUSMODE (S_IXUGO | S_IRUGO)
 #define USBFS_DEFAULT_LISTMODE S_IRUGO
 
-static struct super_operations usbfs_ops;
 static const struct file_operations default_file_operations;
 static struct vfsmount *usbfs_mount;
 static int usbfs_mount_count;	/* = 0 */
@@ -449,7 +448,7 @@ static const struct file_operations default_file_operations = {
 	.llseek =	default_file_lseek,
 };
 
-static struct super_operations usbfs_ops = {
+static const struct super_operations usbfs_ops = {
 	.statfs =	simple_statfs,
 	.drop_inode =	generic_delete_inode,
 	.remount_fs =	remount,
diff --git a/drivers/usb/gadget/inode.c b/drivers/usb/gadget/inode.c
index 7d33f50..c44367f 100644
--- a/drivers/usb/gadget/inode.c
+++ b/drivers/usb/gadget/inode.c
@@ -2033,7 +2033,7 @@ gadgetfs_create_file (struct super_block *sb, char const *name,
 	return inode;
 }
 
-static struct super_operations gadget_fs_operations = {
+static const struct super_operations gadget_fs_operations = {
 	.statfs =	simple_statfs,
 	.drop_inode =	generic_delete_inode,
 };
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 615d549..dd376c1 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -842,7 +842,7 @@ befs_fill_super(struct super_block *sb, void *data, int silent)
 	sb->s_magic = BEFS_SUPER_MAGIC;
 	/* Set real blocksize of fs */
 	sb_set_blocksize(sb, (ulong) befs_sb->block_size);
-	sb->s_op = (struct super_operations *) &befs_sops;
+	sb->s_op = &befs_sops;
 	root = befs_iget(sb, iaddr2blockno(sb, &(befs_sb->root_dir)));
 	if (IS_ERR(root)) {
 		ret = PTR_ERR(root);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 6d6d06c..2db17cd 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -51,7 +51,7 @@
 #include "export.h"
 #include "compression.h"
 
-static struct super_operations btrfs_super_ops;
+static const struct super_operations btrfs_super_ops;
 
 static void btrfs_put_super(struct super_block *sb)
 {
@@ -675,7 +675,7 @@ static int btrfs_unfreeze(struct super_block *sb)
 	return 0;
 }
 
-static struct super_operations btrfs_super_ops = {
+static const struct super_operations btrfs_super_ops = {
 	.delete_inode	= btrfs_delete_inode,
 	.put_super	= btrfs_put_super,
 	.sync_fs	= btrfs_sync_fs,
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 55f3d6b..1c83f44 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -504,7 +504,7 @@ static int nilfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
 	return 0;
 }
 
-static struct super_operations nilfs_sops = {
+static const struct super_operations nilfs_sops = {
 	.alloc_inode    = nilfs_alloc_inode,
 	.destroy_inode  = nilfs_destroy_inode,
 	.dirty_inode    = nilfs_dirty_inode,
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index 379ae5f..f3b7c15 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -278,7 +278,7 @@ static int omfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	return 0;
 }
 
-static struct super_operations omfs_sops = {
+static const struct super_operations omfs_sops = {
 	.write_inode	= omfs_write_inode,
 	.delete_inode	= omfs_delete_inode,
 	.put_super	= omfs_put_super,
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index cb5fc57..6c197ef 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -44,7 +44,7 @@
 #include "squashfs.h"
 
 static struct file_system_type squashfs_fs_type;
-static struct super_operations squashfs_super_ops;
+static const struct super_operations squashfs_super_ops;
 
 static int supported_squashfs_filesystem(short major, short minor, short comp)
 {
@@ -444,7 +444,7 @@ static struct file_system_type squashfs_fs_type = {
 	.fs_flags = FS_REQUIRES_DEV
 };
 
-static struct super_operations squashfs_super_ops = {
+static const struct super_operations squashfs_super_ops = {
 	.alloc_inode = squashfs_alloc_inode,
 	.destroy_inode = squashfs_destroy_inode,
 	.statfs = squashfs_statfs,
diff --git a/fs/super.c b/fs/super.c
index b03fea8..0e7207b 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -54,7 +54,7 @@ DEFINE_SPINLOCK(sb_lock);
 static struct super_block *alloc_super(struct file_system_type *type)
 {
 	struct super_block *s = kzalloc(sizeof(struct super_block),  GFP_USER);
-	static struct super_operations default_op;
+	static const struct super_operations default_op;
 
 	if (s) {
 		if (security_sb_alloc(s)) {
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 5d7c60a..bdd41c8 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -67,7 +67,7 @@
 #include <linux/freezer.h>
 #include <linux/parser.h>
 
-static struct super_operations xfs_super_operations;
+static const struct super_operations xfs_super_operations;
 static kmem_zone_t *xfs_ioend_zone;
 mempool_t *xfs_ioend_pool;
 
@@ -1536,7 +1536,7 @@ xfs_fs_get_sb(
 			   mnt);
 }
 
-static struct super_operations xfs_super_operations = {
+static const struct super_operations xfs_super_operations = {
 	.alloc_inode		= xfs_fs_alloc_inode,
 	.destroy_inode		= xfs_fs_destroy_inode,
 	.write_inode		= xfs_fs_write_inode,
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index c5e68ad..ee9d697 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -77,7 +77,7 @@ struct mqueue_inode_info {
 
 static const struct inode_operations mqueue_dir_inode_operations;
 static const struct file_operations mqueue_file_operations;
-static struct super_operations mqueue_super_ops;
+static const struct super_operations mqueue_super_ops;
 static void remove_notification(struct mqueue_inode_info *info);
 
 static struct kmem_cache *mqueue_inode_cachep;
@@ -1224,7 +1224,7 @@ static const struct file_operations mqueue_file_operations = {
 	.read = mqueue_read_file,
 };
 
-static struct super_operations mqueue_super_ops = {
+static const struct super_operations mqueue_super_ops = {
 	.alloc_inode = mqueue_alloc_inode,
 	.destroy_inode = mqueue_destroy_inode,
 	.statfs = simple_statfs,
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index c7ece8f..39dde29 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -961,7 +961,7 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
 	return ret;
 }
 
-static struct super_operations cgroup_ops = {
+static const struct super_operations cgroup_ops = {
 	.statfs = simple_statfs,
 	.drop_inode = generic_delete_inode,
 	.show_options = cgroup_show_options,
diff --git a/net/socket.c b/net/socket.c
index 2a022c0..0ad02ae 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -285,7 +285,7 @@ static int init_inodecache(void)
 	return 0;
 }
 
-static struct super_operations sockfs_ops = {
+static const struct super_operations sockfs_ops = {
 	.alloc_inode =	sock_alloc_inode,
 	.destroy_inode =sock_destroy_inode,
 	.statfs =	simple_statfs,
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 7f676bd..858a443 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -930,7 +930,7 @@ void rpc_remove_cache_dir(struct dentry *dentry)
 /*
  * populate the filesystem
  */
-static struct super_operations s_ops = {
+static const struct super_operations s_ops = {
 	.alloc_inode	= rpc_alloc_inode,
 	.destroy_inode	= rpc_destroy_inode,
 	.statfs		= simple_statfs,
-- 
cgit v0.10.2


From ac4cfdd6d141c319a7af8655f750ed504c187a74 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 21 Sep 2009 17:01:10 -0700
Subject: const: mark remaining export_operations const

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 0035c02..9a80e8e 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -123,7 +123,7 @@ static struct dentry *jffs2_get_parent(struct dentry *child)
 	return d_obtain_alias(jffs2_iget(child->d_inode->i_sb, pino));
 }
 
-static struct export_operations jffs2_export_ops = {
+static const struct export_operations jffs2_export_ops = {
 	.get_parent = jffs2_get_parent,
 	.fh_to_dentry = jffs2_fh_to_dentry,
 	.fh_to_parent = jffs2_fh_to_parent,
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 1c83f44..644e667 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -560,7 +560,7 @@ nilfs_fh_to_parent(struct super_block *sb, struct fid *fid, int fh_len,
 				    nilfs_nfs_get_inode);
 }
 
-static struct export_operations nilfs_export_ops = {
+static const struct export_operations nilfs_export_ops = {
 	.fh_to_dentry = nilfs_fh_to_dentry,
 	.fh_to_parent = nilfs_fh_to_parent,
 	.get_parent = nilfs_get_parent,
-- 
cgit v0.10.2


From 7f09410bbc4306f592cfb43812389ea1c7905a20 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 21 Sep 2009 17:01:10 -0700
Subject: const: mark remaining address_space_operations const

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 8b81927..6c41731 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -772,7 +772,7 @@ static void btree_invalidatepage(struct page *page, unsigned long offset)
 	}
 }
 
-static struct address_space_operations btree_aops = {
+static const struct address_space_operations btree_aops = {
 	.readpage	= btree_readpage,
 	.writepage	= btree_writepage,
 	.writepages	= btree_writepages,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 59cba18..3e4d679 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -60,8 +60,8 @@ static struct inode_operations btrfs_symlink_inode_operations;
 static struct inode_operations btrfs_dir_ro_inode_operations;
 static struct inode_operations btrfs_special_inode_operations;
 static struct inode_operations btrfs_file_inode_operations;
-static struct address_space_operations btrfs_aops;
-static struct address_space_operations btrfs_symlink_aops;
+static const struct address_space_operations btrfs_aops;
+static const struct address_space_operations btrfs_symlink_aops;
 static struct file_operations btrfs_dir_file_operations;
 static struct extent_io_ops btrfs_extent_io_ops;
 
@@ -5259,7 +5259,7 @@ static struct extent_io_ops btrfs_extent_io_ops = {
  *
  * For now we're avoiding this by dropping bmap.
  */
-static struct address_space_operations btrfs_aops = {
+static const struct address_space_operations btrfs_aops = {
 	.readpage	= btrfs_readpage,
 	.writepage	= btrfs_writepage,
 	.writepages	= btrfs_writepages,
@@ -5271,7 +5271,7 @@ static struct address_space_operations btrfs_aops = {
 	.set_page_dirty	= btrfs_set_page_dirty,
 };
 
-static struct address_space_operations btrfs_symlink_aops = {
+static const struct address_space_operations btrfs_symlink_aops = {
 	.readpage	= btrfs_readpage,
 	.writepage	= btrfs_writepage,
 	.invalidatepage = btrfs_invalidatepage,
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 00b30a2..542f625 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -582,7 +582,7 @@ extern const struct inode_operations ecryptfs_dir_iops;
 extern const struct inode_operations ecryptfs_symlink_iops;
 extern const struct super_operations ecryptfs_sops;
 extern const struct dentry_operations ecryptfs_dops;
-extern struct address_space_operations ecryptfs_aops;
+extern const struct address_space_operations ecryptfs_aops;
 extern int ecryptfs_verbosity;
 extern unsigned int ecryptfs_message_buf_len;
 extern signed long ecryptfs_message_wait_timeout;
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index 5c6bab9..05772ae 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -545,7 +545,7 @@ static sector_t ecryptfs_bmap(struct address_space *mapping, sector_t block)
 	return rc;
 }
 
-struct address_space_operations ecryptfs_aops = {
+const struct address_space_operations ecryptfs_aops = {
 	.writepage = ecryptfs_writepage,
 	.readpage = ecryptfs_readpage,
 	.write_begin = ecryptfs_write_begin,
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index c668bca..6a2711f 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -46,7 +46,7 @@ void nilfs_btnode_cache_init_once(struct address_space *btnc)
 	INIT_LIST_HEAD(&btnc->i_mmap_nonlinear);
 }
 
-static struct address_space_operations def_btnode_aops = {
+static const struct address_space_operations def_btnode_aops = {
 	.sync_page		= block_sync_page,
 };
 
diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c
index 1b3c2bb..e6de0a2 100644
--- a/fs/nilfs2/gcinode.c
+++ b/fs/nilfs2/gcinode.c
@@ -52,7 +52,7 @@
 #include "dat.h"
 #include "ifile.h"
 
-static struct address_space_operations def_gcinode_aops = {
+static const struct address_space_operations def_gcinode_aops = {
 	.sync_page		= block_sync_page,
 };
 
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 807e584..2d2c501 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -238,7 +238,7 @@ nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
 	return size;
 }
 
-struct address_space_operations nilfs_aops = {
+const struct address_space_operations nilfs_aops = {
 	.writepage		= nilfs_writepage,
 	.readpage		= nilfs_readpage,
 	.sync_page		= block_sync_page,
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index 156bf60..680e86c 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -427,7 +427,7 @@ nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc)
 }
 
 
-static struct address_space_operations def_mdt_aops = {
+static const struct address_space_operations def_mdt_aops = {
 	.writepage		= nilfs_mdt_write_page,
 	.sync_page		= block_sync_page,
 };
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index 724c637..c0cf52a 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -297,7 +297,7 @@ void nilfs_clear_gcdat_inode(struct the_nilfs *);
 extern struct file_operations nilfs_dir_operations;
 extern struct inode_operations nilfs_file_inode_operations;
 extern struct file_operations nilfs_file_operations;
-extern struct address_space_operations nilfs_aops;
+extern const struct address_space_operations nilfs_aops;
 extern struct inode_operations nilfs_dir_inode_operations;
 extern struct inode_operations nilfs_special_inode_operations;
 extern struct inode_operations nilfs_symlink_inode_operations;
diff --git a/fs/omfs/file.c b/fs/omfs/file.c
index d17e774..b707fa5 100644
--- a/fs/omfs/file.c
+++ b/fs/omfs/file.c
@@ -337,7 +337,7 @@ struct inode_operations omfs_file_inops = {
 	.truncate = omfs_truncate
 };
 
-struct address_space_operations omfs_aops = {
+const struct address_space_operations omfs_aops = {
 	.readpage = omfs_readpage,
 	.readpages = omfs_readpages,
 	.writepage = omfs_writepage,
diff --git a/fs/omfs/omfs.h b/fs/omfs/omfs.h
index 2bc0f06..16d72ab 100644
--- a/fs/omfs/omfs.h
+++ b/fs/omfs/omfs.h
@@ -53,7 +53,7 @@ extern int omfs_is_bad(struct omfs_sb_info *sbi, struct omfs_header *header,
 /* file.c */
 extern struct file_operations omfs_file_operations;
 extern struct inode_operations omfs_file_inops;
-extern struct address_space_operations omfs_aops;
+extern const struct address_space_operations omfs_aops;
 extern void omfs_make_empty_table(struct buffer_head *bh, int offset);
 extern int omfs_shrink_inode(struct inode *inode);
 
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index 7998cc3..195830f 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -79,7 +79,7 @@ enum {
 };
 
 static const struct inode_operations none_inode_operations;
-static struct address_space_operations none_address_operations;
+static const struct address_space_operations none_address_operations;
 static const struct file_operations none_file_operations;
 
 /**
-- 
cgit v0.10.2


From 6e1d5dcc2bbbe71dbf010c747e15739bef6b7218 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 21 Sep 2009 17:01:11 -0700
Subject: const: mark remaining inode_operations as const

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index 78f2a52..fc1b1c4 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -119,7 +119,7 @@ spufs_new_file(struct super_block *sb, struct dentry *dentry,
 		const struct file_operations *fops, int mode,
 		size_t size, struct spu_context *ctx)
 {
-	static struct inode_operations spufs_file_iops = {
+	static const struct inode_operations spufs_file_iops = {
 		.setattr = spufs_setattr,
 	};
 	struct inode *inode;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 3e4d679..9096fd0 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -55,11 +55,11 @@ struct btrfs_iget_args {
 	struct btrfs_root *root;
 };
 
-static struct inode_operations btrfs_dir_inode_operations;
-static struct inode_operations btrfs_symlink_inode_operations;
-static struct inode_operations btrfs_dir_ro_inode_operations;
-static struct inode_operations btrfs_special_inode_operations;
-static struct inode_operations btrfs_file_inode_operations;
+static const struct inode_operations btrfs_dir_inode_operations;
+static const struct inode_operations btrfs_symlink_inode_operations;
+static const struct inode_operations btrfs_dir_ro_inode_operations;
+static const struct inode_operations btrfs_special_inode_operations;
+static const struct inode_operations btrfs_file_inode_operations;
 static const struct address_space_operations btrfs_aops;
 static const struct address_space_operations btrfs_symlink_aops;
 static struct file_operations btrfs_dir_file_operations;
@@ -5201,7 +5201,7 @@ static int btrfs_permission(struct inode *inode, int mask)
 	return generic_permission(inode, mask, btrfs_check_acl);
 }
 
-static struct inode_operations btrfs_dir_inode_operations = {
+static const struct inode_operations btrfs_dir_inode_operations = {
 	.getattr	= btrfs_getattr,
 	.lookup		= btrfs_lookup,
 	.create		= btrfs_create,
@@ -5219,7 +5219,7 @@ static struct inode_operations btrfs_dir_inode_operations = {
 	.removexattr	= btrfs_removexattr,
 	.permission	= btrfs_permission,
 };
-static struct inode_operations btrfs_dir_ro_inode_operations = {
+static const struct inode_operations btrfs_dir_ro_inode_operations = {
 	.lookup		= btrfs_lookup,
 	.permission	= btrfs_permission,
 };
@@ -5278,7 +5278,7 @@ static const struct address_space_operations btrfs_symlink_aops = {
 	.releasepage	= btrfs_releasepage,
 };
 
-static struct inode_operations btrfs_file_inode_operations = {
+static const struct inode_operations btrfs_file_inode_operations = {
 	.truncate	= btrfs_truncate,
 	.getattr	= btrfs_getattr,
 	.setattr	= btrfs_setattr,
@@ -5290,7 +5290,7 @@ static struct inode_operations btrfs_file_inode_operations = {
 	.fallocate	= btrfs_fallocate,
 	.fiemap		= btrfs_fiemap,
 };
-static struct inode_operations btrfs_special_inode_operations = {
+static const struct inode_operations btrfs_special_inode_operations = {
 	.getattr	= btrfs_getattr,
 	.setattr	= btrfs_setattr,
 	.permission	= btrfs_permission,
@@ -5299,7 +5299,7 @@ static struct inode_operations btrfs_special_inode_operations = {
 	.listxattr	= btrfs_listxattr,
 	.removexattr	= btrfs_removexattr,
 };
-static struct inode_operations btrfs_symlink_inode_operations = {
+static const struct inode_operations btrfs_symlink_inode_operations = {
 	.readlink	= generic_readlink,
 	.follow_link	= page_follow_link_light,
 	.put_link	= page_put_link,
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 606912d..6889c0d 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -385,7 +385,7 @@ out_err:
 	goto out;
 }
 
-struct inode_operations cifs_dfs_referral_inode_operations = {
+const struct inode_operations cifs_dfs_referral_inode_operations = {
 	.follow_link = cifs_dfs_follow_mountpoint,
 };
 
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 094325e..ac2b24c 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -67,7 +67,7 @@ extern int cifs_setattr(struct dentry *, struct iattr *);
 
 extern const struct inode_operations cifs_file_inode_ops;
 extern const struct inode_operations cifs_symlink_inode_ops;
-extern struct inode_operations cifs_dfs_referral_inode_operations;
+extern const struct inode_operations cifs_dfs_referral_inode_operations;
 
 
 /* Functions related to files and directories */
diff --git a/fs/inode.c b/fs/inode.c
index 798052f..f5ff71c 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -123,7 +123,7 @@ static void wake_up_inode(struct inode *inode)
 int inode_init_always(struct super_block *sb, struct inode *inode)
 {
 	static const struct address_space_operations empty_aops;
-	static struct inode_operations empty_iops;
+	static const struct inode_operations empty_iops;
 	static const struct file_operations empty_fops;
 	struct address_space *const mapping = &inode->i_data;
 
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index 6bd84a0..fc8278c 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -151,7 +151,7 @@ struct file_operations nilfs_file_operations = {
 	.splice_read	= generic_file_splice_read,
 };
 
-struct inode_operations nilfs_file_inode_operations = {
+const struct inode_operations nilfs_file_inode_operations = {
 	.truncate	= nilfs_truncate,
 	.setattr	= nilfs_setattr,
 	.permission     = nilfs_permission,
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index 680e86c..b18c499 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -432,7 +432,7 @@ static const struct address_space_operations def_mdt_aops = {
 	.sync_page		= block_sync_page,
 };
 
-static struct inode_operations def_mdt_iops;
+static const struct inode_operations def_mdt_iops;
 static struct file_operations def_mdt_fops;
 
 /*
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index df70dad..ed02e88 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -448,7 +448,7 @@ out:
 	return err;
 }
 
-struct inode_operations nilfs_dir_inode_operations = {
+const struct inode_operations nilfs_dir_inode_operations = {
 	.create		= nilfs_create,
 	.lookup		= nilfs_lookup,
 	.link		= nilfs_link,
@@ -462,12 +462,12 @@ struct inode_operations nilfs_dir_inode_operations = {
 	.permission	= nilfs_permission,
 };
 
-struct inode_operations nilfs_special_inode_operations = {
+const struct inode_operations nilfs_special_inode_operations = {
 	.setattr	= nilfs_setattr,
 	.permission	= nilfs_permission,
 };
 
-struct inode_operations nilfs_symlink_inode_operations = {
+const struct inode_operations nilfs_symlink_inode_operations = {
 	.readlink	= generic_readlink,
 	.follow_link	= page_follow_link_light,
 	.put_link	= page_put_link,
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index c0cf52a..bad7368 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -295,12 +295,12 @@ void nilfs_clear_gcdat_inode(struct the_nilfs *);
  * Inodes and files operations
  */
 extern struct file_operations nilfs_dir_operations;
-extern struct inode_operations nilfs_file_inode_operations;
+extern const struct inode_operations nilfs_file_inode_operations;
 extern struct file_operations nilfs_file_operations;
 extern const struct address_space_operations nilfs_aops;
-extern struct inode_operations nilfs_dir_inode_operations;
-extern struct inode_operations nilfs_special_inode_operations;
-extern struct inode_operations nilfs_symlink_inode_operations;
+extern const struct inode_operations nilfs_dir_inode_operations;
+extern const struct inode_operations nilfs_special_inode_operations;
+extern const struct inode_operations nilfs_symlink_inode_operations;
 
 /*
  * filesystem type
diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c
index c7275cf..3680bae 100644
--- a/fs/omfs/dir.c
+++ b/fs/omfs/dir.c
@@ -489,7 +489,7 @@ out:
 	return ret;
 }
 
-struct inode_operations omfs_dir_inops = {
+const struct inode_operations omfs_dir_inops = {
 	.lookup = omfs_lookup,
 	.mkdir = omfs_mkdir,
 	.rename = omfs_rename,
diff --git a/fs/omfs/file.c b/fs/omfs/file.c
index b707fa5..4845fbb 100644
--- a/fs/omfs/file.c
+++ b/fs/omfs/file.c
@@ -333,7 +333,7 @@ struct file_operations omfs_file_operations = {
 	.splice_read = generic_file_splice_read,
 };
 
-struct inode_operations omfs_file_inops = {
+const struct inode_operations omfs_file_inops = {
 	.truncate = omfs_truncate
 };
 
diff --git a/fs/omfs/omfs.h b/fs/omfs/omfs.h
index 16d72ab..df71039 100644
--- a/fs/omfs/omfs.h
+++ b/fs/omfs/omfs.h
@@ -45,14 +45,14 @@ extern int omfs_clear_range(struct super_block *sb, u64 block, int count);
 
 /* dir.c */
 extern struct file_operations omfs_dir_operations;
-extern struct inode_operations omfs_dir_inops;
+extern const struct inode_operations omfs_dir_inops;
 extern int omfs_make_empty(struct inode *inode, struct super_block *sb);
 extern int omfs_is_bad(struct omfs_sb_info *sbi, struct omfs_header *header,
 			u64 fsblock);
 
 /* file.c */
 extern struct file_operations omfs_file_operations;
-extern struct inode_operations omfs_file_inops;
+extern const struct inode_operations omfs_file_inops;
 extern const struct address_space_operations omfs_aops;
 extern void omfs_make_empty_table(struct buffer_head *bh, int offset);
 extern int omfs_shrink_inode(struct inode *inode);
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index 4ab3c03..47f132d 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -284,7 +284,7 @@ static const struct file_operations romfs_dir_operations = {
 	.readdir	= romfs_readdir,
 };
 
-static struct inode_operations romfs_dir_inode_operations = {
+static const struct inode_operations romfs_dir_inode_operations = {
 	.lookup		= romfs_lookup,
 };
 
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 39dde29..213b7f9 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -596,7 +596,7 @@ void cgroup_unlock(void)
 static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode);
 static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry);
 static int cgroup_populate_dir(struct cgroup *cgrp);
-static struct inode_operations cgroup_dir_inode_operations;
+static const struct inode_operations cgroup_dir_inode_operations;
 static struct file_operations proc_cgroupstats_operations;
 
 static struct backing_dev_info cgroup_backing_dev_info = {
@@ -1711,7 +1711,7 @@ static struct file_operations cgroup_file_operations = {
 	.release = cgroup_file_release,
 };
 
-static struct inode_operations cgroup_dir_inode_operations = {
+static const struct inode_operations cgroup_dir_inode_operations = {
 	.lookup = simple_lookup,
 	.mkdir = cgroup_mkdir,
 	.rmdir = cgroup_rmdir,
-- 
cgit v0.10.2


From 6aed62853c72e29f2c97bbac7712cb398e8c9437 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 21 Sep 2009 17:01:11 -0700
Subject: const: make file_lock_operations const

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index 3ff8bdd..0931bc1 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -21,7 +21,7 @@ static void afs_fl_release_private(struct file_lock *fl);
 static struct workqueue_struct *afs_lock_manager;
 static DEFINE_MUTEX(afs_lock_manager_mutex);
 
-static struct file_lock_operations afs_lock_ops = {
+static const struct file_lock_operations afs_lock_ops = {
 	.fl_copy_lock		= afs_fl_copy_lock,
 	.fl_release_private	= afs_fl_release_private,
 };
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 4336adb..c81249f 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -458,7 +458,7 @@ static void nlmclnt_locks_release_private(struct file_lock *fl)
 	nlm_put_lockowner(fl->fl_u.nfs_fl.owner);
 }
 
-static struct file_lock_operations nlmclnt_lock_ops = {
+static const struct file_lock_operations nlmclnt_lock_ops = {
 	.fl_copy_lock = nlmclnt_locks_copy_lock,
 	.fl_release_private = nlmclnt_locks_release_private,
 };
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 1434080..2ef4fec 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -638,7 +638,7 @@ static void nfs4_fl_release_lock(struct file_lock *fl)
 	nfs4_put_lock_state(fl->fl_u.nfs4_fl.owner);
 }
 
-static struct file_lock_operations nfs4_fl_lock_ops = {
+static const struct file_lock_operations nfs4_fl_lock_ops = {
 	.fl_copy_lock = nfs4_fl_copy_lock,
 	.fl_release_private = nfs4_fl_release_lock,
 };
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 13c030e..6146dec 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1066,7 +1066,7 @@ struct file_lock {
 	struct fasync_struct *	fl_fasync; /* for lease break notifications */
 	unsigned long fl_break_time;	/* for nonblocking lease breaks */
 
-	struct file_lock_operations *fl_ops;	/* Callbacks for filesystems */
+	const struct file_lock_operations *fl_ops;	/* Callbacks for filesystems */
 	struct lock_manager_operations *fl_lmops;	/* Callbacks for lockmanagers */
 	union {
 		struct nfs_lock_info	nfs_fl;
-- 
cgit v0.10.2


From 7b021967c5e1463936042c8da72b550d3cabe9ac Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 21 Sep 2009 17:01:12 -0700
Subject: const: make lock_manager_operations const

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index e577a78..d100179 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -705,7 +705,7 @@ static int nlmsvc_same_owner(struct file_lock *fl1, struct file_lock *fl2)
 	return fl1->fl_owner == fl2->fl_owner && fl1->fl_pid == fl2->fl_pid;
 }
 
-struct lock_manager_operations nlmsvc_lock_operations = {
+const struct lock_manager_operations nlmsvc_lock_operations = {
 	.fl_compare_owner = nlmsvc_same_owner,
 	.fl_notify = nlmsvc_notify_blocked,
 	.fl_grant = nlmsvc_grant_deferred,
diff --git a/fs/locks.c b/fs/locks.c
index 19ee18a..a8794f2 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -434,7 +434,7 @@ static int lease_mylease_callback(struct file_lock *fl, struct file_lock *try)
 	return fl->fl_file == try->fl_file;
 }
 
-static struct lock_manager_operations lease_manager_ops = {
+static const struct lock_manager_operations lease_manager_ops = {
 	.fl_break = lease_break_callback,
 	.fl_release_private = lease_release_private_callback,
 	.fl_mylease = lease_mylease_callback,
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 980a216..766d3d5 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2163,7 +2163,7 @@ int nfsd_change_deleg_cb(struct file_lock **onlist, int arg)
 		return -EAGAIN;
 }
 
-static struct lock_manager_operations nfsd_lease_mng_ops = {
+static const struct lock_manager_operations nfsd_lease_mng_ops = {
 	.fl_break = nfsd_break_deleg_cb,
 	.fl_release_private = nfsd_release_deleg_cb,
 	.fl_copy_lock = nfsd_copy_lock_deleg_cb,
@@ -3368,7 +3368,7 @@ nfs4_transform_lock_offset(struct file_lock *lock)
 
 /* Hack!: For now, we're defining this just so we can use a pointer to it
  * as a unique cookie to identify our (NFSv4's) posix locks. */
-static struct lock_manager_operations nfsd_posix_mng_ops  = {
+static const struct lock_manager_operations nfsd_posix_mng_ops  = {
 };
 
 static inline void
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6146dec..5180352 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1067,7 +1067,7 @@ struct file_lock {
 	unsigned long fl_break_time;	/* for nonblocking lease breaks */
 
 	const struct file_lock_operations *fl_ops;	/* Callbacks for filesystems */
-	struct lock_manager_operations *fl_lmops;	/* Callbacks for lockmanagers */
+	const struct lock_manager_operations *fl_lmops;	/* Callbacks for lockmanagers */
 	union {
 		struct nfs_lock_info	nfs_fl;
 		struct nfs4_lock_info	nfs4_fl;
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index c325b18..ccf2e0d 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -395,7 +395,7 @@ static inline int nlm_compare_locks(const struct file_lock *fl1,
 	     &&(fl1->fl_type  == fl2->fl_type || fl2->fl_type == F_UNLCK);
 }
 
-extern struct lock_manager_operations nlmsvc_lock_operations;
+extern const struct lock_manager_operations nlmsvc_lock_operations;
 
 #endif /* __KERNEL__ */
 
-- 
cgit v0.10.2


From 83d5cde47dedf01b6a4a4331882cbc0a7eea3c2e Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 21 Sep 2009 17:01:13 -0700
Subject: const: make block_device_operations const

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
index a477991..88f4ae7 100644
--- a/arch/powerpc/sysdev/axonram.c
+++ b/arch/powerpc/sysdev/axonram.c
@@ -165,7 +165,7 @@ axon_ram_direct_access(struct block_device *device, sector_t sector,
 	return 0;
 }
 
-static struct block_device_operations axon_ram_devops = {
+static const struct block_device_operations axon_ram_devops = {
 	.owner		= THIS_MODULE,
 	.direct_access	= axon_ram_direct_access
 };
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 8f05d4d..635d16d 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -106,7 +106,7 @@ static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
 
 #define MAX_DEV (16)
 
-static struct block_device_operations ubd_blops = {
+static const struct block_device_operations ubd_blops = {
         .owner		= THIS_MODULE,
         .open		= ubd_open,
         .release	= ubd_release,
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index 1e6b7c1..8b50af3 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -152,7 +152,7 @@ static int DAC960_revalidate_disk(struct gendisk *disk)
 	return 0;
 }
 
-static struct block_device_operations DAC960_BlockDeviceOperations = {
+static const struct block_device_operations DAC960_BlockDeviceOperations = {
 	.owner			= THIS_MODULE,
 	.open			= DAC960_open,
 	.getgeo			= DAC960_getgeo,
diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index 2f07b7c..0552258 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -1632,7 +1632,7 @@ static int amiga_floppy_change(struct gendisk *disk)
 	return 0;
 }
 
-static struct block_device_operations floppy_fops = {
+static const struct block_device_operations floppy_fops = {
 	.owner		= THIS_MODULE,
 	.open		= floppy_open,
 	.release	= floppy_release,
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index b6cd571..3af97d4 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -237,7 +237,7 @@ aoeblk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 	return 0;
 }
 
-static struct block_device_operations aoe_bdops = {
+static const struct block_device_operations aoe_bdops = {
 	.open = aoeblk_open,
 	.release = aoeblk_release,
 	.getgeo = aoeblk_getgeo,
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index 3ff0294..847a9e5 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -1856,7 +1856,7 @@ static int floppy_release(struct gendisk *disk, fmode_t mode)
 	return 0;
 }
 
-static struct block_device_operations floppy_fops = {
+static const struct block_device_operations floppy_fops = {
 	.owner		= THIS_MODULE,
 	.open		= floppy_open,
 	.release	= floppy_release,
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 4bf8705..4f68843 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -375,7 +375,7 @@ static int brd_ioctl(struct block_device *bdev, fmode_t mode,
 	return error;
 }
 
-static struct block_device_operations brd_fops = {
+static const struct block_device_operations brd_fops = {
 	.owner =		THIS_MODULE,
 	.locked_ioctl =		brd_ioctl,
 #ifdef CONFIG_BLK_DEV_XIP
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index d8372b4..4f19105 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -205,7 +205,7 @@ static int cciss_compat_ioctl(struct block_device *, fmode_t,
 			      unsigned, unsigned long);
 #endif
 
-static struct block_device_operations cciss_fops = {
+static const struct block_device_operations cciss_fops = {
 	.owner = THIS_MODULE,
 	.open = cciss_open,
 	.release = cciss_release,
diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c
index 44fa201..b82d438 100644
--- a/drivers/block/cpqarray.c
+++ b/drivers/block/cpqarray.c
@@ -193,7 +193,7 @@ static inline ctlr_info_t *get_host(struct gendisk *disk)
 }
 
 
-static struct block_device_operations ida_fops  = {
+static const struct block_device_operations ida_fops  = {
 	.owner		= THIS_MODULE,
 	.open		= ida_open,
 	.release	= ida_release,
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 2b387c2..5c01f74 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -3907,7 +3907,7 @@ static int floppy_revalidate(struct gendisk *disk)
 	return res;
 }
 
-static struct block_device_operations floppy_fops = {
+static const struct block_device_operations floppy_fops = {
 	.owner			= THIS_MODULE,
 	.open			= floppy_open,
 	.release		= floppy_release,
diff --git a/drivers/block/hd.c b/drivers/block/hd.c
index f9d0160..d5cdce0 100644
--- a/drivers/block/hd.c
+++ b/drivers/block/hd.c
@@ -692,7 +692,7 @@ static irqreturn_t hd_interrupt(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-static struct block_device_operations hd_fops = {
+static const struct block_device_operations hd_fops = {
 	.getgeo =	hd_getgeo,
 };
 
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index bbb7944..edda9ea 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1438,7 +1438,7 @@ out_unlocked:
 	return 0;
 }
 
-static struct block_device_operations lo_fops = {
+static const struct block_device_operations lo_fops = {
 	.owner =	THIS_MODULE,
 	.open =		lo_open,
 	.release =	lo_release,
diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c
index 6d7fbaa..e0339aa 100644
--- a/drivers/block/mg_disk.c
+++ b/drivers/block/mg_disk.c
@@ -775,7 +775,7 @@ static int mg_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 	return 0;
 }
 
-static struct block_device_operations mg_disk_ops = {
+static const struct block_device_operations mg_disk_ops = {
 	.getgeo = mg_getgeo
 };
 
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 5d23ffa..cc923a5 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -722,7 +722,7 @@ static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
 	return error;
 }
 
-static struct block_device_operations nbd_fops =
+static const struct block_device_operations nbd_fops =
 {
 	.owner =	THIS_MODULE,
 	.locked_ioctl =	nbd_ioctl,
diff --git a/drivers/block/osdblk.c b/drivers/block/osdblk.c
index 13c1aee..a808b15 100644
--- a/drivers/block/osdblk.c
+++ b/drivers/block/osdblk.c
@@ -125,7 +125,7 @@ static struct class *class_osdblk;		/* /sys/class/osdblk */
 static DEFINE_MUTEX(ctl_mutex);	/* Serialize open/close/setup/teardown */
 static LIST_HEAD(osdblkdev_list);
 
-static struct block_device_operations osdblk_bd_ops = {
+static const struct block_device_operations osdblk_bd_ops = {
 	.owner		= THIS_MODULE,
 };
 
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index 9f3518c..8866ca3 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c
@@ -247,7 +247,7 @@ static int pcd_block_media_changed(struct gendisk *disk)
 	return cdrom_media_changed(&cd->info);
 }
 
-static struct block_device_operations pcd_bdops = {
+static const struct block_device_operations pcd_bdops = {
 	.owner		= THIS_MODULE,
 	.open		= pcd_block_open,
 	.release	= pcd_block_release,
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index bf5955b..569e39e 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -807,7 +807,7 @@ static int pd_revalidate(struct gendisk *p)
 	return 0;
 }
 
-static struct block_device_operations pd_fops = {
+static const struct block_device_operations pd_fops = {
 	.owner		= THIS_MODULE,
 	.open		= pd_open,
 	.release	= pd_release,
diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c
index 68a9083..ea54ea3 100644
--- a/drivers/block/paride/pf.c
+++ b/drivers/block/paride/pf.c
@@ -262,7 +262,7 @@ static char *pf_buf;		/* buffer for request in progress */
 
 /* kernel glue structures */
 
-static struct block_device_operations pf_fops = {
+static const struct block_device_operations pf_fops = {
 	.owner		= THIS_MODULE,
 	.open		= pf_open,
 	.release	= pf_release,
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index fd5bb8a..2ddf03a 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -2849,7 +2849,7 @@ static int pkt_media_changed(struct gendisk *disk)
 	return attached_disk->fops->media_changed(attached_disk);
 }
 
-static struct block_device_operations pktcdvd_ops = {
+static const struct block_device_operations pktcdvd_ops = {
 	.owner =		THIS_MODULE,
 	.open =			pkt_open,
 	.release =		pkt_close,
diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c
index 34cbb7f..03a130d 100644
--- a/drivers/block/ps3disk.c
+++ b/drivers/block/ps3disk.c
@@ -82,7 +82,7 @@ enum lv1_ata_in_out {
 static int ps3disk_major;
 
 
-static struct block_device_operations ps3disk_fops = {
+static const struct block_device_operations ps3disk_fops = {
 	.owner		= THIS_MODULE,
 };
 
diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c
index c8753a9..3bb7c47 100644
--- a/drivers/block/ps3vram.c
+++ b/drivers/block/ps3vram.c
@@ -88,7 +88,7 @@ struct ps3vram_priv {
 static int ps3vram_major;
 
 
-static struct block_device_operations ps3vram_fops = {
+static const struct block_device_operations ps3vram_fops = {
 	.owner		= THIS_MODULE,
 };
 
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index cbfd9c0..411f064 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -103,7 +103,7 @@ static int vdc_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 	return 0;
 }
 
-static struct block_device_operations vdc_fops = {
+static const struct block_device_operations vdc_fops = {
 	.owner		= THIS_MODULE,
 	.getgeo		= vdc_getgeo,
 };
diff --git a/drivers/block/swim.c b/drivers/block/swim.c
index cf7877f..8f569e3 100644
--- a/drivers/block/swim.c
+++ b/drivers/block/swim.c
@@ -748,7 +748,7 @@ static int floppy_revalidate(struct gendisk *disk)
 	return !fs->disk_in;
 }
 
-static struct block_device_operations floppy_fops = {
+static const struct block_device_operations floppy_fops = {
 	.owner		 = THIS_MODULE,
 	.open		 = floppy_open,
 	.release	 = floppy_release,
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index 80df93e..e39e382 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -998,7 +998,7 @@ static int floppy_revalidate(struct gendisk *disk)
 	return ret;
 }
 
-static struct block_device_operations floppy_fops = {
+static const struct block_device_operations floppy_fops = {
 	.open		= floppy_open,
 	.release	= floppy_release,
 	.locked_ioctl	= floppy_ioctl,
diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c
index f5cd2e8..a7c4184 100644
--- a/drivers/block/sx8.c
+++ b/drivers/block/sx8.c
@@ -423,7 +423,7 @@ static struct pci_driver carm_driver = {
 	.remove		= carm_remove_one,
 };
 
-static struct block_device_operations carm_bd_ops = {
+static const struct block_device_operations carm_bd_ops = {
 	.owner		= THIS_MODULE,
 	.getgeo		= carm_bdev_getgeo,
 };
diff --git a/drivers/block/ub.c b/drivers/block/ub.c
index cc54473..c739b20 100644
--- a/drivers/block/ub.c
+++ b/drivers/block/ub.c
@@ -1789,7 +1789,7 @@ static int ub_bd_media_changed(struct gendisk *disk)
 	return lun->changed;
 }
 
-static struct block_device_operations ub_bd_fops = {
+static const struct block_device_operations ub_bd_fops = {
 	.owner		= THIS_MODULE,
 	.open		= ub_bd_open,
 	.release	= ub_bd_release,
diff --git a/drivers/block/umem.c b/drivers/block/umem.c
index 858c34d..ad1ba39 100644
--- a/drivers/block/umem.c
+++ b/drivers/block/umem.c
@@ -140,7 +140,6 @@ struct cardinfo {
 };
 
 static struct cardinfo cards[MM_MAXCARDS];
-static struct block_device_operations mm_fops;
 static struct timer_list battery_timer;
 
 static int num_cards;
@@ -789,7 +788,7 @@ static int mm_check_change(struct gendisk *disk)
 	return 0;
 }
 
-static struct block_device_operations mm_fops = {
+static const struct block_device_operations mm_fops = {
 	.owner		= THIS_MODULE,
 	.getgeo		= mm_getgeo,
 	.revalidate_disk = mm_revalidate,
diff --git a/drivers/block/viodasd.c b/drivers/block/viodasd.c
index b441ce3..a8c8b56 100644
--- a/drivers/block/viodasd.c
+++ b/drivers/block/viodasd.c
@@ -219,7 +219,7 @@ static int viodasd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 /*
  * Our file operations table
  */
-static struct block_device_operations viodasd_fops = {
+static const struct block_device_operations viodasd_fops = {
 	.owner = THIS_MODULE,
 	.open = viodasd_open,
 	.release = viodasd_release,
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index aa1a3d5..aa89fe4 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -243,7 +243,7 @@ static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo)
 	return 0;
 }
 
-static struct block_device_operations virtblk_fops = {
+static const struct block_device_operations virtblk_fops = {
 	.locked_ioctl = virtblk_ioctl,
 	.owner  = THIS_MODULE,
 	.getgeo = virtblk_getgeo,
diff --git a/drivers/block/xd.c b/drivers/block/xd.c
index ce24292..0877d36 100644
--- a/drivers/block/xd.c
+++ b/drivers/block/xd.c
@@ -130,7 +130,7 @@ static struct gendisk *xd_gendisk[2];
 
 static int xd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
 
-static struct block_device_operations xd_fops = {
+static const struct block_device_operations xd_fops = {
 	.owner	= THIS_MODULE,
 	.locked_ioctl	= xd_ioctl,
 	.getgeo = xd_getgeo,
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index e532847..b8578bb 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -65,7 +65,7 @@ struct blk_shadow {
 	unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 };
 
-static struct block_device_operations xlvbd_block_fops;
+static const struct block_device_operations xlvbd_block_fops;
 
 #define BLK_RING_SIZE __RING_SIZE((struct blkif_sring *)0, PAGE_SIZE)
 
@@ -1039,7 +1039,7 @@ static int blkif_release(struct gendisk *disk, fmode_t mode)
 	return 0;
 }
 
-static struct block_device_operations xlvbd_block_fops =
+static const struct block_device_operations xlvbd_block_fops =
 {
 	.owner = THIS_MODULE,
 	.open = blkif_open,
diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c
index b20abe10..e5c5415 100644
--- a/drivers/block/xsysace.c
+++ b/drivers/block/xsysace.c
@@ -941,7 +941,7 @@ static int ace_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 	return 0;
 }
 
-static struct block_device_operations ace_fops = {
+static const struct block_device_operations ace_fops = {
 	.owner = THIS_MODULE,
 	.open = ace_open,
 	.release = ace_release,
diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c
index b259040..64f941e 100644
--- a/drivers/block/z2ram.c
+++ b/drivers/block/z2ram.c
@@ -64,7 +64,6 @@ static int current_device   = -1;
 
 static DEFINE_SPINLOCK(z2ram_lock);
 
-static struct block_device_operations z2_fops;
 static struct gendisk *z2ram_gendisk;
 
 static void do_z2_request(struct request_queue *q)
@@ -315,7 +314,7 @@ z2_release(struct gendisk *disk, fmode_t mode)
     return 0;
 }
 
-static struct block_device_operations z2_fops =
+static const struct block_device_operations z2_fops =
 {
 	.owner		= THIS_MODULE,
 	.open		= z2_open,
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index b5621f2..a762283 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -512,7 +512,7 @@ static int gdrom_bdops_ioctl(struct block_device *bdev, fmode_t mode,
 	return cdrom_ioctl(gd.cd_info, bdev, mode, cmd, arg);
 }
 
-static struct block_device_operations gdrom_bdops = {
+static const struct block_device_operations gdrom_bdops = {
 	.owner			= THIS_MODULE,
 	.open			= gdrom_bdops_open,
 	.release		= gdrom_bdops_release,
diff --git a/drivers/cdrom/viocd.c b/drivers/cdrom/viocd.c
index 0fff646..57ca69e 100644
--- a/drivers/cdrom/viocd.c
+++ b/drivers/cdrom/viocd.c
@@ -177,7 +177,7 @@ static int viocd_blk_media_changed(struct gendisk *disk)
 	return cdrom_media_changed(&di->viocd_info);
 }
 
-struct block_device_operations viocd_fops = {
+static const struct block_device_operations viocd_fops = {
 	.owner =		THIS_MODULE,
 	.open =			viocd_blk_open,
 	.release =		viocd_blk_release,
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index b79ca41..64207df 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -1686,7 +1686,7 @@ static int idecd_revalidate_disk(struct gendisk *disk)
 	return  0;
 }
 
-static struct block_device_operations idecd_ops = {
+static const struct block_device_operations idecd_ops = {
 	.owner			= THIS_MODULE,
 	.open			= idecd_open,
 	.release		= idecd_release,
diff --git a/drivers/ide/ide-gd.c b/drivers/ide/ide-gd.c
index 2141190..7532414 100644
--- a/drivers/ide/ide-gd.c
+++ b/drivers/ide/ide-gd.c
@@ -321,7 +321,7 @@ static int ide_gd_ioctl(struct block_device *bdev, fmode_t mode,
 	return drive->disk_ops->ioctl(drive, bdev, mode, cmd, arg);
 }
 
-static struct block_device_operations ide_gd_ops = {
+static const struct block_device_operations ide_gd_ops = {
 	.owner			= THIS_MODULE,
 	.open			= ide_gd_open,
 	.release		= ide_gd_release,
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index 9d6f62b..58fc920 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -1913,7 +1913,7 @@ static int idetape_ioctl(struct block_device *bdev, fmode_t mode,
 	return err;
 }
 
-static struct block_device_operations idetape_block_ops = {
+static const struct block_device_operations idetape_block_ops = {
 	.owner		= THIS_MODULE,
 	.open		= idetape_open,
 	.release	= idetape_release,
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index eee28fa..376f1ab 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1716,7 +1716,7 @@ out:
 	return r;
 }
 
-static struct block_device_operations dm_blk_dops;
+static const struct block_device_operations dm_blk_dops;
 
 static void dm_wq_work(struct work_struct *work);
 
@@ -2663,7 +2663,7 @@ void dm_free_md_mempools(struct dm_md_mempools *pools)
 	kfree(pools);
 }
 
-static struct block_device_operations dm_blk_dops = {
+static const struct block_device_operations dm_blk_dops = {
 	.open = dm_blk_open,
 	.release = dm_blk_close,
 	.ioctl = dm_blk_ioctl,
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 9dd8720..6aa497e 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -138,7 +138,7 @@ static ctl_table raid_root_table[] = {
 	{ .ctl_name = 0 }
 };
 
-static struct block_device_operations md_fops;
+static const struct block_device_operations md_fops;
 
 static int start_readonly;
 
@@ -5556,7 +5556,7 @@ static int md_revalidate(struct gendisk *disk)
 	mddev->changed = 0;
 	return 0;
 }
-static struct block_device_operations md_fops =
+static const struct block_device_operations md_fops =
 {
 	.owner		= THIS_MODULE,
 	.open		= md_open,
diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c
index 7847bbc..bd83fa0 100644
--- a/drivers/memstick/core/mspro_block.c
+++ b/drivers/memstick/core/mspro_block.c
@@ -235,7 +235,7 @@ static int mspro_block_bd_getgeo(struct block_device *bdev,
 	return 0;
 }
 
-static struct block_device_operations ms_block_bdops = {
+static const struct block_device_operations ms_block_bdops = {
 	.open    = mspro_block_bd_open,
 	.release = mspro_block_bd_release,
 	.getgeo  = mspro_block_bd_getgeo,
diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c
index 335d4c7..d505b68 100644
--- a/drivers/message/i2o/i2o_block.c
+++ b/drivers/message/i2o/i2o_block.c
@@ -925,7 +925,7 @@ static void i2o_block_request_fn(struct request_queue *q)
 };
 
 /* I2O Block device operations definition */
-static struct block_device_operations i2o_block_fops = {
+static const struct block_device_operations i2o_block_fops = {
 	.owner = THIS_MODULE,
 	.open = i2o_block_open,
 	.release = i2o_block_release,
diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index adc205c..85f0e8c 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -130,7 +130,7 @@ mmc_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 	return 0;
 }
 
-static struct block_device_operations mmc_bdops = {
+static const struct block_device_operations mmc_bdops = {
 	.open			= mmc_blk_open,
 	.release		= mmc_blk_release,
 	.getgeo			= mmc_blk_getgeo,
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 7baba40..0acbf4f 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -210,7 +210,7 @@ static int blktrans_ioctl(struct block_device *bdev, fmode_t mode,
 	}
 }
 
-static struct block_device_operations mtd_blktrans_ops = {
+static const struct block_device_operations mtd_blktrans_ops = {
 	.owner		= THIS_MODULE,
 	.open		= blktrans_open,
 	.release	= blktrans_release,
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index e109da4..dad0449 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -2146,7 +2146,7 @@ static int dasd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 	return 0;
 }
 
-struct block_device_operations
+const struct block_device_operations
 dasd_device_operations = {
 	.owner		= THIS_MODULE,
 	.open		= dasd_open,
diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h
index 5e47a1e..8afd9fa 100644
--- a/drivers/s390/block/dasd_int.h
+++ b/drivers/s390/block/dasd_int.h
@@ -540,7 +540,7 @@ dasd_check_blocksize(int bsize)
 extern debug_info_t *dasd_debug_area;
 extern struct dasd_profile_info_t dasd_global_profile;
 extern unsigned int dasd_profile_level;
-extern struct block_device_operations dasd_device_operations;
+extern const struct block_device_operations dasd_device_operations;
 
 extern struct kmem_cache *dasd_page_cache;
 
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index d346176..f76f4bd 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -34,7 +34,7 @@ static int dcssblk_direct_access(struct block_device *bdev, sector_t secnum,
 static char dcssblk_segments[DCSSBLK_PARM_LEN] = "\0";
 
 static int dcssblk_major;
-static struct block_device_operations dcssblk_devops = {
+static const struct block_device_operations dcssblk_devops = {
 	.owner   	= THIS_MODULE,
 	.open    	= dcssblk_open,
 	.release 	= dcssblk_release,
diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c
index ee604e9..116d1b3 100644
--- a/drivers/s390/block/xpram.c
+++ b/drivers/s390/block/xpram.c
@@ -244,7 +244,7 @@ static int xpram_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 	return 0;
 }
 
-static struct block_device_operations xpram_devops =
+static const struct block_device_operations xpram_devops =
 {
 	.owner	= THIS_MODULE,
 	.getgeo	= xpram_getgeo,
diff --git a/drivers/s390/char/tape_block.c b/drivers/s390/char/tape_block.c
index 4cb9e70..64f57ef 100644
--- a/drivers/s390/char/tape_block.c
+++ b/drivers/s390/char/tape_block.c
@@ -50,7 +50,7 @@ static int tapeblock_ioctl(struct block_device *, fmode_t, unsigned int,
 static int tapeblock_medium_changed(struct gendisk *);
 static int tapeblock_revalidate_disk(struct gendisk *);
 
-static struct block_device_operations tapeblock_fops = {
+static const struct block_device_operations tapeblock_fops = {
 	.owner		 = THIS_MODULE,
 	.open		 = tapeblock_open,
 	.release	 = tapeblock_release,
diff --git a/drivers/sbus/char/jsflash.c b/drivers/sbus/char/jsflash.c
index 6d46516..869a30b 100644
--- a/drivers/sbus/char/jsflash.c
+++ b/drivers/sbus/char/jsflash.c
@@ -452,7 +452,7 @@ static const struct file_operations jsf_fops = {
 
 static struct miscdevice jsf_dev = { JSF_MINOR, "jsflash", &jsf_fops };
 
-static struct block_device_operations jsfd_fops = {
+static const struct block_device_operations jsfd_fops = {
 	.owner =	THIS_MODULE,
 };
 
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index a89c421..8dd96dc 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -956,7 +956,7 @@ static int sd_compat_ioctl(struct block_device *bdev, fmode_t mode,
 }
 #endif
 
-static struct block_device_operations sd_fops = {
+static const struct block_device_operations sd_fops = {
 	.owner			= THIS_MODULE,
 	.open			= sd_open,
 	.release		= sd_release,
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index cce0fe4..eb61f7a 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -525,7 +525,7 @@ static int sr_block_media_changed(struct gendisk *disk)
 	return cdrom_media_changed(&cd->cdi);
 }
 
-static struct block_device_operations sr_bdops =
+static const struct block_device_operations sr_bdops =
 {
 	.owner		= THIS_MODULE,
 	.open		= sr_block_open,
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 71e7e03..5d1ed50 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1114,7 +1114,7 @@ EXPORT_SYMBOL(revalidate_disk);
 int check_disk_change(struct block_device *bdev)
 {
 	struct gendisk *disk = bdev->bd_disk;
-	struct block_device_operations * bdops = disk->fops;
+	const struct block_device_operations *bdops = disk->fops;
 
 	if (!bdops->media_changed)
 		return 0;
diff --git a/fs/ext2/xip.c b/fs/ext2/xip.c
index b72b858..c18fbf3 100644
--- a/fs/ext2/xip.c
+++ b/fs/ext2/xip.c
@@ -20,7 +20,7 @@ __inode_direct_access(struct inode *inode, sector_t block,
 		      void **kaddr, unsigned long *pfn)
 {
 	struct block_device *bdev = inode->i_sb->s_bdev;
-	struct block_device_operations *ops = bdev->bd_disk->fops;
+	const struct block_device_operations *ops = bdev->bd_disk->fops;
 	sector_t sector;
 
 	sector = block * (PAGE_SIZE / 512); /* ext2 block to bdev sector */
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index fbeaddf..7b685e1 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -581,7 +581,7 @@ try_scan:
 		}
 
 		if (from + size > get_capacity(disk)) {
-			struct block_device_operations *bdops = disk->fops;
+			const struct block_device_operations *bdops = disk->fops;
 			unsigned long long capacity;
 
 			printk(KERN_WARNING
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 109d179..297df45 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -151,7 +151,7 @@ struct gendisk {
 	struct disk_part_tbl *part_tbl;
 	struct hd_struct part0;
 
-	struct block_device_operations *fops;
+	const struct block_device_operations *fops;
 	struct request_queue *queue;
 	void *private_data;
 
-- 
cgit v0.10.2


From a5abeeacc44bbef2935a7a8e939264c28962def2 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Mon, 21 Sep 2009 17:01:13 -0700
Subject: mm: make swap token dummies static inlines

Make use of the compiler's typechecking on !CONFIG_SWAP as well.

[akpm@linux-foundation.org: build fix]
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 7c15334..6c990e6 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -419,10 +419,22 @@ static inline swp_entry_t get_swap_page(void)
 }
 
 /* linux/mm/thrash.c */
-#define put_swap_token(mm)	do { } while (0)
-#define grab_swap_token(mm)	do { } while (0)
-#define has_swap_token(mm)	0
-#define disable_swap_token()	do { } while (0)
+static inline void put_swap_token(struct mm_struct *mm)
+{
+}
+
+static inline void grab_swap_token(struct mm_struct *mm)
+{
+}
+
+static inline int has_swap_token(struct mm_struct *mm)
+{
+	return 0;
+}
+
+static inline void disable_swap_token(void)
+{
+}
 
 static inline void
 mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent)
-- 
cgit v0.10.2


From 478b81fd84a299adb401dbbae296f3767e552999 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Mon, 21 Sep 2009 17:01:15 -0700
Subject: mm: remove obsoleted alloc_pages cpuset comment

When a cpuset's nodemask is updated, all attached tasks have their cached
task->mems_allowed updated by a heap instead of requiring an explicit call
to cpuset_update_task_memory_state(), which has since been removed in
58568d2a8215cb6f55caf2332017d7bdff954e1c ("cpuset,mm: update tasks'
mems_allowed in time").

Remove the obsoleted comment from the page allocator.

Cc: Paul Menage <menage@google.com>
Acked-by: Miao Xie <miaox@cn.fujitsu.com>
Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index a0de15f..f4e929e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1627,10 +1627,6 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order,
 
 	/* We now go into synchronous reclaim */
 	cpuset_memory_pressure_bump();
-
-	/*
-	 * The task's cpuset might have expanded its set of allowable nodes
-	 */
 	p->flags |= PF_MEMALLOC;
 	lockdep_set_current_reclaim_state(gfp_mask);
 	reclaim_state.reclaimed_slab = 0;
-- 
cgit v0.10.2


From 112067f0905b2de862c607ee62411cf47d2fe5c4 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shaohua.li@intel.com>
Date: Mon, 21 Sep 2009 17:01:16 -0700
Subject: memory hotplug: update zone pcp at memory online

In my test, 128M memory is hot added, but zone's pcp batch is 0, which is
an obvious error.  When pages are onlined, zone pcp should be updated
accordingly.

[akpm@linux-foundation.org: fix warnings]
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Yakui Zhao <yakui.zhao@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 9a72cc7..d3c8ae7 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1058,6 +1058,8 @@ extern void setup_per_cpu_pageset(void);
 static inline void setup_per_cpu_pageset(void) {}
 #endif
 
+extern void zone_pcp_update(struct zone *zone);
+
 /* nommu.c */
 extern atomic_long_t mmap_pages_allocated;
 
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index e4412a6..616236e 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -422,6 +422,7 @@ int online_pages(unsigned long pfn, unsigned long nr_pages)
 	zone->present_pages += onlined_pages;
 	zone->zone_pgdat->node_present_pages += onlined_pages;
 
+	zone_pcp_update(zone);
 	setup_per_zone_wmarks();
 	calculate_zone_inactive_ratio(zone);
 	if (onlined_pages) {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index f4e929e..1a3a893 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3142,6 +3142,32 @@ int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages)
 	return 0;
 }
 
+static int __zone_pcp_update(void *data)
+{
+	struct zone *zone = data;
+	int cpu;
+	unsigned long batch = zone_batchsize(zone), flags;
+
+	for (cpu = 0; cpu < NR_CPUS; cpu++) {
+		struct per_cpu_pageset *pset;
+		struct per_cpu_pages *pcp;
+
+		pset = zone_pcp(zone, cpu);
+		pcp = &pset->pcp;
+
+		local_irq_save(flags);
+		free_pages_bulk(zone, pcp->count, &pcp->list, 0);
+		setup_pageset(pset, batch);
+		local_irq_restore(flags);
+	}
+	return 0;
+}
+
+void zone_pcp_update(struct zone *zone)
+{
+	stop_machine(__zone_pcp_update, zone, NULL);
+}
+
 static __meminit void zone_pcp_init(struct zone *zone)
 {
 	int cpu;
-- 
cgit v0.10.2


From 6fb332fabd7288af9dbe7992394aa6ba97c1a537 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shaohua.li@intel.com>
Date: Mon, 21 Sep 2009 17:01:17 -0700
Subject: memory hotplug: exclude isolated page from pco page alloc

Pages marked as isolated should not be allocated again.  If such pages
reside in pcp list, they can be allocated too, so there is a ping-pong
memory offline frees some pages to pcp list and the pages get allocated
and then memory offline frees them again, this loop will happen again and
again.

This should have no impact in normal code path, because in normal code
path, pages in pcp list aren't isolated, and below loop will break in the
first entry.

Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Yakui Zhao <yakui.zhao@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 1a3a893..8a83027 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1143,10 +1143,20 @@ again:
 
 		/* Allocate more to the pcp list if necessary */
 		if (unlikely(&page->lru == &pcp->list)) {
+			int get_one_page = 0;
+
 			pcp->count += rmqueue_bulk(zone, 0,
 					pcp->batch, &pcp->list,
 					migratetype, cold);
-			page = list_entry(pcp->list.next, struct page, lru);
+			list_for_each_entry(page, &pcp->list, lru) {
+				if (get_pageblock_migratetype(page) !=
+					    MIGRATE_ISOLATE) {
+					get_one_page = 1;
+					break;
+				}
+			}
+			if (!get_one_page)
+				goto failed;
 		}
 
 		list_del(&page->lru);
-- 
cgit v0.10.2


From 8e7e40d9658cf7b2ae2b76484e235799b3ddaa97 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shaohua.li@intel.com>
Date: Mon, 21 Sep 2009 17:01:18 -0700
Subject: memory hotplug: make pages from movable zone always isolatable

Pages on movable zone have two types, MIGRATE_MOVABLE and MIGRATE_RESERVE,
both them can be movable, because only movable memory allocation can get
pages from movable zone.  This makes pages in movable zone always be able
to migrate.

Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Yakui Zhao <yakui.zhao@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8a83027..7b4d4e4 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4906,13 +4906,16 @@ int set_migratetype_isolate(struct page *page)
 	struct zone *zone;
 	unsigned long flags;
 	int ret = -EBUSY;
+	int zone_idx;
 
 	zone = page_zone(page);
+	zone_idx = zone_idx(zone);
 	spin_lock_irqsave(&zone->lock, flags);
 	/*
 	 * In future, more migrate types will be able to be isolation target.
 	 */
-	if (get_pageblock_migratetype(page) != MIGRATE_MOVABLE)
+	if (get_pageblock_migratetype(page) != MIGRATE_MOVABLE &&
+	    zone_idx != ZONE_MOVABLE)
 		goto out;
 	set_pageblock_migratetype(page, MIGRATE_ISOLATE);
 	move_freepages_block(zone, page, MIGRATE_ISOLATE);
-- 
cgit v0.10.2


From f52407ce2deac76c87abc8211a63ea152ba72d54 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shaohua.li@intel.com>
Date: Mon, 21 Sep 2009 17:01:19 -0700
Subject: memory hotplug: alloc page from other node in memory online

To initialize hotadded node, some pages are allocated.  At that time, the
node hasn't memory, this makes the allocation always fail.  In such case,
let's allocate pages from other nodes.

Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Yakui Zhao <yakui.zhao@intel.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
index f22b4eb..3d535d5 100644
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c
@@ -116,10 +116,16 @@ static int __init_refok init_section_page_cgroup(unsigned long pfn)
 		nid = page_to_nid(pfn_to_page(pfn));
 		table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION;
 		VM_BUG_ON(!slab_is_available());
-		base = kmalloc_node(table_size,
+		if (node_state(nid, N_HIGH_MEMORY)) {
+			base = kmalloc_node(table_size,
 				GFP_KERNEL | __GFP_NOWARN, nid);
-		if (!base)
-			base = vmalloc_node(table_size, nid);
+			if (!base)
+				base = vmalloc_node(table_size, nid);
+		} else {
+			base = kmalloc(table_size, GFP_KERNEL | __GFP_NOWARN);
+			if (!base)
+				base = vmalloc(table_size);
+		}
 	} else {
 		/*
  		 * We don't have to allocate page_cgroup again, but
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index a13ea64..d9714bd 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -48,8 +48,14 @@ void * __meminit vmemmap_alloc_block(unsigned long size, int node)
 {
 	/* If the main allocator is up use that, fallback to bootmem. */
 	if (slab_is_available()) {
-		struct page *page = alloc_pages_node(node,
+		struct page *page;
+
+		if (node_state(node, N_HIGH_MEMORY))
+			page = alloc_pages_node(node,
 				GFP_KERNEL | __GFP_ZERO, get_order(size));
+		else
+			page = alloc_pages(GFP_KERNEL | __GFP_ZERO,
+				get_order(size));
 		if (page)
 			return page_address(page);
 		return NULL;
diff --git a/mm/sparse.c b/mm/sparse.c
index da432d9..6ce4aab 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -62,9 +62,12 @@ static struct mem_section noinline __init_refok *sparse_index_alloc(int nid)
 	unsigned long array_size = SECTIONS_PER_ROOT *
 				   sizeof(struct mem_section);
 
-	if (slab_is_available())
-		section = kmalloc_node(array_size, GFP_KERNEL, nid);
-	else
+	if (slab_is_available()) {
+		if (node_state(nid, N_HIGH_MEMORY))
+			section = kmalloc_node(array_size, GFP_KERNEL, nid);
+		else
+			section = kmalloc(array_size, GFP_KERNEL);
+	} else
 		section = alloc_bootmem_node(NODE_DATA(nid), array_size);
 
 	if (section)
-- 
cgit v0.10.2


From abfc3488118d48a2b8cce5a2345901aac6b03fee Mon Sep 17 00:00:00 2001
From: Shaohua Li <shaohua.li@intel.com>
Date: Mon, 21 Sep 2009 17:01:19 -0700
Subject: memory hotplug: migrate swap cache page

In test, some pages in swap-cache can't be migrated, as they aren't rmap.

unmap_and_move() ignores swap-cache page which is just read in and hasn't
rmap (see the comments in the code), but swap_aops provides .migratepage.
Better to migrate such pages instead of ignore them.

Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Yakui Zhao <yakui.zhao@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/migrate.c b/mm/migrate.c
index 939888f..0edeac9 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -147,7 +147,7 @@ out:
 static void remove_file_migration_ptes(struct page *old, struct page *new)
 {
 	struct vm_area_struct *vma;
-	struct address_space *mapping = page_mapping(new);
+	struct address_space *mapping = new->mapping;
 	struct prio_tree_iter iter;
 	pgoff_t pgoff = new->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
 
@@ -664,13 +664,15 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
 			 *    needs to be effective.
 			 */
 			try_to_free_buffers(page);
+			goto rcu_unlock;
 		}
-		goto rcu_unlock;
+		goto skip_unmap;
 	}
 
 	/* Establish migration ptes or remove ptes */
 	try_to_unmap(page, 1);
 
+skip_unmap:
 	if (!page_mapped(page))
 		rc = move_to_new_page(newpage, page);
 
-- 
cgit v0.10.2


From 55a4462af5722d2814858bc51ee8d58ca29544ab Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Mon, 21 Sep 2009 17:01:20 -0700
Subject: page_alloc: fix kernel-doc warning

Ummark function as having kernel-doc notation, fixing the kernel-doc
warning.

Warning(mm/page_alloc.c:4519): No description found for parameter 'zone'

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 7b4d4e4..91ae36c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4541,7 +4541,7 @@ void setup_per_zone_wmarks(void)
 	calculate_totalreserve_pages();
 }
 
-/**
+/*
  * The inactive anon list should be small enough that the VM never has to
  * do too much work, but large enough that each inactive page has a chance
  * to be referenced again before it is swapped out.
-- 
cgit v0.10.2


From e8c5c8249878fb6564125680a1d15e06adbd5639 Mon Sep 17 00:00:00 2001
From: Lee Schermerhorn <lee.schermerhorn@hp.com>
Date: Mon, 21 Sep 2009 17:01:22 -0700
Subject: hugetlb: balance freeing of huge pages across nodes

Free huges pages from nodes in round robin fashion in an attempt to keep
[persistent a.k.a static] hugepages balanced across nodes

New function free_pool_huge_page() is modeled on and performs roughly the
inverse of alloc_fresh_huge_page().  Replaces dequeue_huge_page() which
now has no callers, so this patch removes it.

Helper function hstate_next_node_to_free() uses new hstate member
next_to_free_nid to distribute "frees" across all nodes with huge pages.

Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Cc: Nishanth Aravamudan <nacc@us.ibm.com>
Cc: Adam Litke <agl@us.ibm.com>
Cc: Andy Whitcroft <apw@canonical.com>
Cc: Eric Whitney <eric.whitney@hp.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 5cbc620..16cdb75 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -185,7 +185,8 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 #define HSTATE_NAME_LEN 32
 /* Defines one hugetlb page size */
 struct hstate {
-	int hugetlb_next_nid;
+	int next_nid_to_alloc;
+	int next_nid_to_free;
 	unsigned int order;
 	unsigned long mask;
 	unsigned long max_huge_pages;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index b16d636..38dab55 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -456,24 +456,6 @@ static void enqueue_huge_page(struct hstate *h, struct page *page)
 	h->free_huge_pages_node[nid]++;
 }
 
-static struct page *dequeue_huge_page(struct hstate *h)
-{
-	int nid;
-	struct page *page = NULL;
-
-	for (nid = 0; nid < MAX_NUMNODES; ++nid) {
-		if (!list_empty(&h->hugepage_freelists[nid])) {
-			page = list_entry(h->hugepage_freelists[nid].next,
-					  struct page, lru);
-			list_del(&page->lru);
-			h->free_huge_pages--;
-			h->free_huge_pages_node[nid]--;
-			break;
-		}
-	}
-	return page;
-}
-
 static struct page *dequeue_huge_page_vma(struct hstate *h,
 				struct vm_area_struct *vma,
 				unsigned long address, int avoid_reserve)
@@ -641,7 +623,7 @@ static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid)
 
 /*
  * Use a helper variable to find the next node and then
- * copy it back to hugetlb_next_nid afterwards:
+ * copy it back to next_nid_to_alloc afterwards:
  * otherwise there's a window in which a racer might
  * pass invalid nid MAX_NUMNODES to alloc_pages_exact_node.
  * But we don't need to use a spin_lock here: it really
@@ -650,13 +632,13 @@ static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid)
  * if we just successfully allocated a hugepage so that
  * the next caller gets hugepages on the next node.
  */
-static int hstate_next_node(struct hstate *h)
+static int hstate_next_node_to_alloc(struct hstate *h)
 {
 	int next_nid;
-	next_nid = next_node(h->hugetlb_next_nid, node_online_map);
+	next_nid = next_node(h->next_nid_to_alloc, node_online_map);
 	if (next_nid == MAX_NUMNODES)
 		next_nid = first_node(node_online_map);
-	h->hugetlb_next_nid = next_nid;
+	h->next_nid_to_alloc = next_nid;
 	return next_nid;
 }
 
@@ -667,14 +649,15 @@ static int alloc_fresh_huge_page(struct hstate *h)
 	int next_nid;
 	int ret = 0;
 
-	start_nid = h->hugetlb_next_nid;
+	start_nid = h->next_nid_to_alloc;
+	next_nid = start_nid;
 
 	do {
-		page = alloc_fresh_huge_page_node(h, h->hugetlb_next_nid);
+		page = alloc_fresh_huge_page_node(h, next_nid);
 		if (page)
 			ret = 1;
-		next_nid = hstate_next_node(h);
-	} while (!page && h->hugetlb_next_nid != start_nid);
+		next_nid = hstate_next_node_to_alloc(h);
+	} while (!page && next_nid != start_nid);
 
 	if (ret)
 		count_vm_event(HTLB_BUDDY_PGALLOC);
@@ -684,6 +667,52 @@ static int alloc_fresh_huge_page(struct hstate *h)
 	return ret;
 }
 
+/*
+ * helper for free_pool_huge_page() - find next node
+ * from which to free a huge page
+ */
+static int hstate_next_node_to_free(struct hstate *h)
+{
+	int next_nid;
+	next_nid = next_node(h->next_nid_to_free, node_online_map);
+	if (next_nid == MAX_NUMNODES)
+		next_nid = first_node(node_online_map);
+	h->next_nid_to_free = next_nid;
+	return next_nid;
+}
+
+/*
+ * Free huge page from pool from next node to free.
+ * Attempt to keep persistent huge pages more or less
+ * balanced over allowed nodes.
+ * Called with hugetlb_lock locked.
+ */
+static int free_pool_huge_page(struct hstate *h)
+{
+	int start_nid;
+	int next_nid;
+	int ret = 0;
+
+	start_nid = h->next_nid_to_free;
+	next_nid = start_nid;
+
+	do {
+		if (!list_empty(&h->hugepage_freelists[next_nid])) {
+			struct page *page =
+				list_entry(h->hugepage_freelists[next_nid].next,
+					  struct page, lru);
+			list_del(&page->lru);
+			h->free_huge_pages--;
+			h->free_huge_pages_node[next_nid]--;
+			update_and_free_page(h, page);
+			ret = 1;
+		}
+		next_nid = hstate_next_node_to_free(h);
+	} while (!ret && next_nid != start_nid);
+
+	return ret;
+}
+
 static struct page *alloc_buddy_huge_page(struct hstate *h,
 			struct vm_area_struct *vma, unsigned long address)
 {
@@ -1008,7 +1037,7 @@ int __weak alloc_bootmem_huge_page(struct hstate *h)
 		void *addr;
 
 		addr = __alloc_bootmem_node_nopanic(
-				NODE_DATA(h->hugetlb_next_nid),
+				NODE_DATA(h->next_nid_to_alloc),
 				huge_page_size(h), huge_page_size(h), 0);
 
 		if (addr) {
@@ -1020,7 +1049,7 @@ int __weak alloc_bootmem_huge_page(struct hstate *h)
 			m = addr;
 			goto found;
 		}
-		hstate_next_node(h);
+		hstate_next_node_to_alloc(h);
 		nr_nodes--;
 	}
 	return 0;
@@ -1141,31 +1170,43 @@ static inline void try_to_free_low(struct hstate *h, unsigned long count)
  */
 static int adjust_pool_surplus(struct hstate *h, int delta)
 {
-	static int prev_nid;
-	int nid = prev_nid;
+	int start_nid, next_nid;
 	int ret = 0;
 
 	VM_BUG_ON(delta != -1 && delta != 1);
-	do {
-		nid = next_node(nid, node_online_map);
-		if (nid == MAX_NUMNODES)
-			nid = first_node(node_online_map);
 
-		/* To shrink on this node, there must be a surplus page */
-		if (delta < 0 && !h->surplus_huge_pages_node[nid])
-			continue;
-		/* Surplus cannot exceed the total number of pages */
-		if (delta > 0 && h->surplus_huge_pages_node[nid] >=
+	if (delta < 0)
+		start_nid = h->next_nid_to_alloc;
+	else
+		start_nid = h->next_nid_to_free;
+	next_nid = start_nid;
+
+	do {
+		int nid = next_nid;
+		if (delta < 0)  {
+			next_nid = hstate_next_node_to_alloc(h);
+			/*
+			 * To shrink on this node, there must be a surplus page
+			 */
+			if (!h->surplus_huge_pages_node[nid])
+				continue;
+		}
+		if (delta > 0) {
+			next_nid = hstate_next_node_to_free(h);
+			/*
+			 * Surplus cannot exceed the total number of pages
+			 */
+			if (h->surplus_huge_pages_node[nid] >=
 						h->nr_huge_pages_node[nid])
-			continue;
+				continue;
+		}
 
 		h->surplus_huge_pages += delta;
 		h->surplus_huge_pages_node[nid] += delta;
 		ret = 1;
 		break;
-	} while (nid != prev_nid);
+	} while (next_nid != start_nid);
 
-	prev_nid = nid;
 	return ret;
 }
 
@@ -1227,10 +1268,8 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count)
 	min_count = max(count, min_count);
 	try_to_free_low(h, min_count);
 	while (min_count < persistent_huge_pages(h)) {
-		struct page *page = dequeue_huge_page(h);
-		if (!page)
+		if (!free_pool_huge_page(h))
 			break;
-		update_and_free_page(h, page);
 	}
 	while (count < persistent_huge_pages(h)) {
 		if (!adjust_pool_surplus(h, 1))
@@ -1442,7 +1481,8 @@ void __init hugetlb_add_hstate(unsigned order)
 	h->free_huge_pages = 0;
 	for (i = 0; i < MAX_NUMNODES; ++i)
 		INIT_LIST_HEAD(&h->hugepage_freelists[i]);
-	h->hugetlb_next_nid = first_node(node_online_map);
+	h->next_nid_to_alloc = first_node(node_online_map);
+	h->next_nid_to_free = first_node(node_online_map);
 	snprintf(h->name, HSTATE_NAME_LEN, "hugepages-%lukB",
 					huge_page_size(h)/1024);
 
-- 
cgit v0.10.2


From 685f345708096ed21078aa44a6f4a6e6d1d1b580 Mon Sep 17 00:00:00 2001
From: Lee Schermerhorn <lee.schermerhorn@hp.com>
Date: Mon, 21 Sep 2009 17:01:23 -0700
Subject: hugetlb: use free_pool_huge_page() to return unused surplus pages

Use the [modified] free_pool_huge_page() function to return unused
surplus pages.  This will help keep huge pages balanced across nodes
between freeing of unused surplus pages and freeing of persistent huge
pages [from set_max_huge_pages] by using the same node id "cursor". It
also eliminates some code duplication.

Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Nishanth Aravamudan <nacc@us.ibm.com>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Adam Litke <agl@us.ibm.com>
Cc: Andy Whitcroft <apw@canonical.com>
Cc: Eric Whitney <eric.whitney@hp.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 38dab55..f10cc27 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -687,7 +687,7 @@ static int hstate_next_node_to_free(struct hstate *h)
  * balanced over allowed nodes.
  * Called with hugetlb_lock locked.
  */
-static int free_pool_huge_page(struct hstate *h)
+static int free_pool_huge_page(struct hstate *h, bool acct_surplus)
 {
 	int start_nid;
 	int next_nid;
@@ -697,13 +697,22 @@ static int free_pool_huge_page(struct hstate *h)
 	next_nid = start_nid;
 
 	do {
-		if (!list_empty(&h->hugepage_freelists[next_nid])) {
+		/*
+		 * If we're returning unused surplus pages, only examine
+		 * nodes with surplus pages.
+		 */
+		if ((!acct_surplus || h->surplus_huge_pages_node[next_nid]) &&
+		    !list_empty(&h->hugepage_freelists[next_nid])) {
 			struct page *page =
 				list_entry(h->hugepage_freelists[next_nid].next,
 					  struct page, lru);
 			list_del(&page->lru);
 			h->free_huge_pages--;
 			h->free_huge_pages_node[next_nid]--;
+			if (acct_surplus) {
+				h->surplus_huge_pages--;
+				h->surplus_huge_pages_node[next_nid]--;
+			}
 			update_and_free_page(h, page);
 			ret = 1;
 		}
@@ -884,22 +893,13 @@ free:
  * When releasing a hugetlb pool reservation, any surplus pages that were
  * allocated to satisfy the reservation must be explicitly freed if they were
  * never used.
+ * Called with hugetlb_lock held.
  */
 static void return_unused_surplus_pages(struct hstate *h,
 					unsigned long unused_resv_pages)
 {
-	static int nid = -1;
-	struct page *page;
 	unsigned long nr_pages;
 
-	/*
-	 * We want to release as many surplus pages as possible, spread
-	 * evenly across all nodes. Iterate across all nodes until we
-	 * can no longer free unreserved surplus pages. This occurs when
-	 * the nodes with surplus pages have no free pages.
-	 */
-	unsigned long remaining_iterations = nr_online_nodes;
-
 	/* Uncommit the reservation */
 	h->resv_huge_pages -= unused_resv_pages;
 
@@ -909,26 +909,17 @@ static void return_unused_surplus_pages(struct hstate *h,
 
 	nr_pages = min(unused_resv_pages, h->surplus_huge_pages);
 
-	while (remaining_iterations-- && nr_pages) {
-		nid = next_node(nid, node_online_map);
-		if (nid == MAX_NUMNODES)
-			nid = first_node(node_online_map);
-
-		if (!h->surplus_huge_pages_node[nid])
-			continue;
-
-		if (!list_empty(&h->hugepage_freelists[nid])) {
-			page = list_entry(h->hugepage_freelists[nid].next,
-					  struct page, lru);
-			list_del(&page->lru);
-			update_and_free_page(h, page);
-			h->free_huge_pages--;
-			h->free_huge_pages_node[nid]--;
-			h->surplus_huge_pages--;
-			h->surplus_huge_pages_node[nid]--;
-			nr_pages--;
-			remaining_iterations = nr_online_nodes;
-		}
+	/*
+	 * We want to release as many surplus pages as possible, spread
+	 * evenly across all nodes. Iterate across all nodes until we
+	 * can no longer free unreserved surplus pages. This occurs when
+	 * the nodes with surplus pages have no free pages.
+	 * free_pool_huge_page() will balance the the frees across the
+	 * on-line nodes for us and will handle the hstate accounting.
+	 */
+	while (nr_pages--) {
+		if (!free_pool_huge_page(h, 1))
+			break;
 	}
 }
 
@@ -1268,7 +1259,7 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count)
 	min_count = max(count, min_count);
 	try_to_free_low(h, min_count);
 	while (min_count < persistent_huge_pages(h)) {
-		if (!free_pool_huge_page(h))
+		if (!free_pool_huge_page(h, 0))
 			break;
 	}
 	while (count < persistent_huge_pages(h)) {
-- 
cgit v0.10.2


From 41a25e7e67b8be33d7598ff7968b9a8b405b6567 Mon Sep 17 00:00:00 2001
From: Lee Schermerhorn <lee.schermerhorn@hp.com>
Date: Mon, 21 Sep 2009 17:01:24 -0700
Subject: hugetlb: clean up and update huge pages documentation

Attempt to clarify huge page administration and usage, and updates the
doucmentation to mention the balancing of huge pages across nodes when
allocating and freeing.

Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Nishanth Aravamudan <nacc@us.ibm.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Adam Litke <agl@us.ibm.com>
Cc: Andy Whitcroft <apw@canonical.com>
Cc: Eric Whitney <eric.whitney@hp.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/Documentation/vm/hugetlbpage.txt b/Documentation/vm/hugetlbpage.txt
index ea8714f..3a167be7 100644
--- a/Documentation/vm/hugetlbpage.txt
+++ b/Documentation/vm/hugetlbpage.txt
@@ -18,13 +18,13 @@ First the Linux kernel needs to be built with the CONFIG_HUGETLBFS
 automatically when CONFIG_HUGETLBFS is selected) configuration
 options.
 
-The kernel built with hugepage support should show the number of configured
-hugepages in the system by running the "cat /proc/meminfo" command.
+The kernel built with huge page support should show the number of configured
+huge pages in the system by running the "cat /proc/meminfo" command.
 
 /proc/meminfo also provides information about the total number of hugetlb
 pages configured in the kernel.  It also displays information about the
 number of free hugetlb pages at any time.  It also displays information about
-the configured hugepage size - this is needed for generating the proper
+the configured huge page size - this is needed for generating the proper
 alignment and size of the arguments to the above system calls.
 
 The output of "cat /proc/meminfo" will have lines like:
@@ -37,25 +37,27 @@ HugePages_Surp:  yyy
 Hugepagesize:    zzz kB
 
 where:
-HugePages_Total is the size of the pool of hugepages.
-HugePages_Free is the number of hugepages in the pool that are not yet
-allocated.
-HugePages_Rsvd is short for "reserved," and is the number of hugepages
-for which a commitment to allocate from the pool has been made, but no
-allocation has yet been made. It's vaguely analogous to overcommit.
-HugePages_Surp is short for "surplus," and is the number of hugepages in
-the pool above the value in /proc/sys/vm/nr_hugepages. The maximum
-number of surplus hugepages is controlled by
-/proc/sys/vm/nr_overcommit_hugepages.
+HugePages_Total is the size of the pool of huge pages.
+HugePages_Free  is the number of huge pages in the pool that are not yet
+                allocated.
+HugePages_Rsvd  is short for "reserved," and is the number of huge pages for
+                which a commitment to allocate from the pool has been made,
+                but no allocation has yet been made.  Reserved huge pages
+                guarantee that an application will be able to allocate a
+                huge page from the pool of huge pages at fault time.
+HugePages_Surp  is short for "surplus," and is the number of huge pages in
+                the pool above the value in /proc/sys/vm/nr_hugepages. The
+                maximum number of surplus huge pages is controlled by
+                /proc/sys/vm/nr_overcommit_hugepages.
 
 /proc/filesystems should also show a filesystem of type "hugetlbfs" configured
 in the kernel.
 
 /proc/sys/vm/nr_hugepages indicates the current number of configured hugetlb
 pages in the kernel.  Super user can dynamically request more (or free some
-pre-configured) hugepages.
+pre-configured) huge pages.
 The allocation (or deallocation) of hugetlb pages is possible only if there are
-enough physically contiguous free pages in system (freeing of hugepages is
+enough physically contiguous free pages in system (freeing of huge pages is
 possible only if there are enough hugetlb pages free that can be transferred
 back to regular memory pool).
 
@@ -67,43 +69,82 @@ use either the mmap system call or shared memory system calls to start using
 the huge pages.  It is required that the system administrator preallocate
 enough memory for huge page purposes.
 
-Use the following command to dynamically allocate/deallocate hugepages:
+The administrator can preallocate huge pages on the kernel boot command line by
+specifying the "hugepages=N" parameter, where 'N' = the number of huge pages
+requested.  This is the most reliable method for preallocating huge pages as
+memory has not yet become fragmented.
+
+Some platforms support multiple huge page sizes.  To preallocate huge pages
+of a specific size, one must preceed the huge pages boot command parameters
+with a huge page size selection parameter "hugepagesz=<size>".  <size> must
+be specified in bytes with optional scale suffix [kKmMgG].  The default huge
+page size may be selected with the "default_hugepagesz=<size>" boot parameter.
+
+/proc/sys/vm/nr_hugepages indicates the current number of configured [default
+size] hugetlb pages in the kernel.  Super user can dynamically request more
+(or free some pre-configured) huge pages.
+
+Use the following command to dynamically allocate/deallocate default sized
+huge pages:
 
 	echo 20 > /proc/sys/vm/nr_hugepages
 
-This command will try to configure 20 hugepages in the system.  The success
-or failure of allocation depends on the amount of physically contiguous
-memory that is preset in system at this time.  System administrators may want
-to put this command in one of the local rc init files.  This will enable the
-kernel to request huge pages early in the boot process (when the possibility
-of getting physical contiguous pages is still very high). In either
-case, administrators will want to verify the number of hugepages actually
-allocated by checking the sysctl or meminfo.
-
-/proc/sys/vm/nr_overcommit_hugepages indicates how large the pool of
-hugepages can grow, if more hugepages than /proc/sys/vm/nr_hugepages are
-requested by applications. echo'ing any non-zero value into this file
-indicates that the hugetlb subsystem is allowed to try to obtain
-hugepages from the buddy allocator, if the normal pool is exhausted. As
-these surplus hugepages go out of use, they are freed back to the buddy
+This command will try to configure 20 default sized huge pages in the system.
+On a NUMA platform, the kernel will attempt to distribute the huge page pool
+over the all on-line nodes.  These huge pages, allocated when nr_hugepages
+is increased, are called "persistent huge pages".
+
+The success or failure of huge page allocation depends on the amount of
+physically contiguous memory that is preset in system at the time of the
+allocation attempt.  If the kernel is unable to allocate huge pages from
+some nodes in a NUMA system, it will attempt to make up the difference by
+allocating extra pages on other nodes with sufficient available contiguous
+memory, if any.
+
+System administrators may want to put this command in one of the local rc init
+files.  This will enable the kernel to request huge pages early in the boot
+process when the possibility of getting physical contiguous pages is still
+very high.  Administrators can verify the number of huge pages actually
+allocated by checking the sysctl or meminfo.  To check the per node
+distribution of huge pages in a NUMA system, use:
+
+	cat /sys/devices/system/node/node*/meminfo | fgrep Huge
+
+/proc/sys/vm/nr_overcommit_hugepages specifies how large the pool of
+huge pages can grow, if more huge pages than /proc/sys/vm/nr_hugepages are
+requested by applications.  Writing any non-zero value into this file
+indicates that the hugetlb subsystem is allowed to try to obtain "surplus"
+huge pages from the buddy allocator, when the normal pool is exhausted. As
+these surplus huge pages go out of use, they are freed back to the buddy
 allocator.
 
+When increasing the huge page pool size via nr_hugepages, any surplus
+pages will first be promoted to persistent huge pages.  Then, additional
+huge pages will be allocated, if necessary and if possible, to fulfill
+the new huge page pool size.
+
+The administrator may shrink the pool of preallocated huge pages for
+the default huge page size by setting the nr_hugepages sysctl to a
+smaller value.  The kernel will attempt to balance the freeing of huge pages
+across all on-line nodes.  Any free huge pages on the selected nodes will
+be freed back to the buddy allocator.
+
 Caveat: Shrinking the pool via nr_hugepages such that it becomes less
-than the number of hugepages in use will convert the balance to surplus
+than the number of huge pages in use will convert the balance to surplus
 huge pages even if it would exceed the overcommit value.  As long as
 this condition holds, however, no more surplus huge pages will be
 allowed on the system until one of the two sysctls are increased
 sufficiently, or the surplus huge pages go out of use and are freed.
 
-With support for multiple hugepage pools at run-time available, much of
-the hugepage userspace interface has been duplicated in sysfs. The above
-information applies to the default hugepage size (which will be
-controlled by the proc interfaces for backwards compatibility). The root
-hugepage control directory is
+With support for multiple huge page pools at run-time available, much of
+the huge page userspace interface has been duplicated in sysfs. The above
+information applies to the default huge page size which will be
+controlled by the /proc interfaces for backwards compatibility. The root
+huge page control directory in sysfs is:
 
 	/sys/kernel/mm/hugepages
 
-For each hugepage size supported by the running kernel, a subdirectory
+For each huge page size supported by the running kernel, a subdirectory
 will exist, of the form
 
 	hugepages-${size}kB
@@ -116,9 +157,9 @@ Inside each of these directories, the same set of files will exist:
 	resv_hugepages
 	surplus_hugepages
 
-which function as described above for the default hugepage-sized case.
+which function as described above for the default huge page-sized case.
 
-If the user applications are going to request hugepages using mmap system
+If the user applications are going to request huge pages using mmap system
 call, then it is required that system administrator mount a file system of
 type hugetlbfs:
 
@@ -127,7 +168,7 @@ type hugetlbfs:
 	none /mnt/huge
 
 This command mounts a (pseudo) filesystem of type hugetlbfs on the directory
-/mnt/huge.  Any files created on /mnt/huge uses hugepages.  The uid and gid
+/mnt/huge.  Any files created on /mnt/huge uses huge pages.  The uid and gid
 options sets the owner and group of the root of the file system.  By default
 the uid and gid of the current process are taken.  The mode option sets the
 mode of root of file system to value & 0777.  This value is given in octal.
@@ -156,14 +197,14 @@ mount of filesystem will be required for using mmap calls.
 *******************************************************************
 
 /*
- * Example of using hugepage memory in a user application using Sys V shared
+ * Example of using huge page memory in a user application using Sys V shared
  * memory system calls.  In this example the app is requesting 256MB of
  * memory that is backed by huge pages.  The application uses the flag
  * SHM_HUGETLB in the shmget system call to inform the kernel that it is
- * requesting hugepages.
+ * requesting huge pages.
  *
  * For the ia64 architecture, the Linux kernel reserves Region number 4 for
- * hugepages.  That means the addresses starting with 0x800000... will need
+ * huge pages.  That means the addresses starting with 0x800000... will need
  * to be specified.  Specifying a fixed address is not required on ppc64,
  * i386 or x86_64.
  *
@@ -252,14 +293,14 @@ int main(void)
 *******************************************************************
 
 /*
- * Example of using hugepage memory in a user application using the mmap
+ * Example of using huge page memory in a user application using the mmap
  * system call.  Before running this application, make sure that the
  * administrator has mounted the hugetlbfs filesystem (on some directory
  * like /mnt) using the command mount -t hugetlbfs nodev /mnt. In this
  * example, the app is requesting memory of size 256MB that is backed by
  * huge pages.
  *
- * For ia64 architecture, Linux kernel reserves Region number 4 for hugepages.
+ * For ia64 architecture, Linux kernel reserves Region number 4 for huge pages.
  * That means the addresses starting with 0x800000... will need to be
  * specified.  Specifying a fixed address is not required on ppc64, i386
  * or x86_64.
-- 
cgit v0.10.2


From 57dd28fb0513d2f772bb215f27925165e7b9ce5f Mon Sep 17 00:00:00 2001
From: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
Date: Mon, 21 Sep 2009 17:01:25 -0700
Subject: hugetlb: restore interleaving of bootmem huge pages

I noticed that alloc_bootmem_huge_page() will only advance to the next
node on failure to allocate a huge page, potentially filling nodes with
huge-pages.  I asked about this on linux-mm and linux-numa, cc'ing the
usual huge page suspects.

Mel Gorman responded:

	I strongly suspect that the same node being used until allocation
	failure instead of round-robin is an oversight and not deliberate
	at all. It appears to be a side-effect of a fix made way back in
	commit 63b4613c3f0d4b724ba259dc6c201bb68b884e1a ["hugetlb: fix
	hugepage allocation with memoryless nodes"]. Prior to that patch
	it looked like allocations would always round-robin even when
	allocation was successful.

This patch--factored out of my "hugetlb mempolicy" series--moves the
advance of the hstate next node from which to allocate up before the test
for success of the attempted allocation.

Note that alloc_bootmem_huge_page() is only used for order > MAX_ORDER
huge pages.

I'll post a separate patch for mainline/stable, as the above mentioned
"balance freeing" series renamed the next node to alloc function.

Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Reviewed-by: Mel Gorman <mel@csn.ul.ie>
Reviewed-by: Andy Whitcroft <apw@canonical.com>
Reviewed-by: Andi Kleen <andi@firstfloor.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index f10cc27..c001f84 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1031,6 +1031,7 @@ int __weak alloc_bootmem_huge_page(struct hstate *h)
 				NODE_DATA(h->next_nid_to_alloc),
 				huge_page_size(h), huge_page_size(h), 0);
 
+		hstate_next_node_to_alloc(h);
 		if (addr) {
 			/*
 			 * Use the beginning of the huge page to store the
@@ -1040,7 +1041,6 @@ int __weak alloc_bootmem_huge_page(struct hstate *h)
 			m = addr;
 			goto found;
 		}
-		hstate_next_node_to_alloc(h);
 		nr_nodes--;
 	}
 	return 0;
-- 
cgit v0.10.2


From b904dcfed6967e9cfc8a54778498f6d289420309 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Mon, 21 Sep 2009 17:01:28 -0700
Subject: mm: clean up page_remove_rmap()

page_remove_rmap() has multiple PageAnon() tests and it has deep nesting.
Clean this up.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Reviewed-by: Wu Fengguang <fengguang.wu@intel.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/rmap.c b/mm/rmap.c
index 0895b5c..1406e67 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -739,34 +739,37 @@ void page_dup_rmap(struct page *page, struct vm_area_struct *vma, unsigned long
  */
 void page_remove_rmap(struct page *page)
 {
-	if (atomic_add_negative(-1, &page->_mapcount)) {
-		/*
-		 * Now that the last pte has gone, s390 must transfer dirty
-		 * flag from storage key to struct page.  We can usually skip
-		 * this if the page is anon, so about to be freed; but perhaps
-		 * not if it's in swapcache - there might be another pte slot
-		 * containing the swap entry, but page not yet written to swap.
-		 */
-		if ((!PageAnon(page) || PageSwapCache(page)) &&
-		    page_test_dirty(page)) {
-			page_clear_dirty(page);
-			set_page_dirty(page);
-		}
-		if (PageAnon(page))
-			mem_cgroup_uncharge_page(page);
-		__dec_zone_page_state(page,
-			PageAnon(page) ? NR_ANON_PAGES : NR_FILE_MAPPED);
-		mem_cgroup_update_mapped_file_stat(page, -1);
-		/*
-		 * It would be tidy to reset the PageAnon mapping here,
-		 * but that might overwrite a racing page_add_anon_rmap
-		 * which increments mapcount after us but sets mapping
-		 * before us: so leave the reset to free_hot_cold_page,
-		 * and remember that it's only reliable while mapped.
-		 * Leaving it set also helps swapoff to reinstate ptes
-		 * faster for those pages still in swapcache.
-		 */
+	/* page still mapped by someone else? */
+	if (!atomic_add_negative(-1, &page->_mapcount))
+		return;
+
+	/*
+	 * Now that the last pte has gone, s390 must transfer dirty
+	 * flag from storage key to struct page.  We can usually skip
+	 * this if the page is anon, so about to be freed; but perhaps
+	 * not if it's in swapcache - there might be another pte slot
+	 * containing the swap entry, but page not yet written to swap.
+	 */
+	if ((!PageAnon(page) || PageSwapCache(page)) && page_test_dirty(page)) {
+		page_clear_dirty(page);
+		set_page_dirty(page);
 	}
+	if (PageAnon(page)) {
+		mem_cgroup_uncharge_page(page);
+		__dec_zone_page_state(page, NR_ANON_PAGES);
+	} else {
+		__dec_zone_page_state(page, NR_FILE_MAPPED);
+	}
+	mem_cgroup_update_mapped_file_stat(page, -1);
+	/*
+	 * It would be tidy to reset the PageAnon mapping here,
+	 * but that might overwrite a racing page_add_anon_rmap
+	 * which increments mapcount after us but sets mapping
+	 * before us: so leave the reset to free_hot_cold_page,
+	 * and remember that it's only reliable while mapped.
+	 * Leaving it set also helps swapoff to reinstate ptes
+	 * faster for those pages still in swapcache.
+	 */
 }
 
 /*
-- 
cgit v0.10.2


From 3701b0332330ca1add3e5d56513ef201ff7efdbb Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Mon, 21 Sep 2009 17:01:29 -0700
Subject: mm: show_free_areas(): display slab pages in two separate fields

If an OOM happens, we really want to know the number of remaining
reclaimable pages.  So the reclaimable slab and unreclaimable slab fields
should not be combined for display.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 91ae36c..bdf12f3 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2138,7 +2138,8 @@ void show_free_areas(void)
 		" inactive_file:%lu"
 		" unevictable:%lu"
 		" dirty:%lu writeback:%lu unstable:%lu\n"
-		" free:%lu slab:%lu mapped:%lu pagetables:%lu bounce:%lu\n",
+		" free:%lu slab_reclaimable:%lu slab_unreclaimable:%lu\n"
+		" mapped:%lu pagetables:%lu bounce:%lu\n",
 		global_page_state(NR_ACTIVE_ANON),
 		global_page_state(NR_ACTIVE_FILE),
 		global_page_state(NR_INACTIVE_ANON),
@@ -2148,8 +2149,8 @@ void show_free_areas(void)
 		global_page_state(NR_WRITEBACK),
 		global_page_state(NR_UNSTABLE_NFS),
 		global_page_state(NR_FREE_PAGES),
-		global_page_state(NR_SLAB_RECLAIMABLE) +
-			global_page_state(NR_SLAB_UNRECLAIMABLE),
+		global_page_state(NR_SLAB_RECLAIMABLE),
+		global_page_state(NR_SLAB_UNRECLAIMABLE),
 		global_page_state(NR_FILE_MAPPED),
 		global_page_state(NR_PAGETABLE),
 		global_page_state(NR_BOUNCE));
-- 
cgit v0.10.2


From 3b2b9a875ddcbf9fcd667db9f961a6a163bd083f Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Mon, 21 Sep 2009 17:01:29 -0700
Subject: Documentation/memory.txt: remove some very outdated recommendations

Remove some very outdated recommendations in Documentation/memory.txt

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/Documentation/memory.txt b/Documentation/memory.txt
index 2b3dedd..802efe5 100644
--- a/Documentation/memory.txt
+++ b/Documentation/memory.txt
@@ -1,18 +1,7 @@
 There are several classic problems related to memory on Linux
 systems.
 
-	1) There are some buggy motherboards which cannot properly 
-	   deal with the memory above 16MB.  Consider exchanging
-	   your motherboard.
-
-	2) You cannot do DMA on the ISA bus to addresses above
-	   16M.  Most device drivers under Linux allow the use
-           of bounce buffers which work around this problem.  Drivers
-	   that don't use bounce buffers will be unstable with
-	   more than 16M installed.  Drivers that use bounce buffers
-	   will be OK, but may have slightly higher overhead.
-	
-	3) There are some motherboards that will not cache above
+	1) There are some motherboards that will not cache above
 	   a certain quantity of memory.  If you have one of these
 	   motherboards, your system will be SLOWER, not faster
 	   as you add more memory.  Consider exchanging your 
@@ -24,7 +13,7 @@ It can also tell Linux to use less memory than is actually installed.
 If you use "mem=" on a machine with PCI, consider using "memmap=" to avoid
 physical address space collisions.
 
-See the documentation of your boot loader (LILO, loadlin, etc.) about
+See the documentation of your boot loader (LILO, grub, loadlin, etc.) about
 how to pass options to the kernel.
 
 There are other memory problems which Linux cannot deal with.  Random
@@ -42,19 +31,3 @@ Try:
 	  with the vendor. Consider testing it with memtest86 yourself.
 	
 	* Exchanging your CPU, cache, or motherboard for one that works.
-
-	* Disabling the cache from the BIOS.
-
-	* Try passing the "mem=4M" option to the kernel to limit
-	  Linux to using a very small amount of memory. Use "memmap="-option
-	  together with "mem=" on systems with PCI to avoid physical address
-	  space collisions.
-
-
-Other tricks:
-
-	* Try passing the "no-387" option to the kernel to ignore
-	  a buggy FPU.
-
-	* Try passing the "no-hlt" option to disable the potentially
-          buggy HLT instruction in your CPU.
-- 
cgit v0.10.2


From 4a0aa73f1d613bf19bc8610bf090c941ef49d720 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Mon, 21 Sep 2009 17:01:30 -0700
Subject: mm: oom analysis: add per-zone statistics to show_free_areas()

show_free_areas() displays only a limited amount of zone counters.  This
patch includes additional counters in the display to allow easier
debugging.  This may be especially useful if an OOM is due to running out
of DMA memory.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Reviewed-by: Christoph Lameter <cl@linux-foundation.org>
Acked-by: Wu Fengguang <fengguang.wu@intel.com>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index bdf12f3..8fbf5a4 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2170,6 +2170,16 @@ void show_free_areas(void)
 			" inactive_file:%lukB"
 			" unevictable:%lukB"
 			" present:%lukB"
+			" mlocked:%lukB"
+			" dirty:%lukB"
+			" writeback:%lukB"
+			" mapped:%lukB"
+			" slab_reclaimable:%lukB"
+			" slab_unreclaimable:%lukB"
+			" pagetables:%lukB"
+			" unstable:%lukB"
+			" bounce:%lukB"
+			" writeback_tmp:%lukB"
 			" pages_scanned:%lu"
 			" all_unreclaimable? %s"
 			"\n",
@@ -2184,6 +2194,16 @@ void show_free_areas(void)
 			K(zone_page_state(zone, NR_INACTIVE_FILE)),
 			K(zone_page_state(zone, NR_UNEVICTABLE)),
 			K(zone->present_pages),
+			K(zone_page_state(zone, NR_MLOCK)),
+			K(zone_page_state(zone, NR_FILE_DIRTY)),
+			K(zone_page_state(zone, NR_WRITEBACK)),
+			K(zone_page_state(zone, NR_FILE_MAPPED)),
+			K(zone_page_state(zone, NR_SLAB_RECLAIMABLE)),
+			K(zone_page_state(zone, NR_SLAB_UNRECLAIMABLE)),
+			K(zone_page_state(zone, NR_PAGETABLE)),
+			K(zone_page_state(zone, NR_UNSTABLE_NFS)),
+			K(zone_page_state(zone, NR_BOUNCE)),
+			K(zone_page_state(zone, NR_WRITEBACK_TEMP)),
 			zone->pages_scanned,
 			(zone_is_all_unreclaimable(zone) ? "yes" : "no")
 			);
-- 
cgit v0.10.2


From 71de1ccbe1fb40203edd3beb473f8580d917d2ca Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Mon, 21 Sep 2009 17:01:31 -0700
Subject: mm: oom analysis: add buffer cache information to show_free_areas()

It is often useful to know the statistics for all pages that are handled
like page cache pages when looking at OOM log output.

Therefore show_free_areas() should also display buffer cache statistics.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by: Wu Fengguang <fengguang.wu@intel.com>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8fbf5a4..494c091 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2137,7 +2137,7 @@ void show_free_areas(void)
 	printk("Active_anon:%lu active_file:%lu inactive_anon:%lu\n"
 		" inactive_file:%lu"
 		" unevictable:%lu"
-		" dirty:%lu writeback:%lu unstable:%lu\n"
+		" dirty:%lu writeback:%lu unstable:%lu buffer:%lu\n"
 		" free:%lu slab_reclaimable:%lu slab_unreclaimable:%lu\n"
 		" mapped:%lu pagetables:%lu bounce:%lu\n",
 		global_page_state(NR_ACTIVE_ANON),
@@ -2148,6 +2148,7 @@ void show_free_areas(void)
 		global_page_state(NR_FILE_DIRTY),
 		global_page_state(NR_WRITEBACK),
 		global_page_state(NR_UNSTABLE_NFS),
+		nr_blockdev_pages(),
 		global_page_state(NR_FREE_PAGES),
 		global_page_state(NR_SLAB_RECLAIMABLE),
 		global_page_state(NR_SLAB_UNRECLAIMABLE),
-- 
cgit v0.10.2


From c6a7f5728a1db45d30df55a01adc130b4ab0327c Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Mon, 21 Sep 2009 17:01:32 -0700
Subject: mm: oom analysis: Show kernel stack usage in /proc/meminfo and OOM
 log output

The amount of memory allocated to kernel stacks can become significant and
cause OOM conditions.  However, we do not display the amount of memory
consumed by stacks.

Add code to display the amount of memory used for stacks in /proc/meminfo.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Reviewed-by: Christoph Lameter <cl@linux-foundation.org>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/base/node.c b/drivers/base/node.c
index 91d4087..b560c17 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -85,6 +85,7 @@ static ssize_t node_read_meminfo(struct sys_device * dev,
 		       "Node %d FilePages:      %8lu kB\n"
 		       "Node %d Mapped:         %8lu kB\n"
 		       "Node %d AnonPages:      %8lu kB\n"
+		       "Node %d KernelStack:    %8lu kB\n"
 		       "Node %d PageTables:     %8lu kB\n"
 		       "Node %d NFS_Unstable:   %8lu kB\n"
 		       "Node %d Bounce:         %8lu kB\n"
@@ -116,6 +117,8 @@ static ssize_t node_read_meminfo(struct sys_device * dev,
 		       nid, K(node_page_state(nid, NR_FILE_PAGES)),
 		       nid, K(node_page_state(nid, NR_FILE_MAPPED)),
 		       nid, K(node_page_state(nid, NR_ANON_PAGES)),
+		       nid, node_page_state(nid, NR_KERNEL_STACK) *
+				THREAD_SIZE / 1024,
 		       nid, K(node_page_state(nid, NR_PAGETABLE)),
 		       nid, K(node_page_state(nid, NR_UNSTABLE_NFS)),
 		       nid, K(node_page_state(nid, NR_BOUNCE)),
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index d5c410d..1fc588f 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -84,6 +84,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 		"Slab:           %8lu kB\n"
 		"SReclaimable:   %8lu kB\n"
 		"SUnreclaim:     %8lu kB\n"
+		"KernelStack:    %8lu kB\n"
 		"PageTables:     %8lu kB\n"
 #ifdef CONFIG_QUICKLIST
 		"Quicklists:     %8lu kB\n"
@@ -128,6 +129,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 				global_page_state(NR_SLAB_UNRECLAIMABLE)),
 		K(global_page_state(NR_SLAB_RECLAIMABLE)),
 		K(global_page_state(NR_SLAB_UNRECLAIMABLE)),
+		global_page_state(NR_KERNEL_STACK) * THREAD_SIZE / 1024,
 		K(global_page_state(NR_PAGETABLE)),
 #ifdef CONFIG_QUICKLIST
 		K(quicklist_total_size()),
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 8895985..d9335b8 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -94,10 +94,11 @@ enum zone_stat_item {
 	NR_SLAB_RECLAIMABLE,
 	NR_SLAB_UNRECLAIMABLE,
 	NR_PAGETABLE,		/* used for pagetables */
+	NR_KERNEL_STACK,
+	/* Second 128 byte cacheline */
 	NR_UNSTABLE_NFS,	/* NFS unstable pages */
 	NR_BOUNCE,
 	NR_VMSCAN_WRITE,
-	/* Second 128 byte cacheline */
 	NR_WRITEBACK_TEMP,	/* Writeback using temporary buffers */
 #ifdef CONFIG_NUMA
 	NUMA_HIT,		/* allocated in intended node */
diff --git a/kernel/fork.c b/kernel/fork.c
index 2cebfb2..d4638c8 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -136,9 +136,17 @@ struct kmem_cache *vm_area_cachep;
 /* SLAB cache for mm_struct structures (tsk->mm) */
 static struct kmem_cache *mm_cachep;
 
+static void account_kernel_stack(struct thread_info *ti, int account)
+{
+	struct zone *zone = page_zone(virt_to_page(ti));
+
+	mod_zone_page_state(zone, NR_KERNEL_STACK, account);
+}
+
 void free_task(struct task_struct *tsk)
 {
 	prop_local_destroy_single(&tsk->dirties);
+	account_kernel_stack(tsk->stack, -1);
 	free_thread_info(tsk->stack);
 	rt_mutex_debug_task_free(tsk);
 	ftrace_graph_exit_task(tsk);
@@ -253,6 +261,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
 	tsk->btrace_seq = 0;
 #endif
 	tsk->splice_pipe = NULL;
+
+	account_kernel_stack(ti, 1);
+
 	return tsk;
 
 out:
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 494c091..4e050f3 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2177,6 +2177,7 @@ void show_free_areas(void)
 			" mapped:%lukB"
 			" slab_reclaimable:%lukB"
 			" slab_unreclaimable:%lukB"
+			" kernel_stack:%lukB"
 			" pagetables:%lukB"
 			" unstable:%lukB"
 			" bounce:%lukB"
@@ -2201,6 +2202,8 @@ void show_free_areas(void)
 			K(zone_page_state(zone, NR_FILE_MAPPED)),
 			K(zone_page_state(zone, NR_SLAB_RECLAIMABLE)),
 			K(zone_page_state(zone, NR_SLAB_UNRECLAIMABLE)),
+			zone_page_state(zone, NR_KERNEL_STACK) *
+				THREAD_SIZE / 1024,
 			K(zone_page_state(zone, NR_PAGETABLE)),
 			K(zone_page_state(zone, NR_UNSTABLE_NFS)),
 			K(zone_page_state(zone, NR_BOUNCE)),
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 138bed5..ceda39b 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -639,6 +639,7 @@ static const char * const vmstat_text[] = {
 	"nr_slab_reclaimable",
 	"nr_slab_unreclaimable",
 	"nr_page_table_pages",
+	"nr_kernel_stack",
 	"nr_unstable",
 	"nr_bounce",
 	"nr_vmscan_write",
-- 
cgit v0.10.2


From 4b02108ac1b3354a22b0d83c684797692efdc395 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Mon, 21 Sep 2009 17:01:33 -0700
Subject: mm: oom analysis: add shmem vmstat

Recently we encountered OOM problems due to memory use of the GEM cache.
Generally a large amuont of Shmem/Tmpfs pages tend to create a memory
shortage problem.

We often use the following calculation to determine the amount of shmem
pages:

shmem = NR_ACTIVE_ANON + NR_INACTIVE_ANON - NR_ANON_PAGES

however the expression does not consider isolated and mlocked pages.

This patch adds explicit accounting for pages used by shmem and tmpfs.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by: Rik van Riel <riel@redhat.com>
Reviewed-by: Christoph Lameter <cl@linux-foundation.org>
Acked-by: Wu Fengguang <fengguang.wu@intel.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/base/node.c b/drivers/base/node.c
index b560c17..1fe5536 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -85,6 +85,7 @@ static ssize_t node_read_meminfo(struct sys_device * dev,
 		       "Node %d FilePages:      %8lu kB\n"
 		       "Node %d Mapped:         %8lu kB\n"
 		       "Node %d AnonPages:      %8lu kB\n"
+		       "Node %d Shmem:          %8lu kB\n"
 		       "Node %d KernelStack:    %8lu kB\n"
 		       "Node %d PageTables:     %8lu kB\n"
 		       "Node %d NFS_Unstable:   %8lu kB\n"
@@ -117,6 +118,7 @@ static ssize_t node_read_meminfo(struct sys_device * dev,
 		       nid, K(node_page_state(nid, NR_FILE_PAGES)),
 		       nid, K(node_page_state(nid, NR_FILE_MAPPED)),
 		       nid, K(node_page_state(nid, NR_ANON_PAGES)),
+		       nid, K(node_page_state(nid, NR_SHMEM)),
 		       nid, node_page_state(nid, NR_KERNEL_STACK) *
 				THREAD_SIZE / 1024,
 		       nid, K(node_page_state(nid, NR_PAGETABLE)),
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 1fc588f..171e052 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -81,6 +81,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 		"Writeback:      %8lu kB\n"
 		"AnonPages:      %8lu kB\n"
 		"Mapped:         %8lu kB\n"
+		"Shmem:          %8lu kB\n"
 		"Slab:           %8lu kB\n"
 		"SReclaimable:   %8lu kB\n"
 		"SUnreclaim:     %8lu kB\n"
@@ -125,6 +126,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 		K(global_page_state(NR_WRITEBACK)),
 		K(global_page_state(NR_ANON_PAGES)),
 		K(global_page_state(NR_FILE_MAPPED)),
+		K(global_page_state(NR_SHMEM)),
 		K(global_page_state(NR_SLAB_RECLAIMABLE) +
 				global_page_state(NR_SLAB_UNRECLAIMABLE)),
 		K(global_page_state(NR_SLAB_RECLAIMABLE)),
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index d9335b8..b3583b9 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -100,6 +100,7 @@ enum zone_stat_item {
 	NR_BOUNCE,
 	NR_VMSCAN_WRITE,
 	NR_WRITEBACK_TEMP,	/* Writeback using temporary buffers */
+	NR_SHMEM,		/* shmem pages (included tmpfs/GEM pages) */
 #ifdef CONFIG_NUMA
 	NUMA_HIT,		/* allocated in intended node */
 	NUMA_MISS,		/* allocated in non intended node */
diff --git a/mm/filemap.c b/mm/filemap.c
index dd51c68e..bcc7372 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -119,6 +119,8 @@ void __remove_from_page_cache(struct page *page)
 	page->mapping = NULL;
 	mapping->nrpages--;
 	__dec_zone_page_state(page, NR_FILE_PAGES);
+	if (PageSwapBacked(page))
+		__dec_zone_page_state(page, NR_SHMEM);
 	BUG_ON(page_mapped(page));
 
 	/*
@@ -431,6 +433,8 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
 		if (likely(!error)) {
 			mapping->nrpages++;
 			__inc_zone_page_state(page, NR_FILE_PAGES);
+			if (PageSwapBacked(page))
+				__inc_zone_page_state(page, NR_SHMEM);
 			spin_unlock_irq(&mapping->tree_lock);
 		} else {
 			page->mapping = NULL;
diff --git a/mm/migrate.c b/mm/migrate.c
index 0edeac9..37143b9 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -312,7 +312,10 @@ static int migrate_page_move_mapping(struct address_space *mapping,
 	 */
 	__dec_zone_page_state(page, NR_FILE_PAGES);
 	__inc_zone_page_state(newpage, NR_FILE_PAGES);
-
+	if (PageSwapBacked(page)) {
+		__dec_zone_page_state(page, NR_SHMEM);
+		__inc_zone_page_state(newpage, NR_SHMEM);
+	}
 	spin_unlock_irq(&mapping->tree_lock);
 
 	return 0;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 4e050f3..e50c225 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2139,7 +2139,7 @@ void show_free_areas(void)
 		" unevictable:%lu"
 		" dirty:%lu writeback:%lu unstable:%lu buffer:%lu\n"
 		" free:%lu slab_reclaimable:%lu slab_unreclaimable:%lu\n"
-		" mapped:%lu pagetables:%lu bounce:%lu\n",
+		" mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n",
 		global_page_state(NR_ACTIVE_ANON),
 		global_page_state(NR_ACTIVE_FILE),
 		global_page_state(NR_INACTIVE_ANON),
@@ -2153,6 +2153,7 @@ void show_free_areas(void)
 		global_page_state(NR_SLAB_RECLAIMABLE),
 		global_page_state(NR_SLAB_UNRECLAIMABLE),
 		global_page_state(NR_FILE_MAPPED),
+		global_page_state(NR_SHMEM),
 		global_page_state(NR_PAGETABLE),
 		global_page_state(NR_BOUNCE));
 
@@ -2175,6 +2176,7 @@ void show_free_areas(void)
 			" dirty:%lukB"
 			" writeback:%lukB"
 			" mapped:%lukB"
+			" shmem:%lukB"
 			" slab_reclaimable:%lukB"
 			" slab_unreclaimable:%lukB"
 			" kernel_stack:%lukB"
@@ -2200,6 +2202,7 @@ void show_free_areas(void)
 			K(zone_page_state(zone, NR_FILE_DIRTY)),
 			K(zone_page_state(zone, NR_WRITEBACK)),
 			K(zone_page_state(zone, NR_FILE_MAPPED)),
+			K(zone_page_state(zone, NR_SHMEM)),
 			K(zone_page_state(zone, NR_SLAB_RECLAIMABLE)),
 			K(zone_page_state(zone, NR_SLAB_UNRECLAIMABLE)),
 			zone_page_state(zone, NR_KERNEL_STACK) *
diff --git a/mm/vmstat.c b/mm/vmstat.c
index ceda39b..7214a45 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -644,7 +644,7 @@ static const char * const vmstat_text[] = {
 	"nr_bounce",
 	"nr_vmscan_write",
 	"nr_writeback_temp",
-
+	"nr_shmem",
 #ifdef CONFIG_NUMA
 	"numa_hit",
 	"numa_miss",
-- 
cgit v0.10.2


From b259fbde0a86085264c89aa2ce9c6e35792a1aad Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Mon, 21 Sep 2009 17:01:34 -0700
Subject: mm: update alloc_flags after oom killer has been called

It is possible for the oom killer to select current as the task to kill.
When this happens, alloc_flags needs to be updated accordingly to set
ALLOC_NO_WATERMARKS so the subsequent allocation attempt may use memory
reserves as the result of its thread having TIF_MEMDIE set if the
allocation is not __GFP_NOMEMALLOC.

Acked-by: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index e50c225..b6d0d09 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1771,6 +1771,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 
 	wake_all_kswapd(order, zonelist, high_zoneidx);
 
+restart:
 	/*
 	 * OK, we're below the kswapd watermark and have kicked background
 	 * reclaim. Now things get more complex, so set up alloc_flags according
@@ -1778,7 +1779,6 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 	 */
 	alloc_flags = gfp_to_alloc_flags(gfp_mask);
 
-restart:
 	/* This is the last chance, in general, before the goto nopage. */
 	page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist,
 			high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS,
-- 
cgit v0.10.2


From 44c241f166b31999482c3c40448f4bbb2157a804 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Mon, 21 Sep 2009 17:01:35 -0700
Subject: mm: rename pgmoved variable in shrink_active_list()

Currently the pgmoved variable has two meanings.  It causes harder
reviewing.  This patch separates it.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Reviewed-by: Rik van Riel <riel@redhat.com>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/vmscan.c b/mm/vmscan.c
index ba8228e..45a150a 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1244,7 +1244,7 @@ static void move_active_pages_to_lru(struct zone *zone,
 static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 			struct scan_control *sc, int priority, int file)
 {
-	unsigned long pgmoved;
+	unsigned long nr_taken;
 	unsigned long pgscanned;
 	unsigned long vm_flags;
 	LIST_HEAD(l_hold);	/* The pages which were snipped off */
@@ -1252,10 +1252,11 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 	LIST_HEAD(l_inactive);
 	struct page *page;
 	struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
+	unsigned long nr_rotated = 0;
 
 	lru_add_drain();
 	spin_lock_irq(&zone->lru_lock);
-	pgmoved = sc->isolate_pages(nr_pages, &l_hold, &pgscanned, sc->order,
+	nr_taken = sc->isolate_pages(nr_pages, &l_hold, &pgscanned, sc->order,
 					ISOLATE_ACTIVE, zone,
 					sc->mem_cgroup, 1, file);
 	/*
@@ -1265,16 +1266,15 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 	if (scanning_global_lru(sc)) {
 		zone->pages_scanned += pgscanned;
 	}
-	reclaim_stat->recent_scanned[!!file] += pgmoved;
+	reclaim_stat->recent_scanned[!!file] += nr_taken;
 
 	__count_zone_vm_events(PGREFILL, zone, pgscanned);
 	if (file)
-		__mod_zone_page_state(zone, NR_ACTIVE_FILE, -pgmoved);
+		__mod_zone_page_state(zone, NR_ACTIVE_FILE, -nr_taken);
 	else
-		__mod_zone_page_state(zone, NR_ACTIVE_ANON, -pgmoved);
+		__mod_zone_page_state(zone, NR_ACTIVE_ANON, -nr_taken);
 	spin_unlock_irq(&zone->lru_lock);
 
-	pgmoved = 0;  /* count referenced (mapping) mapped pages */
 	while (!list_empty(&l_hold)) {
 		cond_resched();
 		page = lru_to_page(&l_hold);
@@ -1288,7 +1288,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 		/* page_referenced clears PageReferenced */
 		if (page_mapping_inuse(page) &&
 		    page_referenced(page, 0, sc->mem_cgroup, &vm_flags)) {
-			pgmoved++;
+			nr_rotated++;
 			/*
 			 * Identify referenced, file-backed active pages and
 			 * give them one more trip around the active list. So
@@ -1317,7 +1317,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 	 * helps balance scan pressure between file and anonymous pages in
 	 * get_scan_ratio.
 	 */
-	reclaim_stat->recent_rotated[!!file] += pgmoved;
+	reclaim_stat->recent_rotated[!!file] += nr_rotated;
 
 	move_active_pages_to_lru(zone, &l_active,
 						LRU_ACTIVE + file * LRU_FILE);
-- 
cgit v0.10.2


From b35ea17b7bbf5dea35faa0de11030acc620c3197 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Mon, 21 Sep 2009 17:01:36 -0700
Subject: mm: shrink_inactive_list() nr_scan accounting fix fix

If sc->isolate_pages() return 0, we don't need to call shrink_page_list().
In past days, shrink_inactive_list() handled it properly.

But commit fb8d14e1 (three years ago commit!) breaked it.  current
shrink_inactive_list() always call shrink_page_list() although
isolate_pages() return 0.

This patch restore proper return value check.

Requirements:
  o "nr_taken == 0" condition should stay before calling shrink_page_list().
  o "nr_taken == 0" condition should stay after nr_scan related statistics
     modification.

Cc: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Reviewed-by: Rik van Riel <riel@redhat.com>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 45a150a..d86a91f 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1076,6 +1076,20 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
 		nr_taken = sc->isolate_pages(sc->swap_cluster_max,
 			     &page_list, &nr_scan, sc->order, mode,
 				zone, sc->mem_cgroup, 0, file);
+
+		if (scanning_global_lru(sc)) {
+			zone->pages_scanned += nr_scan;
+			if (current_is_kswapd())
+				__count_zone_vm_events(PGSCAN_KSWAPD, zone,
+						       nr_scan);
+			else
+				__count_zone_vm_events(PGSCAN_DIRECT, zone,
+						       nr_scan);
+		}
+
+		if (nr_taken == 0)
+			goto done;
+
 		nr_active = clear_active_flags(&page_list, count);
 		__count_vm_events(PGDEACTIVATE, nr_active);
 
@@ -1088,8 +1102,6 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
 		__mod_zone_page_state(zone, NR_INACTIVE_ANON,
 						-count[LRU_INACTIVE_ANON]);
 
-		if (scanning_global_lru(sc))
-			zone->pages_scanned += nr_scan;
 
 		reclaim_stat->recent_scanned[0] += count[LRU_INACTIVE_ANON];
 		reclaim_stat->recent_scanned[0] += count[LRU_ACTIVE_ANON];
@@ -1123,18 +1135,12 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
 		}
 
 		nr_reclaimed += nr_freed;
+
 		local_irq_disable();
-		if (current_is_kswapd()) {
-			__count_zone_vm_events(PGSCAN_KSWAPD, zone, nr_scan);
+		if (current_is_kswapd())
 			__count_vm_events(KSWAPD_STEAL, nr_freed);
-		} else if (scanning_global_lru(sc))
-			__count_zone_vm_events(PGSCAN_DIRECT, zone, nr_scan);
-
 		__count_zone_vm_events(PGSTEAL, zone, nr_freed);
 
-		if (nr_taken == 0)
-			goto done;
-
 		spin_lock(&zone->lru_lock);
 		/*
 		 * Put back any unfreeable pages.
@@ -1164,9 +1170,9 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
 			}
 		}
   	} while (nr_scanned < max_scan);
-	spin_unlock(&zone->lru_lock);
+
 done:
-	local_irq_enable();
+	spin_unlock_irq(&zone->lru_lock);
 	pagevec_release(&pvec);
 	return nr_reclaimed;
 }
-- 
cgit v0.10.2


From a731286de62294b63d8ceb3c5914ac52cc17e690 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Mon, 21 Sep 2009 17:01:37 -0700
Subject: mm: vmstat: add isolate pages

If the system is running a heavy load of processes then concurrent reclaim
can isolate a large number of pages from the LRU. /proc/vmstat and the
output generated for an OOM do not show how many pages were isolated.

This has been observed during process fork bomb testing (mstctl11 in LTP).

This patch shows the information about isolated pages.

Reproduced via:

-----------------------
% ./hackbench 140 process 1000
   => OOM occur

active_anon:146 inactive_anon:0 isolated_anon:49245
 active_file:79 inactive_file:18 isolated_file:113
 unevictable:0 dirty:0 writeback:0 unstable:0 buffer:39
 free:370 slab_reclaimable:309 slab_unreclaimable:5492
 mapped:53 shmem:15 pagetables:28140 bounce:0

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by: Rik van Riel <riel@redhat.com>
Acked-by: Wu Fengguang <fengguang.wu@intel.com>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index b3583b9..9c50309 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -100,6 +100,8 @@ enum zone_stat_item {
 	NR_BOUNCE,
 	NR_VMSCAN_WRITE,
 	NR_WRITEBACK_TEMP,	/* Writeback using temporary buffers */
+	NR_ISOLATED_ANON,	/* Temporary isolated pages from anon lru */
+	NR_ISOLATED_FILE,	/* Temporary isolated pages from file lru */
 	NR_SHMEM,		/* shmem pages (included tmpfs/GEM pages) */
 #ifdef CONFIG_NUMA
 	NUMA_HIT,		/* allocated in intended node */
diff --git a/mm/migrate.c b/mm/migrate.c
index 37143b9..b535a2c 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -67,6 +67,8 @@ int putback_lru_pages(struct list_head *l)
 
 	list_for_each_entry_safe(page, page2, l, lru) {
 		list_del(&page->lru);
+		dec_zone_page_state(page, NR_ISOLATED_ANON +
+				    !!page_is_file_cache(page));
 		putback_lru_page(page);
 		count++;
 	}
@@ -698,6 +700,8 @@ unlock:
  		 * restored.
  		 */
  		list_del(&page->lru);
+		dec_zone_page_state(page, NR_ISOLATED_ANON +
+				    !!page_is_file_cache(page));
 		putback_lru_page(page);
 	}
 
@@ -742,6 +746,13 @@ int migrate_pages(struct list_head *from,
 	struct page *page2;
 	int swapwrite = current->flags & PF_SWAPWRITE;
 	int rc;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	list_for_each_entry(page, from, lru)
+		__inc_zone_page_state(page, NR_ISOLATED_ANON +
+				      !!page_is_file_cache(page));
+	local_irq_restore(flags);
 
 	if (!swapwrite)
 		current->flags |= PF_SWAPWRITE;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index b6d0d09..afda8fd 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2134,16 +2134,18 @@ void show_free_areas(void)
 		}
 	}
 
-	printk("Active_anon:%lu active_file:%lu inactive_anon:%lu\n"
-		" inactive_file:%lu"
+	printk("active_anon:%lu inactive_anon:%lu isolated_anon:%lu\n"
+		" active_file:%lu inactive_file:%lu isolated_file:%lu\n"
 		" unevictable:%lu"
 		" dirty:%lu writeback:%lu unstable:%lu buffer:%lu\n"
 		" free:%lu slab_reclaimable:%lu slab_unreclaimable:%lu\n"
 		" mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n",
 		global_page_state(NR_ACTIVE_ANON),
-		global_page_state(NR_ACTIVE_FILE),
 		global_page_state(NR_INACTIVE_ANON),
+		global_page_state(NR_ISOLATED_ANON),
+		global_page_state(NR_ACTIVE_FILE),
 		global_page_state(NR_INACTIVE_FILE),
+		global_page_state(NR_ISOLATED_FILE),
 		global_page_state(NR_UNEVICTABLE),
 		global_page_state(NR_FILE_DIRTY),
 		global_page_state(NR_WRITEBACK),
@@ -2171,6 +2173,8 @@ void show_free_areas(void)
 			" active_file:%lukB"
 			" inactive_file:%lukB"
 			" unevictable:%lukB"
+			" isolated(anon):%lukB"
+			" isolated(file):%lukB"
 			" present:%lukB"
 			" mlocked:%lukB"
 			" dirty:%lukB"
@@ -2197,6 +2201,8 @@ void show_free_areas(void)
 			K(zone_page_state(zone, NR_ACTIVE_FILE)),
 			K(zone_page_state(zone, NR_INACTIVE_FILE)),
 			K(zone_page_state(zone, NR_UNEVICTABLE)),
+			K(zone_page_state(zone, NR_ISOLATED_ANON)),
+			K(zone_page_state(zone, NR_ISOLATED_FILE)),
 			K(zone->present_pages),
 			K(zone_page_state(zone, NR_MLOCK)),
 			K(zone_page_state(zone, NR_FILE_DIRTY)),
diff --git a/mm/vmscan.c b/mm/vmscan.c
index d86a91f..75c2997 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1072,6 +1072,8 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
 		unsigned long nr_active;
 		unsigned int count[NR_LRU_LISTS] = { 0, };
 		int mode = lumpy_reclaim ? ISOLATE_BOTH : ISOLATE_INACTIVE;
+		unsigned long nr_anon;
+		unsigned long nr_file;
 
 		nr_taken = sc->isolate_pages(sc->swap_cluster_max,
 			     &page_list, &nr_scan, sc->order, mode,
@@ -1102,6 +1104,10 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
 		__mod_zone_page_state(zone, NR_INACTIVE_ANON,
 						-count[LRU_INACTIVE_ANON]);
 
+		nr_anon = count[LRU_ACTIVE_ANON] + count[LRU_INACTIVE_ANON];
+		nr_file = count[LRU_ACTIVE_FILE] + count[LRU_INACTIVE_FILE];
+		__mod_zone_page_state(zone, NR_ISOLATED_ANON, nr_anon);
+		__mod_zone_page_state(zone, NR_ISOLATED_FILE, nr_file);
 
 		reclaim_stat->recent_scanned[0] += count[LRU_INACTIVE_ANON];
 		reclaim_stat->recent_scanned[0] += count[LRU_ACTIVE_ANON];
@@ -1169,6 +1175,9 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
 				spin_lock_irq(&zone->lru_lock);
 			}
 		}
+		__mod_zone_page_state(zone, NR_ISOLATED_ANON, -nr_anon);
+		__mod_zone_page_state(zone, NR_ISOLATED_FILE, -nr_file);
+
   	} while (nr_scanned < max_scan);
 
 done:
@@ -1279,6 +1288,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 		__mod_zone_page_state(zone, NR_ACTIVE_FILE, -nr_taken);
 	else
 		__mod_zone_page_state(zone, NR_ACTIVE_ANON, -nr_taken);
+	__mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken);
 	spin_unlock_irq(&zone->lru_lock);
 
 	while (!list_empty(&l_hold)) {
@@ -1329,7 +1339,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 						LRU_ACTIVE + file * LRU_FILE);
 	move_active_pages_to_lru(zone, &l_inactive,
 						LRU_BASE   + file * LRU_FILE);
-
+	__mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken);
 	spin_unlock_irq(&zone->lru_lock);
 }
 
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 7214a45..c81321f 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -644,6 +644,8 @@ static const char * const vmstat_text[] = {
 	"nr_bounce",
 	"nr_vmscan_write",
 	"nr_writeback_temp",
+	"nr_isolated_anon",
+	"nr_isolated_file",
 	"nr_shmem",
 #ifdef CONFIG_NUMA
 	"numa_hit",
-- 
cgit v0.10.2


From 35cd78156c499ef83f60605e4643d5a98fef14fd Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Mon, 21 Sep 2009 17:01:38 -0700
Subject: vmscan: throttle direct reclaim when too many pages are isolated
 already

When way too many processes go into direct reclaim, it is possible for all
of the pages to be taken off the LRU.  One result of this is that the next
process in the page reclaim code thinks there are no reclaimable pages
left and triggers an out of memory kill.

One solution to this problem is to never let so many processes into the
page reclaim path that the entire LRU is emptied.  Limiting the system to
only having half of each inactive list isolated for reclaim should be
safe.

Signed-off-by: Rik van Riel <riel@redhat.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 75c2997..f90b760 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1034,6 +1034,31 @@ int isolate_lru_page(struct page *page)
 }
 
 /*
+ * Are there way too many processes in the direct reclaim path already?
+ */
+static int too_many_isolated(struct zone *zone, int file,
+		struct scan_control *sc)
+{
+	unsigned long inactive, isolated;
+
+	if (current_is_kswapd())
+		return 0;
+
+	if (!scanning_global_lru(sc))
+		return 0;
+
+	if (file) {
+		inactive = zone_page_state(zone, NR_INACTIVE_FILE);
+		isolated = zone_page_state(zone, NR_ISOLATED_FILE);
+	} else {
+		inactive = zone_page_state(zone, NR_INACTIVE_ANON);
+		isolated = zone_page_state(zone, NR_ISOLATED_ANON);
+	}
+
+	return isolated > inactive;
+}
+
+/*
  * shrink_inactive_list() is a helper for shrink_zone().  It returns the number
  * of reclaimed pages
  */
@@ -1048,6 +1073,14 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
 	struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
 	int lumpy_reclaim = 0;
 
+	while (unlikely(too_many_isolated(zone, file, sc))) {
+		congestion_wait(WRITE, HZ/10);
+
+		/* We are about to die and free our memory. Return now. */
+		if (fatal_signal_pending(current))
+			return SWAP_CLUSTER_MAX;
+	}
+
 	/*
 	 * If we need a large contiguous chunk of memory, or have
 	 * trouble getting a small set of contiguous pages, we
-- 
cgit v0.10.2


From 5a2ae913f5229d6e1d4a666f0477350789d5128e Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Mon, 21 Sep 2009 17:01:39 -0700
Subject: mm: remove __{add,sub}_zone_page_state()

__add_zone_page_state() and __sub_zone_page_state() are unused.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 81a97cf..d7f577f 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -210,11 +210,6 @@ extern void zone_statistics(struct zone *, struct zone *);
 
 #endif /* CONFIG_NUMA */
 
-#define __add_zone_page_state(__z, __i, __d)	\
-		__mod_zone_page_state(__z, __i, __d)
-#define __sub_zone_page_state(__z, __i, __d)	\
-		__mod_zone_page_state(__z, __i,-(__d))
-
 #define add_zone_page_state(__z, __i, __d) mod_zone_page_state(__z, __i, __d)
 #define sub_zone_page_state(__z, __i, __d) mod_zone_page_state(__z, __i, -(__d))
 
-- 
cgit v0.10.2


From 55c37a840d9ec0ebed5c944355156d490b1ad5d1 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 21 Sep 2009 17:01:40 -0700
Subject: vm: document that setting vfs_cache_pressure to 0 isn't a good idea

Reported-by: Christian Thaeter <ct@pipapo.org>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index c4de635..e6fb1ec 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -585,7 +585,9 @@ caching of directory and inode objects.
 At the default value of vfs_cache_pressure=100 the kernel will attempt to
 reclaim dentries and inodes at a "fair" rate with respect to pagecache and
 swapcache reclaim.  Decreasing vfs_cache_pressure causes the kernel to prefer
-to retain dentry and inode caches.  Increasing vfs_cache_pressure beyond 100
+to retain dentry and inode caches. When vfs_cache_pressure=0, the kernel will
+never reclaim dentries and inodes due to memory pressure and this can easily
+lead to out-of-memory conditions. Increasing vfs_cache_pressure beyond 100
 causes the kernel to prefer to reclaim dentries and inodes.
 
 ==============================================================
-- 
cgit v0.10.2


From adea02a1bea71a508da32c04d715485a1fe62029 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Mon, 21 Sep 2009 17:01:42 -0700
Subject: mm: count only reclaimable lru pages

global_lru_pages() / zone_lru_pages() can be used in two ways:
- to estimate max reclaimable pages in determine_dirtyable_memory()
- to calculate the slab scan ratio

When swap is full or not present, the anon lru lists are not reclaimable
and also won't be scanned.  So the anon pages shall not be counted in both
usage scenarios.  Also rename to _reclaimable_pages: now they are counting
the possibly reclaimable lru pages.

It can greatly (and correctly) increase the slab scan rate under high
memory pressure (when most file pages have been reclaimed and swap is
full/absent), thus reduce false OOM kills.

Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Reviewed-by: Rik van Riel <riel@redhat.com>
Reviewed-by: Christoph Lameter <cl@linux-foundation.org>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: David Howells <dhowells@redhat.com>
Cc: "Li, Ming Chun" <macli@brc.ubc.ca>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index d7f577f..2d0f222 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -166,15 +166,8 @@ static inline unsigned long zone_page_state(struct zone *zone,
 	return x;
 }
 
-extern unsigned long global_lru_pages(void);
-
-static inline unsigned long zone_lru_pages(struct zone *zone)
-{
-	return (zone_page_state(zone, NR_ACTIVE_ANON)
-		+ zone_page_state(zone, NR_ACTIVE_FILE)
-		+ zone_page_state(zone, NR_INACTIVE_ANON)
-		+ zone_page_state(zone, NR_INACTIVE_FILE));
-}
+extern unsigned long global_reclaimable_pages(void);
+extern unsigned long zone_reclaimable_pages(struct zone *zone);
 
 #ifdef CONFIG_NUMA
 /*
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index d1ba464..5f378dd5 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -380,7 +380,8 @@ static unsigned long highmem_dirtyable_memory(unsigned long total)
 		struct zone *z =
 			&NODE_DATA(node)->node_zones[ZONE_HIGHMEM];
 
-		x += zone_page_state(z, NR_FREE_PAGES) + zone_lru_pages(z);
+		x += zone_page_state(z, NR_FREE_PAGES) +
+		     zone_reclaimable_pages(z);
 	}
 	/*
 	 * Make sure that the number of highmem pages is never larger
@@ -404,7 +405,7 @@ unsigned long determine_dirtyable_memory(void)
 {
 	unsigned long x;
 
-	x = global_page_state(NR_FREE_PAGES) + global_lru_pages();
+	x = global_page_state(NR_FREE_PAGES) + global_reclaimable_pages();
 
 	if (!vm_highmem_is_dirtyable)
 		x -= highmem_dirtyable_memory(x);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index f90b760..208071c 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1734,7 +1734,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
 			if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
 				continue;
 
-			lru_pages += zone_lru_pages(zone);
+			lru_pages += zone_reclaimable_pages(zone);
 		}
 	}
 
@@ -1951,7 +1951,7 @@ loop_again:
 		for (i = 0; i <= end_zone; i++) {
 			struct zone *zone = pgdat->node_zones + i;
 
-			lru_pages += zone_lru_pages(zone);
+			lru_pages += zone_reclaimable_pages(zone);
 		}
 
 		/*
@@ -1995,7 +1995,7 @@ loop_again:
 			if (zone_is_all_unreclaimable(zone))
 				continue;
 			if (nr_slab == 0 && zone->pages_scanned >=
-						(zone_lru_pages(zone) * 6))
+					(zone_reclaimable_pages(zone) * 6))
 					zone_set_flag(zone,
 						      ZONE_ALL_UNRECLAIMABLE);
 			/*
@@ -2162,12 +2162,39 @@ void wakeup_kswapd(struct zone *zone, int order)
 	wake_up_interruptible(&pgdat->kswapd_wait);
 }
 
-unsigned long global_lru_pages(void)
+/*
+ * The reclaimable count would be mostly accurate.
+ * The less reclaimable pages may be
+ * - mlocked pages, which will be moved to unevictable list when encountered
+ * - mapped pages, which may require several travels to be reclaimed
+ * - dirty pages, which is not "instantly" reclaimable
+ */
+unsigned long global_reclaimable_pages(void)
 {
-	return global_page_state(NR_ACTIVE_ANON)
-		+ global_page_state(NR_ACTIVE_FILE)
-		+ global_page_state(NR_INACTIVE_ANON)
-		+ global_page_state(NR_INACTIVE_FILE);
+	int nr;
+
+	nr = global_page_state(NR_ACTIVE_FILE) +
+	     global_page_state(NR_INACTIVE_FILE);
+
+	if (nr_swap_pages > 0)
+		nr += global_page_state(NR_ACTIVE_ANON) +
+		      global_page_state(NR_INACTIVE_ANON);
+
+	return nr;
+}
+
+unsigned long zone_reclaimable_pages(struct zone *zone)
+{
+	int nr;
+
+	nr = zone_page_state(zone, NR_ACTIVE_FILE) +
+	     zone_page_state(zone, NR_INACTIVE_FILE);
+
+	if (nr_swap_pages > 0)
+		nr += zone_page_state(zone, NR_ACTIVE_ANON) +
+		      zone_page_state(zone, NR_INACTIVE_ANON);
+
+	return nr;
 }
 
 #ifdef CONFIG_HIBERNATION
@@ -2239,7 +2266,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
 
 	current->reclaim_state = &reclaim_state;
 
-	lru_pages = global_lru_pages();
+	lru_pages = global_reclaimable_pages();
 	nr_slab = global_page_state(NR_SLAB_RECLAIMABLE);
 	/* If slab caches are huge, it's better to hit them first */
 	while (nr_slab >= lru_pages) {
@@ -2281,7 +2308,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
 
 			reclaim_state.reclaimed_slab = 0;
 			shrink_slab(sc.nr_scanned, sc.gfp_mask,
-					global_lru_pages());
+				    global_reclaimable_pages());
 			sc.nr_reclaimed += reclaim_state.reclaimed_slab;
 			if (sc.nr_reclaimed >= nr_pages)
 				goto out;
@@ -2298,7 +2325,8 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
 	if (!sc.nr_reclaimed) {
 		do {
 			reclaim_state.reclaimed_slab = 0;
-			shrink_slab(nr_pages, sc.gfp_mask, global_lru_pages());
+			shrink_slab(nr_pages, sc.gfp_mask,
+				    global_reclaimable_pages());
 			sc.nr_reclaimed += reclaim_state.reclaimed_slab;
 		} while (sc.nr_reclaimed < nr_pages &&
 				reclaim_state.reclaimed_slab > 0);
-- 
cgit v0.10.2


From de2e7567c7ddf24f0ca80010163ed10da66a14e2 Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan.kim@gmail.com>
Date: Mon, 21 Sep 2009 17:01:43 -0700
Subject: vmscan: don't attempt to reclaim anon page in lumpy reclaim when no
 swap space is available

The VM already avoids attempting to reclaim anon pages in various places,
But it doesn't avoid it for lumpy reclaim.

It shuffles lru list unnecessary so that it is pointless.

[akpm@linux-foundation.org: cleanup]
Signed-off-by: Minchan Kim <minchan.kim@gmail.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 208071c..ece2ecb 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -935,6 +935,16 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
 			/* Check that we have not crossed a zone boundary. */
 			if (unlikely(page_zone_id(cursor_page) != zone_id))
 				continue;
+
+			/*
+			 * If we don't have enough swap space, reclaiming of
+			 * anon page which don't already have a swap slot is
+			 * pointless.
+			 */
+			if (nr_swap_pages <= 0 && PageAnon(cursor_page) &&
+					!PageSwapCache(cursor_page))
+				continue;
+
 			if (__isolate_lru_page(cursor_page, mode, file) == 0) {
 				list_move(&cursor_page->lru, dst);
 				mem_cgroup_del_lru(cursor_page);
-- 
cgit v0.10.2


From 5205e56eeab04ce02f8bb6b47d1569b216bc0b6a Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Mon, 21 Sep 2009 17:01:44 -0700
Subject: vmscan: move ClearPageActive from move_active_pages() to
 shrink_active_list()

The move_active_pages_to_lru() function is called under irq disabled and
ClearPageActive() doesn't need irq disabling.

Then, this patch move it into shrink_active_list().

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Reviewed-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/vmscan.c b/mm/vmscan.c
index ece2ecb..ca81d0d 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1278,10 +1278,6 @@ static void move_active_pages_to_lru(struct zone *zone,
 		VM_BUG_ON(PageLRU(page));
 		SetPageLRU(page);
 
-		VM_BUG_ON(!PageActive(page));
-		if (!is_active_lru(lru))
-			ClearPageActive(page);	/* we are de-activating */
-
 		list_move(&page->lru, &zone->lru[lru].list);
 		mem_cgroup_add_lru_list(page, lru);
 		pgmoved++;
@@ -1363,6 +1359,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 			}
 		}
 
+		ClearPageActive(page);	/* we are de-activating */
 		list_add(&page->lru, &l_inactive);
 	}
 
-- 
cgit v0.10.2


From 74a1c48fb4e9f10e3c83dcd39af73487968e35bf Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Mon, 21 Sep 2009 17:01:45 -0700
Subject: vmscan: kill unnecessary page flag test

The page_lru() already evaluate PageActive() and PageSwapBacked().  We
don't need to re-evaluate it.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Reviewed-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/vmscan.c b/mm/vmscan.c
index ca81d0d..20c5fa8 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1208,8 +1208,8 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
 			SetPageLRU(page);
 			lru = page_lru(page);
 			add_page_to_lru_list(zone, page, lru);
-			if (PageActive(page)) {
-				int file = !!page_is_file_cache(page);
+			if (is_active_lru(lru)) {
+				int file = !!is_file_lru(lru);
 				reclaim_stat->recent_rotated[file]++;
 			}
 			if (!pagevec_add(&pvec, page)) {
-- 
cgit v0.10.2


From a26f5320c4ee3d46a0da48fa0f3ac6a00b575793 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Mon, 21 Sep 2009 17:01:46 -0700
Subject: vmscan: kill unnecessary prefetch

The pages in the list passed move_active_pages_to_lru() are already
touched by shrink_active_list().  IOW the prefetch in
move_active_pages_to_lru() don't populate any cache.  it's pointless.

This patch remove it.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Reviewed-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 20c5fa8..e219b47 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1273,7 +1273,6 @@ static void move_active_pages_to_lru(struct zone *zone,
 
 	while (!list_empty(list)) {
 		page = lru_to_page(list);
-		prefetchw_prev_lru_page(page, list, flags);
 
 		VM_BUG_ON(PageLRU(page));
 		SetPageLRU(page);
-- 
cgit v0.10.2


From 945a11136ebdfa7fcce319ee6215958e84cb85f6 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Mon, 21 Sep 2009 17:01:47 -0700
Subject: mm: add gfp mask checking for __get_free_pages()

__get_free_pages() with __GFP_HIGHMEM is not safe because the return
address cannot represent a highmem page.  get_zeroed_page() already has
such a debug checking.

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index afda8fd..81926c7 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1922,31 +1922,25 @@ EXPORT_SYMBOL(__alloc_pages_nodemask);
  */
 unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order)
 {
-	struct page * page;
+	struct page *page;
+
+	/*
+	 * __get_free_pages() returns a 32-bit address, which cannot represent
+	 * a highmem page
+	 */
+	VM_BUG_ON((gfp_mask & __GFP_HIGHMEM) != 0);
+
 	page = alloc_pages(gfp_mask, order);
 	if (!page)
 		return 0;
 	return (unsigned long) page_address(page);
 }
-
 EXPORT_SYMBOL(__get_free_pages);
 
 unsigned long get_zeroed_page(gfp_t gfp_mask)
 {
-	struct page * page;
-
-	/*
-	 * get_zeroed_page() returns a 32-bit address, which cannot represent
-	 * a highmem page
-	 */
-	VM_BUG_ON((gfp_mask & __GFP_HIGHMEM) != 0);
-
-	page = alloc_pages(gfp_mask | __GFP_ZERO, 0);
-	if (page)
-		return (unsigned long) page_address(page);
-	return 0;
+	return __get_free_pages(gfp_mask | __GFP_ZERO, 0);
 }
-
 EXPORT_SYMBOL(get_zeroed_page);
 
 void __pagevec_free(struct pagevec *pvec)
-- 
cgit v0.10.2


From bf88c8c83e4425d17e29daa5354ffb1f8ba7b225 Mon Sep 17 00:00:00 2001
From: "Figo.zhang" <figo1802@gmail.com>
Date: Mon, 21 Sep 2009 17:01:47 -0700
Subject: vmalloc.c: fix double error checking

There is no need for double error checking.

Signed-off-by: Figo.zhang <figo1802@gmail.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 204b824..759deae 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -168,11 +168,9 @@ static int vmap_page_range_noflush(unsigned long start, unsigned long end,
 		next = pgd_addr_end(addr, end);
 		err = vmap_pud_range(pgd, addr, next, prot, pages, &nr);
 		if (err)
-			break;
+			return err;
 	} while (pgd++, addr = next, addr != end);
 
-	if (unlikely(err))
-		return err;
 	return nr;
 }
 
-- 
cgit v0.10.2


From 451ea25da71590361c71bf3044c55b870a887d53 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Mon, 21 Sep 2009 17:01:48 -0700
Subject: mm: perform non-atomic test-clear of PG_mlocked on free

By the time PG_mlocked is cleared in the page freeing path, nobody else is
looking at our page->flags anymore.

It is thus safe to make the test-and-clear non-atomic and thereby removing
an unnecessary and expensive operation from a hotpath.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Christoph Lameter <cl@linux-foundation.org>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 2b87acf..d07c0bb 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -158,6 +158,9 @@ static inline int TestSetPage##uname(struct page *page)			\
 static inline int TestClearPage##uname(struct page *page)		\
 		{ return test_and_clear_bit(PG_##lname, &page->flags); }
 
+#define __TESTCLEARFLAG(uname, lname)					\
+static inline int __TestClearPage##uname(struct page *page)		\
+		{ return __test_and_clear_bit(PG_##lname, &page->flags); }
 
 #define PAGEFLAG(uname, lname) TESTPAGEFLAG(uname, lname)		\
 	SETPAGEFLAG(uname, lname) CLEARPAGEFLAG(uname, lname)
@@ -184,6 +187,9 @@ static inline void __ClearPage##uname(struct page *page) {  }
 #define TESTCLEARFLAG_FALSE(uname)					\
 static inline int TestClearPage##uname(struct page *page) { return 0; }
 
+#define __TESTCLEARFLAG_FALSE(uname)					\
+static inline int __TestClearPage##uname(struct page *page) { return 0; }
+
 struct page;	/* forward declaration */
 
 TESTPAGEFLAG(Locked, locked) TESTSETFLAG(Locked, locked)
@@ -250,11 +256,11 @@ PAGEFLAG(Unevictable, unevictable) __CLEARPAGEFLAG(Unevictable, unevictable)
 #ifdef CONFIG_HAVE_MLOCKED_PAGE_BIT
 #define MLOCK_PAGES 1
 PAGEFLAG(Mlocked, mlocked) __CLEARPAGEFLAG(Mlocked, mlocked)
-	TESTSCFLAG(Mlocked, mlocked)
+	TESTSCFLAG(Mlocked, mlocked) __TESTCLEARFLAG(Mlocked, mlocked)
 #else
 #define MLOCK_PAGES 0
-PAGEFLAG_FALSE(Mlocked)
-	SETPAGEFLAG_NOOP(Mlocked) TESTCLEARFLAG_FALSE(Mlocked)
+PAGEFLAG_FALSE(Mlocked) SETPAGEFLAG_NOOP(Mlocked)
+	TESTCLEARFLAG_FALSE(Mlocked) __TESTCLEARFLAG_FALSE(Mlocked)
 #endif
 
 #ifdef CONFIG_ARCH_USES_PG_UNCACHED
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 81926c7..9242d13 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -557,7 +557,7 @@ static void __free_pages_ok(struct page *page, unsigned int order)
 	unsigned long flags;
 	int i;
 	int bad = 0;
-	int wasMlocked = TestClearPageMlocked(page);
+	int wasMlocked = __TestClearPageMlocked(page);
 
 	kmemcheck_free_shadow(page, order);
 
@@ -1026,7 +1026,7 @@ static void free_hot_cold_page(struct page *page, int cold)
 	struct zone *zone = page_zone(page);
 	struct per_cpu_pages *pcp;
 	unsigned long flags;
-	int wasMlocked = TestClearPageMlocked(page);
+	int wasMlocked = __TestClearPageMlocked(page);
 
 	kmemcheck_free_shadow(page, 0);
 
-- 
cgit v0.10.2


From 828502d30073036a486d96b1fe051e0f08b6df83 Mon Sep 17 00:00:00 2001
From: Izik Eidus <ieidus@redhat.com>
Date: Mon, 21 Sep 2009 17:01:51 -0700
Subject: ksm: add mmu_notifier set_pte_at_notify()

KSM is a linux driver that allows dynamicly sharing identical memory pages
between one or more processes.

Unlike tradtional page sharing that is made at the allocation of the
memory, ksm do it dynamicly after the memory was created.  Memory is
periodically scanned; identical pages are identified and merged.

The sharing is made in a transparent way to the processes that use it.

Ksm is highly important for hypervisors (kvm), where in production
enviorments there might be many copys of the same data data among the host
memory.  This kind of data can be: similar kernels, librarys, cache, and
so on.

Even that ksm was wrote for kvm, any userspace application that want to
use it to share its data can try it.

Ksm may be useful for any application that might have similar (page
aligment) data strctures among the memory, ksm will find this data merge
it to one copy, and even if it will be changed and thereforew copy on
writed, ksm will merge it again as soon as it will be identical again.

Another reason to consider using ksm is the fact that it might simplify
alot the userspace code of application that want to use shared private
data, instead that the application will mange shared area, ksm will do
this for the application, and even write to this data will be allowed
without any synchinization acts from the application.

Ksm was designed to be a loadable module that doesn't change the VM code
of linux.

This patch:

The set_pte_at_notify() macro allows setting a pte in the shadow page
table directly, instead of flushing the shadow page table entry and then
getting vmexit to set it.  It uses a new change_pte() callback to do so.

set_pte_at_notify() is an optimization for kvm, and other users of
mmu_notifiers, for COW pages.  It is useful for kvm when ksm is used,
because it allows kvm not to have to receive vmexit and only then map the
ksm page into the shadow page table, but instead map it directly at the
same time as Linux maps the page into the host page table.

Users of mmu_notifiers who don't implement new mmu_notifier_change_pte()
callback will just receive the mmu_notifier_invalidate_page() callback.

Signed-off-by: Izik Eidus <ieidus@redhat.com>
Signed-off-by: Chris Wright <chrisw@redhat.com>
Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index b77486d..4e02ee2 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -62,6 +62,15 @@ struct mmu_notifier_ops {
 				 unsigned long address);
 
 	/*
+	 * change_pte is called in cases that pte mapping to page is changed:
+	 * for example, when ksm remaps pte to point to a new shared page.
+	 */
+	void (*change_pte)(struct mmu_notifier *mn,
+			   struct mm_struct *mm,
+			   unsigned long address,
+			   pte_t pte);
+
+	/*
 	 * Before this is invoked any secondary MMU is still ok to
 	 * read/write to the page previously pointed to by the Linux
 	 * pte because the page hasn't been freed yet and it won't be
@@ -154,6 +163,8 @@ extern void __mmu_notifier_mm_destroy(struct mm_struct *mm);
 extern void __mmu_notifier_release(struct mm_struct *mm);
 extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
 					  unsigned long address);
+extern void __mmu_notifier_change_pte(struct mm_struct *mm,
+				      unsigned long address, pte_t pte);
 extern void __mmu_notifier_invalidate_page(struct mm_struct *mm,
 					  unsigned long address);
 extern void __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
@@ -175,6 +186,13 @@ static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
 	return 0;
 }
 
+static inline void mmu_notifier_change_pte(struct mm_struct *mm,
+					   unsigned long address, pte_t pte)
+{
+	if (mm_has_notifiers(mm))
+		__mmu_notifier_change_pte(mm, address, pte);
+}
+
 static inline void mmu_notifier_invalidate_page(struct mm_struct *mm,
 					  unsigned long address)
 {
@@ -236,6 +254,16 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
 	__young;							\
 })
 
+#define set_pte_at_notify(__mm, __address, __ptep, __pte)		\
+({									\
+	struct mm_struct *___mm = __mm;					\
+	unsigned long ___address = __address;				\
+	pte_t ___pte = __pte;						\
+									\
+	set_pte_at(___mm, ___address, __ptep, ___pte);			\
+	mmu_notifier_change_pte(___mm, ___address, ___pte);		\
+})
+
 #else /* CONFIG_MMU_NOTIFIER */
 
 static inline void mmu_notifier_release(struct mm_struct *mm)
@@ -248,6 +276,11 @@ static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
 	return 0;
 }
 
+static inline void mmu_notifier_change_pte(struct mm_struct *mm,
+					   unsigned long address, pte_t pte)
+{
+}
+
 static inline void mmu_notifier_invalidate_page(struct mm_struct *mm,
 					  unsigned long address)
 {
@@ -273,6 +306,7 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
 
 #define ptep_clear_flush_young_notify ptep_clear_flush_young
 #define ptep_clear_flush_notify ptep_clear_flush
+#define set_pte_at_notify set_pte_at
 
 #endif /* CONFIG_MMU_NOTIFIER */
 
diff --git a/mm/memory.c b/mm/memory.c
index e8f63d9..368561f 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2115,9 +2115,14 @@ gotten:
 		 * seen in the presence of one thread doing SMC and another
 		 * thread doing COW.
 		 */
-		ptep_clear_flush_notify(vma, address, page_table);
+		ptep_clear_flush(vma, address, page_table);
 		page_add_new_anon_rmap(new_page, vma, address);
-		set_pte_at(mm, address, page_table, entry);
+		/*
+		 * We call the notify macro here because, when using secondary
+		 * mmu page tables (such as kvm shadow page tables), we want the
+		 * new page to be mapped directly into the secondary page table.
+		 */
+		set_pte_at_notify(mm, address, page_table, entry);
 		update_mmu_cache(vma, address, entry);
 		if (old_page) {
 			/*
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index 5f4ef02..7e33f2c 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -99,6 +99,26 @@ int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
 	return young;
 }
 
+void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address,
+			       pte_t pte)
+{
+	struct mmu_notifier *mn;
+	struct hlist_node *n;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
+		if (mn->ops->change_pte)
+			mn->ops->change_pte(mn, mm, address, pte);
+		/*
+		 * Some drivers don't have change_pte,
+		 * so we must call invalidate_page in that case.
+		 */
+		else if (mn->ops->invalidate_page)
+			mn->ops->invalidate_page(mn, mm, address);
+	}
+	rcu_read_unlock();
+}
+
 void __mmu_notifier_invalidate_page(struct mm_struct *mm,
 					  unsigned long address)
 {
-- 
cgit v0.10.2


From 3866ea90d3635ddddcd77ce51087222ac7de85f2 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 21 Sep 2009 17:01:52 -0700
Subject: ksm: first tidy up madvise_vma()

madvise.c has several levels of switch statements, what to do in which?
Move MADV_DOFORK code down from madvise_vma() to madvise_behavior(), so
madvise_vma() can be a simple router, to madvise_behavior() by default.

vma->vm_flags is an unsigned long so use the same type for new_flags.  Add
missing comment lines to describe MADV_DONTFORK and MADV_DOFORK.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Signed-off-by: Chris Wright <chrisw@redhat.com>
Signed-off-by: Izik Eidus <ieidus@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/madvise.c b/mm/madvise.c
index 76eb419..66c3126 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -41,7 +41,7 @@ static long madvise_behavior(struct vm_area_struct * vma,
 	struct mm_struct * mm = vma->vm_mm;
 	int error = 0;
 	pgoff_t pgoff;
-	int new_flags = vma->vm_flags;
+	unsigned long new_flags = vma->vm_flags;
 
 	switch (behavior) {
 	case MADV_NORMAL:
@@ -57,6 +57,10 @@ static long madvise_behavior(struct vm_area_struct * vma,
 		new_flags |= VM_DONTCOPY;
 		break;
 	case MADV_DOFORK:
+		if (vma->vm_flags & VM_IO) {
+			error = -EINVAL;
+			goto out;
+		}
 		new_flags &= ~VM_DONTCOPY;
 		break;
 	}
@@ -211,37 +215,16 @@ static long
 madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
 		unsigned long start, unsigned long end, int behavior)
 {
-	long error;
-
 	switch (behavior) {
-	case MADV_DOFORK:
-		if (vma->vm_flags & VM_IO) {
-			error = -EINVAL;
-			break;
-		}
-	case MADV_DONTFORK:
-	case MADV_NORMAL:
-	case MADV_SEQUENTIAL:
-	case MADV_RANDOM:
-		error = madvise_behavior(vma, prev, start, end, behavior);
-		break;
 	case MADV_REMOVE:
-		error = madvise_remove(vma, prev, start, end);
-		break;
-
+		return madvise_remove(vma, prev, start, end);
 	case MADV_WILLNEED:
-		error = madvise_willneed(vma, prev, start, end);
-		break;
-
+		return madvise_willneed(vma, prev, start, end);
 	case MADV_DONTNEED:
-		error = madvise_dontneed(vma, prev, start, end);
-		break;
-
+		return madvise_dontneed(vma, prev, start, end);
 	default:
-		BUG();
-		break;
+		return madvise_behavior(vma, prev, start, end, behavior);
 	}
-	return error;
 }
 
 static int
@@ -262,6 +245,7 @@ madvise_behavior_valid(int behavior)
 		return 0;
 	}
 }
+
 /*
  * The madvise(2) system call.
  *
@@ -286,6 +270,9 @@ madvise_behavior_valid(int behavior)
  *		so the kernel can free resources associated with it.
  *  MADV_REMOVE - the application wants to free up the given range of
  *		pages and associated backing store.
+ *  MADV_DONTFORK - omit this area from child's address space when forking:
+ *		typically, to avoid COWing pages pinned by get_user_pages().
+ *  MADV_DOFORK - cancel MADV_DONTFORK: no longer omit this area when forking.
  *
  * return values:
  *  zero    - success
-- 
cgit v0.10.2


From d19f352484467a5e518639ddff0554669c10ffab Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 21 Sep 2009 17:01:53 -0700
Subject: ksm: define MADV_MERGEABLE and MADV_UNMERGEABLE

The out-of-tree KSM used ioctls on fds cloned from /dev/ksm to register a
memory area for merging: we prefer now to use an madvise(2) interface.

This patch just defines MADV_MERGEABLE (to tell KSM it may merge pages in
this area found identical to pages in other mergeable areas) and
MADV_UNMERGEABLE (to undo that).

Most architectures use asm-generic, but alpha, mips, parisc, xtensa need
their own definitions: included here for mmotm convenience, but we'll
probably want to split this and feed pieces to arch maintainers.

Based upon earlier patches by Chris Wright and Izik Eidus.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Signed-off-by: Chris Wright <chrisw@redhat.com>
Signed-off-by: Izik Eidus <ieidus@redhat.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Helge Deller <deller@gmx.de>
Cc: Chris Zankel <chris@zankel.net>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/alpha/include/asm/mman.h b/arch/alpha/include/asm/mman.h
index 90d7c35..c77c557 100644
--- a/arch/alpha/include/asm/mman.h
+++ b/arch/alpha/include/asm/mman.h
@@ -48,6 +48,9 @@
 #define MADV_DONTFORK	10		/* don't inherit across fork */
 #define MADV_DOFORK	11		/* do inherit across fork */
 
+#define MADV_MERGEABLE   12		/* KSM may merge identical pages */
+#define MADV_UNMERGEABLE 13		/* KSM may not merge identical pages */
+
 /* compatibility flags */
 #define MAP_FILE	0
 
diff --git a/arch/mips/include/asm/mman.h b/arch/mips/include/asm/mman.h
index e4d6f1f..f15554d 100644
--- a/arch/mips/include/asm/mman.h
+++ b/arch/mips/include/asm/mman.h
@@ -71,6 +71,9 @@
 #define MADV_DONTFORK	10		/* don't inherit across fork */
 #define MADV_DOFORK	11		/* do inherit across fork */
 
+#define MADV_MERGEABLE   12		/* KSM may merge identical pages */
+#define MADV_UNMERGEABLE 13		/* KSM may not merge identical pages */
+
 /* compatibility flags */
 #define MAP_FILE	0
 
diff --git a/arch/parisc/include/asm/mman.h b/arch/parisc/include/asm/mman.h
index defe752..a12d9d4 100644
--- a/arch/parisc/include/asm/mman.h
+++ b/arch/parisc/include/asm/mman.h
@@ -54,6 +54,9 @@
 #define MADV_16M_PAGES  24              /* Use 16 Megabyte pages */
 #define MADV_64M_PAGES  26              /* Use 64 Megabyte pages */
 
+#define MADV_MERGEABLE   65		/* KSM may merge identical pages */
+#define MADV_UNMERGEABLE 66		/* KSM may not merge identical pages */
+
 /* compatibility flags */
 #define MAP_FILE	0
 #define MAP_VARIABLE	0
diff --git a/arch/xtensa/include/asm/mman.h b/arch/xtensa/include/asm/mman.h
index 9b92620..6e55b4d 100644
--- a/arch/xtensa/include/asm/mman.h
+++ b/arch/xtensa/include/asm/mman.h
@@ -78,6 +78,9 @@
 #define MADV_DONTFORK	10		/* don't inherit across fork */
 #define MADV_DOFORK	11		/* do inherit across fork */
 
+#define MADV_MERGEABLE   12		/* KSM may merge identical pages */
+#define MADV_UNMERGEABLE 13		/* KSM may not merge identical pages */
+
 /* compatibility flags */
 #define MAP_FILE	0
 
diff --git a/include/asm-generic/mman-common.h b/include/asm-generic/mman-common.h
index 3b69ad3..dd63bd3 100644
--- a/include/asm-generic/mman-common.h
+++ b/include/asm-generic/mman-common.h
@@ -35,6 +35,9 @@
 #define MADV_DONTFORK	10		/* don't inherit across fork */
 #define MADV_DOFORK	11		/* do inherit across fork */
 
+#define MADV_MERGEABLE   12		/* KSM may merge identical pages */
+#define MADV_UNMERGEABLE 13		/* KSM may not merge identical pages */
+
 /* compatibility flags */
 #define MAP_FILE	0
 
-- 
cgit v0.10.2


From f8af4da3b4c14e7267c4ffb952079af3912c51c5 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 21 Sep 2009 17:01:57 -0700
Subject: ksm: the mm interface to ksm

This patch presents the mm interface to a dummy version of ksm.c, for
better scrutiny of that interface: the real ksm.c follows later.

When CONFIG_KSM is not set, madvise(2) reject MADV_MERGEABLE and
MADV_UNMERGEABLE with EINVAL, since that seems more helpful than
pretending that they can be serviced.  But when CONFIG_KSM=y, accept them
even if KSM is not currently running, and even on areas which KSM will not
touch (e.g.  hugetlb or shared file or special driver mappings).

Like other madvices, report ENOMEM despite success if any area in the
range is unmapped, and use EAGAIN to report out of memory.

Define vma flag VM_MERGEABLE to identify an area on which KSM may try
merging pages: leave it to ksm_madvise() to decide whether to set it.
Define mm flag MMF_VM_MERGEABLE to identify an mm which might contain
VM_MERGEABLE areas, to minimize callouts when forking or exiting.

Based upon earlier patches by Chris Wright and Izik Eidus.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Signed-off-by: Chris Wright <chrisw@redhat.com>
Signed-off-by: Izik Eidus <ieidus@redhat.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/ksm.h b/include/linux/ksm.h
new file mode 100644
index 0000000..eb2a448
--- /dev/null
+++ b/include/linux/ksm.h
@@ -0,0 +1,50 @@
+#ifndef __LINUX_KSM_H
+#define __LINUX_KSM_H
+/*
+ * Memory merging support.
+ *
+ * This code enables dynamic sharing of identical pages found in different
+ * memory areas, even if they are not shared by fork().
+ */
+
+#include <linux/bitops.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+
+#ifdef CONFIG_KSM
+int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
+		unsigned long end, int advice, unsigned long *vm_flags);
+int __ksm_enter(struct mm_struct *mm);
+void __ksm_exit(struct mm_struct *mm);
+
+static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
+{
+	if (test_bit(MMF_VM_MERGEABLE, &oldmm->flags))
+		return __ksm_enter(mm);
+	return 0;
+}
+
+static inline void ksm_exit(struct mm_struct *mm)
+{
+	if (test_bit(MMF_VM_MERGEABLE, &mm->flags))
+		__ksm_exit(mm);
+}
+#else  /* !CONFIG_KSM */
+
+static inline int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
+		unsigned long end, int advice, unsigned long *vm_flags)
+{
+	return 0;
+}
+
+static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
+{
+	return 0;
+}
+
+static inline void ksm_exit(struct mm_struct *mm)
+{
+}
+#endif /* !CONFIG_KSM */
+
+#endif
diff --git a/include/linux/mm.h b/include/linux/mm.h
index d3c8ae7..d808cf8 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -103,6 +103,7 @@ extern unsigned int kobjsize(const void *objp);
 #define VM_MIXEDMAP	0x10000000	/* Can contain "struct page" and pure PFN pages */
 #define VM_SAO		0x20000000	/* Strong Access Ordering (powerpc) */
 #define VM_PFN_AT_MMAP	0x40000000	/* PFNMAP vma that is fully mapped at mmap time */
+#define VM_MERGEABLE	0x80000000	/* KSM may merge identical pages */
 
 #ifndef VM_STACK_DEFAULT_FLAGS		/* arch can override this */
 #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8fe351c..8f3e63c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -434,7 +434,9 @@ extern int get_dumpable(struct mm_struct *mm);
 /* dumpable bits */
 #define MMF_DUMPABLE      0  /* core dump is permitted */
 #define MMF_DUMP_SECURELY 1  /* core file is readable only by root */
+
 #define MMF_DUMPABLE_BITS 2
+#define MMF_DUMPABLE_MASK ((1 << MMF_DUMPABLE_BITS) - 1)
 
 /* coredump filter bits */
 #define MMF_DUMP_ANON_PRIVATE	2
@@ -444,6 +446,7 @@ extern int get_dumpable(struct mm_struct *mm);
 #define MMF_DUMP_ELF_HEADERS	6
 #define MMF_DUMP_HUGETLB_PRIVATE 7
 #define MMF_DUMP_HUGETLB_SHARED  8
+
 #define MMF_DUMP_FILTER_SHIFT	MMF_DUMPABLE_BITS
 #define MMF_DUMP_FILTER_BITS	7
 #define MMF_DUMP_FILTER_MASK \
@@ -457,6 +460,10 @@ extern int get_dumpable(struct mm_struct *mm);
 #else
 # define MMF_DUMP_MASK_DEFAULT_ELF	0
 #endif
+					/* leave room for more dump flags */
+#define MMF_VM_MERGEABLE	16	/* KSM may merge identical pages */
+
+#define MMF_INIT_MASK		(MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK)
 
 struct sighand_struct {
 	atomic_t		count;
diff --git a/kernel/fork.c b/kernel/fork.c
index d4638c8..73a442b 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -49,6 +49,7 @@
 #include <linux/ftrace.h>
 #include <linux/profile.h>
 #include <linux/rmap.h>
+#include <linux/ksm.h>
 #include <linux/acct.h>
 #include <linux/tsacct_kern.h>
 #include <linux/cn_proc.h>
@@ -299,6 +300,9 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
 	rb_link = &mm->mm_rb.rb_node;
 	rb_parent = NULL;
 	pprev = &mm->mmap;
+	retval = ksm_fork(mm, oldmm);
+	if (retval)
+		goto out;
 
 	for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) {
 		struct file *file;
@@ -435,7 +439,8 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
 	atomic_set(&mm->mm_count, 1);
 	init_rwsem(&mm->mmap_sem);
 	INIT_LIST_HEAD(&mm->mmlist);
-	mm->flags = (current->mm) ? current->mm->flags : default_dump_filter;
+	mm->flags = (current->mm) ?
+		(current->mm->flags & MMF_INIT_MASK) : default_dump_filter;
 	mm->core_state = NULL;
 	mm->nr_ptes = 0;
 	set_mm_counter(mm, file_rss, 0);
@@ -496,6 +501,7 @@ void mmput(struct mm_struct *mm)
 
 	if (atomic_dec_and_test(&mm->mm_users)) {
 		exit_aio(mm);
+		ksm_exit(mm);
 		exit_mmap(mm);
 		set_mm_exe_file(mm, NULL);
 		if (!list_empty(&mm->mmlist)) {
diff --git a/mm/Kconfig b/mm/Kconfig
index 3aa519f..c0b6afa 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -214,6 +214,17 @@ config HAVE_MLOCKED_PAGE_BIT
 config MMU_NOTIFIER
 	bool
 
+config KSM
+	bool "Enable KSM for page merging"
+	depends on MMU
+	help
+	  Enable Kernel Samepage Merging: KSM periodically scans those areas
+	  of an application's address space that an app has advised may be
+	  mergeable.  When it finds pages of identical content, it replaces
+	  the many instances by a single resident page with that content, so
+	  saving memory until one or another app needs to modify the content.
+	  Recommended for use with KVM, or with other duplicative applications.
+
 config DEFAULT_MMAP_MIN_ADDR
         int "Low address space to protect from user allocation"
         default 4096
diff --git a/mm/Makefile b/mm/Makefile
index ea4b18b..a63bf59 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o
 obj-$(CONFIG_TMPFS_POSIX_ACL) += shmem_acl.o
 obj-$(CONFIG_SLOB) += slob.o
 obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o
+obj-$(CONFIG_KSM) += ksm.o
 obj-$(CONFIG_PAGE_POISONING) += debug-pagealloc.o
 obj-$(CONFIG_SLAB) += slab.o
 obj-$(CONFIG_SLUB) += slub.o
diff --git a/mm/ksm.c b/mm/ksm.c
new file mode 100644
index 0000000..8b76008
--- /dev/null
+++ b/mm/ksm.c
@@ -0,0 +1,56 @@
+/*
+ * Initial dummy version just to illustrate KSM's interface to other files.
+ */
+
+#include <linux/errno.h>
+#include <linux/mman.h>
+#include <linux/ksm.h>
+
+int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
+		unsigned long end, int advice, unsigned long *vm_flags)
+{
+	struct mm_struct *mm = vma->vm_mm;
+
+	switch (advice) {
+	case MADV_MERGEABLE:
+		/*
+		 * Be somewhat over-protective for now!
+		 */
+		if (*vm_flags & (VM_MERGEABLE | VM_SHARED  | VM_MAYSHARE   |
+				 VM_PFNMAP    | VM_IO      | VM_DONTEXPAND |
+				 VM_RESERVED  | VM_HUGETLB | VM_INSERTPAGE |
+				 VM_MIXEDMAP  | VM_SAO))
+			return 0;		/* just ignore the advice */
+
+		if (!test_bit(MMF_VM_MERGEABLE, &mm->flags))
+			if (__ksm_enter(mm) < 0)
+				return -EAGAIN;
+
+		*vm_flags |= VM_MERGEABLE;
+		break;
+
+	case MADV_UNMERGEABLE:
+		if (!(*vm_flags & VM_MERGEABLE))
+			return 0;		/* just ignore the advice */
+
+		/* Unmerge any merged pages here */
+
+		*vm_flags &= ~VM_MERGEABLE;
+		break;
+	}
+
+	return 0;
+}
+
+int __ksm_enter(struct mm_struct *mm)
+{
+	/* Allocate a structure to track mm and link it into KSM's list */
+	set_bit(MMF_VM_MERGEABLE, &mm->flags);
+	return 0;
+}
+
+void __ksm_exit(struct mm_struct *mm)
+{
+	/* Unlink and free all KSM's structures which track this mm */
+	clear_bit(MMF_VM_MERGEABLE, &mm->flags);
+}
diff --git a/mm/madvise.c b/mm/madvise.c
index 66c3126..d9ae206 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -11,6 +11,7 @@
 #include <linux/mempolicy.h>
 #include <linux/hugetlb.h>
 #include <linux/sched.h>
+#include <linux/ksm.h>
 
 /*
  * Any behaviour which results in changes to the vma->vm_flags needs to
@@ -63,6 +64,12 @@ static long madvise_behavior(struct vm_area_struct * vma,
 		}
 		new_flags &= ~VM_DONTCOPY;
 		break;
+	case MADV_MERGEABLE:
+	case MADV_UNMERGEABLE:
+		error = ksm_madvise(vma, start, end, behavior, &new_flags);
+		if (error)
+			goto out;
+		break;
 	}
 
 	if (new_flags == vma->vm_flags) {
@@ -239,6 +246,10 @@ madvise_behavior_valid(int behavior)
 	case MADV_REMOVE:
 	case MADV_WILLNEED:
 	case MADV_DONTNEED:
+#ifdef CONFIG_KSM
+	case MADV_MERGEABLE:
+	case MADV_UNMERGEABLE:
+#endif
 		return 1;
 
 	default:
@@ -273,6 +284,9 @@ madvise_behavior_valid(int behavior)
  *  MADV_DONTFORK - omit this area from child's address space when forking:
  *		typically, to avoid COWing pages pinned by get_user_pages().
  *  MADV_DOFORK - cancel MADV_DONTFORK: no longer omit this area when forking.
+ *  MADV_MERGEABLE - the application recommends that KSM try to merge pages in
+ *		this area with pages of identical content from other such areas.
+ *  MADV_UNMERGEABLE- cancel MADV_MERGEABLE: no longer merge pages with others.
  *
  * return values:
  *  zero    - success
-- 
cgit v0.10.2


From 21333b2b66b805a360641568588e5a0bb06d9d1f Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 21 Sep 2009 17:01:59 -0700
Subject: ksm: no debug in page_dup_rmap()

page_dup_rmap(), used on each mapped page when forking, was originally
just an inline atomic_inc of mapcount.  2.6.22 added CONFIG_DEBUG_VM
out-of-line checks to it, which would need to be ever-so-slightly
complicated to allow for the PageKsm() we're about to define.

But I think these checks never caught anything.  And if it's coding errors
we're worried about, such checks should be in page_remove_rmap() too, not
just when forking; whereas if it's pagetable corruption we're worried
about, then they shouldn't be limited to CONFIG_DEBUG_VM.

Oh, just revert page_dup_rmap() to an inline atomic_inc of mapcount.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Signed-off-by: Chris Wright <chrisw@redhat.com>
Signed-off-by: Izik Eidus <ieidus@redhat.com>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Avi Kivity <avi@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index bf116d0..477841d 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -71,14 +71,10 @@ void page_add_new_anon_rmap(struct page *, struct vm_area_struct *, unsigned lon
 void page_add_file_rmap(struct page *);
 void page_remove_rmap(struct page *);
 
-#ifdef CONFIG_DEBUG_VM
-void page_dup_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address);
-#else
-static inline void page_dup_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address)
+static inline void page_dup_rmap(struct page *page)
 {
 	atomic_inc(&page->_mapcount);
 }
-#endif
 
 /*
  * Called from mm/vmscan.c to handle paging out
diff --git a/mm/memory.c b/mm/memory.c
index 368561f..7a61a11 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -597,7 +597,7 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 	page = vm_normal_page(vma, addr, pte);
 	if (page) {
 		get_page(page);
-		page_dup_rmap(page, vma, addr);
+		page_dup_rmap(page);
 		rss[!!PageAnon(page)]++;
 	}
 
diff --git a/mm/rmap.c b/mm/rmap.c
index 1406e67..720fc03 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -710,27 +710,6 @@ void page_add_file_rmap(struct page *page)
 	}
 }
 
-#ifdef CONFIG_DEBUG_VM
-/**
- * page_dup_rmap - duplicate pte mapping to a page
- * @page:	the page to add the mapping to
- * @vma:	the vm area being duplicated
- * @address:	the user virtual address mapped
- *
- * For copy_page_range only: minimal extract from page_add_file_rmap /
- * page_add_anon_rmap, avoiding unnecessary tests (already checked) so it's
- * quicker.
- *
- * The caller needs to hold the pte lock.
- */
-void page_dup_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address)
-{
-	if (PageAnon(page))
-		__page_check_anon_rmap(page, vma, address);
-	atomic_inc(&page->_mapcount);
-}
-#endif
-
 /**
  * page_remove_rmap - take down pte mapping from a page
  * @page: page to remove mapping from
-- 
cgit v0.10.2


From 9a840895147b12de5cdd633c600b38686840ee53 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 21 Sep 2009 17:02:01 -0700
Subject: ksm: identify PageKsm pages

KSM will need to identify its kernel merged pages unambiguously, and
/proc/kpageflags will probably like to do so too.

Since KSM will only be substituting anonymous pages, statistics are best
preserved by making a PageKsm page a special PageAnon page: one with no
anon_vma.

But KSM then needs its own page_add_ksm_rmap() - keep it in ksm.h near
PageKsm; and do_wp_page() must COW them, unlike singly mapped PageAnons.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Signed-off-by: Chris Wright <chrisw@redhat.com>
Signed-off-by: Izik Eidus <ieidus@redhat.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/proc/page.c b/fs/proc/page.c
index 2707c6c..2281c2c 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -2,6 +2,7 @@
 #include <linux/compiler.h>
 #include <linux/fs.h>
 #include <linux/init.h>
+#include <linux/ksm.h>
 #include <linux/mm.h>
 #include <linux/mmzone.h>
 #include <linux/proc_fs.h>
@@ -95,6 +96,8 @@ static const struct file_operations proc_kpagecount_operations = {
 #define KPF_UNEVICTABLE		18
 #define KPF_NOPAGE		20
 
+#define KPF_KSM			21
+
 /* kernel hacking assistances
  * WARNING: subject to change, never rely on them!
  */
@@ -137,6 +140,8 @@ static u64 get_uflags(struct page *page)
 		u |= 1 << KPF_MMAP;
 	if (PageAnon(page))
 		u |= 1 << KPF_ANON;
+	if (PageKsm(page))
+		u |= 1 << KPF_KSM;
 
 	/*
 	 * compound pages: export both head/tail info
diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index eb2a448..a485c14 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -10,6 +10,7 @@
 #include <linux/bitops.h>
 #include <linux/mm.h>
 #include <linux/sched.h>
+#include <linux/vmstat.h>
 
 #ifdef CONFIG_KSM
 int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
@@ -29,6 +30,27 @@ static inline void ksm_exit(struct mm_struct *mm)
 	if (test_bit(MMF_VM_MERGEABLE, &mm->flags))
 		__ksm_exit(mm);
 }
+
+/*
+ * A KSM page is one of those write-protected "shared pages" or "merged pages"
+ * which KSM maps into multiple mms, wherever identical anonymous page content
+ * is found in VM_MERGEABLE vmas.  It's a PageAnon page, with NULL anon_vma.
+ */
+static inline int PageKsm(struct page *page)
+{
+	return ((unsigned long)page->mapping == PAGE_MAPPING_ANON);
+}
+
+/*
+ * But we have to avoid the checking which page_add_anon_rmap() performs.
+ */
+static inline void page_add_ksm_rmap(struct page *page)
+{
+	if (atomic_inc_and_test(&page->_mapcount)) {
+		page->mapping = (void *) PAGE_MAPPING_ANON;
+		__inc_zone_page_state(page, NR_ANON_PAGES);
+	}
+}
 #else  /* !CONFIG_KSM */
 
 static inline int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
@@ -45,6 +67,13 @@ static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
 static inline void ksm_exit(struct mm_struct *mm)
 {
 }
+
+static inline int PageKsm(struct page *page)
+{
+	return 0;
+}
+
+/* No stub required for page_add_ksm_rmap(page) */
 #endif /* !CONFIG_KSM */
 
 #endif
diff --git a/mm/memory.c b/mm/memory.c
index 7a61a11..1a435b8 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -45,6 +45,7 @@
 #include <linux/swap.h>
 #include <linux/highmem.h>
 #include <linux/pagemap.h>
+#include <linux/ksm.h>
 #include <linux/rmap.h>
 #include <linux/module.h>
 #include <linux/delayacct.h>
@@ -1974,7 +1975,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	 * Take out anonymous pages first, anonymous shared vmas are
 	 * not dirty accountable.
 	 */
-	if (PageAnon(old_page)) {
+	if (PageAnon(old_page) && !PageKsm(old_page)) {
 		if (!trylock_page(old_page)) {
 			page_cache_get(old_page);
 			pte_unmap_unlock(page_table, ptl);
-- 
cgit v0.10.2


From 31dbd01f314364b70c2e026a5793a29a4da8a9dc Mon Sep 17 00:00:00 2001
From: Izik Eidus <ieidus@redhat.com>
Date: Mon, 21 Sep 2009 17:02:03 -0700
Subject: ksm: Kernel SamePage Merging

Ksm is code that allows merging of identical pages between one or more
applications, in a way invisible to the applications that use it.  Pages
that are merged are marked as read-only, then COWed when any application
tries to change them.

Whereas fork() allows sharing anonymous pages between parent and child,
ksm can share anonymous pages between unrelated processes.

Ksm works by walking over the memory pages of the applications it scans,
in order to find identical pages.  It uses two sorted data structures,
called the stable and unstable trees, to locate identical pages in an
effective way.

When ksm finds two identical pages, it marks them as readonly and merges
them into a single page.  After the pages have been marked as readonly and
merged into one, Linux treats them as normal copy-on-write pages, copying
to a fresh anonymous page if write access is required later.

Ksm scans and merges anonymous pages only in those memory areas that have
been registered with it by madvise(addr, length, MADV_MERGEABLE).

The ksm scanner is controlled by sysfs files in /sys/kernel/mm/ksm/:

max_kernel_pages - the maximum number of unswappable kernel pages
                   which may be allocated by ksm (0 for unlimited).

kernel_pages_allocated - how many ksm pages are currently allocated,
                         sharing identical content between different
                         processes (pages unswappable in this release).

pages_shared - how many pages have been saved by sharing with ksm pages
               (kernel_pages_allocated being excluded from this count).

pages_to_scan - how many pages ksm should scan before sleeping.

sleep_millisecs - how many milliseconds ksm should sleep between scans.

run - write 0 to disable ksm, read 0 while ksm is disabled (default),
      write 1 to run ksm, read 1 while ksm is running,
      write 2 to disable ksm and unmerge all its pages.

Includes contributions by Andrea Arcangeli Chris Wright and Hugh Dickins.

[hugh.dickins@tiscali.co.uk: fix rare page leak]
Signed-off-by: Izik Eidus <ieidus@redhat.com>
Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Signed-off-by: Chris Wright <chrisw@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/ksm.c b/mm/ksm.c
index 8b76008..2c02094 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1,11 +1,1285 @@
 /*
- * Initial dummy version just to illustrate KSM's interface to other files.
+ * Memory merging support.
+ *
+ * This code enables dynamic sharing of identical pages found in different
+ * memory areas, even if they are not shared by fork()
+ *
+ * Copyright (C) 2008 Red Hat, Inc.
+ * Authors:
+ *	Izik Eidus
+ *	Andrea Arcangeli
+ *	Chris Wright
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
  */
 
 #include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
 #include <linux/mman.h>
+#include <linux/sched.h>
+#include <linux/rwsem.h>
+#include <linux/pagemap.h>
+#include <linux/rmap.h>
+#include <linux/spinlock.h>
+#include <linux/jhash.h>
+#include <linux/delay.h>
+#include <linux/kthread.h>
+#include <linux/wait.h>
+#include <linux/slab.h>
+#include <linux/rbtree.h>
+#include <linux/mmu_notifier.h>
 #include <linux/ksm.h>
 
+#include <asm/tlbflush.h>
+
+/*
+ * A few notes about the KSM scanning process,
+ * to make it easier to understand the data structures below:
+ *
+ * In order to reduce excessive scanning, KSM sorts the memory pages by their
+ * contents into a data structure that holds pointers to the pages' locations.
+ *
+ * Since the contents of the pages may change at any moment, KSM cannot just
+ * insert the pages into a normal sorted tree and expect it to find anything.
+ * Therefore KSM uses two data structures - the stable and the unstable tree.
+ *
+ * The stable tree holds pointers to all the merged pages (ksm pages), sorted
+ * by their contents.  Because each such page is write-protected, searching on
+ * this tree is fully assured to be working (except when pages are unmapped),
+ * and therefore this tree is called the stable tree.
+ *
+ * In addition to the stable tree, KSM uses a second data structure called the
+ * unstable tree: this tree holds pointers to pages which have been found to
+ * be "unchanged for a period of time".  The unstable tree sorts these pages
+ * by their contents, but since they are not write-protected, KSM cannot rely
+ * upon the unstable tree to work correctly - the unstable tree is liable to
+ * be corrupted as its contents are modified, and so it is called unstable.
+ *
+ * KSM solves this problem by several techniques:
+ *
+ * 1) The unstable tree is flushed every time KSM completes scanning all
+ *    memory areas, and then the tree is rebuilt again from the beginning.
+ * 2) KSM will only insert into the unstable tree, pages whose hash value
+ *    has not changed since the previous scan of all memory areas.
+ * 3) The unstable tree is a RedBlack Tree - so its balancing is based on the
+ *    colors of the nodes and not on their contents, assuring that even when
+ *    the tree gets "corrupted" it won't get out of balance, so scanning time
+ *    remains the same (also, searching and inserting nodes in an rbtree uses
+ *    the same algorithm, so we have no overhead when we flush and rebuild).
+ * 4) KSM never flushes the stable tree, which means that even if it were to
+ *    take 10 attempts to find a page in the unstable tree, once it is found,
+ *    it is secured in the stable tree.  (When we scan a new page, we first
+ *    compare it against the stable tree, and then against the unstable tree.)
+ */
+
+/**
+ * struct mm_slot - ksm information per mm that is being scanned
+ * @link: link to the mm_slots hash list
+ * @mm_list: link into the mm_slots list, rooted in ksm_mm_head
+ * @rmap_list: head for this mm_slot's list of rmap_items
+ * @mm: the mm that this information is valid for
+ */
+struct mm_slot {
+	struct hlist_node link;
+	struct list_head mm_list;
+	struct list_head rmap_list;
+	struct mm_struct *mm;
+};
+
+/**
+ * struct ksm_scan - cursor for scanning
+ * @mm_slot: the current mm_slot we are scanning
+ * @address: the next address inside that to be scanned
+ * @rmap_item: the current rmap that we are scanning inside the rmap_list
+ * @seqnr: count of completed full scans (needed when removing unstable node)
+ *
+ * There is only the one ksm_scan instance of this cursor structure.
+ */
+struct ksm_scan {
+	struct mm_slot *mm_slot;
+	unsigned long address;
+	struct rmap_item *rmap_item;
+	unsigned long seqnr;
+};
+
+/**
+ * struct rmap_item - reverse mapping item for virtual addresses
+ * @link: link into mm_slot's rmap_list (rmap_list is per mm)
+ * @mm: the memory structure this rmap_item is pointing into
+ * @address: the virtual address this rmap_item tracks (+ flags in low bits)
+ * @oldchecksum: previous checksum of the page at that virtual address
+ * @node: rb_node of this rmap_item in either unstable or stable tree
+ * @next: next rmap_item hanging off the same node of the stable tree
+ * @prev: previous rmap_item hanging off the same node of the stable tree
+ */
+struct rmap_item {
+	struct list_head link;
+	struct mm_struct *mm;
+	unsigned long address;		/* + low bits used for flags below */
+	union {
+		unsigned int oldchecksum;		/* when unstable */
+		struct rmap_item *next;			/* when stable */
+	};
+	union {
+		struct rb_node node;			/* when tree node */
+		struct rmap_item *prev;			/* in stable list */
+	};
+};
+
+#define SEQNR_MASK	0x0ff	/* low bits of unstable tree seqnr */
+#define NODE_FLAG	0x100	/* is a node of unstable or stable tree */
+#define STABLE_FLAG	0x200	/* is a node or list item of stable tree */
+
+/* The stable and unstable tree heads */
+static struct rb_root root_stable_tree = RB_ROOT;
+static struct rb_root root_unstable_tree = RB_ROOT;
+
+#define MM_SLOTS_HASH_HEADS 1024
+static struct hlist_head *mm_slots_hash;
+
+static struct mm_slot ksm_mm_head = {
+	.mm_list = LIST_HEAD_INIT(ksm_mm_head.mm_list),
+};
+static struct ksm_scan ksm_scan = {
+	.mm_slot = &ksm_mm_head,
+};
+
+static struct kmem_cache *rmap_item_cache;
+static struct kmem_cache *mm_slot_cache;
+
+/* The number of nodes in the stable tree */
+static unsigned long ksm_kernel_pages_allocated;
+
+/* The number of page slots sharing those nodes */
+static unsigned long ksm_pages_shared;
+
+/* Limit on the number of unswappable pages used */
+static unsigned long ksm_max_kernel_pages;
+
+/* Number of pages ksmd should scan in one batch */
+static unsigned int ksm_thread_pages_to_scan;
+
+/* Milliseconds ksmd should sleep between batches */
+static unsigned int ksm_thread_sleep_millisecs;
+
+#define KSM_RUN_STOP	0
+#define KSM_RUN_MERGE	1
+#define KSM_RUN_UNMERGE	2
+static unsigned int ksm_run;
+
+static DECLARE_WAIT_QUEUE_HEAD(ksm_thread_wait);
+static DEFINE_MUTEX(ksm_thread_mutex);
+static DEFINE_SPINLOCK(ksm_mmlist_lock);
+
+#define KSM_KMEM_CACHE(__struct, __flags) kmem_cache_create("ksm_"#__struct,\
+		sizeof(struct __struct), __alignof__(struct __struct),\
+		(__flags), NULL)
+
+static int __init ksm_slab_init(void)
+{
+	rmap_item_cache = KSM_KMEM_CACHE(rmap_item, 0);
+	if (!rmap_item_cache)
+		goto out;
+
+	mm_slot_cache = KSM_KMEM_CACHE(mm_slot, 0);
+	if (!mm_slot_cache)
+		goto out_free;
+
+	return 0;
+
+out_free:
+	kmem_cache_destroy(rmap_item_cache);
+out:
+	return -ENOMEM;
+}
+
+static void __init ksm_slab_free(void)
+{
+	kmem_cache_destroy(mm_slot_cache);
+	kmem_cache_destroy(rmap_item_cache);
+	mm_slot_cache = NULL;
+}
+
+static inline struct rmap_item *alloc_rmap_item(void)
+{
+	return kmem_cache_zalloc(rmap_item_cache, GFP_KERNEL);
+}
+
+static inline void free_rmap_item(struct rmap_item *rmap_item)
+{
+	rmap_item->mm = NULL;	/* debug safety */
+	kmem_cache_free(rmap_item_cache, rmap_item);
+}
+
+static inline struct mm_slot *alloc_mm_slot(void)
+{
+	if (!mm_slot_cache)	/* initialization failed */
+		return NULL;
+	return kmem_cache_zalloc(mm_slot_cache, GFP_KERNEL);
+}
+
+static inline void free_mm_slot(struct mm_slot *mm_slot)
+{
+	kmem_cache_free(mm_slot_cache, mm_slot);
+}
+
+static int __init mm_slots_hash_init(void)
+{
+	mm_slots_hash = kzalloc(MM_SLOTS_HASH_HEADS * sizeof(struct hlist_head),
+				GFP_KERNEL);
+	if (!mm_slots_hash)
+		return -ENOMEM;
+	return 0;
+}
+
+static void __init mm_slots_hash_free(void)
+{
+	kfree(mm_slots_hash);
+}
+
+static struct mm_slot *get_mm_slot(struct mm_struct *mm)
+{
+	struct mm_slot *mm_slot;
+	struct hlist_head *bucket;
+	struct hlist_node *node;
+
+	bucket = &mm_slots_hash[((unsigned long)mm / sizeof(struct mm_struct))
+				% MM_SLOTS_HASH_HEADS];
+	hlist_for_each_entry(mm_slot, node, bucket, link) {
+		if (mm == mm_slot->mm)
+			return mm_slot;
+	}
+	return NULL;
+}
+
+static void insert_to_mm_slots_hash(struct mm_struct *mm,
+				    struct mm_slot *mm_slot)
+{
+	struct hlist_head *bucket;
+
+	bucket = &mm_slots_hash[((unsigned long)mm / sizeof(struct mm_struct))
+				% MM_SLOTS_HASH_HEADS];
+	mm_slot->mm = mm;
+	INIT_LIST_HEAD(&mm_slot->rmap_list);
+	hlist_add_head(&mm_slot->link, bucket);
+}
+
+static inline int in_stable_tree(struct rmap_item *rmap_item)
+{
+	return rmap_item->address & STABLE_FLAG;
+}
+
+/*
+ * We use break_ksm to break COW on a ksm page: it's a stripped down
+ *
+ *	if (get_user_pages(current, mm, addr, 1, 1, 1, &page, NULL) == 1)
+ *		put_page(page);
+ *
+ * but taking great care only to touch a ksm page, in a VM_MERGEABLE vma,
+ * in case the application has unmapped and remapped mm,addr meanwhile.
+ * Could a ksm page appear anywhere else?  Actually yes, in a VM_PFNMAP
+ * mmap of /dev/mem or /dev/kmem, where we would not want to touch it.
+ */
+static void break_ksm(struct vm_area_struct *vma, unsigned long addr)
+{
+	struct page *page;
+	int ret;
+
+	do {
+		cond_resched();
+		page = follow_page(vma, addr, FOLL_GET);
+		if (!page)
+			break;
+		if (PageKsm(page))
+			ret = handle_mm_fault(vma->vm_mm, vma, addr,
+							FAULT_FLAG_WRITE);
+		else
+			ret = VM_FAULT_WRITE;
+		put_page(page);
+	} while (!(ret & (VM_FAULT_WRITE | VM_FAULT_SIGBUS)));
+
+	/* Which leaves us looping there if VM_FAULT_OOM: hmmm... */
+}
+
+static void __break_cow(struct mm_struct *mm, unsigned long addr)
+{
+	struct vm_area_struct *vma;
+
+	vma = find_vma(mm, addr);
+	if (!vma || vma->vm_start > addr)
+		return;
+	if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
+		return;
+	break_ksm(vma, addr);
+}
+
+static void break_cow(struct mm_struct *mm, unsigned long addr)
+{
+	down_read(&mm->mmap_sem);
+	__break_cow(mm, addr);
+	up_read(&mm->mmap_sem);
+}
+
+static struct page *get_mergeable_page(struct rmap_item *rmap_item)
+{
+	struct mm_struct *mm = rmap_item->mm;
+	unsigned long addr = rmap_item->address;
+	struct vm_area_struct *vma;
+	struct page *page;
+
+	down_read(&mm->mmap_sem);
+	vma = find_vma(mm, addr);
+	if (!vma || vma->vm_start > addr)
+		goto out;
+	if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
+		goto out;
+
+	page = follow_page(vma, addr, FOLL_GET);
+	if (!page)
+		goto out;
+	if (PageAnon(page)) {
+		flush_anon_page(vma, page, addr);
+		flush_dcache_page(page);
+	} else {
+		put_page(page);
+out:		page = NULL;
+	}
+	up_read(&mm->mmap_sem);
+	return page;
+}
+
+/*
+ * get_ksm_page: checks if the page at the virtual address in rmap_item
+ * is still PageKsm, in which case we can trust the content of the page,
+ * and it returns the gotten page; but NULL if the page has been zapped.
+ */
+static struct page *get_ksm_page(struct rmap_item *rmap_item)
+{
+	struct page *page;
+
+	page = get_mergeable_page(rmap_item);
+	if (page && !PageKsm(page)) {
+		put_page(page);
+		page = NULL;
+	}
+	return page;
+}
+
+/*
+ * Removing rmap_item from stable or unstable tree.
+ * This function will clean the information from the stable/unstable tree.
+ */
+static void remove_rmap_item_from_tree(struct rmap_item *rmap_item)
+{
+	if (in_stable_tree(rmap_item)) {
+		struct rmap_item *next_item = rmap_item->next;
+
+		if (rmap_item->address & NODE_FLAG) {
+			if (next_item) {
+				rb_replace_node(&rmap_item->node,
+						&next_item->node,
+						&root_stable_tree);
+				next_item->address |= NODE_FLAG;
+			} else {
+				rb_erase(&rmap_item->node, &root_stable_tree);
+				ksm_kernel_pages_allocated--;
+			}
+		} else {
+			struct rmap_item *prev_item = rmap_item->prev;
+
+			BUG_ON(prev_item->next != rmap_item);
+			prev_item->next = next_item;
+			if (next_item) {
+				BUG_ON(next_item->prev != rmap_item);
+				next_item->prev = rmap_item->prev;
+			}
+		}
+
+		rmap_item->next = NULL;
+		ksm_pages_shared--;
+
+	} else if (rmap_item->address & NODE_FLAG) {
+		unsigned char age;
+		/*
+		 * ksm_thread can and must skip the rb_erase, because
+		 * root_unstable_tree was already reset to RB_ROOT.
+		 * But __ksm_exit has to be careful: do the rb_erase
+		 * if it's interrupting a scan, and this rmap_item was
+		 * inserted by this scan rather than left from before.
+		 *
+		 * Because of the case in which remove_mm_from_lists
+		 * increments seqnr before removing rmaps, unstable_nr
+		 * may even be 2 behind seqnr, but should never be
+		 * further behind.  Yes, I did have trouble with this!
+		 */
+		age = (unsigned char)(ksm_scan.seqnr - rmap_item->address);
+		BUG_ON(age > 2);
+		if (!age)
+			rb_erase(&rmap_item->node, &root_unstable_tree);
+	}
+
+	rmap_item->address &= PAGE_MASK;
+
+	cond_resched();		/* we're called from many long loops */
+}
+
+static void remove_all_slot_rmap_items(struct mm_slot *mm_slot)
+{
+	struct rmap_item *rmap_item, *node;
+
+	list_for_each_entry_safe(rmap_item, node, &mm_slot->rmap_list, link) {
+		remove_rmap_item_from_tree(rmap_item);
+		list_del(&rmap_item->link);
+		free_rmap_item(rmap_item);
+	}
+}
+
+static void remove_trailing_rmap_items(struct mm_slot *mm_slot,
+				       struct list_head *cur)
+{
+	struct rmap_item *rmap_item;
+
+	while (cur != &mm_slot->rmap_list) {
+		rmap_item = list_entry(cur, struct rmap_item, link);
+		cur = cur->next;
+		remove_rmap_item_from_tree(rmap_item);
+		list_del(&rmap_item->link);
+		free_rmap_item(rmap_item);
+	}
+}
+
+/*
+ * Though it's very tempting to unmerge in_stable_tree(rmap_item)s rather
+ * than check every pte of a given vma, the locking doesn't quite work for
+ * that - an rmap_item is assigned to the stable tree after inserting ksm
+ * page and upping mmap_sem.  Nor does it fit with the way we skip dup'ing
+ * rmap_items from parent to child at fork time (so as not to waste time
+ * if exit comes before the next scan reaches it).
+ */
+static void unmerge_ksm_pages(struct vm_area_struct *vma,
+			      unsigned long start, unsigned long end)
+{
+	unsigned long addr;
+
+	for (addr = start; addr < end; addr += PAGE_SIZE)
+		break_ksm(vma, addr);
+}
+
+static void unmerge_and_remove_all_rmap_items(void)
+{
+	struct mm_slot *mm_slot;
+	struct mm_struct *mm;
+	struct vm_area_struct *vma;
+
+	list_for_each_entry(mm_slot, &ksm_mm_head.mm_list, mm_list) {
+		mm = mm_slot->mm;
+		down_read(&mm->mmap_sem);
+		for (vma = mm->mmap; vma; vma = vma->vm_next) {
+			if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
+				continue;
+			unmerge_ksm_pages(vma, vma->vm_start, vma->vm_end);
+		}
+		remove_all_slot_rmap_items(mm_slot);
+		up_read(&mm->mmap_sem);
+	}
+
+	spin_lock(&ksm_mmlist_lock);
+	if (ksm_scan.mm_slot != &ksm_mm_head) {
+		ksm_scan.mm_slot = &ksm_mm_head;
+		ksm_scan.seqnr++;
+	}
+	spin_unlock(&ksm_mmlist_lock);
+}
+
+static void remove_mm_from_lists(struct mm_struct *mm)
+{
+	struct mm_slot *mm_slot;
+
+	spin_lock(&ksm_mmlist_lock);
+	mm_slot = get_mm_slot(mm);
+
+	/*
+	 * This mm_slot is always at the scanning cursor when we're
+	 * called from scan_get_next_rmap_item; but it's a special
+	 * case when we're called from __ksm_exit.
+	 */
+	if (ksm_scan.mm_slot == mm_slot) {
+		ksm_scan.mm_slot = list_entry(
+			mm_slot->mm_list.next, struct mm_slot, mm_list);
+		ksm_scan.address = 0;
+		ksm_scan.rmap_item = list_entry(
+			&ksm_scan.mm_slot->rmap_list, struct rmap_item, link);
+		if (ksm_scan.mm_slot == &ksm_mm_head)
+			ksm_scan.seqnr++;
+	}
+
+	hlist_del(&mm_slot->link);
+	list_del(&mm_slot->mm_list);
+	spin_unlock(&ksm_mmlist_lock);
+
+	remove_all_slot_rmap_items(mm_slot);
+	free_mm_slot(mm_slot);
+	clear_bit(MMF_VM_MERGEABLE, &mm->flags);
+}
+
+static u32 calc_checksum(struct page *page)
+{
+	u32 checksum;
+	void *addr = kmap_atomic(page, KM_USER0);
+	checksum = jhash2(addr, PAGE_SIZE / 4, 17);
+	kunmap_atomic(addr, KM_USER0);
+	return checksum;
+}
+
+static int memcmp_pages(struct page *page1, struct page *page2)
+{
+	char *addr1, *addr2;
+	int ret;
+
+	addr1 = kmap_atomic(page1, KM_USER0);
+	addr2 = kmap_atomic(page2, KM_USER1);
+	ret = memcmp(addr1, addr2, PAGE_SIZE);
+	kunmap_atomic(addr2, KM_USER1);
+	kunmap_atomic(addr1, KM_USER0);
+	return ret;
+}
+
+static inline int pages_identical(struct page *page1, struct page *page2)
+{
+	return !memcmp_pages(page1, page2);
+}
+
+static int write_protect_page(struct vm_area_struct *vma, struct page *page,
+			      pte_t *orig_pte)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	unsigned long addr;
+	pte_t *ptep;
+	spinlock_t *ptl;
+	int swapped;
+	int err = -EFAULT;
+
+	addr = page_address_in_vma(page, vma);
+	if (addr == -EFAULT)
+		goto out;
+
+	ptep = page_check_address(page, mm, addr, &ptl, 0);
+	if (!ptep)
+		goto out;
+
+	if (pte_write(*ptep)) {
+		pte_t entry;
+
+		swapped = PageSwapCache(page);
+		flush_cache_page(vma, addr, page_to_pfn(page));
+		/*
+		 * Ok this is tricky, when get_user_pages_fast() run it doesnt
+		 * take any lock, therefore the check that we are going to make
+		 * with the pagecount against the mapcount is racey and
+		 * O_DIRECT can happen right after the check.
+		 * So we clear the pte and flush the tlb before the check
+		 * this assure us that no O_DIRECT can happen after the check
+		 * or in the middle of the check.
+		 */
+		entry = ptep_clear_flush(vma, addr, ptep);
+		/*
+		 * Check that no O_DIRECT or similar I/O is in progress on the
+		 * page
+		 */
+		if ((page_mapcount(page) + 2 + swapped) != page_count(page)) {
+			set_pte_at_notify(mm, addr, ptep, entry);
+			goto out_unlock;
+		}
+		entry = pte_wrprotect(entry);
+		set_pte_at_notify(mm, addr, ptep, entry);
+	}
+	*orig_pte = *ptep;
+	err = 0;
+
+out_unlock:
+	pte_unmap_unlock(ptep, ptl);
+out:
+	return err;
+}
+
+/**
+ * replace_page - replace page in vma by new ksm page
+ * @vma:      vma that holds the pte pointing to oldpage
+ * @oldpage:  the page we are replacing by newpage
+ * @newpage:  the ksm page we replace oldpage by
+ * @orig_pte: the original value of the pte
+ *
+ * Returns 0 on success, -EFAULT on failure.
+ */
+static int replace_page(struct vm_area_struct *vma, struct page *oldpage,
+			struct page *newpage, pte_t orig_pte)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *ptep;
+	spinlock_t *ptl;
+	unsigned long addr;
+	pgprot_t prot;
+	int err = -EFAULT;
+
+	prot = vm_get_page_prot(vma->vm_flags & ~VM_WRITE);
+
+	addr = page_address_in_vma(oldpage, vma);
+	if (addr == -EFAULT)
+		goto out;
+
+	pgd = pgd_offset(mm, addr);
+	if (!pgd_present(*pgd))
+		goto out;
+
+	pud = pud_offset(pgd, addr);
+	if (!pud_present(*pud))
+		goto out;
+
+	pmd = pmd_offset(pud, addr);
+	if (!pmd_present(*pmd))
+		goto out;
+
+	ptep = pte_offset_map_lock(mm, pmd, addr, &ptl);
+	if (!pte_same(*ptep, orig_pte)) {
+		pte_unmap_unlock(ptep, ptl);
+		goto out;
+	}
+
+	get_page(newpage);
+	page_add_ksm_rmap(newpage);
+
+	flush_cache_page(vma, addr, pte_pfn(*ptep));
+	ptep_clear_flush(vma, addr, ptep);
+	set_pte_at_notify(mm, addr, ptep, mk_pte(newpage, prot));
+
+	page_remove_rmap(oldpage);
+	put_page(oldpage);
+
+	pte_unmap_unlock(ptep, ptl);
+	err = 0;
+out:
+	return err;
+}
+
+/*
+ * try_to_merge_one_page - take two pages and merge them into one
+ * @vma: the vma that hold the pte pointing into oldpage
+ * @oldpage: the page that we want to replace with newpage
+ * @newpage: the page that we want to map instead of oldpage
+ *
+ * Note:
+ * oldpage should be a PageAnon page, while newpage should be a PageKsm page,
+ * or a newly allocated kernel page which page_add_ksm_rmap will make PageKsm.
+ *
+ * This function returns 0 if the pages were merged, -EFAULT otherwise.
+ */
+static int try_to_merge_one_page(struct vm_area_struct *vma,
+				 struct page *oldpage,
+				 struct page *newpage)
+{
+	pte_t orig_pte = __pte(0);
+	int err = -EFAULT;
+
+	if (!(vma->vm_flags & VM_MERGEABLE))
+		goto out;
+
+	if (!PageAnon(oldpage))
+		goto out;
+
+	get_page(newpage);
+	get_page(oldpage);
+
+	/*
+	 * We need the page lock to read a stable PageSwapCache in
+	 * write_protect_page().  We use trylock_page() instead of
+	 * lock_page() because we don't want to wait here - we
+	 * prefer to continue scanning and merging different pages,
+	 * then come back to this page when it is unlocked.
+	 */
+	if (!trylock_page(oldpage))
+		goto out_putpage;
+	/*
+	 * If this anonymous page is mapped only here, its pte may need
+	 * to be write-protected.  If it's mapped elsewhere, all of its
+	 * ptes are necessarily already write-protected.  But in either
+	 * case, we need to lock and check page_count is not raised.
+	 */
+	if (write_protect_page(vma, oldpage, &orig_pte)) {
+		unlock_page(oldpage);
+		goto out_putpage;
+	}
+	unlock_page(oldpage);
+
+	if (pages_identical(oldpage, newpage))
+		err = replace_page(vma, oldpage, newpage, orig_pte);
+
+out_putpage:
+	put_page(oldpage);
+	put_page(newpage);
+out:
+	return err;
+}
+
+/*
+ * try_to_merge_two_pages - take two identical pages and prepare them
+ * to be merged into one page.
+ *
+ * This function returns 0 if we successfully mapped two identical pages
+ * into one page, -EFAULT otherwise.
+ *
+ * Note that this function allocates a new kernel page: if one of the pages
+ * is already a ksm page, try_to_merge_with_ksm_page should be used.
+ */
+static int try_to_merge_two_pages(struct mm_struct *mm1, unsigned long addr1,
+				  struct page *page1, struct mm_struct *mm2,
+				  unsigned long addr2, struct page *page2)
+{
+	struct vm_area_struct *vma;
+	struct page *kpage;
+	int err = -EFAULT;
+
+	/*
+	 * The number of nodes in the stable tree
+	 * is the number of kernel pages that we hold.
+	 */
+	if (ksm_max_kernel_pages &&
+	    ksm_max_kernel_pages <= ksm_kernel_pages_allocated)
+		return err;
+
+	kpage = alloc_page(GFP_HIGHUSER);
+	if (!kpage)
+		return err;
+
+	down_read(&mm1->mmap_sem);
+	vma = find_vma(mm1, addr1);
+	if (!vma || vma->vm_start > addr1) {
+		put_page(kpage);
+		up_read(&mm1->mmap_sem);
+		return err;
+	}
+
+	copy_user_highpage(kpage, page1, addr1, vma);
+	err = try_to_merge_one_page(vma, page1, kpage);
+	up_read(&mm1->mmap_sem);
+
+	if (!err) {
+		down_read(&mm2->mmap_sem);
+		vma = find_vma(mm2, addr2);
+		if (!vma || vma->vm_start > addr2) {
+			put_page(kpage);
+			up_read(&mm2->mmap_sem);
+			break_cow(mm1, addr1);
+			return -EFAULT;
+		}
+
+		err = try_to_merge_one_page(vma, page2, kpage);
+		up_read(&mm2->mmap_sem);
+
+		/*
+		 * If the second try_to_merge_one_page failed, we have a
+		 * ksm page with just one pte pointing to it, so break it.
+		 */
+		if (err)
+			break_cow(mm1, addr1);
+		else
+			ksm_pages_shared += 2;
+	}
+
+	put_page(kpage);
+	return err;
+}
+
+/*
+ * try_to_merge_with_ksm_page - like try_to_merge_two_pages,
+ * but no new kernel page is allocated: kpage must already be a ksm page.
+ */
+static int try_to_merge_with_ksm_page(struct mm_struct *mm1,
+				      unsigned long addr1,
+				      struct page *page1,
+				      struct page *kpage)
+{
+	struct vm_area_struct *vma;
+	int err = -EFAULT;
+
+	down_read(&mm1->mmap_sem);
+	vma = find_vma(mm1, addr1);
+	if (!vma || vma->vm_start > addr1) {
+		up_read(&mm1->mmap_sem);
+		return err;
+	}
+
+	err = try_to_merge_one_page(vma, page1, kpage);
+	up_read(&mm1->mmap_sem);
+
+	if (!err)
+		ksm_pages_shared++;
+
+	return err;
+}
+
+/*
+ * stable_tree_search - search page inside the stable tree
+ * @page: the page that we are searching identical pages to.
+ * @page2: pointer into identical page that we are holding inside the stable
+ *	   tree that we have found.
+ * @rmap_item: the reverse mapping item
+ *
+ * This function checks if there is a page inside the stable tree
+ * with identical content to the page that we are scanning right now.
+ *
+ * This function return rmap_item pointer to the identical item if found,
+ * NULL otherwise.
+ */
+static struct rmap_item *stable_tree_search(struct page *page,
+					    struct page **page2,
+					    struct rmap_item *rmap_item)
+{
+	struct rb_node *node = root_stable_tree.rb_node;
+
+	while (node) {
+		struct rmap_item *tree_rmap_item, *next_rmap_item;
+		int ret;
+
+		tree_rmap_item = rb_entry(node, struct rmap_item, node);
+		while (tree_rmap_item) {
+			BUG_ON(!in_stable_tree(tree_rmap_item));
+			cond_resched();
+			page2[0] = get_ksm_page(tree_rmap_item);
+			if (page2[0])
+				break;
+			next_rmap_item = tree_rmap_item->next;
+			remove_rmap_item_from_tree(tree_rmap_item);
+			tree_rmap_item = next_rmap_item;
+		}
+		if (!tree_rmap_item)
+			return NULL;
+
+		ret = memcmp_pages(page, page2[0]);
+
+		if (ret < 0) {
+			put_page(page2[0]);
+			node = node->rb_left;
+		} else if (ret > 0) {
+			put_page(page2[0]);
+			node = node->rb_right;
+		} else {
+			return tree_rmap_item;
+		}
+	}
+
+	return NULL;
+}
+
+/*
+ * stable_tree_insert - insert rmap_item pointing to new ksm page
+ * into the stable tree.
+ *
+ * @page: the page that we are searching identical page to inside the stable
+ *	  tree.
+ * @rmap_item: pointer to the reverse mapping item.
+ *
+ * This function returns rmap_item if success, NULL otherwise.
+ */
+static struct rmap_item *stable_tree_insert(struct page *page,
+					    struct rmap_item *rmap_item)
+{
+	struct rb_node **new = &root_stable_tree.rb_node;
+	struct rb_node *parent = NULL;
+
+	while (*new) {
+		struct rmap_item *tree_rmap_item, *next_rmap_item;
+		struct page *tree_page;
+		int ret;
+
+		tree_rmap_item = rb_entry(*new, struct rmap_item, node);
+		while (tree_rmap_item) {
+			BUG_ON(!in_stable_tree(tree_rmap_item));
+			cond_resched();
+			tree_page = get_ksm_page(tree_rmap_item);
+			if (tree_page)
+				break;
+			next_rmap_item = tree_rmap_item->next;
+			remove_rmap_item_from_tree(tree_rmap_item);
+			tree_rmap_item = next_rmap_item;
+		}
+		if (!tree_rmap_item)
+			return NULL;
+
+		ret = memcmp_pages(page, tree_page);
+		put_page(tree_page);
+
+		parent = *new;
+		if (ret < 0)
+			new = &parent->rb_left;
+		else if (ret > 0)
+			new = &parent->rb_right;
+		else {
+			/*
+			 * It is not a bug that stable_tree_search() didn't
+			 * find this node: because at that time our page was
+			 * not yet write-protected, so may have changed since.
+			 */
+			return NULL;
+		}
+	}
+
+	ksm_kernel_pages_allocated++;
+
+	rmap_item->address |= NODE_FLAG | STABLE_FLAG;
+	rmap_item->next = NULL;
+	rb_link_node(&rmap_item->node, parent, new);
+	rb_insert_color(&rmap_item->node, &root_stable_tree);
+
+	return rmap_item;
+}
+
+/*
+ * unstable_tree_search_insert - search and insert items into the unstable tree.
+ *
+ * @page: the page that we are going to search for identical page or to insert
+ *	  into the unstable tree
+ * @page2: pointer into identical page that was found inside the unstable tree
+ * @rmap_item: the reverse mapping item of page
+ *
+ * This function searches for a page in the unstable tree identical to the
+ * page currently being scanned; and if no identical page is found in the
+ * tree, we insert rmap_item as a new object into the unstable tree.
+ *
+ * This function returns pointer to rmap_item found to be identical
+ * to the currently scanned page, NULL otherwise.
+ *
+ * This function does both searching and inserting, because they share
+ * the same walking algorithm in an rbtree.
+ */
+static struct rmap_item *unstable_tree_search_insert(struct page *page,
+						struct page **page2,
+						struct rmap_item *rmap_item)
+{
+	struct rb_node **new = &root_unstable_tree.rb_node;
+	struct rb_node *parent = NULL;
+
+	while (*new) {
+		struct rmap_item *tree_rmap_item;
+		int ret;
+
+		tree_rmap_item = rb_entry(*new, struct rmap_item, node);
+		page2[0] = get_mergeable_page(tree_rmap_item);
+		if (!page2[0])
+			return NULL;
+
+		/*
+		 * Don't substitute an unswappable ksm page
+		 * just for one good swappable forked page.
+		 */
+		if (page == page2[0]) {
+			put_page(page2[0]);
+			return NULL;
+		}
+
+		ret = memcmp_pages(page, page2[0]);
+
+		parent = *new;
+		if (ret < 0) {
+			put_page(page2[0]);
+			new = &parent->rb_left;
+		} else if (ret > 0) {
+			put_page(page2[0]);
+			new = &parent->rb_right;
+		} else {
+			return tree_rmap_item;
+		}
+	}
+
+	rmap_item->address |= NODE_FLAG;
+	rmap_item->address |= (ksm_scan.seqnr & SEQNR_MASK);
+	rb_link_node(&rmap_item->node, parent, new);
+	rb_insert_color(&rmap_item->node, &root_unstable_tree);
+
+	return NULL;
+}
+
+/*
+ * stable_tree_append - add another rmap_item to the linked list of
+ * rmap_items hanging off a given node of the stable tree, all sharing
+ * the same ksm page.
+ */
+static void stable_tree_append(struct rmap_item *rmap_item,
+			       struct rmap_item *tree_rmap_item)
+{
+	rmap_item->next = tree_rmap_item->next;
+	rmap_item->prev = tree_rmap_item;
+
+	if (tree_rmap_item->next)
+		tree_rmap_item->next->prev = rmap_item;
+
+	tree_rmap_item->next = rmap_item;
+	rmap_item->address |= STABLE_FLAG;
+}
+
+/*
+ * cmp_and_merge_page - take a page computes its hash value and check if there
+ * is similar hash value to different page,
+ * in case we find that there is similar hash to different page we call to
+ * try_to_merge_two_pages().
+ *
+ * @page: the page that we are searching identical page to.
+ * @rmap_item: the reverse mapping into the virtual address of this page
+ */
+static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
+{
+	struct page *page2[1];
+	struct rmap_item *tree_rmap_item;
+	unsigned int checksum;
+	int err;
+
+	if (in_stable_tree(rmap_item))
+		remove_rmap_item_from_tree(rmap_item);
+
+	/* We first start with searching the page inside the stable tree */
+	tree_rmap_item = stable_tree_search(page, page2, rmap_item);
+	if (tree_rmap_item) {
+		if (page == page2[0]) {			/* forked */
+			ksm_pages_shared++;
+			err = 0;
+		} else
+			err = try_to_merge_with_ksm_page(rmap_item->mm,
+							 rmap_item->address,
+							 page, page2[0]);
+		put_page(page2[0]);
+
+		if (!err) {
+			/*
+			 * The page was successfully merged:
+			 * add its rmap_item to the stable tree.
+			 */
+			stable_tree_append(rmap_item, tree_rmap_item);
+		}
+		return;
+	}
+
+	/*
+	 * A ksm page might have got here by fork, but its other
+	 * references have already been removed from the stable tree.
+	 */
+	if (PageKsm(page))
+		break_cow(rmap_item->mm, rmap_item->address);
+
+	/*
+	 * In case the hash value of the page was changed from the last time we
+	 * have calculated it, this page to be changed frequely, therefore we
+	 * don't want to insert it to the unstable tree, and we don't want to
+	 * waste our time to search if there is something identical to it there.
+	 */
+	checksum = calc_checksum(page);
+	if (rmap_item->oldchecksum != checksum) {
+		rmap_item->oldchecksum = checksum;
+		return;
+	}
+
+	tree_rmap_item = unstable_tree_search_insert(page, page2, rmap_item);
+	if (tree_rmap_item) {
+		err = try_to_merge_two_pages(rmap_item->mm,
+					     rmap_item->address, page,
+					     tree_rmap_item->mm,
+					     tree_rmap_item->address, page2[0]);
+		/*
+		 * As soon as we merge this page, we want to remove the
+		 * rmap_item of the page we have merged with from the unstable
+		 * tree, and insert it instead as new node in the stable tree.
+		 */
+		if (!err) {
+			rb_erase(&tree_rmap_item->node, &root_unstable_tree);
+			tree_rmap_item->address &= ~NODE_FLAG;
+			/*
+			 * If we fail to insert the page into the stable tree,
+			 * we will have 2 virtual addresses that are pointing
+			 * to a ksm page left outside the stable tree,
+			 * in which case we need to break_cow on both.
+			 */
+			if (stable_tree_insert(page2[0], tree_rmap_item))
+				stable_tree_append(rmap_item, tree_rmap_item);
+			else {
+				break_cow(tree_rmap_item->mm,
+						tree_rmap_item->address);
+				break_cow(rmap_item->mm, rmap_item->address);
+				ksm_pages_shared -= 2;
+			}
+		}
+
+		put_page(page2[0]);
+	}
+}
+
+static struct rmap_item *get_next_rmap_item(struct mm_slot *mm_slot,
+					    struct list_head *cur,
+					    unsigned long addr)
+{
+	struct rmap_item *rmap_item;
+
+	while (cur != &mm_slot->rmap_list) {
+		rmap_item = list_entry(cur, struct rmap_item, link);
+		if ((rmap_item->address & PAGE_MASK) == addr) {
+			if (!in_stable_tree(rmap_item))
+				remove_rmap_item_from_tree(rmap_item);
+			return rmap_item;
+		}
+		if (rmap_item->address > addr)
+			break;
+		cur = cur->next;
+		remove_rmap_item_from_tree(rmap_item);
+		list_del(&rmap_item->link);
+		free_rmap_item(rmap_item);
+	}
+
+	rmap_item = alloc_rmap_item();
+	if (rmap_item) {
+		/* It has already been zeroed */
+		rmap_item->mm = mm_slot->mm;
+		rmap_item->address = addr;
+		list_add_tail(&rmap_item->link, cur);
+	}
+	return rmap_item;
+}
+
+static struct rmap_item *scan_get_next_rmap_item(struct page **page)
+{
+	struct mm_struct *mm;
+	struct mm_slot *slot;
+	struct vm_area_struct *vma;
+	struct rmap_item *rmap_item;
+
+	if (list_empty(&ksm_mm_head.mm_list))
+		return NULL;
+
+	slot = ksm_scan.mm_slot;
+	if (slot == &ksm_mm_head) {
+		root_unstable_tree = RB_ROOT;
+
+		spin_lock(&ksm_mmlist_lock);
+		slot = list_entry(slot->mm_list.next, struct mm_slot, mm_list);
+		ksm_scan.mm_slot = slot;
+		spin_unlock(&ksm_mmlist_lock);
+next_mm:
+		ksm_scan.address = 0;
+		ksm_scan.rmap_item = list_entry(&slot->rmap_list,
+						struct rmap_item, link);
+	}
+
+	mm = slot->mm;
+	down_read(&mm->mmap_sem);
+	for (vma = find_vma(mm, ksm_scan.address); vma; vma = vma->vm_next) {
+		if (!(vma->vm_flags & VM_MERGEABLE))
+			continue;
+		if (ksm_scan.address < vma->vm_start)
+			ksm_scan.address = vma->vm_start;
+		if (!vma->anon_vma)
+			ksm_scan.address = vma->vm_end;
+
+		while (ksm_scan.address < vma->vm_end) {
+			*page = follow_page(vma, ksm_scan.address, FOLL_GET);
+			if (*page && PageAnon(*page)) {
+				flush_anon_page(vma, *page, ksm_scan.address);
+				flush_dcache_page(*page);
+				rmap_item = get_next_rmap_item(slot,
+					ksm_scan.rmap_item->link.next,
+					ksm_scan.address);
+				if (rmap_item) {
+					ksm_scan.rmap_item = rmap_item;
+					ksm_scan.address += PAGE_SIZE;
+				} else
+					put_page(*page);
+				up_read(&mm->mmap_sem);
+				return rmap_item;
+			}
+			if (*page)
+				put_page(*page);
+			ksm_scan.address += PAGE_SIZE;
+			cond_resched();
+		}
+	}
+
+	if (!ksm_scan.address) {
+		/*
+		 * We've completed a full scan of all vmas, holding mmap_sem
+		 * throughout, and found no VM_MERGEABLE: so do the same as
+		 * __ksm_exit does to remove this mm from all our lists now.
+		 */
+		remove_mm_from_lists(mm);
+		up_read(&mm->mmap_sem);
+		slot = ksm_scan.mm_slot;
+		if (slot != &ksm_mm_head)
+			goto next_mm;
+		return NULL;
+	}
+
+	/*
+	 * Nuke all the rmap_items that are above this current rmap:
+	 * because there were no VM_MERGEABLE vmas with such addresses.
+	 */
+	remove_trailing_rmap_items(slot, ksm_scan.rmap_item->link.next);
+	up_read(&mm->mmap_sem);
+
+	spin_lock(&ksm_mmlist_lock);
+	slot = list_entry(slot->mm_list.next, struct mm_slot, mm_list);
+	ksm_scan.mm_slot = slot;
+	spin_unlock(&ksm_mmlist_lock);
+
+	/* Repeat until we've completed scanning the whole list */
+	if (slot != &ksm_mm_head)
+		goto next_mm;
+
+	/*
+	 * Bump seqnr here rather than at top, so that __ksm_exit
+	 * can skip rb_erase on unstable tree until we run again.
+	 */
+	ksm_scan.seqnr++;
+	return NULL;
+}
+
+/**
+ * ksm_do_scan  - the ksm scanner main worker function.
+ * @scan_npages - number of pages we want to scan before we return.
+ */
+static void ksm_do_scan(unsigned int scan_npages)
+{
+	struct rmap_item *rmap_item;
+	struct page *page;
+
+	while (scan_npages--) {
+		cond_resched();
+		rmap_item = scan_get_next_rmap_item(&page);
+		if (!rmap_item)
+			return;
+		if (!PageKsm(page) || !in_stable_tree(rmap_item))
+			cmp_and_merge_page(page, rmap_item);
+		put_page(page);
+	}
+}
+
+static int ksm_scan_thread(void *nothing)
+{
+	set_user_nice(current, 0);
+
+	while (!kthread_should_stop()) {
+		if (ksm_run & KSM_RUN_MERGE) {
+			mutex_lock(&ksm_thread_mutex);
+			ksm_do_scan(ksm_thread_pages_to_scan);
+			mutex_unlock(&ksm_thread_mutex);
+			schedule_timeout_interruptible(
+				msecs_to_jiffies(ksm_thread_sleep_millisecs));
+		} else {
+			wait_event_interruptible(ksm_thread_wait,
+					(ksm_run & KSM_RUN_MERGE) ||
+					kthread_should_stop());
+		}
+	}
+	return 0;
+}
+
 int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
 		unsigned long end, int advice, unsigned long *vm_flags)
 {
@@ -33,7 +1307,8 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
 		if (!(*vm_flags & VM_MERGEABLE))
 			return 0;		/* just ignore the advice */
 
-		/* Unmerge any merged pages here */
+		if (vma->anon_vma)
+			unmerge_ksm_pages(vma, start, end);
 
 		*vm_flags &= ~VM_MERGEABLE;
 		break;
@@ -44,13 +1319,217 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
 
 int __ksm_enter(struct mm_struct *mm)
 {
-	/* Allocate a structure to track mm and link it into KSM's list */
+	struct mm_slot *mm_slot = alloc_mm_slot();
+	if (!mm_slot)
+		return -ENOMEM;
+
+	spin_lock(&ksm_mmlist_lock);
+	insert_to_mm_slots_hash(mm, mm_slot);
+	/*
+	 * Insert just behind the scanning cursor, to let the area settle
+	 * down a little; when fork is followed by immediate exec, we don't
+	 * want ksmd to waste time setting up and tearing down an rmap_list.
+	 */
+	list_add_tail(&mm_slot->mm_list, &ksm_scan.mm_slot->mm_list);
+	spin_unlock(&ksm_mmlist_lock);
+
 	set_bit(MMF_VM_MERGEABLE, &mm->flags);
 	return 0;
 }
 
 void __ksm_exit(struct mm_struct *mm)
 {
-	/* Unlink and free all KSM's structures which track this mm */
-	clear_bit(MMF_VM_MERGEABLE, &mm->flags);
+	/*
+	 * This process is exiting: doesn't hold and doesn't need mmap_sem;
+	 * but we do need to exclude ksmd and other exiters while we modify
+	 * the various lists and trees.
+	 */
+	mutex_lock(&ksm_thread_mutex);
+	remove_mm_from_lists(mm);
+	mutex_unlock(&ksm_thread_mutex);
+}
+
+#define KSM_ATTR_RO(_name) \
+	static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
+#define KSM_ATTR(_name) \
+	static struct kobj_attribute _name##_attr = \
+		__ATTR(_name, 0644, _name##_show, _name##_store)
+
+static ssize_t sleep_millisecs_show(struct kobject *kobj,
+				    struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%u\n", ksm_thread_sleep_millisecs);
+}
+
+static ssize_t sleep_millisecs_store(struct kobject *kobj,
+				     struct kobj_attribute *attr,
+				     const char *buf, size_t count)
+{
+	unsigned long msecs;
+	int err;
+
+	err = strict_strtoul(buf, 10, &msecs);
+	if (err || msecs > UINT_MAX)
+		return -EINVAL;
+
+	ksm_thread_sleep_millisecs = msecs;
+
+	return count;
+}
+KSM_ATTR(sleep_millisecs);
+
+static ssize_t pages_to_scan_show(struct kobject *kobj,
+				  struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%u\n", ksm_thread_pages_to_scan);
+}
+
+static ssize_t pages_to_scan_store(struct kobject *kobj,
+				   struct kobj_attribute *attr,
+				   const char *buf, size_t count)
+{
+	int err;
+	unsigned long nr_pages;
+
+	err = strict_strtoul(buf, 10, &nr_pages);
+	if (err || nr_pages > UINT_MAX)
+		return -EINVAL;
+
+	ksm_thread_pages_to_scan = nr_pages;
+
+	return count;
+}
+KSM_ATTR(pages_to_scan);
+
+static ssize_t run_show(struct kobject *kobj, struct kobj_attribute *attr,
+			char *buf)
+{
+	return sprintf(buf, "%u\n", ksm_run);
+}
+
+static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr,
+			 const char *buf, size_t count)
+{
+	int err;
+	unsigned long flags;
+
+	err = strict_strtoul(buf, 10, &flags);
+	if (err || flags > UINT_MAX)
+		return -EINVAL;
+	if (flags > KSM_RUN_UNMERGE)
+		return -EINVAL;
+
+	/*
+	 * KSM_RUN_MERGE sets ksmd running, and 0 stops it running.
+	 * KSM_RUN_UNMERGE stops it running and unmerges all rmap_items,
+	 * breaking COW to free the kernel_pages_allocated (but leaves
+	 * mm_slots on the list for when ksmd may be set running again).
+	 */
+
+	mutex_lock(&ksm_thread_mutex);
+	if (ksm_run != flags) {
+		ksm_run = flags;
+		if (flags & KSM_RUN_UNMERGE)
+			unmerge_and_remove_all_rmap_items();
+	}
+	mutex_unlock(&ksm_thread_mutex);
+
+	if (flags & KSM_RUN_MERGE)
+		wake_up_interruptible(&ksm_thread_wait);
+
+	return count;
+}
+KSM_ATTR(run);
+
+static ssize_t pages_shared_show(struct kobject *kobj,
+				 struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%lu\n",
+			ksm_pages_shared - ksm_kernel_pages_allocated);
+}
+KSM_ATTR_RO(pages_shared);
+
+static ssize_t kernel_pages_allocated_show(struct kobject *kobj,
+					   struct kobj_attribute *attr,
+					   char *buf)
+{
+	return sprintf(buf, "%lu\n", ksm_kernel_pages_allocated);
+}
+KSM_ATTR_RO(kernel_pages_allocated);
+
+static ssize_t max_kernel_pages_store(struct kobject *kobj,
+				      struct kobj_attribute *attr,
+				      const char *buf, size_t count)
+{
+	int err;
+	unsigned long nr_pages;
+
+	err = strict_strtoul(buf, 10, &nr_pages);
+	if (err)
+		return -EINVAL;
+
+	ksm_max_kernel_pages = nr_pages;
+
+	return count;
+}
+
+static ssize_t max_kernel_pages_show(struct kobject *kobj,
+				     struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%lu\n", ksm_max_kernel_pages);
+}
+KSM_ATTR(max_kernel_pages);
+
+static struct attribute *ksm_attrs[] = {
+	&sleep_millisecs_attr.attr,
+	&pages_to_scan_attr.attr,
+	&run_attr.attr,
+	&pages_shared_attr.attr,
+	&kernel_pages_allocated_attr.attr,
+	&max_kernel_pages_attr.attr,
+	NULL,
+};
+
+static struct attribute_group ksm_attr_group = {
+	.attrs = ksm_attrs,
+	.name = "ksm",
+};
+
+static int __init ksm_init(void)
+{
+	struct task_struct *ksm_thread;
+	int err;
+
+	err = ksm_slab_init();
+	if (err)
+		goto out;
+
+	err = mm_slots_hash_init();
+	if (err)
+		goto out_free1;
+
+	ksm_thread = kthread_run(ksm_scan_thread, NULL, "ksmd");
+	if (IS_ERR(ksm_thread)) {
+		printk(KERN_ERR "ksm: creating kthread failed\n");
+		err = PTR_ERR(ksm_thread);
+		goto out_free2;
+	}
+
+	err = sysfs_create_group(mm_kobj, &ksm_attr_group);
+	if (err) {
+		printk(KERN_ERR "ksm: register sysfs failed\n");
+		goto out_free3;
+	}
+
+	return 0;
+
+out_free3:
+	kthread_stop(ksm_thread);
+out_free2:
+	mm_slots_hash_free();
+out_free1:
+	ksm_slab_free();
+out:
+	return err;
 }
+module_init(ksm_init)
-- 
cgit v0.10.2


From 1ff829957316670af64be24192ef849e7253a509 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 21 Sep 2009 17:02:05 -0700
Subject: ksm: prevent mremap move poisoning

KSM's scan allows for user pages to be COWed or unmapped at any time,
without requiring any notification.  But its stable tree does assume that
when it finds a KSM page where it placed a KSM page, then it is the same
KSM page that it placed there.

mremap move could break that assumption: if an area containing a KSM page
was unmapped, then an area containing a different KSM page was moved with
mremap into the place of the original, before KSM's scan came around to
notice.  That could then poison a node of the stable tree, so that memcmps
would "lie" and upset the ordering of the tree.

Probably noone will ever need mremap move on a VM_MERGEABLE area; except
that prohibiting it would make trouble for schemes in which we try making
everything VM_MERGEABLE e.g.  for testing: an mremap which normally works
would then fail mysteriously.

There's no need to go to any trouble, such as re-sorting KSM's list of
rmap_items to match the new layout: simply unmerge the area to COW all its
KSM pages before moving, but leave VM_MERGEABLE on so that they're
remerged later.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Signed-off-by: Chris Wright <chrisw@redhat.com>
Signed-off-by: Izik Eidus <ieidus@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/mremap.c b/mm/mremap.c
index a39b7b9..93addde 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -11,6 +11,7 @@
 #include <linux/hugetlb.h>
 #include <linux/slab.h>
 #include <linux/shm.h>
+#include <linux/ksm.h>
 #include <linux/mman.h>
 #include <linux/swap.h>
 #include <linux/capability.h>
@@ -182,6 +183,17 @@ static unsigned long move_vma(struct vm_area_struct *vma,
 	if (mm->map_count >= sysctl_max_map_count - 3)
 		return -ENOMEM;
 
+	/*
+	 * Advise KSM to break any KSM pages in the area to be moved:
+	 * it would be confusing if they were to turn up at the new
+	 * location, where they happen to coincide with different KSM
+	 * pages recently unmapped.  But leave vma->vm_flags as it was,
+	 * so KSM can come around to merge on vma and new_vma afterwards.
+	 */
+	if (ksm_madvise(vma, old_addr, old_addr + old_len,
+						MADV_UNMERGEABLE, &vm_flags))
+		return -ENOMEM;
+
 	new_pgoff = vma->vm_pgoff + ((old_addr - vma->vm_start) >> PAGE_SHIFT);
 	new_vma = copy_vma(&vma, new_addr, new_len, new_pgoff);
 	if (!new_vma)
-- 
cgit v0.10.2


From 36b2528dc1819dc783f69917ac20e66a678b3479 Mon Sep 17 00:00:00 2001
From: Izik Eidus <ieidus@redhat.com>
Date: Mon, 21 Sep 2009 17:02:06 -0700
Subject: ksm: change copyright message

Adding Hugh Dickins into the authors list.

Signed-off-by: Izik Eidus <ieidus@redhat.com>
Cc: Chris Wright <chrisw@redhat.com>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/ksm.c b/mm/ksm.c
index 2c02094..0113f15 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -4,11 +4,12 @@
  * This code enables dynamic sharing of identical pages found in different
  * memory areas, even if they are not shared by fork()
  *
- * Copyright (C) 2008 Red Hat, Inc.
+ * Copyright (C) 2008-2009 Red Hat, Inc.
  * Authors:
  *	Izik Eidus
  *	Andrea Arcangeli
  *	Chris Wright
+ *	Hugh Dickins
  *
  * This work is licensed under the terms of the GNU GPL, version 2.
  */
-- 
cgit v0.10.2


From 339aa62469f65daf38a01d6c098b5f3ff8016653 Mon Sep 17 00:00:00 2001
From: Izik Eidus <ieidus@redhat.com>
Date: Mon, 21 Sep 2009 17:02:07 -0700
Subject: ksm: change ksm nice level to be 5

ksm should try not to disturb other tasks as much as possible.

Signed-off-by: Izik Eidus <ieidus@redhat.com>
Cc: Chris Wright <chrisw@redhat.com>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/ksm.c b/mm/ksm.c
index 0113f15..cf072c5 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1263,7 +1263,7 @@ static void ksm_do_scan(unsigned int scan_npages)
 
 static int ksm_scan_thread(void *nothing)
 {
-	set_user_nice(current, 0);
+	set_user_nice(current, 5);
 
 	while (!kthread_should_stop()) {
 		if (ksm_run & KSM_RUN_MERGE) {
-- 
cgit v0.10.2


From b4028260334e1ecf63fb5e0a95d65bb2db02c1ec Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 21 Sep 2009 17:02:09 -0700
Subject: ksm: rename kernel_pages_allocated

We're not implementing swapping of KSM pages in its first release;
but when that follows, "kernel_pages_allocated" will be a very poor
name for the sysfs file showing number of nodes in the stable tree:
rename that to "pages_shared" throughout.

But we already have a "pages_shared", counting those page slots
sharing the shared pages: first rename that to... "pages_sharing".

What will become of "max_kernel_pages" when the pages shared can
be swapped?  I guess it will just be removed, so keep that name.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Acked-by: Izik Eidus <ieidus@redhat.com>
Acked-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/ksm.c b/mm/ksm.c
index cf072c5..46b4c52 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -150,10 +150,10 @@ static struct kmem_cache *rmap_item_cache;
 static struct kmem_cache *mm_slot_cache;
 
 /* The number of nodes in the stable tree */
-static unsigned long ksm_kernel_pages_allocated;
+static unsigned long ksm_pages_shared;
 
 /* The number of page slots sharing those nodes */
-static unsigned long ksm_pages_shared;
+static unsigned long ksm_pages_sharing;
 
 /* Limit on the number of unswappable pages used */
 static unsigned long ksm_max_kernel_pages;
@@ -384,7 +384,7 @@ static void remove_rmap_item_from_tree(struct rmap_item *rmap_item)
 				next_item->address |= NODE_FLAG;
 			} else {
 				rb_erase(&rmap_item->node, &root_stable_tree);
-				ksm_kernel_pages_allocated--;
+				ksm_pages_shared--;
 			}
 		} else {
 			struct rmap_item *prev_item = rmap_item->prev;
@@ -398,7 +398,7 @@ static void remove_rmap_item_from_tree(struct rmap_item *rmap_item)
 		}
 
 		rmap_item->next = NULL;
-		ksm_pages_shared--;
+		ksm_pages_sharing--;
 
 	} else if (rmap_item->address & NODE_FLAG) {
 		unsigned char age;
@@ -748,7 +748,7 @@ static int try_to_merge_two_pages(struct mm_struct *mm1, unsigned long addr1,
 	 * is the number of kernel pages that we hold.
 	 */
 	if (ksm_max_kernel_pages &&
-	    ksm_max_kernel_pages <= ksm_kernel_pages_allocated)
+	    ksm_max_kernel_pages <= ksm_pages_shared)
 		return err;
 
 	kpage = alloc_page(GFP_HIGHUSER);
@@ -787,7 +787,7 @@ static int try_to_merge_two_pages(struct mm_struct *mm1, unsigned long addr1,
 		if (err)
 			break_cow(mm1, addr1);
 		else
-			ksm_pages_shared += 2;
+			ksm_pages_sharing += 2;
 	}
 
 	put_page(kpage);
@@ -817,7 +817,7 @@ static int try_to_merge_with_ksm_page(struct mm_struct *mm1,
 	up_read(&mm1->mmap_sem);
 
 	if (!err)
-		ksm_pages_shared++;
+		ksm_pages_sharing++;
 
 	return err;
 }
@@ -928,7 +928,7 @@ static struct rmap_item *stable_tree_insert(struct page *page,
 		}
 	}
 
-	ksm_kernel_pages_allocated++;
+	ksm_pages_shared++;
 
 	rmap_item->address |= NODE_FLAG | STABLE_FLAG;
 	rmap_item->next = NULL;
@@ -1044,7 +1044,7 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
 	tree_rmap_item = stable_tree_search(page, page2, rmap_item);
 	if (tree_rmap_item) {
 		if (page == page2[0]) {			/* forked */
-			ksm_pages_shared++;
+			ksm_pages_sharing++;
 			err = 0;
 		} else
 			err = try_to_merge_with_ksm_page(rmap_item->mm,
@@ -1107,7 +1107,7 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
 				break_cow(tree_rmap_item->mm,
 						tree_rmap_item->address);
 				break_cow(rmap_item->mm, rmap_item->address);
-				ksm_pages_shared -= 2;
+				ksm_pages_sharing -= 2;
 			}
 		}
 
@@ -1423,7 +1423,7 @@ static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr,
 	/*
 	 * KSM_RUN_MERGE sets ksmd running, and 0 stops it running.
 	 * KSM_RUN_UNMERGE stops it running and unmerges all rmap_items,
-	 * breaking COW to free the kernel_pages_allocated (but leaves
+	 * breaking COW to free the unswappable pages_shared (but leaves
 	 * mm_slots on the list for when ksmd may be set running again).
 	 */
 
@@ -1442,22 +1442,6 @@ static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr,
 }
 KSM_ATTR(run);
 
-static ssize_t pages_shared_show(struct kobject *kobj,
-				 struct kobj_attribute *attr, char *buf)
-{
-	return sprintf(buf, "%lu\n",
-			ksm_pages_shared - ksm_kernel_pages_allocated);
-}
-KSM_ATTR_RO(pages_shared);
-
-static ssize_t kernel_pages_allocated_show(struct kobject *kobj,
-					   struct kobj_attribute *attr,
-					   char *buf)
-{
-	return sprintf(buf, "%lu\n", ksm_kernel_pages_allocated);
-}
-KSM_ATTR_RO(kernel_pages_allocated);
-
 static ssize_t max_kernel_pages_store(struct kobject *kobj,
 				      struct kobj_attribute *attr,
 				      const char *buf, size_t count)
@@ -1481,13 +1465,28 @@ static ssize_t max_kernel_pages_show(struct kobject *kobj,
 }
 KSM_ATTR(max_kernel_pages);
 
+static ssize_t pages_shared_show(struct kobject *kobj,
+				 struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%lu\n", ksm_pages_shared);
+}
+KSM_ATTR_RO(pages_shared);
+
+static ssize_t pages_sharing_show(struct kobject *kobj,
+				  struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%lu\n",
+			ksm_pages_sharing - ksm_pages_shared);
+}
+KSM_ATTR_RO(pages_sharing);
+
 static struct attribute *ksm_attrs[] = {
 	&sleep_millisecs_attr.attr,
 	&pages_to_scan_attr.attr,
 	&run_attr.attr,
-	&pages_shared_attr.attr,
-	&kernel_pages_allocated_attr.attr,
 	&max_kernel_pages_attr.attr,
+	&pages_shared_attr.attr,
+	&pages_sharing_attr.attr,
 	NULL,
 };
 
-- 
cgit v0.10.2


From e178dfde3952192cf44eeb0612882f01fc96c0a9 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 21 Sep 2009 17:02:10 -0700
Subject: ksm: move pages_sharing updates

The pages_shared count is incremented and decremented when adding a node
to and removing a node from the stable tree: easy to understand.  But the
pages_sharing count was hard to follow, being adjusted in various places:
increment and decrement it when adding to and removing from the stable tree.

And the pages_sharing variable used to include the pages_shared, then those
were subtracted when shown in the pages_sharing sysfs file: now keep it as
an exclusive count of leaves hanging off the stable tree nodes, throughout.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Acked-by: Izik Eidus <ieidus@redhat.com>
Acked-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/ksm.c b/mm/ksm.c
index 46b4c52..ef89526 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -152,7 +152,7 @@ static struct kmem_cache *mm_slot_cache;
 /* The number of nodes in the stable tree */
 static unsigned long ksm_pages_shared;
 
-/* The number of page slots sharing those nodes */
+/* The number of page slots additionally sharing those nodes */
 static unsigned long ksm_pages_sharing;
 
 /* Limit on the number of unswappable pages used */
@@ -382,6 +382,7 @@ static void remove_rmap_item_from_tree(struct rmap_item *rmap_item)
 						&next_item->node,
 						&root_stable_tree);
 				next_item->address |= NODE_FLAG;
+				ksm_pages_sharing--;
 			} else {
 				rb_erase(&rmap_item->node, &root_stable_tree);
 				ksm_pages_shared--;
@@ -395,10 +396,10 @@ static void remove_rmap_item_from_tree(struct rmap_item *rmap_item)
 				BUG_ON(next_item->prev != rmap_item);
 				next_item->prev = rmap_item->prev;
 			}
+			ksm_pages_sharing--;
 		}
 
 		rmap_item->next = NULL;
-		ksm_pages_sharing--;
 
 	} else if (rmap_item->address & NODE_FLAG) {
 		unsigned char age;
@@ -786,8 +787,6 @@ static int try_to_merge_two_pages(struct mm_struct *mm1, unsigned long addr1,
 		 */
 		if (err)
 			break_cow(mm1, addr1);
-		else
-			ksm_pages_sharing += 2;
 	}
 
 	put_page(kpage);
@@ -816,9 +815,6 @@ static int try_to_merge_with_ksm_page(struct mm_struct *mm1,
 	err = try_to_merge_one_page(vma, page1, kpage);
 	up_read(&mm1->mmap_sem);
 
-	if (!err)
-		ksm_pages_sharing++;
-
 	return err;
 }
 
@@ -928,13 +924,12 @@ static struct rmap_item *stable_tree_insert(struct page *page,
 		}
 	}
 
-	ksm_pages_shared++;
-
 	rmap_item->address |= NODE_FLAG | STABLE_FLAG;
 	rmap_item->next = NULL;
 	rb_link_node(&rmap_item->node, parent, new);
 	rb_insert_color(&rmap_item->node, &root_stable_tree);
 
+	ksm_pages_shared++;
 	return rmap_item;
 }
 
@@ -1019,6 +1014,8 @@ static void stable_tree_append(struct rmap_item *rmap_item,
 
 	tree_rmap_item->next = rmap_item;
 	rmap_item->address |= STABLE_FLAG;
+
+	ksm_pages_sharing++;
 }
 
 /*
@@ -1043,10 +1040,9 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
 	/* We first start with searching the page inside the stable tree */
 	tree_rmap_item = stable_tree_search(page, page2, rmap_item);
 	if (tree_rmap_item) {
-		if (page == page2[0]) {			/* forked */
-			ksm_pages_sharing++;
+		if (page == page2[0])			/* forked */
 			err = 0;
-		} else
+		else
 			err = try_to_merge_with_ksm_page(rmap_item->mm,
 							 rmap_item->address,
 							 page, page2[0]);
@@ -1107,7 +1103,6 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
 				break_cow(tree_rmap_item->mm,
 						tree_rmap_item->address);
 				break_cow(rmap_item->mm, rmap_item->address);
-				ksm_pages_sharing -= 2;
 			}
 		}
 
@@ -1475,8 +1470,7 @@ KSM_ATTR_RO(pages_shared);
 static ssize_t pages_sharing_show(struct kobject *kobj,
 				  struct kobj_attribute *attr, char *buf)
 {
-	return sprintf(buf, "%lu\n",
-			ksm_pages_sharing - ksm_pages_shared);
+	return sprintf(buf, "%lu\n", ksm_pages_sharing);
 }
 KSM_ATTR_RO(pages_sharing);
 
-- 
cgit v0.10.2


From 473b0ce4d13ee77925a7062e25dea0d16a91f654 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 21 Sep 2009 17:02:11 -0700
Subject: ksm: pages_unshared and pages_volatile

The pages_shared and pages_sharing counts give a good picture of how
successful KSM is at sharing; but no clue to how much wasted work it's
doing to get there.  Add pages_unshared (count of unique pages waiting
in the unstable tree, hoping to find a mate) and pages_volatile.

pages_volatile is harder to define.  It includes those pages changing
too fast to get into the unstable tree, but also whatever other edge
conditions prevent a page getting into the trees: a high value may
deserve investigation.  Don't try to calculate it from the various
conditions: it's the total of rmap_items less those accounted for.

Also show full_scans: the number of completed scans of everything
registered in the mm list.

The locking for all these counts is simply ksm_thread_mutex.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Acked-by: Izik Eidus <ieidus@redhat.com>
Acked-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/ksm.c b/mm/ksm.c
index ef89526..9f8f052 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -155,6 +155,12 @@ static unsigned long ksm_pages_shared;
 /* The number of page slots additionally sharing those nodes */
 static unsigned long ksm_pages_sharing;
 
+/* The number of nodes in the unstable tree */
+static unsigned long ksm_pages_unshared;
+
+/* The number of rmap_items in use: to calculate pages_volatile */
+static unsigned long ksm_rmap_items;
+
 /* Limit on the number of unswappable pages used */
 static unsigned long ksm_max_kernel_pages;
 
@@ -204,11 +210,17 @@ static void __init ksm_slab_free(void)
 
 static inline struct rmap_item *alloc_rmap_item(void)
 {
-	return kmem_cache_zalloc(rmap_item_cache, GFP_KERNEL);
+	struct rmap_item *rmap_item;
+
+	rmap_item = kmem_cache_zalloc(rmap_item_cache, GFP_KERNEL);
+	if (rmap_item)
+		ksm_rmap_items++;
+	return rmap_item;
 }
 
 static inline void free_rmap_item(struct rmap_item *rmap_item)
 {
+	ksm_rmap_items--;
 	rmap_item->mm = NULL;	/* debug safety */
 	kmem_cache_free(rmap_item_cache, rmap_item);
 }
@@ -419,6 +431,7 @@ static void remove_rmap_item_from_tree(struct rmap_item *rmap_item)
 		BUG_ON(age > 2);
 		if (!age)
 			rb_erase(&rmap_item->node, &root_unstable_tree);
+		ksm_pages_unshared--;
 	}
 
 	rmap_item->address &= PAGE_MASK;
@@ -995,6 +1008,7 @@ static struct rmap_item *unstable_tree_search_insert(struct page *page,
 	rb_link_node(&rmap_item->node, parent, new);
 	rb_insert_color(&rmap_item->node, &root_unstable_tree);
 
+	ksm_pages_unshared++;
 	return NULL;
 }
 
@@ -1091,6 +1105,8 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
 		if (!err) {
 			rb_erase(&tree_rmap_item->node, &root_unstable_tree);
 			tree_rmap_item->address &= ~NODE_FLAG;
+			ksm_pages_unshared--;
+
 			/*
 			 * If we fail to insert the page into the stable tree,
 			 * we will have 2 virtual addresses that are pointing
@@ -1474,6 +1490,37 @@ static ssize_t pages_sharing_show(struct kobject *kobj,
 }
 KSM_ATTR_RO(pages_sharing);
 
+static ssize_t pages_unshared_show(struct kobject *kobj,
+				   struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%lu\n", ksm_pages_unshared);
+}
+KSM_ATTR_RO(pages_unshared);
+
+static ssize_t pages_volatile_show(struct kobject *kobj,
+				   struct kobj_attribute *attr, char *buf)
+{
+	long ksm_pages_volatile;
+
+	ksm_pages_volatile = ksm_rmap_items - ksm_pages_shared
+				- ksm_pages_sharing - ksm_pages_unshared;
+	/*
+	 * It was not worth any locking to calculate that statistic,
+	 * but it might therefore sometimes be negative: conceal that.
+	 */
+	if (ksm_pages_volatile < 0)
+		ksm_pages_volatile = 0;
+	return sprintf(buf, "%ld\n", ksm_pages_volatile);
+}
+KSM_ATTR_RO(pages_volatile);
+
+static ssize_t full_scans_show(struct kobject *kobj,
+			       struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%lu\n", ksm_scan.seqnr);
+}
+KSM_ATTR_RO(full_scans);
+
 static struct attribute *ksm_attrs[] = {
 	&sleep_millisecs_attr.attr,
 	&pages_to_scan_attr.attr,
@@ -1481,6 +1528,9 @@ static struct attribute *ksm_attrs[] = {
 	&max_kernel_pages_attr.attr,
 	&pages_shared_attr.attr,
 	&pages_sharing_attr.attr,
+	&pages_unshared_attr.attr,
+	&pages_volatile_attr.attr,
+	&full_scans_attr.attr,
 	NULL,
 };
 
-- 
cgit v0.10.2


From 26465d3ea5a62d59efb3796b9e0e2b0656d02cb1 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 21 Sep 2009 17:02:12 -0700
Subject: ksm: break cow once unshared

We kept agreeing not to bother about the unswappable shared KSM pages
which later become unshared by others: observation suggests they're not
a significant proportion.  But they are disadvantageous, and it is easier
to break COW to replace them by swappable pages, than offer statistics
to show that they don't matter; then we can stop worrying about them.

Doing this in ksm_do_scan, they don't go through cmp_and_merge_page on
this pass: give them a good chance of getting into the unstable tree
on the next pass, or back into the stable, by computing checksum now.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Acked-by: Izik Eidus <ieidus@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/ksm.c b/mm/ksm.c
index 9f8f052..81f692e 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1268,6 +1268,14 @@ static void ksm_do_scan(unsigned int scan_npages)
 			return;
 		if (!PageKsm(page) || !in_stable_tree(rmap_item))
 			cmp_and_merge_page(page, rmap_item);
+		else if (page_mapcount(page) == 1) {
+			/*
+			 * Replace now-unshared ksm page by ordinary page.
+			 */
+			break_cow(rmap_item->mm, rmap_item->address);
+			remove_rmap_item_from_tree(rmap_item);
+			rmap_item->oldchecksum = calc_checksum(page);
+		}
 		put_page(page);
 	}
 }
-- 
cgit v0.10.2


From 6e15838425ac855982f10419558649954a0684a3 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 21 Sep 2009 17:02:14 -0700
Subject: ksm: keep quiet while list empty

ksm_scan_thread already sleeps in wait_event_interruptible until setting
ksm_run activates it; but if there's nothing on its list to look at, i.e.
nobody has yet said madvise MADV_MERGEABLE, it's a shame to be clocking
up system time and full_scans: ksmd_should_run added to check that too.

And move the mutex_lock out around it: the new counts showed that when
ksm_run is stopped, a little work often got done afterwards, because it
had been read before taking the mutex.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Acked-by: Izik Eidus <ieidus@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/ksm.c b/mm/ksm.c
index 81f692e..2849422 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1280,21 +1280,27 @@ static void ksm_do_scan(unsigned int scan_npages)
 	}
 }
 
+static int ksmd_should_run(void)
+{
+	return (ksm_run & KSM_RUN_MERGE) && !list_empty(&ksm_mm_head.mm_list);
+}
+
 static int ksm_scan_thread(void *nothing)
 {
 	set_user_nice(current, 5);
 
 	while (!kthread_should_stop()) {
-		if (ksm_run & KSM_RUN_MERGE) {
-			mutex_lock(&ksm_thread_mutex);
+		mutex_lock(&ksm_thread_mutex);
+		if (ksmd_should_run())
 			ksm_do_scan(ksm_thread_pages_to_scan);
-			mutex_unlock(&ksm_thread_mutex);
+		mutex_unlock(&ksm_thread_mutex);
+
+		if (ksmd_should_run()) {
 			schedule_timeout_interruptible(
 				msecs_to_jiffies(ksm_thread_sleep_millisecs));
 		} else {
 			wait_event_interruptible(ksm_thread_wait,
-					(ksm_run & KSM_RUN_MERGE) ||
-					kthread_should_stop());
+				ksmd_should_run() || kthread_should_stop());
 		}
 	}
 	return 0;
@@ -1339,10 +1345,16 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
 
 int __ksm_enter(struct mm_struct *mm)
 {
-	struct mm_slot *mm_slot = alloc_mm_slot();
+	struct mm_slot *mm_slot;
+	int needs_wakeup;
+
+	mm_slot = alloc_mm_slot();
 	if (!mm_slot)
 		return -ENOMEM;
 
+	/* Check ksm_run too?  Would need tighter locking */
+	needs_wakeup = list_empty(&ksm_mm_head.mm_list);
+
 	spin_lock(&ksm_mmlist_lock);
 	insert_to_mm_slots_hash(mm, mm_slot);
 	/*
@@ -1354,6 +1366,10 @@ int __ksm_enter(struct mm_struct *mm)
 	spin_unlock(&ksm_mmlist_lock);
 
 	set_bit(MMF_VM_MERGEABLE, &mm->flags);
+
+	if (needs_wakeup)
+		wake_up_interruptible(&ksm_thread_wait);
+
 	return 0;
 }
 
-- 
cgit v0.10.2


From 81464e30609cdbd3d96d8dd6991e7481195a89a1 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 21 Sep 2009 17:02:15 -0700
Subject: ksm: five little cleanups

1. We don't use __break_cow entry point now: merge it into break_cow.
2. remove_all_slot_rmap_items is just a special case of
   remove_trailing_rmap_items: use the latter instead.
3. Extend comment on unmerge_ksm_pages and rmap_items.
4. try_to_merge_two_pages should use try_to_merge_with_ksm_page
   instead of duplicating its code; and so swap them around.
5. Comment on cmp_and_merge_page described last year's: update it.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Acked-by: Izik Eidus <ieidus@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/ksm.c b/mm/ksm.c
index 2849422..c49bb71 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -315,22 +315,18 @@ static void break_ksm(struct vm_area_struct *vma, unsigned long addr)
 	/* Which leaves us looping there if VM_FAULT_OOM: hmmm... */
 }
 
-static void __break_cow(struct mm_struct *mm, unsigned long addr)
+static void break_cow(struct mm_struct *mm, unsigned long addr)
 {
 	struct vm_area_struct *vma;
 
+	down_read(&mm->mmap_sem);
 	vma = find_vma(mm, addr);
 	if (!vma || vma->vm_start > addr)
-		return;
+		goto out;
 	if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
-		return;
+		goto out;
 	break_ksm(vma, addr);
-}
-
-static void break_cow(struct mm_struct *mm, unsigned long addr)
-{
-	down_read(&mm->mmap_sem);
-	__break_cow(mm, addr);
+out:
 	up_read(&mm->mmap_sem);
 }
 
@@ -439,17 +435,6 @@ static void remove_rmap_item_from_tree(struct rmap_item *rmap_item)
 	cond_resched();		/* we're called from many long loops */
 }
 
-static void remove_all_slot_rmap_items(struct mm_slot *mm_slot)
-{
-	struct rmap_item *rmap_item, *node;
-
-	list_for_each_entry_safe(rmap_item, node, &mm_slot->rmap_list, link) {
-		remove_rmap_item_from_tree(rmap_item);
-		list_del(&rmap_item->link);
-		free_rmap_item(rmap_item);
-	}
-}
-
 static void remove_trailing_rmap_items(struct mm_slot *mm_slot,
 				       struct list_head *cur)
 {
@@ -471,6 +456,11 @@ static void remove_trailing_rmap_items(struct mm_slot *mm_slot,
  * page and upping mmap_sem.  Nor does it fit with the way we skip dup'ing
  * rmap_items from parent to child at fork time (so as not to waste time
  * if exit comes before the next scan reaches it).
+ *
+ * Similarly, although we'd like to remove rmap_items (so updating counts
+ * and freeing memory) when unmerging an area, it's easier to leave that
+ * to the next pass of ksmd - consider, for example, how ksmd might be
+ * in cmp_and_merge_page on one of the rmap_items we would be removing.
  */
 static void unmerge_ksm_pages(struct vm_area_struct *vma,
 			      unsigned long start, unsigned long end)
@@ -495,7 +485,7 @@ static void unmerge_and_remove_all_rmap_items(void)
 				continue;
 			unmerge_ksm_pages(vma, vma->vm_start, vma->vm_end);
 		}
-		remove_all_slot_rmap_items(mm_slot);
+		remove_trailing_rmap_items(mm_slot, mm_slot->rmap_list.next);
 		up_read(&mm->mmap_sem);
 	}
 
@@ -533,7 +523,7 @@ static void remove_mm_from_lists(struct mm_struct *mm)
 	list_del(&mm_slot->mm_list);
 	spin_unlock(&ksm_mmlist_lock);
 
-	remove_all_slot_rmap_items(mm_slot);
+	remove_trailing_rmap_items(mm_slot, mm_slot->rmap_list.next);
 	free_mm_slot(mm_slot);
 	clear_bit(MMF_VM_MERGEABLE, &mm->flags);
 }
@@ -740,6 +730,29 @@ out:
 }
 
 /*
+ * try_to_merge_with_ksm_page - like try_to_merge_two_pages,
+ * but no new kernel page is allocated: kpage must already be a ksm page.
+ */
+static int try_to_merge_with_ksm_page(struct mm_struct *mm1,
+				      unsigned long addr1,
+				      struct page *page1,
+				      struct page *kpage)
+{
+	struct vm_area_struct *vma;
+	int err = -EFAULT;
+
+	down_read(&mm1->mmap_sem);
+	vma = find_vma(mm1, addr1);
+	if (!vma || vma->vm_start > addr1)
+		goto out;
+
+	err = try_to_merge_one_page(vma, page1, kpage);
+out:
+	up_read(&mm1->mmap_sem);
+	return err;
+}
+
+/*
  * try_to_merge_two_pages - take two identical pages and prepare them
  * to be merged into one page.
  *
@@ -772,9 +785,8 @@ static int try_to_merge_two_pages(struct mm_struct *mm1, unsigned long addr1,
 	down_read(&mm1->mmap_sem);
 	vma = find_vma(mm1, addr1);
 	if (!vma || vma->vm_start > addr1) {
-		put_page(kpage);
 		up_read(&mm1->mmap_sem);
-		return err;
+		goto out;
 	}
 
 	copy_user_highpage(kpage, page1, addr1, vma);
@@ -782,56 +794,20 @@ static int try_to_merge_two_pages(struct mm_struct *mm1, unsigned long addr1,
 	up_read(&mm1->mmap_sem);
 
 	if (!err) {
-		down_read(&mm2->mmap_sem);
-		vma = find_vma(mm2, addr2);
-		if (!vma || vma->vm_start > addr2) {
-			put_page(kpage);
-			up_read(&mm2->mmap_sem);
-			break_cow(mm1, addr1);
-			return -EFAULT;
-		}
-
-		err = try_to_merge_one_page(vma, page2, kpage);
-		up_read(&mm2->mmap_sem);
-
+		err = try_to_merge_with_ksm_page(mm2, addr2, page2, kpage);
 		/*
-		 * If the second try_to_merge_one_page failed, we have a
-		 * ksm page with just one pte pointing to it, so break it.
+		 * If that fails, we have a ksm page with only one pte
+		 * pointing to it: so break it.
 		 */
 		if (err)
 			break_cow(mm1, addr1);
 	}
-
+out:
 	put_page(kpage);
 	return err;
 }
 
 /*
- * try_to_merge_with_ksm_page - like try_to_merge_two_pages,
- * but no new kernel page is allocated: kpage must already be a ksm page.
- */
-static int try_to_merge_with_ksm_page(struct mm_struct *mm1,
-				      unsigned long addr1,
-				      struct page *page1,
-				      struct page *kpage)
-{
-	struct vm_area_struct *vma;
-	int err = -EFAULT;
-
-	down_read(&mm1->mmap_sem);
-	vma = find_vma(mm1, addr1);
-	if (!vma || vma->vm_start > addr1) {
-		up_read(&mm1->mmap_sem);
-		return err;
-	}
-
-	err = try_to_merge_one_page(vma, page1, kpage);
-	up_read(&mm1->mmap_sem);
-
-	return err;
-}
-
-/*
  * stable_tree_search - search page inside the stable tree
  * @page: the page that we are searching identical pages to.
  * @page2: pointer into identical page that we are holding inside the stable
@@ -1033,10 +1009,10 @@ static void stable_tree_append(struct rmap_item *rmap_item,
 }
 
 /*
- * cmp_and_merge_page - take a page computes its hash value and check if there
- * is similar hash value to different page,
- * in case we find that there is similar hash to different page we call to
- * try_to_merge_two_pages().
+ * cmp_and_merge_page - first see if page can be merged into the stable tree;
+ * if not, compare checksum to previous and if it's the same, see if page can
+ * be inserted into the unstable tree, or merged with a page already there and
+ * both transferred to the stable tree.
  *
  * @page: the page that we are searching identical page to.
  * @rmap_item: the reverse mapping into the virtual address of this page
-- 
cgit v0.10.2


From d952b79136a6c32a3f97e0628ca78340f1d5c6f9 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 21 Sep 2009 17:02:16 -0700
Subject: ksm: fix endless loop on oom

break_ksm has been looping endlessly ignoring VM_FAULT_OOM: that should
only be a problem for ksmd when a memory control group imposes limits
(normally the OOM killer will kill others with an mm until it succeeds);
but in general (especially for MADV_UNMERGEABLE and KSM_RUN_UNMERGE) we
do need to route the error (or kill) back to the caller (or sighandling).

Test signal_pending in unmerge_ksm_pages, which could be a lengthy
procedure if it has to spill into swap: returning -ERESTARTSYS so that
trivial signals will restart but fatals will terminate (is that right?
we do different things in different places in mm, none exactly this).

unmerge_and_remove_all_rmap_items was forgetting to lock when going
down the mm_list: fix that.  Whether it's successful or not, reset
ksm_scan cursor to head; but only if it's successful, reset seqnr
(shown in full_scans) - page counts will have gone down to zero.

This patch leaves a significant OOM deadlock, but it's a good step
on the way, and that deadlock is fixed in a subsequent patch.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Acked-by: Izik Eidus <ieidus@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/ksm.c b/mm/ksm.c
index c49bb71..d9e3cfc 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -294,10 +294,10 @@ static inline int in_stable_tree(struct rmap_item *rmap_item)
  * Could a ksm page appear anywhere else?  Actually yes, in a VM_PFNMAP
  * mmap of /dev/mem or /dev/kmem, where we would not want to touch it.
  */
-static void break_ksm(struct vm_area_struct *vma, unsigned long addr)
+static int break_ksm(struct vm_area_struct *vma, unsigned long addr)
 {
 	struct page *page;
-	int ret;
+	int ret = 0;
 
 	do {
 		cond_resched();
@@ -310,9 +310,36 @@ static void break_ksm(struct vm_area_struct *vma, unsigned long addr)
 		else
 			ret = VM_FAULT_WRITE;
 		put_page(page);
-	} while (!(ret & (VM_FAULT_WRITE | VM_FAULT_SIGBUS)));
-
-	/* Which leaves us looping there if VM_FAULT_OOM: hmmm... */
+	} while (!(ret & (VM_FAULT_WRITE | VM_FAULT_SIGBUS | VM_FAULT_OOM)));
+	/*
+	 * We must loop because handle_mm_fault() may back out if there's
+	 * any difficulty e.g. if pte accessed bit gets updated concurrently.
+	 *
+	 * VM_FAULT_WRITE is what we have been hoping for: it indicates that
+	 * COW has been broken, even if the vma does not permit VM_WRITE;
+	 * but note that a concurrent fault might break PageKsm for us.
+	 *
+	 * VM_FAULT_SIGBUS could occur if we race with truncation of the
+	 * backing file, which also invalidates anonymous pages: that's
+	 * okay, that truncation will have unmapped the PageKsm for us.
+	 *
+	 * VM_FAULT_OOM: at the time of writing (late July 2009), setting
+	 * aside mem_cgroup limits, VM_FAULT_OOM would only be set if the
+	 * current task has TIF_MEMDIE set, and will be OOM killed on return
+	 * to user; and ksmd, having no mm, would never be chosen for that.
+	 *
+	 * But if the mm is in a limited mem_cgroup, then the fault may fail
+	 * with VM_FAULT_OOM even if the current task is not TIF_MEMDIE; and
+	 * even ksmd can fail in this way - though it's usually breaking ksm
+	 * just to undo a merge it made a moment before, so unlikely to oom.
+	 *
+	 * That's a pity: we might therefore have more kernel pages allocated
+	 * than we're counting as nodes in the stable tree; but ksm_do_scan
+	 * will retry to break_cow on each pass, so should recover the page
+	 * in due course.  The important thing is to not let VM_MERGEABLE
+	 * be cleared while any such pages might remain in the area.
+	 */
+	return (ret & VM_FAULT_OOM) ? -ENOMEM : 0;
 }
 
 static void break_cow(struct mm_struct *mm, unsigned long addr)
@@ -462,39 +489,61 @@ static void remove_trailing_rmap_items(struct mm_slot *mm_slot,
  * to the next pass of ksmd - consider, for example, how ksmd might be
  * in cmp_and_merge_page on one of the rmap_items we would be removing.
  */
-static void unmerge_ksm_pages(struct vm_area_struct *vma,
-			      unsigned long start, unsigned long end)
+static int unmerge_ksm_pages(struct vm_area_struct *vma,
+			     unsigned long start, unsigned long end)
 {
 	unsigned long addr;
+	int err = 0;
 
-	for (addr = start; addr < end; addr += PAGE_SIZE)
-		break_ksm(vma, addr);
+	for (addr = start; addr < end && !err; addr += PAGE_SIZE) {
+		if (signal_pending(current))
+			err = -ERESTARTSYS;
+		else
+			err = break_ksm(vma, addr);
+	}
+	return err;
 }
 
-static void unmerge_and_remove_all_rmap_items(void)
+static int unmerge_and_remove_all_rmap_items(void)
 {
 	struct mm_slot *mm_slot;
 	struct mm_struct *mm;
 	struct vm_area_struct *vma;
+	int err = 0;
+
+	spin_lock(&ksm_mmlist_lock);
+	mm_slot = list_entry(ksm_mm_head.mm_list.next,
+						struct mm_slot, mm_list);
+	spin_unlock(&ksm_mmlist_lock);
 
-	list_for_each_entry(mm_slot, &ksm_mm_head.mm_list, mm_list) {
+	while (mm_slot != &ksm_mm_head) {
 		mm = mm_slot->mm;
 		down_read(&mm->mmap_sem);
 		for (vma = mm->mmap; vma; vma = vma->vm_next) {
 			if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
 				continue;
-			unmerge_ksm_pages(vma, vma->vm_start, vma->vm_end);
+			err = unmerge_ksm_pages(vma,
+						vma->vm_start, vma->vm_end);
+			if (err) {
+				up_read(&mm->mmap_sem);
+				goto out;
+			}
 		}
 		remove_trailing_rmap_items(mm_slot, mm_slot->rmap_list.next);
 		up_read(&mm->mmap_sem);
+
+		spin_lock(&ksm_mmlist_lock);
+		mm_slot = list_entry(mm_slot->mm_list.next,
+						struct mm_slot, mm_list);
+		spin_unlock(&ksm_mmlist_lock);
 	}
 
+	ksm_scan.seqnr = 0;
+out:
 	spin_lock(&ksm_mmlist_lock);
-	if (ksm_scan.mm_slot != &ksm_mm_head) {
-		ksm_scan.mm_slot = &ksm_mm_head;
-		ksm_scan.seqnr++;
-	}
+	ksm_scan.mm_slot = &ksm_mm_head;
 	spin_unlock(&ksm_mmlist_lock);
+	return err;
 }
 
 static void remove_mm_from_lists(struct mm_struct *mm)
@@ -1051,6 +1100,8 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
 	/*
 	 * A ksm page might have got here by fork, but its other
 	 * references have already been removed from the stable tree.
+	 * Or it might be left over from a break_ksm which failed
+	 * when the mem_cgroup had reached its limit: try again now.
 	 */
 	if (PageKsm(page))
 		break_cow(rmap_item->mm, rmap_item->address);
@@ -1286,6 +1337,7 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
 		unsigned long end, int advice, unsigned long *vm_flags)
 {
 	struct mm_struct *mm = vma->vm_mm;
+	int err;
 
 	switch (advice) {
 	case MADV_MERGEABLE:
@@ -1298,9 +1350,11 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
 				 VM_MIXEDMAP  | VM_SAO))
 			return 0;		/* just ignore the advice */
 
-		if (!test_bit(MMF_VM_MERGEABLE, &mm->flags))
-			if (__ksm_enter(mm) < 0)
-				return -EAGAIN;
+		if (!test_bit(MMF_VM_MERGEABLE, &mm->flags)) {
+			err = __ksm_enter(mm);
+			if (err)
+				return err;
+		}
 
 		*vm_flags |= VM_MERGEABLE;
 		break;
@@ -1309,8 +1363,11 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
 		if (!(*vm_flags & VM_MERGEABLE))
 			return 0;		/* just ignore the advice */
 
-		if (vma->anon_vma)
-			unmerge_ksm_pages(vma, start, end);
+		if (vma->anon_vma) {
+			err = unmerge_ksm_pages(vma, start, end);
+			if (err)
+				return err;
+		}
 
 		*vm_flags &= ~VM_MERGEABLE;
 		break;
@@ -1441,8 +1498,13 @@ static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr,
 	mutex_lock(&ksm_thread_mutex);
 	if (ksm_run != flags) {
 		ksm_run = flags;
-		if (flags & KSM_RUN_UNMERGE)
-			unmerge_and_remove_all_rmap_items();
+		if (flags & KSM_RUN_UNMERGE) {
+			err = unmerge_and_remove_all_rmap_items();
+			if (err) {
+				ksm_run = KSM_RUN_STOP;
+				count = err;
+			}
+		}
 	}
 	mutex_unlock(&ksm_thread_mutex);
 
-- 
cgit v0.10.2


From cd551f97519d35855be5a8720a47cc802ee4fd06 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 21 Sep 2009 17:02:17 -0700
Subject: ksm: distribute remove_mm_from_lists

Do some housekeeping in ksm.c, to help make the next patch easier
to understand: remove the function remove_mm_from_lists, distributing
its code to its callsites scan_get_next_rmap_item and __ksm_exit.

That turns out to be a win in scan_get_next_rmap_item: move its
remove_trailing_rmap_items and cursor advancement up, and it becomes
simpler than before.  __ksm_exit becomes messier, but will change
again; and moving its remove_trailing_rmap_items up lets us strengthen
the unstable tree item's age condition in remove_rmap_item_from_tree.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Acked-by: Izik Eidus <ieidus@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/ksm.c b/mm/ksm.c
index d9e3cfc..7e4d255 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -444,14 +444,9 @@ static void remove_rmap_item_from_tree(struct rmap_item *rmap_item)
 		 * But __ksm_exit has to be careful: do the rb_erase
 		 * if it's interrupting a scan, and this rmap_item was
 		 * inserted by this scan rather than left from before.
-		 *
-		 * Because of the case in which remove_mm_from_lists
-		 * increments seqnr before removing rmaps, unstable_nr
-		 * may even be 2 behind seqnr, but should never be
-		 * further behind.  Yes, I did have trouble with this!
 		 */
 		age = (unsigned char)(ksm_scan.seqnr - rmap_item->address);
-		BUG_ON(age > 2);
+		BUG_ON(age > 1);
 		if (!age)
 			rb_erase(&rmap_item->node, &root_unstable_tree);
 		ksm_pages_unshared--;
@@ -546,37 +541,6 @@ out:
 	return err;
 }
 
-static void remove_mm_from_lists(struct mm_struct *mm)
-{
-	struct mm_slot *mm_slot;
-
-	spin_lock(&ksm_mmlist_lock);
-	mm_slot = get_mm_slot(mm);
-
-	/*
-	 * This mm_slot is always at the scanning cursor when we're
-	 * called from scan_get_next_rmap_item; but it's a special
-	 * case when we're called from __ksm_exit.
-	 */
-	if (ksm_scan.mm_slot == mm_slot) {
-		ksm_scan.mm_slot = list_entry(
-			mm_slot->mm_list.next, struct mm_slot, mm_list);
-		ksm_scan.address = 0;
-		ksm_scan.rmap_item = list_entry(
-			&ksm_scan.mm_slot->rmap_list, struct rmap_item, link);
-		if (ksm_scan.mm_slot == &ksm_mm_head)
-			ksm_scan.seqnr++;
-	}
-
-	hlist_del(&mm_slot->link);
-	list_del(&mm_slot->mm_list);
-	spin_unlock(&ksm_mmlist_lock);
-
-	remove_trailing_rmap_items(mm_slot, mm_slot->rmap_list.next);
-	free_mm_slot(mm_slot);
-	clear_bit(MMF_VM_MERGEABLE, &mm->flags);
-}
-
 static u32 calc_checksum(struct page *page)
 {
 	u32 checksum;
@@ -1241,33 +1205,31 @@ next_mm:
 		}
 	}
 
-	if (!ksm_scan.address) {
-		/*
-		 * We've completed a full scan of all vmas, holding mmap_sem
-		 * throughout, and found no VM_MERGEABLE: so do the same as
-		 * __ksm_exit does to remove this mm from all our lists now.
-		 */
-		remove_mm_from_lists(mm);
-		up_read(&mm->mmap_sem);
-		slot = ksm_scan.mm_slot;
-		if (slot != &ksm_mm_head)
-			goto next_mm;
-		return NULL;
-	}
-
 	/*
 	 * Nuke all the rmap_items that are above this current rmap:
 	 * because there were no VM_MERGEABLE vmas with such addresses.
 	 */
 	remove_trailing_rmap_items(slot, ksm_scan.rmap_item->link.next);
-	up_read(&mm->mmap_sem);
 
 	spin_lock(&ksm_mmlist_lock);
-	slot = list_entry(slot->mm_list.next, struct mm_slot, mm_list);
-	ksm_scan.mm_slot = slot;
+	ksm_scan.mm_slot = list_entry(slot->mm_list.next,
+						struct mm_slot, mm_list);
+	if (ksm_scan.address == 0) {
+		/*
+		 * We've completed a full scan of all vmas, holding mmap_sem
+		 * throughout, and found no VM_MERGEABLE: so do the same as
+		 * __ksm_exit does to remove this mm from all our lists now.
+		 */
+		hlist_del(&slot->link);
+		list_del(&slot->mm_list);
+		free_mm_slot(slot);
+		clear_bit(MMF_VM_MERGEABLE, &mm->flags);
+	}
 	spin_unlock(&ksm_mmlist_lock);
+	up_read(&mm->mmap_sem);
 
 	/* Repeat until we've completed scanning the whole list */
+	slot = ksm_scan.mm_slot;
 	if (slot != &ksm_mm_head)
 		goto next_mm;
 
@@ -1408,13 +1370,38 @@ int __ksm_enter(struct mm_struct *mm)
 
 void __ksm_exit(struct mm_struct *mm)
 {
+	struct mm_slot *mm_slot;
+
 	/*
 	 * This process is exiting: doesn't hold and doesn't need mmap_sem;
 	 * but we do need to exclude ksmd and other exiters while we modify
 	 * the various lists and trees.
 	 */
 	mutex_lock(&ksm_thread_mutex);
-	remove_mm_from_lists(mm);
+	spin_lock(&ksm_mmlist_lock);
+	mm_slot = get_mm_slot(mm);
+	if (!list_empty(&mm_slot->rmap_list)) {
+		spin_unlock(&ksm_mmlist_lock);
+		remove_trailing_rmap_items(mm_slot, mm_slot->rmap_list.next);
+		spin_lock(&ksm_mmlist_lock);
+	}
+
+	if (ksm_scan.mm_slot == mm_slot) {
+		ksm_scan.mm_slot = list_entry(
+			mm_slot->mm_list.next, struct mm_slot, mm_list);
+		ksm_scan.address = 0;
+		ksm_scan.rmap_item = list_entry(
+			&ksm_scan.mm_slot->rmap_list, struct rmap_item, link);
+		if (ksm_scan.mm_slot == &ksm_mm_head)
+			ksm_scan.seqnr++;
+	}
+
+	hlist_del(&mm_slot->link);
+	list_del(&mm_slot->mm_list);
+	spin_unlock(&ksm_mmlist_lock);
+
+	free_mm_slot(mm_slot);
+	clear_bit(MMF_VM_MERGEABLE, &mm->flags);
 	mutex_unlock(&ksm_thread_mutex);
 }
 
-- 
cgit v0.10.2


From 9ba6929480088a85c1ff60a4b1f1c9fc80dbd2b7 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 21 Sep 2009 17:02:20 -0700
Subject: ksm: fix oom deadlock

There's a now-obvious deadlock in KSM's out-of-memory handling:
imagine ksmd or KSM_RUN_UNMERGE handling, holding ksm_thread_mutex,
trying to allocate a page to break KSM in an mm which becomes the
OOM victim (quite likely in the unmerge case): it's killed and goes
to exit, and hangs there waiting to acquire ksm_thread_mutex.

Clearly we must not require ksm_thread_mutex in __ksm_exit, simple
though that made everything else: perhaps use mmap_sem somehow?
And part of the answer lies in the comments on unmerge_ksm_pages:
__ksm_exit should also leave all the rmap_item removal to ksmd.

But there's a fundamental problem, that KSM relies upon mmap_sem to
guarantee the consistency of the mm it's dealing with, yet exit_mmap
tears down an mm without taking mmap_sem.  And bumping mm_users won't
help at all, that just ensures that the pages the OOM killer assumes
are on their way to being freed will not be freed.

The best answer seems to be, to move the ksm_exit callout from just
before exit_mmap, to the middle of exit_mmap: after the mm's pages
have been freed (if the mmu_gather is flushed), but before its page
tables and vma structures have been freed; and down_write,up_write
mmap_sem there to serialize with KSM's own reliance on mmap_sem.

But KSM then needs to be careful, whenever it downs mmap_sem, to
check that the mm is not already exiting: there's a danger of using
find_vma on a layout that's being torn apart, or writing into page
tables which have been freed for reuse; and even do_anonymous_page
and __do_fault need to check they're not being called by break_ksm
to reinstate a pte after zap_pte_range has zapped that page table.

Though it might be clearer to add an exiting flag, set while holding
mmap_sem in __ksm_exit, that wouldn't cover the issue of reinstating
a zapped pte.  All we need is to check whether mm_users is 0 - but
must remember that ksmd may detect that before __ksm_exit is reached.
So, ksm_test_exit(mm) added to comment such checks on mm->mm_users.

__ksm_exit now has to leave clearing up the rmap_items to ksmd,
that needs ksm_thread_mutex; but shift the exiting mm just after the
ksm_scan cursor so that it will soon be dealt with.  __ksm_enter raise
mm_count to hold the mm_struct, ksmd's exit processing (exactly like
its processing when it finds all VM_MERGEABLEs unmapped) mmdrop it,
similar procedure for KSM_RUN_UNMERGE (which has stopped ksmd).

But also give __ksm_exit a fast path: when there's no complication
(no rmap_items attached to mm and it's not at the ksm_scan cursor),
it can safely do all the exiting work itself.  This is not just an
optimization: when ksmd is not running, the raised mm_count would
otherwise leak mm_structs.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Acked-by: Izik Eidus <ieidus@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index a485c14..2d64ff3 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -12,11 +12,14 @@
 #include <linux/sched.h>
 #include <linux/vmstat.h>
 
+struct mmu_gather;
+
 #ifdef CONFIG_KSM
 int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
 		unsigned long end, int advice, unsigned long *vm_flags);
 int __ksm_enter(struct mm_struct *mm);
-void __ksm_exit(struct mm_struct *mm);
+void __ksm_exit(struct mm_struct *mm,
+		struct mmu_gather **tlbp, unsigned long end);
 
 static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
 {
@@ -25,10 +28,24 @@ static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
 	return 0;
 }
 
-static inline void ksm_exit(struct mm_struct *mm)
+/*
+ * For KSM to handle OOM without deadlock when it's breaking COW in a
+ * likely victim of the OOM killer, exit_mmap() has to serialize with
+ * ksm_exit() after freeing mm's pages but before freeing its page tables.
+ * That leaves a window in which KSM might refault pages which have just
+ * been finally unmapped: guard against that with ksm_test_exit(), and
+ * use it after getting mmap_sem in ksm.c, to check if mm is exiting.
+ */
+static inline bool ksm_test_exit(struct mm_struct *mm)
+{
+	return atomic_read(&mm->mm_users) == 0;
+}
+
+static inline void ksm_exit(struct mm_struct *mm,
+			    struct mmu_gather **tlbp, unsigned long end)
 {
 	if (test_bit(MMF_VM_MERGEABLE, &mm->flags))
-		__ksm_exit(mm);
+		__ksm_exit(mm, tlbp, end);
 }
 
 /*
@@ -64,7 +81,13 @@ static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
 	return 0;
 }
 
-static inline void ksm_exit(struct mm_struct *mm)
+static inline bool ksm_test_exit(struct mm_struct *mm)
+{
+	return 0;
+}
+
+static inline void ksm_exit(struct mm_struct *mm,
+			    struct mmu_gather **tlbp, unsigned long end)
 {
 }
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 73a442b..42f20f5 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -501,7 +501,6 @@ void mmput(struct mm_struct *mm)
 
 	if (atomic_dec_and_test(&mm->mm_users)) {
 		exit_aio(mm);
-		ksm_exit(mm);
 		exit_mmap(mm);
 		set_mm_exe_file(mm, NULL);
 		if (!list_empty(&mm->mmlist)) {
diff --git a/mm/ksm.c b/mm/ksm.c
index 7e4d255..722e3f2 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -32,6 +32,7 @@
 #include <linux/mmu_notifier.h>
 #include <linux/ksm.h>
 
+#include <asm/tlb.h>
 #include <asm/tlbflush.h>
 
 /*
@@ -347,6 +348,8 @@ static void break_cow(struct mm_struct *mm, unsigned long addr)
 	struct vm_area_struct *vma;
 
 	down_read(&mm->mmap_sem);
+	if (ksm_test_exit(mm))
+		goto out;
 	vma = find_vma(mm, addr);
 	if (!vma || vma->vm_start > addr)
 		goto out;
@@ -365,6 +368,8 @@ static struct page *get_mergeable_page(struct rmap_item *rmap_item)
 	struct page *page;
 
 	down_read(&mm->mmap_sem);
+	if (ksm_test_exit(mm))
+		goto out;
 	vma = find_vma(mm, addr);
 	if (!vma || vma->vm_start > addr)
 		goto out;
@@ -439,11 +444,11 @@ static void remove_rmap_item_from_tree(struct rmap_item *rmap_item)
 	} else if (rmap_item->address & NODE_FLAG) {
 		unsigned char age;
 		/*
-		 * ksm_thread can and must skip the rb_erase, because
+		 * Usually ksmd can and must skip the rb_erase, because
 		 * root_unstable_tree was already reset to RB_ROOT.
-		 * But __ksm_exit has to be careful: do the rb_erase
-		 * if it's interrupting a scan, and this rmap_item was
-		 * inserted by this scan rather than left from before.
+		 * But be careful when an mm is exiting: do the rb_erase
+		 * if this rmap_item was inserted by this scan, rather
+		 * than left over from before.
 		 */
 		age = (unsigned char)(ksm_scan.seqnr - rmap_item->address);
 		BUG_ON(age > 1);
@@ -491,6 +496,8 @@ static int unmerge_ksm_pages(struct vm_area_struct *vma,
 	int err = 0;
 
 	for (addr = start; addr < end && !err; addr += PAGE_SIZE) {
+		if (ksm_test_exit(vma->vm_mm))
+			break;
 		if (signal_pending(current))
 			err = -ERESTARTSYS;
 		else
@@ -507,34 +514,50 @@ static int unmerge_and_remove_all_rmap_items(void)
 	int err = 0;
 
 	spin_lock(&ksm_mmlist_lock);
-	mm_slot = list_entry(ksm_mm_head.mm_list.next,
+	ksm_scan.mm_slot = list_entry(ksm_mm_head.mm_list.next,
 						struct mm_slot, mm_list);
 	spin_unlock(&ksm_mmlist_lock);
 
-	while (mm_slot != &ksm_mm_head) {
+	for (mm_slot = ksm_scan.mm_slot;
+			mm_slot != &ksm_mm_head; mm_slot = ksm_scan.mm_slot) {
 		mm = mm_slot->mm;
 		down_read(&mm->mmap_sem);
 		for (vma = mm->mmap; vma; vma = vma->vm_next) {
+			if (ksm_test_exit(mm))
+				break;
 			if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
 				continue;
 			err = unmerge_ksm_pages(vma,
 						vma->vm_start, vma->vm_end);
-			if (err) {
-				up_read(&mm->mmap_sem);
-				goto out;
-			}
+			if (err)
+				goto error;
 		}
+
 		remove_trailing_rmap_items(mm_slot, mm_slot->rmap_list.next);
-		up_read(&mm->mmap_sem);
 
 		spin_lock(&ksm_mmlist_lock);
-		mm_slot = list_entry(mm_slot->mm_list.next,
+		ksm_scan.mm_slot = list_entry(mm_slot->mm_list.next,
 						struct mm_slot, mm_list);
-		spin_unlock(&ksm_mmlist_lock);
+		if (ksm_test_exit(mm)) {
+			hlist_del(&mm_slot->link);
+			list_del(&mm_slot->mm_list);
+			spin_unlock(&ksm_mmlist_lock);
+
+			free_mm_slot(mm_slot);
+			clear_bit(MMF_VM_MERGEABLE, &mm->flags);
+			up_read(&mm->mmap_sem);
+			mmdrop(mm);
+		} else {
+			spin_unlock(&ksm_mmlist_lock);
+			up_read(&mm->mmap_sem);
+		}
 	}
 
 	ksm_scan.seqnr = 0;
-out:
+	return 0;
+
+error:
+	up_read(&mm->mmap_sem);
 	spin_lock(&ksm_mmlist_lock);
 	ksm_scan.mm_slot = &ksm_mm_head;
 	spin_unlock(&ksm_mmlist_lock);
@@ -755,6 +778,9 @@ static int try_to_merge_with_ksm_page(struct mm_struct *mm1,
 	int err = -EFAULT;
 
 	down_read(&mm1->mmap_sem);
+	if (ksm_test_exit(mm1))
+		goto out;
+
 	vma = find_vma(mm1, addr1);
 	if (!vma || vma->vm_start > addr1)
 		goto out;
@@ -796,6 +822,10 @@ static int try_to_merge_two_pages(struct mm_struct *mm1, unsigned long addr1,
 		return err;
 
 	down_read(&mm1->mmap_sem);
+	if (ksm_test_exit(mm1)) {
+		up_read(&mm1->mmap_sem);
+		goto out;
+	}
 	vma = find_vma(mm1, addr1);
 	if (!vma || vma->vm_start > addr1) {
 		up_read(&mm1->mmap_sem);
@@ -1174,7 +1204,12 @@ next_mm:
 
 	mm = slot->mm;
 	down_read(&mm->mmap_sem);
-	for (vma = find_vma(mm, ksm_scan.address); vma; vma = vma->vm_next) {
+	if (ksm_test_exit(mm))
+		vma = NULL;
+	else
+		vma = find_vma(mm, ksm_scan.address);
+
+	for (; vma; vma = vma->vm_next) {
 		if (!(vma->vm_flags & VM_MERGEABLE))
 			continue;
 		if (ksm_scan.address < vma->vm_start)
@@ -1183,6 +1218,8 @@ next_mm:
 			ksm_scan.address = vma->vm_end;
 
 		while (ksm_scan.address < vma->vm_end) {
+			if (ksm_test_exit(mm))
+				break;
 			*page = follow_page(vma, ksm_scan.address, FOLL_GET);
 			if (*page && PageAnon(*page)) {
 				flush_anon_page(vma, *page, ksm_scan.address);
@@ -1205,6 +1242,11 @@ next_mm:
 		}
 	}
 
+	if (ksm_test_exit(mm)) {
+		ksm_scan.address = 0;
+		ksm_scan.rmap_item = list_entry(&slot->rmap_list,
+						struct rmap_item, link);
+	}
 	/*
 	 * Nuke all the rmap_items that are above this current rmap:
 	 * because there were no VM_MERGEABLE vmas with such addresses.
@@ -1219,24 +1261,29 @@ next_mm:
 		 * We've completed a full scan of all vmas, holding mmap_sem
 		 * throughout, and found no VM_MERGEABLE: so do the same as
 		 * __ksm_exit does to remove this mm from all our lists now.
+		 * This applies either when cleaning up after __ksm_exit
+		 * (but beware: we can reach here even before __ksm_exit),
+		 * or when all VM_MERGEABLE areas have been unmapped (and
+		 * mmap_sem then protects against race with MADV_MERGEABLE).
 		 */
 		hlist_del(&slot->link);
 		list_del(&slot->mm_list);
+		spin_unlock(&ksm_mmlist_lock);
+
 		free_mm_slot(slot);
 		clear_bit(MMF_VM_MERGEABLE, &mm->flags);
+		up_read(&mm->mmap_sem);
+		mmdrop(mm);
+	} else {
+		spin_unlock(&ksm_mmlist_lock);
+		up_read(&mm->mmap_sem);
 	}
-	spin_unlock(&ksm_mmlist_lock);
-	up_read(&mm->mmap_sem);
 
 	/* Repeat until we've completed scanning the whole list */
 	slot = ksm_scan.mm_slot;
 	if (slot != &ksm_mm_head)
 		goto next_mm;
 
-	/*
-	 * Bump seqnr here rather than at top, so that __ksm_exit
-	 * can skip rb_erase on unstable tree until we run again.
-	 */
 	ksm_scan.seqnr++;
 	return NULL;
 }
@@ -1361,6 +1408,7 @@ int __ksm_enter(struct mm_struct *mm)
 	spin_unlock(&ksm_mmlist_lock);
 
 	set_bit(MMF_VM_MERGEABLE, &mm->flags);
+	atomic_inc(&mm->mm_count);
 
 	if (needs_wakeup)
 		wake_up_interruptible(&ksm_thread_wait);
@@ -1368,41 +1416,45 @@ int __ksm_enter(struct mm_struct *mm)
 	return 0;
 }
 
-void __ksm_exit(struct mm_struct *mm)
+void __ksm_exit(struct mm_struct *mm,
+		struct mmu_gather **tlbp, unsigned long end)
 {
 	struct mm_slot *mm_slot;
+	int easy_to_free = 0;
 
 	/*
-	 * This process is exiting: doesn't hold and doesn't need mmap_sem;
-	 * but we do need to exclude ksmd and other exiters while we modify
-	 * the various lists and trees.
+	 * This process is exiting: if it's straightforward (as is the
+	 * case when ksmd was never running), free mm_slot immediately.
+	 * But if it's at the cursor or has rmap_items linked to it, use
+	 * mmap_sem to synchronize with any break_cows before pagetables
+	 * are freed, and leave the mm_slot on the list for ksmd to free.
+	 * Beware: ksm may already have noticed it exiting and freed the slot.
 	 */
-	mutex_lock(&ksm_thread_mutex);
+
 	spin_lock(&ksm_mmlist_lock);
 	mm_slot = get_mm_slot(mm);
-	if (!list_empty(&mm_slot->rmap_list)) {
-		spin_unlock(&ksm_mmlist_lock);
-		remove_trailing_rmap_items(mm_slot, mm_slot->rmap_list.next);
-		spin_lock(&ksm_mmlist_lock);
-	}
-
-	if (ksm_scan.mm_slot == mm_slot) {
-		ksm_scan.mm_slot = list_entry(
-			mm_slot->mm_list.next, struct mm_slot, mm_list);
-		ksm_scan.address = 0;
-		ksm_scan.rmap_item = list_entry(
-			&ksm_scan.mm_slot->rmap_list, struct rmap_item, link);
-		if (ksm_scan.mm_slot == &ksm_mm_head)
-			ksm_scan.seqnr++;
+	if (mm_slot && ksm_scan.mm_slot != mm_slot) {
+		if (list_empty(&mm_slot->rmap_list)) {
+			hlist_del(&mm_slot->link);
+			list_del(&mm_slot->mm_list);
+			easy_to_free = 1;
+		} else {
+			list_move(&mm_slot->mm_list,
+				  &ksm_scan.mm_slot->mm_list);
+		}
 	}
-
-	hlist_del(&mm_slot->link);
-	list_del(&mm_slot->mm_list);
 	spin_unlock(&ksm_mmlist_lock);
 
-	free_mm_slot(mm_slot);
-	clear_bit(MMF_VM_MERGEABLE, &mm->flags);
-	mutex_unlock(&ksm_thread_mutex);
+	if (easy_to_free) {
+		free_mm_slot(mm_slot);
+		clear_bit(MMF_VM_MERGEABLE, &mm->flags);
+		mmdrop(mm);
+	} else if (mm_slot) {
+		tlb_finish_mmu(*tlbp, 0, end);
+		down_write(&mm->mmap_sem);
+		up_write(&mm->mmap_sem);
+		*tlbp = tlb_gather_mmu(mm, 1);
+	}
 }
 
 #define KSM_ATTR_RO(_name) \
diff --git a/mm/memory.c b/mm/memory.c
index 1a435b8..f47ffe9 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2648,8 +2648,9 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	entry = maybe_mkwrite(pte_mkdirty(entry), vma);
 
 	page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
-	if (!pte_none(*page_table))
+	if (!pte_none(*page_table) || ksm_test_exit(mm))
 		goto release;
+
 	inc_mm_counter(mm, anon_rss);
 	page_add_new_anon_rmap(page, vma, address);
 	set_pte_at(mm, address, page_table, entry);
@@ -2791,7 +2792,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	 * handle that later.
 	 */
 	/* Only go through if we didn't race with anybody else... */
-	if (likely(pte_same(*page_table, orig_pte))) {
+	if (likely(pte_same(*page_table, orig_pte) && !ksm_test_exit(mm))) {
 		flush_icache_page(vma, page);
 		entry = mk_pte(page, vma->vm_page_prot);
 		if (flags & FAULT_FLAG_WRITE)
diff --git a/mm/mmap.c b/mm/mmap.c
index 376492e..e02f1aa 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -27,6 +27,7 @@
 #include <linux/mount.h>
 #include <linux/mempolicy.h>
 #include <linux/rmap.h>
+#include <linux/ksm.h>
 #include <linux/mmu_notifier.h>
 #include <linux/perf_event.h>
 
@@ -2111,6 +2112,14 @@ void exit_mmap(struct mm_struct *mm)
 	/* Use -1 here to ensure all VMAs in the mm are unmapped */
 	end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL);
 	vm_unacct_memory(nr_accounted);
+
+	/*
+	 * For KSM to handle OOM without deadlock when it's breaking COW in a
+	 * likely victim of the OOM killer, we must serialize with ksm_exit()
+	 * after freeing mm's pages but before freeing its page tables.
+	 */
+	ksm_exit(mm, &tlb, end);
+
 	free_pgtables(tlb, vma, FIRST_USER_ADDRESS, 0);
 	tlb_finish_mmu(tlb, 0, end);
 
-- 
cgit v0.10.2


From 1c2fb7a4c2ca7a958b02bc1e615d0254990bba8d Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Mon, 21 Sep 2009 17:02:22 -0700
Subject: ksm: fix deadlock with munlock in exit_mmap

Rawhide users have reported hang at startup when cryptsetup is run: the
same problem can be simply reproduced by running a program int main() {
mlockall(MCL_CURRENT | MCL_FUTURE); return 0; }

The problem is that exit_mmap() applies munlock_vma_pages_all() to
clean up VM_LOCKED areas, and its current implementation (stupidly)
tries to fault in absent pages, for example where PROT_NONE prevented
them being faulted in when mlocking.  Whereas the "ksm: fix oom
deadlock" patch, knowing there's a race by which KSM might try to fault
in pages after exit_mmap() had finally zapped the range, backs out of
such faults doing nothing when its ksm_test_exit() notices mm_users 0.

So revert that part of "ksm: fix oom deadlock" which moved the
ksm_exit() call from before exit_mmap() to the middle of exit_mmap();
and remove those ksm_test_exit() checks from the page fault paths, so
allowing the munlocking to proceed without interference.

ksm_exit, if there are rmap_items still chained on this mm slot, takes
mmap_sem write side: so preventing KSM from working on an mm while
exit_mmap runs.  And KSM will bail out as soon as it notices that
mm_users is already zero, thanks to its internal ksm_test_exit checks.
So that when a task is killed by OOM killer or the user, KSM will not
indefinitely prevent it from running exit_mmap to release its memory.

This does break a part of what "ksm: fix oom deadlock" was trying to
achieve.  When unmerging KSM (echo 2 >/sys/kernel/mm/ksm), and even
when ksmd itself has to cancel a KSM page, it is possible that the
first OOM-kill victim would be the KSM process being faulted: then its
memory won't be freed until a second victim has been selected (freeing
memory for the unmerging fault to complete).

But the OOM killer is already liable to kill a second victim once the
intended victim's p->mm goes to NULL: so there's not much point in
rejecting this KSM patch before fixing that OOM behaviour.  It is very
much more important to allow KSM users to boot up, than to haggle over
an unlikely and poorly supported OOM case.

We also intend to fix munlocking to not fault pages: at which point
this patch _could_ be reverted; though that would be controversial, so
we hope to find a better solution.

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Justin M. Forbes <jforbes@redhat.com>
Acked-for-now-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: Izik Eidus <ieidus@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index 2d64ff3..0e26de6 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -18,8 +18,7 @@ struct mmu_gather;
 int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
 		unsigned long end, int advice, unsigned long *vm_flags);
 int __ksm_enter(struct mm_struct *mm);
-void __ksm_exit(struct mm_struct *mm,
-		struct mmu_gather **tlbp, unsigned long end);
+void __ksm_exit(struct mm_struct *mm);
 
 static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
 {
@@ -41,11 +40,10 @@ static inline bool ksm_test_exit(struct mm_struct *mm)
 	return atomic_read(&mm->mm_users) == 0;
 }
 
-static inline void ksm_exit(struct mm_struct *mm,
-			    struct mmu_gather **tlbp, unsigned long end)
+static inline void ksm_exit(struct mm_struct *mm)
 {
 	if (test_bit(MMF_VM_MERGEABLE, &mm->flags))
-		__ksm_exit(mm, tlbp, end);
+		__ksm_exit(mm);
 }
 
 /*
@@ -86,8 +84,7 @@ static inline bool ksm_test_exit(struct mm_struct *mm)
 	return 0;
 }
 
-static inline void ksm_exit(struct mm_struct *mm,
-			    struct mmu_gather **tlbp, unsigned long end)
+static inline void ksm_exit(struct mm_struct *mm)
 {
 }
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 42f20f5..73a442b 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -501,6 +501,7 @@ void mmput(struct mm_struct *mm)
 
 	if (atomic_dec_and_test(&mm->mm_users)) {
 		exit_aio(mm);
+		ksm_exit(mm);
 		exit_mmap(mm);
 		set_mm_exe_file(mm, NULL);
 		if (!list_empty(&mm->mmlist)) {
diff --git a/mm/ksm.c b/mm/ksm.c
index 722e3f2..92034eb 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1416,8 +1416,7 @@ int __ksm_enter(struct mm_struct *mm)
 	return 0;
 }
 
-void __ksm_exit(struct mm_struct *mm,
-		struct mmu_gather **tlbp, unsigned long end)
+void __ksm_exit(struct mm_struct *mm)
 {
 	struct mm_slot *mm_slot;
 	int easy_to_free = 0;
@@ -1450,10 +1449,8 @@ void __ksm_exit(struct mm_struct *mm,
 		clear_bit(MMF_VM_MERGEABLE, &mm->flags);
 		mmdrop(mm);
 	} else if (mm_slot) {
-		tlb_finish_mmu(*tlbp, 0, end);
 		down_write(&mm->mmap_sem);
 		up_write(&mm->mmap_sem);
-		*tlbp = tlb_gather_mmu(mm, 1);
 	}
 }
 
diff --git a/mm/memory.c b/mm/memory.c
index f47ffe9..05feaa1 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2648,7 +2648,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	entry = maybe_mkwrite(pte_mkdirty(entry), vma);
 
 	page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
-	if (!pte_none(*page_table) || ksm_test_exit(mm))
+	if (!pte_none(*page_table))
 		goto release;
 
 	inc_mm_counter(mm, anon_rss);
@@ -2792,7 +2792,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	 * handle that later.
 	 */
 	/* Only go through if we didn't race with anybody else... */
-	if (likely(pte_same(*page_table, orig_pte) && !ksm_test_exit(mm))) {
+	if (likely(pte_same(*page_table, orig_pte))) {
 		flush_icache_page(vma, page);
 		entry = mk_pte(page, vma->vm_page_prot);
 		if (flags & FAULT_FLAG_WRITE)
diff --git a/mm/mmap.c b/mm/mmap.c
index e02f1aa..ffd6c6c 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2113,13 +2113,6 @@ void exit_mmap(struct mm_struct *mm)
 	end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL);
 	vm_unacct_memory(nr_accounted);
 
-	/*
-	 * For KSM to handle OOM without deadlock when it's breaking COW in a
-	 * likely victim of the OOM killer, we must serialize with ksm_exit()
-	 * after freeing mm's pages but before freeing its page tables.
-	 */
-	ksm_exit(mm, &tlb, end);
-
 	free_pgtables(tlb, vma, FIRST_USER_ADDRESS, 0);
 	tlb_finish_mmu(tlb, 0, end);
 
-- 
cgit v0.10.2


From 2ffd8679c8e4ec226718bff58b50b226dd477015 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 21 Sep 2009 17:02:23 -0700
Subject: ksm: sysfs and defaults

At present KSM is just a waste of space if you don't have CONFIG_SYSFS=y
to provide the /sys/kernel/mm/ksm files to tune and activate it.

Make KSM depend on SYSFS?  Could do, but it might be better to provide
some defaults so that KSM works out-of-the-box, ready for testers to
madvise MADV_MERGEABLE, even without SYSFS.

Though anyone serious is likely to want to retune the numbers to their
taste once they have experience; and whether these settings ever reach
2.6.32 can be discussed along the way.

Save 1kB from tiny kernels by #ifdef'ing the SYSFS side of it.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Acked-by: Izik Eidus <ieidus@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/ksm.c b/mm/ksm.c
index 92034eb..3bd54ce 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -163,18 +163,18 @@ static unsigned long ksm_pages_unshared;
 static unsigned long ksm_rmap_items;
 
 /* Limit on the number of unswappable pages used */
-static unsigned long ksm_max_kernel_pages;
+static unsigned long ksm_max_kernel_pages = 2000;
 
 /* Number of pages ksmd should scan in one batch */
-static unsigned int ksm_thread_pages_to_scan;
+static unsigned int ksm_thread_pages_to_scan = 200;
 
 /* Milliseconds ksmd should sleep between batches */
-static unsigned int ksm_thread_sleep_millisecs;
+static unsigned int ksm_thread_sleep_millisecs = 20;
 
 #define KSM_RUN_STOP	0
 #define KSM_RUN_MERGE	1
 #define KSM_RUN_UNMERGE	2
-static unsigned int ksm_run;
+static unsigned int ksm_run = KSM_RUN_MERGE;
 
 static DECLARE_WAIT_QUEUE_HEAD(ksm_thread_wait);
 static DEFINE_MUTEX(ksm_thread_mutex);
@@ -506,6 +506,10 @@ static int unmerge_ksm_pages(struct vm_area_struct *vma,
 	return err;
 }
 
+#ifdef CONFIG_SYSFS
+/*
+ * Only called through the sysfs control interface:
+ */
 static int unmerge_and_remove_all_rmap_items(void)
 {
 	struct mm_slot *mm_slot;
@@ -563,6 +567,7 @@ error:
 	spin_unlock(&ksm_mmlist_lock);
 	return err;
 }
+#endif /* CONFIG_SYSFS */
 
 static u32 calc_checksum(struct page *page)
 {
@@ -1454,6 +1459,11 @@ void __ksm_exit(struct mm_struct *mm)
 	}
 }
 
+#ifdef CONFIG_SYSFS
+/*
+ * This all compiles without CONFIG_SYSFS, but is a waste of space.
+ */
+
 #define KSM_ATTR_RO(_name) \
 	static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
 #define KSM_ATTR(_name) \
@@ -1636,6 +1646,7 @@ static struct attribute_group ksm_attr_group = {
 	.attrs = ksm_attrs,
 	.name = "ksm",
 };
+#endif /* CONFIG_SYSFS */
 
 static int __init ksm_init(void)
 {
@@ -1657,16 +1668,17 @@ static int __init ksm_init(void)
 		goto out_free2;
 	}
 
+#ifdef CONFIG_SYSFS
 	err = sysfs_create_group(mm_kobj, &ksm_attr_group);
 	if (err) {
 		printk(KERN_ERR "ksm: register sysfs failed\n");
-		goto out_free3;
+		kthread_stop(ksm_thread);
+		goto out_free2;
 	}
+#endif /* CONFIG_SYSFS */
 
 	return 0;
 
-out_free3:
-	kthread_stop(ksm_thread);
 out_free2:
 	mm_slots_hash_free();
 out_free1:
-- 
cgit v0.10.2


From 7701c9c0f54feb682d0cefa2ae1f4a1e00e0ba09 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 21 Sep 2009 17:02:24 -0700
Subject: ksm: add some documentation

Add Documentation/vm/ksm.txt: how to use the Kernel Samepage Merging feature

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Acked-by: Izik Eidus <ieidus@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/Documentation/vm/00-INDEX b/Documentation/vm/00-INDEX
index 2f77ced..f80a449 100644
--- a/Documentation/vm/00-INDEX
+++ b/Documentation/vm/00-INDEX
@@ -6,6 +6,8 @@ balance
 	- various information on memory balancing.
 hugetlbpage.txt
 	- a brief summary of hugetlbpage support in the Linux kernel.
+ksm.txt
+	- how to use the Kernel Samepage Merging feature.
 locking
 	- info on how locking and synchronization is done in the Linux vm code.
 numa
diff --git a/Documentation/vm/ksm.txt b/Documentation/vm/ksm.txt
new file mode 100644
index 0000000..72a22f6
--- /dev/null
+++ b/Documentation/vm/ksm.txt
@@ -0,0 +1,89 @@
+How to use the Kernel Samepage Merging feature
+----------------------------------------------
+
+KSM is a memory-saving de-duplication feature, enabled by CONFIG_KSM=y,
+added to the Linux kernel in 2.6.32.  See mm/ksm.c for its implementation,
+and http://lwn.net/Articles/306704/ and http://lwn.net/Articles/330589/
+
+The KSM daemon ksmd periodically scans those areas of user memory which
+have been registered with it, looking for pages of identical content which
+can be replaced by a single write-protected page (which is automatically
+copied if a process later wants to update its content).
+
+KSM was originally developed for use with KVM (where it was known as
+Kernel Shared Memory), to fit more virtual machines into physical memory,
+by sharing the data common between them.  But it can be useful to any
+application which generates many instances of the same data.
+
+KSM only merges anonymous (private) pages, never pagecache (file) pages.
+KSM's merged pages are at present locked into kernel memory for as long
+as they are shared: so cannot be swapped out like the user pages they
+replace (but swapping KSM pages should follow soon in a later release).
+
+KSM only operates on those areas of address space which an application
+has advised to be likely candidates for merging, by using the madvise(2)
+system call: int madvise(addr, length, MADV_MERGEABLE).
+
+The app may call int madvise(addr, length, MADV_UNMERGEABLE) to cancel
+that advice and restore unshared pages: whereupon KSM unmerges whatever
+it merged in that range.  Note: this unmerging call may suddenly require
+more memory than is available - possibly failing with EAGAIN, but more
+probably arousing the Out-Of-Memory killer.
+
+If KSM is not configured into the running kernel, madvise MADV_MERGEABLE
+and MADV_UNMERGEABLE simply fail with EINVAL.  If the running kernel was
+built with CONFIG_KSM=y, those calls will normally succeed: even if the
+the KSM daemon is not currently running, MADV_MERGEABLE still registers
+the range for whenever the KSM daemon is started; even if the range
+cannot contain any pages which KSM could actually merge; even if
+MADV_UNMERGEABLE is applied to a range which was never MADV_MERGEABLE.
+
+Like other madvise calls, they are intended for use on mapped areas of
+the user address space: they will report ENOMEM if the specified range
+includes unmapped gaps (though working on the intervening mapped areas),
+and might fail with EAGAIN if not enough memory for internal structures.
+
+Applications should be considerate in their use of MADV_MERGEABLE,
+restricting its use to areas likely to benefit.  KSM's scans may use
+a lot of processing power, and its kernel-resident pages are a limited
+resource.  Some installations will disable KSM for these reasons.
+
+The KSM daemon is controlled by sysfs files in /sys/kernel/mm/ksm/,
+readable by all but writable only by root:
+
+max_kernel_pages - set to maximum number of kernel pages that KSM may use
+                   e.g. "echo 2000 > /sys/kernel/mm/ksm/max_kernel_pages"
+                   Value 0 imposes no limit on the kernel pages KSM may use;
+                   but note that any process using MADV_MERGEABLE can cause
+                   KSM to allocate these pages, unswappable until it exits.
+                   Default: 2000 (chosen for demonstration purposes)
+
+pages_to_scan    - how many present pages to scan before ksmd goes to sleep
+                   e.g. "echo 200 > /sys/kernel/mm/ksm/pages_to_scan"
+                   Default: 200 (chosen for demonstration purposes)
+
+sleep_millisecs  - how many milliseconds ksmd should sleep before next scan
+                   e.g. "echo 20 > /sys/kernel/mm/ksm/sleep_millisecs"
+                   Default: 20 (chosen for demonstration purposes)
+
+run              - set 0 to stop ksmd from running but keep merged pages,
+                   set 1 to run ksmd e.g. "echo 1 > /sys/kernel/mm/ksm/run",
+                   set 2 to stop ksmd and unmerge all pages currently merged,
+                         but leave mergeable areas registered for next run
+                   Default: 1 (for immediate use by apps which register)
+
+The effectiveness of KSM and MADV_MERGEABLE is shown in /sys/kernel/mm/ksm/:
+
+pages_shared     - how many shared unswappable kernel pages KSM is using
+pages_sharing    - how many more sites are sharing them i.e. how much saved
+pages_unshared   - how many pages unique but repeatedly checked for merging
+pages_volatile   - how many pages changing too fast to be placed in a tree
+full_scans       - how many times all mergeable areas have been scanned
+
+A high ratio of pages_sharing to pages_shared indicates good sharing, but
+a high ratio of pages_unshared to pages_sharing indicates wasted effort.
+pages_volatile embraces several different kinds of activity, but a high
+proportion there would also indicate poor use of madvise MADV_MERGEABLE.
+
+Izik Eidus,
+Hugh Dickins, 30 July 2009
diff --git a/mm/Kconfig b/mm/Kconfig
index c0b6afa..71eb0b4 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -224,6 +224,7 @@ config KSM
 	  the many instances by a single resident page with that content, so
 	  saving memory until one or another app needs to modify the content.
 	  Recommended for use with KVM, or with other duplicative applications.
+	  See Documentation/vm/ksm.txt for more information.
 
 config DEFAULT_MMAP_MIN_ADDR
         int "Low address space to protect from user allocation"
-- 
cgit v0.10.2


From 8314c4f24a0a5c9b1f7544e9fa83a1d5367ddaa7 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 21 Sep 2009 17:02:25 -0700
Subject: ksm: remove VM_MERGEABLE_FLAGS

KSM originally stood for Kernel Shared Memory: but the kernel has long
supported shared memory, and VM_SHARED and VM_MAYSHARE vmas, and KSM is
something else.  So we switched to saying "merge" instead of "share".

But Chris Wright points out that this is confusing where mmap.c merges
adjacent vmas: most especially in the name VM_MERGEABLE_FLAGS, used by
is_mergeable_vma() to let vmas be merged despite flags being different.

Call it VMA_MERGE_DESPITE_FLAGS?  Perhaps, but at present it consists
only of VM_CAN_NONLINEAR: so for now it's clearer on all sides to use
that directly, with a comment on it in is_mergeable_vma().

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Acked-by: Izik Eidus <ieidus@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/mmap.c b/mm/mmap.c
index ffd6c6c..22dff49 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -657,9 +657,6 @@ again:			remove_next = 1 + (end > next->vm_end);
 	validate_mm(mm);
 }
 
-/* Flags that can be inherited from an existing mapping when merging */
-#define VM_MERGEABLE_FLAGS (VM_CAN_NONLINEAR)
-
 /*
  * If the vma has a ->close operation then the driver probably needs to release
  * per-vma resources, so we don't attempt to merge those.
@@ -667,7 +664,8 @@ again:			remove_next = 1 + (end > next->vm_end);
 static inline int is_mergeable_vma(struct vm_area_struct *vma,
 			struct file *file, unsigned long vm_flags)
 {
-	if ((vma->vm_flags ^ vm_flags) & ~VM_MERGEABLE_FLAGS)
+	/* VM_CAN_NONLINEAR may get set later by f_op->mmap() */
+	if ((vma->vm_flags ^ vm_flags) & ~VM_CAN_NONLINEAR)
 		return 0;
 	if (vma->vm_file != file)
 		return 0;
-- 
cgit v0.10.2


From a913e182ab9484308e870af37a14d372742d53b0 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 21 Sep 2009 17:02:26 -0700
Subject: ksm: clean up obsolete references

A few cleanups, given the munlock fix: the comment on ksm_test_exit() no
longer applies, and it can be made private to ksm.c; there's no more
reference to mmu_gather or tlb.h, and mmap.c doesn't need ksm.h.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Acked-by: Izik Eidus <ieidus@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index 0e26de6..a485c14 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -12,8 +12,6 @@
 #include <linux/sched.h>
 #include <linux/vmstat.h>
 
-struct mmu_gather;
-
 #ifdef CONFIG_KSM
 int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
 		unsigned long end, int advice, unsigned long *vm_flags);
@@ -27,19 +25,6 @@ static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
 	return 0;
 }
 
-/*
- * For KSM to handle OOM without deadlock when it's breaking COW in a
- * likely victim of the OOM killer, exit_mmap() has to serialize with
- * ksm_exit() after freeing mm's pages but before freeing its page tables.
- * That leaves a window in which KSM might refault pages which have just
- * been finally unmapped: guard against that with ksm_test_exit(), and
- * use it after getting mmap_sem in ksm.c, to check if mm is exiting.
- */
-static inline bool ksm_test_exit(struct mm_struct *mm)
-{
-	return atomic_read(&mm->mm_users) == 0;
-}
-
 static inline void ksm_exit(struct mm_struct *mm)
 {
 	if (test_bit(MMF_VM_MERGEABLE, &mm->flags))
@@ -79,11 +64,6 @@ static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
 	return 0;
 }
 
-static inline bool ksm_test_exit(struct mm_struct *mm)
-{
-	return 0;
-}
-
 static inline void ksm_exit(struct mm_struct *mm)
 {
 }
diff --git a/mm/ksm.c b/mm/ksm.c
index 3bd54ce..e11e7a5 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -32,7 +32,6 @@
 #include <linux/mmu_notifier.h>
 #include <linux/ksm.h>
 
-#include <asm/tlb.h>
 #include <asm/tlbflush.h>
 
 /*
@@ -285,6 +284,19 @@ static inline int in_stable_tree(struct rmap_item *rmap_item)
 }
 
 /*
+ * ksmd, and unmerge_and_remove_all_rmap_items(), must not touch an mm's
+ * page tables after it has passed through ksm_exit() - which, if necessary,
+ * takes mmap_sem briefly to serialize against them.  ksm_exit() does not set
+ * a special flag: they can just back out as soon as mm_users goes to zero.
+ * ksm_test_exit() is used throughout to make this test for exit: in some
+ * places for correctness, in some places just to avoid unnecessary work.
+ */
+static inline bool ksm_test_exit(struct mm_struct *mm)
+{
+	return atomic_read(&mm->mm_users) == 0;
+}
+
+/*
  * We use break_ksm to break COW on a ksm page: it's a stripped down
  *
  *	if (get_user_pages(current, mm, addr, 1, 1, 1, &page, NULL) == 1)
diff --git a/mm/mmap.c b/mm/mmap.c
index 22dff49..6eed98c 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -27,7 +27,6 @@
 #include <linux/mount.h>
 #include <linux/mempolicy.h>
 #include <linux/rmap.h>
-#include <linux/ksm.h>
 #include <linux/mmu_notifier.h>
 #include <linux/perf_event.h>
 
-- 
cgit v0.10.2


From 35451beecbd7c86ce3249d543594517a5fe9a0cd Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 21 Sep 2009 17:02:27 -0700
Subject: ksm: unmerge is an origin of OOMs

Just as the swapoff system call allocates many pages of RAM to various
processes, perhaps triggering OOM, so "echo 2 >/sys/kernel/mm/ksm/run"
(unmerge) is liable to allocate many pages of RAM to various processes,
perhaps triggering OOM; and each is normally run from a modest admin
process (swapoff or shell), easily repeated until it succeeds.

So treat unmerge_and_remove_all_rmap_items() in the same way that we treat
try_to_unuse(): generalize PF_SWAPOFF to PF_OOM_ORIGIN, and bracket both
with that, to ask the OOM killer to kill them first, to prevent them from
spawning more and more OOM kills.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Acked-by: Izik Eidus <ieidus@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8f3e63c..899d730 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1720,7 +1720,7 @@ extern cputime_t task_gtime(struct task_struct *p);
 #define PF_FROZEN	0x00010000	/* frozen for system suspend */
 #define PF_FSTRANS	0x00020000	/* inside a filesystem transaction */
 #define PF_KSWAPD	0x00040000	/* I am kswapd */
-#define PF_SWAPOFF	0x00080000	/* I am in swapoff */
+#define PF_OOM_ORIGIN	0x00080000	/* Allocating much memory to others */
 #define PF_LESS_THROTTLE 0x00100000	/* Throttle me less: I clean memory */
 #define PF_KTHREAD	0x00200000	/* I am a kernel thread */
 #define PF_RANDOMIZE	0x00400000	/* randomize virtual address space */
diff --git a/mm/ksm.c b/mm/ksm.c
index e11e7a5..37cc373 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1557,7 +1557,9 @@ static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr,
 	if (ksm_run != flags) {
 		ksm_run = flags;
 		if (flags & KSM_RUN_UNMERGE) {
+			current->flags |= PF_OOM_ORIGIN;
 			err = unmerge_and_remove_all_rmap_items();
+			current->flags &= ~PF_OOM_ORIGIN;
 			if (err) {
 				ksm_run = KSM_RUN_STOP;
 				count = err;
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index a7b2460..da4c342 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -79,7 +79,7 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
 	/*
 	 * swapoff can easily use up all memory, so kill those first.
 	 */
-	if (p->flags & PF_SWAPOFF)
+	if (p->flags & PF_OOM_ORIGIN)
 		return ULONG_MAX;
 
 	/*
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 74f1102..f1bf19d 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1575,9 +1575,9 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
 	p->flags &= ~SWP_WRITEOK;
 	spin_unlock(&swap_lock);
 
-	current->flags |= PF_SWAPOFF;
+	current->flags |= PF_OOM_ORIGIN;
 	err = try_to_unuse(type);
-	current->flags &= ~PF_SWAPOFF;
+	current->flags &= ~PF_OOM_ORIGIN;
 
 	if (err) {
 		/* re-insert swap space back into swap_list */
-- 
cgit v0.10.2


From 7103ad323b1ae32bedc3267402117e2f8b45e48d Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 21 Sep 2009 17:02:28 -0700
Subject: ksm: mremap use err from ksm_madvise

mremap move's use of ksm_madvise() was assuming -ENOMEM on failure,
because ksm_madvise used to say -EAGAIN for that; but ksm_madvise now says
-ENOMEM (letting madvise convert that to -EAGAIN), and can also say
-ERESTARTSYS when signalled: so pass the error from ksm_madvise.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Acked-by: Izik Eidus <ieidus@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/mremap.c b/mm/mremap.c
index 93addde..20a07db 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -175,6 +175,7 @@ static unsigned long move_vma(struct vm_area_struct *vma,
 	unsigned long excess = 0;
 	unsigned long hiwater_vm;
 	int split = 0;
+	int err;
 
 	/*
 	 * We'd prefer to avoid failure later on in do_munmap:
@@ -190,9 +191,10 @@ static unsigned long move_vma(struct vm_area_struct *vma,
 	 * pages recently unmapped.  But leave vma->vm_flags as it was,
 	 * so KSM can come around to merge on vma and new_vma afterwards.
 	 */
-	if (ksm_madvise(vma, old_addr, old_addr + old_len,
-						MADV_UNMERGEABLE, &vm_flags))
-		return -ENOMEM;
+	err = ksm_madvise(vma, old_addr, old_addr + old_len,
+						MADV_UNMERGEABLE, &vm_flags);
+	if (err)
+		return err;
 
 	new_pgoff = vma->vm_pgoff + ((old_addr - vma->vm_start) >> PAGE_SHIFT);
 	new_vma = copy_vma(&vma, new_addr, new_len, new_pgoff);
-- 
cgit v0.10.2


From 398499d5f3613c47f2143b8c54a04efb5d7a6da9 Mon Sep 17 00:00:00 2001
From: "Moussa A. Ba" <moussa.a.ba@gmail.com>
Date: Mon, 21 Sep 2009 17:02:29 -0700
Subject: pagemap clear_refs: modify to specify anon or mapped vma clearing

The patch makes the clear_refs more versatile in adding the option to
select anonymous pages or file backed pages for clearing.  This addition
has a measurable impact on user space application performance as it
decreases the number of pagewalks in scenarios where one is only
interested in a specific type of page (anonymous or file mapped).

The patch adds anonymous and file backed filters to the clear_refs interface.

echo 1 > /proc/PID/clear_refs resets the bits on all pages
echo 2 > /proc/PID/clear_refs resets the bits on anonymous pages only
echo 3 > /proc/PID/clear_refs resets the bits on file backed pages only

Any other value is ignored

Signed-off-by: Moussa A. Ba <moussa.a.ba@gmail.com>
Signed-off-by: Jared E. Hulbert <jaredeh@gmail.com>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 1c96cb6..ae7f8bb 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -375,6 +375,19 @@ of memory currently marked as referenced or accessed.
 This file is only present if the CONFIG_MMU kernel configuration option is
 enabled.
 
+The /proc/PID/clear_refs is used to reset the PG_Referenced and ACCESSED/YOUNG
+bits on both physical and virtual pages associated with a process.
+To clear the bits for all the pages associated with the process
+    > echo 1 > /proc/PID/clear_refs
+
+To clear the bits for the anonymous pages associated with the process
+    > echo 2 > /proc/PID/clear_refs
+
+To clear the bits for the file mapped pages associated with the process
+    > echo 3 > /proc/PID/clear_refs
+Any other value written to /proc/PID/clear_refs will have no effect.
+
+
 1.2 Kernel data
 ---------------
 
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 9bd8be1..59e98fe 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -465,6 +465,10 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
 	return 0;
 }
 
+#define CLEAR_REFS_ALL 1
+#define CLEAR_REFS_ANON 2
+#define CLEAR_REFS_MAPPED 3
+
 static ssize_t clear_refs_write(struct file *file, const char __user *buf,
 				size_t count, loff_t *ppos)
 {
@@ -472,13 +476,15 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
 	char buffer[PROC_NUMBUF], *end;
 	struct mm_struct *mm;
 	struct vm_area_struct *vma;
+	int type;
 
 	memset(buffer, 0, sizeof(buffer));
 	if (count > sizeof(buffer) - 1)
 		count = sizeof(buffer) - 1;
 	if (copy_from_user(buffer, buf, count))
 		return -EFAULT;
-	if (!simple_strtol(buffer, &end, 0))
+	type = simple_strtol(buffer, &end, 0);
+	if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED)
 		return -EINVAL;
 	if (*end == '\n')
 		end++;
@@ -494,9 +500,23 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
 		down_read(&mm->mmap_sem);
 		for (vma = mm->mmap; vma; vma = vma->vm_next) {
 			clear_refs_walk.private = vma;
-			if (!is_vm_hugetlb_page(vma))
-				walk_page_range(vma->vm_start, vma->vm_end,
-						&clear_refs_walk);
+			if (is_vm_hugetlb_page(vma))
+				continue;
+			/*
+			 * Writing 1 to /proc/pid/clear_refs affects all pages.
+			 *
+			 * Writing 2 to /proc/pid/clear_refs only affects
+			 * Anonymous pages.
+			 *
+			 * Writing 3 to /proc/pid/clear_refs only affects file
+			 * mapped pages.
+			 */
+			if (type == CLEAR_REFS_ANON && vma->vm_file)
+				continue;
+			if (type == CLEAR_REFS_MAPPED && !vma->vm_file)
+				continue;
+			walk_page_range(vma->vm_start, vma->vm_end,
+					&clear_refs_walk);
 		}
 		flush_tlb_mm(mm);
 		up_read(&mm->mmap_sem);
-- 
cgit v0.10.2


From fe1ff49d0d1c30254dbfc84c3786eb538e0cc7d1 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Mon, 21 Sep 2009 17:02:30 -0700
Subject: mm: kmem_cache_create(): make it easier to catch NULL cache names

Right now, if you inadvertently pass NULL to kmem_cache_create() at boot
time, it crashes much later after boot somewhere deep inside sysfs which
makes it very non obvious to figure out what's going on.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/slub.c b/mm/slub.c
index 0a216aa..4996fc7 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3345,6 +3345,9 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size,
 {
 	struct kmem_cache *s;
 
+	if (WARN_ON(!name))
+		return NULL;
+
 	down_write(&slub_lock);
 	s = find_mergeable(size, align, flags, name, ctor);
 	if (s) {
-- 
cgit v0.10.2


From 2f66a68f3fac2e94da360c342ff78ab45553f86c Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Mon, 21 Sep 2009 17:02:31 -0700
Subject: page-allocator: change migratetype for all pageblocks within a
 high-order page during __rmqueue_fallback

When there are no pages of a target migratetype free, the page allocator
selects a high-order block of another migratetype to allocate from.  When
the order of the page taken is greater than pageblock_order, all
pageblocks within that high-order page should change migratetype so that
pages are later freed to the correct free-lists.

The current behaviour is that pageblocks change migratetype if the order
being split matches the pageblock_order.  When pageblock_order <
MAX_ORDER-1, ownership is not changing correct and pages are being later
freed to the incorrect list and this impacts fragmentation avoidance.

This patch changes all pageblocks within the high-order page being split
to the correct migratetype.  Without the patch, allocation success rates
for hugepages under stress were about 59% of physical memory on x86-64.
With the patch applied, this goes up to 65%.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Cc: Andy Whitcroft <apw@shadowen.org>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 9242d13..2075980 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -783,6 +783,17 @@ static int move_freepages_block(struct zone *zone, struct page *page,
 	return move_freepages(zone, start_page, end_page, migratetype);
 }
 
+static void change_pageblock_range(struct page *pageblock_page,
+					int start_order, int migratetype)
+{
+	int nr_pageblocks = 1 << (start_order - pageblock_order);
+
+	while (nr_pageblocks--) {
+		set_pageblock_migratetype(pageblock_page, migratetype);
+		pageblock_page += pageblock_nr_pages;
+	}
+}
+
 /* Remove an element from the buddy allocator from the fallback list */
 static inline struct page *
 __rmqueue_fallback(struct zone *zone, int order, int start_migratetype)
@@ -836,8 +847,9 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype)
 			list_del(&page->lru);
 			rmv_page_order(page);
 
-			if (current_order == pageblock_order)
-				set_pageblock_migratetype(page,
+			/* Take ownership for orders >= pageblock_order */
+			if (current_order >= pageblock_order)
+				change_pageblock_range(page, current_order,
 							start_migratetype);
 
 			expand(zone, page, order, current_order, area, migratetype);
-- 
cgit v0.10.2


From dd32c279983bf77fdcc8a9aa4a05b0ffdc75859c Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Mon, 21 Sep 2009 17:02:32 -0700
Subject: vmalloc: unmap vmalloc area after hiding it

vmap area should be purged after vm_struct is removed from the list
because vread/vwrite etc...believes the range is valid while it's on
vm_struct list.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: WANG Cong <xiyou.wangcong@gmail.com>
Cc: Mike Smith <scgtrp@gmail.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 759deae..c4071fa 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1270,17 +1270,21 @@ struct vm_struct *remove_vm_area(const void *addr)
 	if (va && va->flags & VM_VM_AREA) {
 		struct vm_struct *vm = va->private;
 		struct vm_struct *tmp, **p;
-
-		vmap_debug_free_range(va->va_start, va->va_end);
-		free_unmap_vmap_area(va);
-		vm->size -= PAGE_SIZE;
-
+		/*
+		 * remove from list and disallow access to this vm_struct
+		 * before unmap. (address range confliction is maintained by
+		 * vmap.)
+		 */
 		write_lock(&vmlist_lock);
 		for (p = &vmlist; (tmp = *p) != vm; p = &tmp->next)
 			;
 		*p = tmp->next;
 		write_unlock(&vmlist_lock);
 
+		vmap_debug_free_range(va->va_start, va->va_end);
+		free_unmap_vmap_area(va);
+		vm->size -= PAGE_SIZE;
+
 		return vm;
 	}
 	return NULL;
-- 
cgit v0.10.2


From d0107eb07320b5d37c0f8a9f015534caebb28a48 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Mon, 21 Sep 2009 17:02:34 -0700
Subject: kcore: fix vread/vwrite to be aware of holes

vread/vwrite access vmalloc area without checking there is a page or not.
In most case, this works well.

In old ages, the caller of get_vm_ara() is only IOREMAP and there is no
memory hole within vm_struct's [addr...addr + size - PAGE_SIZE] (
-PAGE_SIZE is for a guard page.)

After per-cpu-alloc patch, it uses get_vm_area() for reserve continuous
virtual address but remap _later_.  There tend to be a hole in valid
vmalloc area in vm_struct lists.  Then, skip the hole (not mapped page) is
necessary.  This patch updates vread/vwrite() for avoiding memory hole.

Routines which access vmalloc area without knowing for which addr is used
are
  - /proc/kcore
  - /dev/kmem

kcore checks IOREMAP, /dev/kmem doesn't.  After this patch, IOREMAP is
checked and /dev/kmem will avoid to read/write it.  Fixes to /proc/kcore
will be in the next patch in series.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: WANG Cong <xiyou.wangcong@gmail.com>
Cc: Mike Smith <scgtrp@gmail.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index c4071fa..9216b25 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -25,7 +25,7 @@
 #include <linux/rcupdate.h>
 #include <linux/pfn.h>
 #include <linux/kmemleak.h>
-
+#include <linux/highmem.h>
 #include <asm/atomic.h>
 #include <asm/uaccess.h>
 #include <asm/tlbflush.h>
@@ -1643,10 +1643,120 @@ void *vmalloc_32_user(unsigned long size)
 }
 EXPORT_SYMBOL(vmalloc_32_user);
 
+/*
+ * small helper routine , copy contents to buf from addr.
+ * If the page is not present, fill zero.
+ */
+
+static int aligned_vread(char *buf, char *addr, unsigned long count)
+{
+	struct page *p;
+	int copied = 0;
+
+	while (count) {
+		unsigned long offset, length;
+
+		offset = (unsigned long)addr & ~PAGE_MASK;
+		length = PAGE_SIZE - offset;
+		if (length > count)
+			length = count;
+		p = vmalloc_to_page(addr);
+		/*
+		 * To do safe access to this _mapped_ area, we need
+		 * lock. But adding lock here means that we need to add
+		 * overhead of vmalloc()/vfree() calles for this _debug_
+		 * interface, rarely used. Instead of that, we'll use
+		 * kmap() and get small overhead in this access function.
+		 */
+		if (p) {
+			/*
+			 * we can expect USER0 is not used (see vread/vwrite's
+			 * function description)
+			 */
+			void *map = kmap_atomic(p, KM_USER0);
+			memcpy(buf, map + offset, length);
+			kunmap_atomic(map, KM_USER0);
+		} else
+			memset(buf, 0, length);
+
+		addr += length;
+		buf += length;
+		copied += length;
+		count -= length;
+	}
+	return copied;
+}
+
+static int aligned_vwrite(char *buf, char *addr, unsigned long count)
+{
+	struct page *p;
+	int copied = 0;
+
+	while (count) {
+		unsigned long offset, length;
+
+		offset = (unsigned long)addr & ~PAGE_MASK;
+		length = PAGE_SIZE - offset;
+		if (length > count)
+			length = count;
+		p = vmalloc_to_page(addr);
+		/*
+		 * To do safe access to this _mapped_ area, we need
+		 * lock. But adding lock here means that we need to add
+		 * overhead of vmalloc()/vfree() calles for this _debug_
+		 * interface, rarely used. Instead of that, we'll use
+		 * kmap() and get small overhead in this access function.
+		 */
+		if (p) {
+			/*
+			 * we can expect USER0 is not used (see vread/vwrite's
+			 * function description)
+			 */
+			void *map = kmap_atomic(p, KM_USER0);
+			memcpy(map + offset, buf, length);
+			kunmap_atomic(map, KM_USER0);
+		}
+		addr += length;
+		buf += length;
+		copied += length;
+		count -= length;
+	}
+	return copied;
+}
+
+/**
+ *	vread() -  read vmalloc area in a safe way.
+ *	@buf:		buffer for reading data
+ *	@addr:		vm address.
+ *	@count:		number of bytes to be read.
+ *
+ *	Returns # of bytes which addr and buf should be increased.
+ *	(same number to @count). Returns 0 if [addr...addr+count) doesn't
+ *	includes any intersect with alive vmalloc area.
+ *
+ *	This function checks that addr is a valid vmalloc'ed area, and
+ *	copy data from that area to a given buffer. If the given memory range
+ *	of [addr...addr+count) includes some valid address, data is copied to
+ *	proper area of @buf. If there are memory holes, they'll be zero-filled.
+ *	IOREMAP area is treated as memory hole and no copy is done.
+ *
+ *	If [addr...addr+count) doesn't includes any intersects with alive
+ *	vm_struct area, returns 0.
+ *	@buf should be kernel's buffer. Because	this function uses KM_USER0,
+ *	the caller should guarantee KM_USER0 is not used.
+ *
+ *	Note: In usual ops, vread() is never necessary because the caller
+ *	should know vmalloc() area is valid and can use memcpy().
+ *	This is for routines which have to access vmalloc area without
+ *	any informaion, as /dev/kmem.
+ *
+ */
+
 long vread(char *buf, char *addr, unsigned long count)
 {
 	struct vm_struct *tmp;
 	char *vaddr, *buf_start = buf;
+	unsigned long buflen = count;
 	unsigned long n;
 
 	/* Don't allow overflow */
@@ -1654,7 +1764,7 @@ long vread(char *buf, char *addr, unsigned long count)
 		count = -(unsigned long) addr;
 
 	read_lock(&vmlist_lock);
-	for (tmp = vmlist; tmp; tmp = tmp->next) {
+	for (tmp = vmlist; count && tmp; tmp = tmp->next) {
 		vaddr = (char *) tmp->addr;
 		if (addr >= vaddr + tmp->size - PAGE_SIZE)
 			continue;
@@ -1667,32 +1777,72 @@ long vread(char *buf, char *addr, unsigned long count)
 			count--;
 		}
 		n = vaddr + tmp->size - PAGE_SIZE - addr;
-		do {
-			if (count == 0)
-				goto finished;
-			*buf = *addr;
-			buf++;
-			addr++;
-			count--;
-		} while (--n > 0);
+		if (n > count)
+			n = count;
+		if (!(tmp->flags & VM_IOREMAP))
+			aligned_vread(buf, addr, n);
+		else /* IOREMAP area is treated as memory hole */
+			memset(buf, 0, n);
+		buf += n;
+		addr += n;
+		count -= n;
 	}
 finished:
 	read_unlock(&vmlist_lock);
-	return buf - buf_start;
+
+	if (buf == buf_start)
+		return 0;
+	/* zero-fill memory holes */
+	if (buf != buf_start + buflen)
+		memset(buf, 0, buflen - (buf - buf_start));
+
+	return buflen;
 }
 
+/**
+ *	vwrite() -  write vmalloc area in a safe way.
+ *	@buf:		buffer for source data
+ *	@addr:		vm address.
+ *	@count:		number of bytes to be read.
+ *
+ *	Returns # of bytes which addr and buf should be incresed.
+ *	(same number to @count).
+ *	If [addr...addr+count) doesn't includes any intersect with valid
+ *	vmalloc area, returns 0.
+ *
+ *	This function checks that addr is a valid vmalloc'ed area, and
+ *	copy data from a buffer to the given addr. If specified range of
+ *	[addr...addr+count) includes some valid address, data is copied from
+ *	proper area of @buf. If there are memory holes, no copy to hole.
+ *	IOREMAP area is treated as memory hole and no copy is done.
+ *
+ *	If [addr...addr+count) doesn't includes any intersects with alive
+ *	vm_struct area, returns 0.
+ *	@buf should be kernel's buffer. Because	this function uses KM_USER0,
+ *	the caller should guarantee KM_USER0 is not used.
+ *
+ *	Note: In usual ops, vwrite() is never necessary because the caller
+ *	should know vmalloc() area is valid and can use memcpy().
+ *	This is for routines which have to access vmalloc area without
+ *	any informaion, as /dev/kmem.
+ *
+ *	The caller should guarantee KM_USER1 is not used.
+ */
+
 long vwrite(char *buf, char *addr, unsigned long count)
 {
 	struct vm_struct *tmp;
-	char *vaddr, *buf_start = buf;
-	unsigned long n;
+	char *vaddr;
+	unsigned long n, buflen;
+	int copied = 0;
 
 	/* Don't allow overflow */
 	if ((unsigned long) addr + count < count)
 		count = -(unsigned long) addr;
+	buflen = count;
 
 	read_lock(&vmlist_lock);
-	for (tmp = vmlist; tmp; tmp = tmp->next) {
+	for (tmp = vmlist; count && tmp; tmp = tmp->next) {
 		vaddr = (char *) tmp->addr;
 		if (addr >= vaddr + tmp->size - PAGE_SIZE)
 			continue;
@@ -1704,18 +1854,21 @@ long vwrite(char *buf, char *addr, unsigned long count)
 			count--;
 		}
 		n = vaddr + tmp->size - PAGE_SIZE - addr;
-		do {
-			if (count == 0)
-				goto finished;
-			*addr = *buf;
-			buf++;
-			addr++;
-			count--;
-		} while (--n > 0);
+		if (n > count)
+			n = count;
+		if (!(tmp->flags & VM_IOREMAP)) {
+			aligned_vwrite(buf, addr, n);
+			copied++;
+		}
+		buf += n;
+		addr += n;
+		count -= n;
 	}
 finished:
 	read_unlock(&vmlist_lock);
-	return buf - buf_start;
+	if (!copied)
+		return 0;
+	return buflen;
 }
 
 /**
-- 
cgit v0.10.2


From 73d7c33e81aed92ac185950a20407c1a2ea65a83 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Mon, 21 Sep 2009 17:02:35 -0700
Subject: kcore: /proc/kcore should use vread

/proc/kcore has its own routine to access vmallc area.  It can be replaced
with vread().  And by this, /proc/kcore can do safe access to vmalloc
area.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: WANG Cong <xiyou.wangcong@gmail.com>
Cc: Mike Smith <scgtrp@gmail.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 59b43a0..f06f45b 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -328,43 +328,12 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
 				return -EFAULT;
 		} else if (is_vmalloc_addr((void *)start)) {
 			char * elf_buf;
-			struct vm_struct *m;
-			unsigned long curstart = start;
-			unsigned long cursize = tsz;
 
 			elf_buf = kzalloc(tsz, GFP_KERNEL);
 			if (!elf_buf)
 				return -ENOMEM;
-
-			read_lock(&vmlist_lock);
-			for (m=vmlist; m && cursize; m=m->next) {
-				unsigned long vmstart;
-				unsigned long vmsize;
-				unsigned long msize = m->size - PAGE_SIZE;
-
-				if (((unsigned long)m->addr + msize) < 
-								curstart)
-					continue;
-				if ((unsigned long)m->addr > (curstart + 
-								cursize))
-					break;
-				vmstart = (curstart < (unsigned long)m->addr ? 
-					(unsigned long)m->addr : curstart);
-				if (((unsigned long)m->addr + msize) > 
-							(curstart + cursize))
-					vmsize = curstart + cursize - vmstart;
-				else
-					vmsize = (unsigned long)m->addr + 
-							msize - vmstart;
-				curstart = vmstart + vmsize;
-				cursize -= vmsize;
-				/* don't dump ioremap'd stuff! (TA) */
-				if (m->flags & VM_IOREMAP)
-					continue;
-				memcpy(elf_buf + (vmstart - start),
-					(char *)vmstart, vmsize);
-			}
-			read_unlock(&vmlist_lock);
+			vread(elf_buf, (char *)start, tsz);
+			/* we have to zero-fill user buffer even if no read */
 			if (copy_to_user(buffer, elf_buf, tsz)) {
 				kfree(elf_buf);
 				return -EFAULT;
-- 
cgit v0.10.2


From cc013a88906bad9d2832d6316de1c7dbc1c2a794 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <Geert.Uytterhoeven@sonycom.com>
Date: Mon, 21 Sep 2009 17:02:36 -0700
Subject: arches: drop superfluous casts in nr_free_pages() callers

Commit 96177299416dbccb73b54e6b344260154a445375 ("Drop free_pages()")
modified nr_free_pages() to return 'unsigned long' instead of 'unsigned
int'.  This made the casts to 'unsigned long' in most callers superfluous,
so remove them.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Geert Uytterhoeven <Geert.Uytterhoeven@sonycom.com>
Reviewed-by: Christoph Lameter <cl@linux-foundation.org>
Acked-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Russell King <rmk+kernel@arm.linux.org.uk>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Kyle McMartin <kyle@mcmartin.ca>
Acked-by: WANG Cong <xiyou.wangcong@gmail.com>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Haavard Skinnemoen <hskinnemoen@atmel.com>
Cc: Mikael Starvik <starvik@axis.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Hirokazu Takata <takata@linux-m32r.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: David Howells <dhowells@redhat.com>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Chris Zankel <zankel@tensilica.com>
Cc: Michal Simek <monstr@monstr.eu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c
index af71d38..a0902c2 100644
--- a/arch/alpha/mm/init.c
+++ b/arch/alpha/mm/init.c
@@ -299,7 +299,7 @@ printk_memory_info(void)
 	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
 
 	printk("Memory: %luk/%luk available (%luk kernel code, %luk reserved, %luk data, %luk init)\n",
-	       (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
+	       nr_free_pages() << (PAGE_SHIFT-10),
 	       max_mapnr << (PAGE_SHIFT-10),
 	       codesize >> 10,
 	       reservedpages << (PAGE_SHIFT-10),
diff --git a/arch/alpha/mm/numa.c b/arch/alpha/mm/numa.c
index 0eab557..10b4035 100644
--- a/arch/alpha/mm/numa.c
+++ b/arch/alpha/mm/numa.c
@@ -349,7 +349,7 @@ void __init mem_init(void)
 
 	printk("Memory: %luk/%luk available (%luk kernel code, %luk reserved, "
 	       "%luk data, %luk init)\n",
-	       (unsigned long)nr_free_pages() << (PAGE_SHIFT-10),
+	       nr_free_pages() << (PAGE_SHIFT-10),
 	       num_physpages << (PAGE_SHIFT-10),
 	       codesize >> 10,
 	       reservedpages << (PAGE_SHIFT-10),
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index ea36186..f982606 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -596,8 +596,8 @@ void __init mem_init(void)
 
 	printk(KERN_NOTICE "Memory: %luKB available (%dK code, "
 		"%dK data, %dK init, %luK highmem)\n",
-		(unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
-		codesize >> 10, datasize >> 10, initsize >> 10,
+		nr_free_pages() << (PAGE_SHIFT-10), codesize >> 10,
+		datasize >> 10, initsize >> 10,
 		(unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)));
 
 	if (PAGE_SIZE >= 16384 && num_physpages <= 128) {
diff --git a/arch/avr32/mm/init.c b/arch/avr32/mm/init.c
index e819fa6..376f18c 100644
--- a/arch/avr32/mm/init.c
+++ b/arch/avr32/mm/init.c
@@ -141,7 +141,7 @@ void __init mem_init(void)
 
 	printk ("Memory: %luk/%luk available (%dk kernel code, "
 		"%dk reserved, %dk data, %dk init)\n",
-		(unsigned long)nr_free_pages() << (PAGE_SHIFT - 10),
+		nr_free_pages() << (PAGE_SHIFT - 10),
 		totalram_pages << (PAGE_SHIFT - 10),
 		codesize >> 10,
 		reservedpages << (PAGE_SHIFT - 10),
diff --git a/arch/cris/mm/init.c b/arch/cris/mm/init.c
index 514f46a..ff68b9f 100644
--- a/arch/cris/mm/init.c
+++ b/arch/cris/mm/init.c
@@ -54,7 +54,7 @@ mem_init(void)
         printk(KERN_INFO
                "Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, "
 	       "%dk init)\n" ,
-	       (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
+	       nr_free_pages() << (PAGE_SHIFT-10),
 	       max_mapnr << (PAGE_SHIFT-10),
 	       codesize >> 10,
 	       reservedpages << (PAGE_SHIFT-10),
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index b115b3b..1d28624 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -655,7 +655,7 @@ mem_init (void)
 	initsize =  (unsigned long) __init_end - (unsigned long) __init_begin;
 
 	printk(KERN_INFO "Memory: %luk/%luk available (%luk code, %luk reserved, "
-	       "%luk data, %luk init)\n", (unsigned long) nr_free_pages() << (PAGE_SHIFT - 10),
+	       "%luk data, %luk init)\n", nr_free_pages() << (PAGE_SHIFT - 10),
 	       num_physpages << (PAGE_SHIFT - 10), codesize >> 10,
 	       reserved_pages << (PAGE_SHIFT - 10), datasize >> 10, initsize >> 10);
 
diff --git a/arch/m32r/mm/init.c b/arch/m32r/mm/init.c
index 24d429f..9f581df 100644
--- a/arch/m32r/mm/init.c
+++ b/arch/m32r/mm/init.c
@@ -171,7 +171,7 @@ void __init mem_init(void)
 
 	printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, "
 		"%dk reserved, %dk data, %dk init)\n",
-		(unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
+		nr_free_pages() << (PAGE_SHIFT-10),
 		num_physpages << (PAGE_SHIFT-10),
 		codesize >> 10,
 		reservedpages << (PAGE_SHIFT-10),
diff --git a/arch/m68k/mm/init.c b/arch/m68k/mm/init.c
index 0007b2a..774549a 100644
--- a/arch/m68k/mm/init.c
+++ b/arch/m68k/mm/init.c
@@ -126,7 +126,7 @@ void __init mem_init(void)
 #endif
 
 	printk("Memory: %luk/%luk available (%dk kernel code, %dk data, %dk init)\n",
-	       (unsigned long)nr_free_pages() << (PAGE_SHIFT-10),
+	       nr_free_pages() << (PAGE_SHIFT-10),
 	       totalram_pages << (PAGE_SHIFT-10),
 	       codepages << (PAGE_SHIFT-10),
 	       datapages << (PAGE_SHIFT-10),
diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c
index f207f1a..1110784 100644
--- a/arch/microblaze/mm/init.c
+++ b/arch/microblaze/mm/init.c
@@ -204,7 +204,7 @@ void __init mem_init(void)
 	totalram_pages += free_all_bootmem();
 
 	printk(KERN_INFO "Memory: %luk/%luk available\n",
-	       (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
+	       nr_free_pages() << (PAGE_SHIFT-10),
 	       num_physpages << (PAGE_SHIFT-10));
 #ifdef CONFIG_MMU
 	mem_init_done = 1;
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 38c79c5..1f4ee47 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -417,7 +417,7 @@ void __init mem_init(void)
 
 	printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, "
 	       "%ldk reserved, %ldk data, %ldk init, %ldk highmem)\n",
-	       (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
+	       nr_free_pages() << (PAGE_SHIFT-10),
 	       ram << (PAGE_SHIFT-10),
 	       codesize >> 10,
 	       reservedpages << (PAGE_SHIFT-10),
diff --git a/arch/mn10300/mm/init.c b/arch/mn10300/mm/init.c
index 8cee387..ec14205 100644
--- a/arch/mn10300/mm/init.c
+++ b/arch/mn10300/mm/init.c
@@ -112,7 +112,7 @@ void __init mem_init(void)
 	       "Memory: %luk/%luk available"
 	       " (%dk kernel code, %dk reserved, %dk data, %dk init,"
 	       " %ldk highmem)\n",
-	       (unsigned long) nr_free_pages() << (PAGE_SHIFT - 10),
+	       nr_free_pages() << (PAGE_SHIFT - 10),
 	       max_mapnr << (PAGE_SHIFT - 10),
 	       codesize >> 10,
 	       reservedpages << (PAGE_SHIFT - 10),
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index b0831d9..d5aca31 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -506,7 +506,7 @@ void __init mem_init(void)
 #endif
 
 	printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init)\n",
-		(unsigned long)nr_free_pages() << (PAGE_SHIFT-10),
+		nr_free_pages() << (PAGE_SHIFT-10),
 		num_physpages << (PAGE_SHIFT-10),
 		codesize >> 10,
 		reservedpages << (PAGE_SHIFT-10),
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 579382c..0e5c59b 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -372,7 +372,7 @@ void __init mem_init(void)
 
 	printk(KERN_INFO "Memory: %luk/%luk available (%luk kernel code, "
 	       "%luk reserved, %luk data, %luk bss, %luk init)\n",
-		(unsigned long)nr_free_pages() << (PAGE_SHIFT-10),
+		nr_free_pages() << (PAGE_SHIFT-10),
 		num_physpages << (PAGE_SHIFT-10),
 		codesize >> 10,
 		reservedpages << (PAGE_SHIFT-10),
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index c634dfb..7656479 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -105,7 +105,7 @@ void __init mem_init(void)
 	datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
 	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
         printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, %ldk data, %ldk init)\n",
-                (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
+		nr_free_pages() << (PAGE_SHIFT-10),
                 max_mapnr << (PAGE_SHIFT-10),
                 codesize >> 10,
                 reservedpages << (PAGE_SHIFT-10),
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index edc842f..fabb7c6 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -232,7 +232,7 @@ void __init mem_init(void)
 
 	printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, "
 	       "%dk data, %dk init)\n",
-		(unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
+		nr_free_pages() << (PAGE_SHIFT-10),
 		num_physpages << (PAGE_SHIFT-10),
 		codesize >> 10,
 		datasize >> 10,
diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c
index 54114ad..dc7c3b1 100644
--- a/arch/sparc/mm/init_32.c
+++ b/arch/sparc/mm/init_32.c
@@ -472,7 +472,7 @@ void __init mem_init(void)
 			reservedpages++;
 
 	printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %ldk highmem)\n",
-	       (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
+	       nr_free_pages() << (PAGE_SHIFT-10),
 	       num_physpages << (PAGE_SHIFT - 10),
 	       codepages << (PAGE_SHIFT-10),
 	       reservedpages << (PAGE_SHIFT - 10),
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index 61d7e61..a5d5e70 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -77,7 +77,7 @@ void __init mem_init(void)
 	num_physpages = totalram_pages;
 	max_pfn = totalram_pages;
 	printk(KERN_INFO "Memory: %luk available\n",
-	       (unsigned long) nr_free_pages() << (PAGE_SHIFT-10));
+	       nr_free_pages() << (PAGE_SHIFT-10));
 	kmalloc_ok = 1;
 
 #ifdef CONFIG_HIGHMEM
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 3cd7711..95e877f 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -892,7 +892,7 @@ void __init mem_init(void)
 
 	printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, "
 			"%dk reserved, %dk data, %dk init, %ldk highmem)\n",
-		(unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
+		nr_free_pages() << (PAGE_SHIFT-10),
 		num_physpages << (PAGE_SHIFT-10),
 		codesize >> 10,
 		reservedpages << (PAGE_SHIFT-10),
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index ea56b8c..810bd31 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -687,7 +687,7 @@ void __init mem_init(void)
 
 	printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, "
 			 "%ldk absent, %ldk reserved, %ldk data, %ldk init)\n",
-		(unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
+		nr_free_pages() << (PAGE_SHIFT-10),
 		max_pfn << (PAGE_SHIFT-10),
 		codesize >> 10,
 		absent_pages << (PAGE_SHIFT-10),
diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c
index 427e14f..cdbc27c 100644
--- a/arch/xtensa/mm/init.c
+++ b/arch/xtensa/mm/init.c
@@ -203,7 +203,7 @@ void __init mem_init(void)
 
 	printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, "
 	       "%ldk data, %ldk init %ldk highmem)\n",
-	       (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
+	       nr_free_pages() << (PAGE_SHIFT-10),
 	       ram << (PAGE_SHIFT-10),
 	       codesize >> 10,
 	       reservedpages << (PAGE_SHIFT-10),
-- 
cgit v0.10.2


From 38a398572fa2d8124f7479e40db581b5b72719c9 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Mon, 21 Sep 2009 17:02:39 -0700
Subject: page-allocator: remove dead function free_cold_page()

The function free_cold_page() has no callers so delete it.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 7c777a0..c32bfa8 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -326,7 +326,6 @@ void free_pages_exact(void *virt, size_t size);
 extern void __free_pages(struct page *page, unsigned int order);
 extern void free_pages(unsigned long addr, unsigned int order);
 extern void free_hot_page(struct page *page);
-extern void free_cold_page(struct page *page);
 
 #define __free_page(page) __free_pages((page), 0)
 #define free_page(addr) free_pages((addr),0)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 2075980..913a8eb 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1079,11 +1079,6 @@ void free_hot_page(struct page *page)
 	free_hot_cold_page(page, 0);
 }
 	
-void free_cold_page(struct page *page)
-{
-	free_hot_cold_page(page, 1);
-}
-
 /*
  * split_page takes a non-compound higher-order page, and splits it into
  * n (1<<order) sub-pages: page[0..n]
-- 
cgit v0.10.2


From 4b4f278c030aa4b6ee0915f396e9a9478d92d610 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Mon, 21 Sep 2009 17:02:41 -0700
Subject: tracing, page-allocator: add trace events for page allocation and
 page freeing

This patch adds trace events for the allocation and freeing of pages,
including the freeing of pagevecs.  Using the events, it will be known
what struct page and pfns are being allocated and freed and what the call
site was in many cases.

The page alloc tracepoints be used as an indicator as to whether the
workload was heavily dependant on the page allocator or not.  You can make
a guess based on vmstat but you can't get a per-process breakdown.
Depending on the call path, the call_site for page allocation may be
__get_free_pages() instead of a useful callsite.  Instead of passing down
a return address similar to slab debugging, the user should enable the
stacktrace and seg-addr options to get a proper stack trace.

The pagevec free tracepoint has a different usecase.  It can be used to
get a idea of how many pages are being dumped off the LRU and whether it
is kswapd doing the work or a process doing direct reclaim.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Rik van Riel <riel@redhat.com>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Cc: Larry Woodman <lwoodman@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Li Ming Chun <macli@brc.ubc.ca>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index 1493c54..0d358a0 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -225,6 +225,80 @@ TRACE_EVENT(kmem_cache_free,
 
 	TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr)
 );
+
+TRACE_EVENT(mm_page_free_direct,
+
+	TP_PROTO(struct page *page, unsigned int order),
+
+	TP_ARGS(page, order),
+
+	TP_STRUCT__entry(
+		__field(	struct page *,	page		)
+		__field(	unsigned int,	order		)
+	),
+
+	TP_fast_assign(
+		__entry->page		= page;
+		__entry->order		= order;
+	),
+
+	TP_printk("page=%p pfn=%lu order=%d",
+			__entry->page,
+			page_to_pfn(__entry->page),
+			__entry->order)
+);
+
+TRACE_EVENT(mm_pagevec_free,
+
+	TP_PROTO(struct page *page, int cold),
+
+	TP_ARGS(page, cold),
+
+	TP_STRUCT__entry(
+		__field(	struct page *,	page		)
+		__field(	int,		cold		)
+	),
+
+	TP_fast_assign(
+		__entry->page		= page;
+		__entry->cold		= cold;
+	),
+
+	TP_printk("page=%p pfn=%lu order=0 cold=%d",
+			__entry->page,
+			page_to_pfn(__entry->page),
+			__entry->cold)
+);
+
+TRACE_EVENT(mm_page_alloc,
+
+	TP_PROTO(struct page *page, unsigned int order,
+			gfp_t gfp_flags, int migratetype),
+
+	TP_ARGS(page, order, gfp_flags, migratetype),
+
+	TP_STRUCT__entry(
+		__field(	struct page *,	page		)
+		__field(	unsigned int,	order		)
+		__field(	gfp_t,		gfp_flags	)
+		__field(	int,		migratetype	)
+	),
+
+	TP_fast_assign(
+		__entry->page		= page;
+		__entry->order		= order;
+		__entry->gfp_flags	= gfp_flags;
+		__entry->migratetype	= migratetype;
+	),
+
+	TP_printk("page=%p pfn=%lu order=%d migratetype=%d gfp_flags=%s",
+		__entry->page,
+		page_to_pfn(__entry->page),
+		__entry->order,
+		__entry->migratetype,
+		show_gfp_flags(__entry->gfp_flags))
+);
+
 #endif /* _TRACE_KMEM_H */
 
 /* This part must be outside protection */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 913a8eb..80f954d 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1076,6 +1076,7 @@ static void free_hot_cold_page(struct page *page, int cold)
 
 void free_hot_page(struct page *page)
 {
+	trace_mm_page_free_direct(page, 0);
 	free_hot_cold_page(page, 0);
 }
 	
@@ -1920,6 +1921,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
 				zonelist, high_zoneidx, nodemask,
 				preferred_zone, migratetype);
 
+	trace_mm_page_alloc(page, order, gfp_mask, migratetype);
 	return page;
 }
 EXPORT_SYMBOL(__alloc_pages_nodemask);
@@ -1954,13 +1956,16 @@ void __pagevec_free(struct pagevec *pvec)
 {
 	int i = pagevec_count(pvec);
 
-	while (--i >= 0)
+	while (--i >= 0) {
+		trace_mm_pagevec_free(pvec->pages[i], pvec->cold);
 		free_hot_cold_page(pvec->pages[i], pvec->cold);
+	}
 }
 
 void __free_pages(struct page *page, unsigned int order)
 {
 	if (put_page_testzero(page)) {
+		trace_mm_page_free_direct(page, order);
 		if (order == 0)
 			free_hot_page(page);
 		else
-- 
cgit v0.10.2


From e0fff1bd12469c45dab088e353d8882761387bb6 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Mon, 21 Sep 2009 17:02:42 -0700
Subject: tracing, page-allocator: add trace events for anti-fragmentation
 falling back to other migratetypes

Fragmentation avoidance depends on being able to use free pages from lists
of the appropriate migrate type.  In the event this is not possible,
__rmqueue_fallback() selects a different list and in some circumstances
change the migratetype of the pageblock.  Simplistically, the more times
this event occurs, the more likely that fragmentation will be a problem
later for hugepage allocation at least but there are other considerations
such as the order of page being split to satisfy the allocation.

This patch adds a trace event for __rmqueue_fallback() that reports what
page is being used for the fallback, the orders of relevant pages, the
desired migratetype and the migratetype of the lists being used, whether
the pageblock changed type and whether this event is important with
respect to fragmentation avoidance or not.  This information can be used
to help analyse fragmentation avoidance and help decide whether
min_free_kbytes should be increased or not.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Rik van Riel <riel@redhat.com>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Cc: Larry Woodman <lwoodman@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Li Ming Chun <macli@brc.ubc.ca>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index 0d358a0..aae16ee1 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -299,6 +299,44 @@ TRACE_EVENT(mm_page_alloc,
 		show_gfp_flags(__entry->gfp_flags))
 );
 
+TRACE_EVENT(mm_page_alloc_extfrag,
+
+	TP_PROTO(struct page *page,
+			int alloc_order, int fallback_order,
+			int alloc_migratetype, int fallback_migratetype),
+
+	TP_ARGS(page,
+		alloc_order, fallback_order,
+		alloc_migratetype, fallback_migratetype),
+
+	TP_STRUCT__entry(
+		__field(	struct page *,	page			)
+		__field(	int,		alloc_order		)
+		__field(	int,		fallback_order		)
+		__field(	int,		alloc_migratetype	)
+		__field(	int,		fallback_migratetype	)
+	),
+
+	TP_fast_assign(
+		__entry->page			= page;
+		__entry->alloc_order		= alloc_order;
+		__entry->fallback_order		= fallback_order;
+		__entry->alloc_migratetype	= alloc_migratetype;
+		__entry->fallback_migratetype	= fallback_migratetype;
+	),
+
+	TP_printk("page=%p pfn=%lu alloc_order=%d fallback_order=%d pageblock_order=%d alloc_migratetype=%d fallback_migratetype=%d fragmenting=%d change_ownership=%d",
+		__entry->page,
+		page_to_pfn(__entry->page),
+		__entry->alloc_order,
+		__entry->fallback_order,
+		pageblock_order,
+		__entry->alloc_migratetype,
+		__entry->fallback_migratetype,
+		__entry->fallback_order < pageblock_order,
+		__entry->alloc_migratetype == __entry->fallback_migratetype)
+);
+
 #endif /* _TRACE_KMEM_H */
 
 /* This part must be outside protection */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 80f954d..77f517c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -853,6 +853,10 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype)
 							start_migratetype);
 
 			expand(zone, page, order, current_order, area, migratetype);
+
+			trace_mm_page_alloc_extfrag(page, order, current_order,
+				start_migratetype, migratetype);
+
 			return page;
 		}
 	}
-- 
cgit v0.10.2


From 0d3d062a6e289e065bd0aa537a6806a1806bf8aa Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Mon, 21 Sep 2009 17:02:44 -0700
Subject: tracing, page-allocator: add trace event for page traffic related to
 the buddy lists

The page allocation trace event reports that a page was successfully
allocated but it does not specify where it came from.  When analysing
performance, it can be important to distinguish between pages coming from
the per-cpu allocator and pages coming from the buddy lists as the latter
requires the zone lock to the taken and more data structures to be
examined.

This patch adds a trace event for __rmqueue reporting when a page is being
allocated from the buddy lists.  It distinguishes between being called to
refill the per-cpu lists or whether it is a high-order allocation.
Similarly, this patch adds an event to catch when the PCP lists are being
drained a little and pages are going back to the buddy lists.

This is trickier to draw conclusions from but high activity on those
events could explain why there were a large number of cache misses on a
page-allocator-intensive workload.  The coalescing and splitting of
buddies involves a lot of writing of page metadata and cache line bounces
not to mention the acquisition of an interrupt-safe lock necessary to
enter this path.

[akpm@linux-foundation.org: fix build]
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Rik van Riel <riel@redhat.com>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Cc: Larry Woodman <lwoodman@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Li Ming Chun <macli@brc.ubc.ca>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index aae16ee1..eaf46bd 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -299,6 +299,57 @@ TRACE_EVENT(mm_page_alloc,
 		show_gfp_flags(__entry->gfp_flags))
 );
 
+TRACE_EVENT(mm_page_alloc_zone_locked,
+
+	TP_PROTO(struct page *page, unsigned int order, int migratetype),
+
+	TP_ARGS(page, order, migratetype),
+
+	TP_STRUCT__entry(
+		__field(	struct page *,	page		)
+		__field(	unsigned int,	order		)
+		__field(	int,		migratetype	)
+	),
+
+	TP_fast_assign(
+		__entry->page		= page;
+		__entry->order		= order;
+		__entry->migratetype	= migratetype;
+	),
+
+	TP_printk("page=%p pfn=%lu order=%u migratetype=%d percpu_refill=%d",
+		__entry->page,
+		page_to_pfn(__entry->page),
+		__entry->order,
+		__entry->migratetype,
+		__entry->order == 0)
+);
+
+TRACE_EVENT(mm_page_pcpu_drain,
+
+	TP_PROTO(struct page *page, int order, int migratetype),
+
+	TP_ARGS(page, order, migratetype),
+
+	TP_STRUCT__entry(
+		__field(	struct page *,	page		)
+		__field(	int,		order		)
+		__field(	int,		migratetype	)
+	),
+
+	TP_fast_assign(
+		__entry->page		= page;
+		__entry->order		= order;
+		__entry->migratetype	= migratetype;
+	),
+
+	TP_printk("page=%p pfn=%lu order=%d migratetype=%d",
+		__entry->page,
+		page_to_pfn(__entry->page),
+		__entry->order,
+		__entry->migratetype)
+);
+
 TRACE_EVENT(mm_page_alloc_extfrag,
 
 	TP_PROTO(struct page *page,
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 77f517c..4c847cc 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -48,6 +48,7 @@
 #include <linux/page_cgroup.h>
 #include <linux/debugobjects.h>
 #include <linux/kmemleak.h>
+#include <trace/events/kmem.h>
 
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
@@ -535,6 +536,7 @@ static void free_pages_bulk(struct zone *zone, int count,
 		page = list_entry(list->prev, struct page, lru);
 		/* have to delete it as __free_one_page list manipulates */
 		list_del(&page->lru);
+		trace_mm_page_pcpu_drain(page, order, page_private(page));
 		__free_one_page(page, zone, order, page_private(page));
 	}
 	spin_unlock(&zone->lock);
@@ -890,6 +892,7 @@ retry_reserve:
 		}
 	}
 
+	trace_mm_page_alloc_zone_locked(page, order, migratetype);
 	return page;
 }
 
-- 
cgit v0.10.2


From c9d05cfc001fef3d6d37651e19ab9227a32b71f5 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Mon, 21 Sep 2009 17:02:47 -0700
Subject: tracing, page-allocator: add a postprocessing script for
 page-allocator-related ftrace events

This patch adds a simple post-processing script for the
page-allocator-related trace events.  It can be used to give an indication
of who the most allocator-intensive processes are and how often the zone
lock was taken during the tracing period.  Example output looks like

Process                   Pages      Pages      Pages    Pages       PCPU     PCPU     PCPU   Fragment Fragment  MigType Fragment Fragment  Unknown
details                  allocd     allocd      freed    freed      pages   drains  refills   Fallback  Causing  Changed   Severe Moderate
                                under lock     direct  pagevec      drain
swapper-0                     0          0          2        0          0        0        0          0        0        0        0        0        0
Xorg-3770                 10603       5952       3685     6978       5996      194      192          0        0        0        0        0        0
modprobe-21397               51          0          0       86         31        1        0          0        0        0        0        0        0
xchat-5370                  228         93          0        0          0        0        3          0        0        0        0        0        0
awesome-4317                 32         32          0        0          0        0       32          0        0        0        0        0        0
thinkfan-3863                 2          0          1        1          0        0        0          0        0        0        0        0        0
hald-addon-stor-3935          2          0          0        0          0        0        0          0        0        0        0        0        0
akregator-4506                1          1          0        0          0        0        1          0        0        0        0        0        0
xmms-14888                    0          0          1        0          0        0        0          0        0        0        0        0        0
khelper-12                    1          0          0        0          0        0        0          0        0        0        0        0        0

Optionally, the output can include information on the parent or aggregate
based on process name instead of aggregating based on each pid. Example output
including parent information and stripped out the PID looks something like;

Process                        Pages      Pages      Pages    Pages       PCPU     PCPU     PCPU   Fragment Fragment  MigType Fragment Fragment  Unknown
details                       allocd     allocd      freed    freed      pages   drains  refills   Fallback  Causing  Changed   Severe Moderate
                                     under lock     direct  pagevec      drain
gdm-3756 :: Xorg-3770           3796       2976         99     3813       3224      104       98          0        0        0        0        0        0
init-1 :: hald-3892                1          0          0        0          0        0        0          0        0        0        0        0        0
git-21447 :: editor-21448          4          0          4        0          0        0        0          0        0        0        0        0        0

This says that Xorg allocated 3796 pages and it's parent process is gdm
with a PID of 3756;

The postprocessor parses the text output of tracing.  While there is a
binary format, the expectation is that the binary output can be readily
translated into text and post-processed offline.  Obviously if the text
format changes, the parser will break but the regular expression parser is
fairly rudimentary so should be readily adjustable.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Cc: Rik van Riel <riel@redhat.com>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Cc: Larry Woodman <lwoodman@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Li Ming Chun <macli@brc.ubc.ca>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/Documentation/trace/postprocess/trace-pagealloc-postprocess.pl b/Documentation/trace/postprocess/trace-pagealloc-postprocess.pl
new file mode 100644
index 0000000..7df50e8
--- /dev/null
+++ b/Documentation/trace/postprocess/trace-pagealloc-postprocess.pl
@@ -0,0 +1,418 @@
+#!/usr/bin/perl
+# This is a POC (proof of concept or piece of crap, take your pick) for reading the
+# text representation of trace output related to page allocation. It makes an attempt
+# to extract some high-level information on what is going on. The accuracy of the parser
+# may vary considerably
+#
+# Example usage: trace-pagealloc-postprocess.pl < /sys/kernel/debug/tracing/trace_pipe
+# other options
+#   --prepend-parent	Report on the parent proc and PID
+#   --read-procstat	If the trace lacks process info, get it from /proc
+#   --ignore-pid	Aggregate processes of the same name together
+#
+# Copyright (c) IBM Corporation 2009
+# Author: Mel Gorman <mel@csn.ul.ie>
+use strict;
+use Getopt::Long;
+
+# Tracepoint events
+use constant MM_PAGE_ALLOC		=> 1;
+use constant MM_PAGE_FREE_DIRECT 	=> 2;
+use constant MM_PAGEVEC_FREE		=> 3;
+use constant MM_PAGE_PCPU_DRAIN		=> 4;
+use constant MM_PAGE_ALLOC_ZONE_LOCKED	=> 5;
+use constant MM_PAGE_ALLOC_EXTFRAG	=> 6;
+use constant EVENT_UNKNOWN		=> 7;
+
+# Constants used to track state
+use constant STATE_PCPU_PAGES_DRAINED	=> 8;
+use constant STATE_PCPU_PAGES_REFILLED	=> 9;
+
+# High-level events extrapolated from tracepoints
+use constant HIGH_PCPU_DRAINS		=> 10;
+use constant HIGH_PCPU_REFILLS		=> 11;
+use constant HIGH_EXT_FRAGMENT		=> 12;
+use constant HIGH_EXT_FRAGMENT_SEVERE	=> 13;
+use constant HIGH_EXT_FRAGMENT_MODERATE	=> 14;
+use constant HIGH_EXT_FRAGMENT_CHANGED	=> 15;
+
+my %perprocesspid;
+my %perprocess;
+my $opt_ignorepid;
+my $opt_read_procstat;
+my $opt_prepend_parent;
+
+# Catch sigint and exit on request
+my $sigint_report = 0;
+my $sigint_exit = 0;
+my $sigint_pending = 0;
+my $sigint_received = 0;
+sub sigint_handler {
+	my $current_time = time;
+	if ($current_time - 2 > $sigint_received) {
+		print "SIGINT received, report pending. Hit ctrl-c again to exit\n";
+		$sigint_report = 1;
+	} else {
+		if (!$sigint_exit) {
+			print "Second SIGINT received quickly, exiting\n";
+		}
+		$sigint_exit++;
+	}
+
+	if ($sigint_exit > 3) {
+		print "Many SIGINTs received, exiting now without report\n";
+		exit;
+	}
+
+	$sigint_received = $current_time;
+	$sigint_pending = 1;
+}
+$SIG{INT} = "sigint_handler";
+
+# Parse command line options
+GetOptions(
+	'ignore-pid'	 =>	\$opt_ignorepid,
+	'read-procstat'	 =>	\$opt_read_procstat,
+	'prepend-parent' =>	\$opt_prepend_parent,
+);
+
+# Defaults for dynamically discovered regex's
+my $regex_fragdetails_default = 'page=([0-9a-f]*) pfn=([0-9]*) alloc_order=([-0-9]*) fallback_order=([-0-9]*) pageblock_order=([-0-9]*) alloc_migratetype=([-0-9]*) fallback_migratetype=([-0-9]*) fragmenting=([-0-9]) change_ownership=([-0-9])';
+
+# Dyanically discovered regex
+my $regex_fragdetails;
+
+# Static regex used. Specified like this for readability and for use with /o
+#                      (process_pid)     (cpus      )   ( time  )   (tpoint    ) (details)
+my $regex_traceevent = '\s*([a-zA-Z0-9-]*)\s*(\[[0-9]*\])\s*([0-9.]*):\s*([a-zA-Z_]*):\s*(.*)';
+my $regex_statname = '[-0-9]*\s\((.*)\).*';
+my $regex_statppid = '[-0-9]*\s\(.*\)\s[A-Za-z]\s([0-9]*).*';
+
+sub generate_traceevent_regex {
+	my $event = shift;
+	my $default = shift;
+	my $regex;
+
+	# Read the event format or use the default
+	if (!open (FORMAT, "/sys/kernel/debug/tracing/events/$event/format")) {
+		$regex = $default;
+	} else {
+		my $line;
+		while (!eof(FORMAT)) {
+			$line = <FORMAT>;
+			if ($line =~ /^print fmt:\s"(.*)",.*/) {
+				$regex = $1;
+				$regex =~ s/%p/\([0-9a-f]*\)/g;
+				$regex =~ s/%d/\([-0-9]*\)/g;
+				$regex =~ s/%lu/\([0-9]*\)/g;
+			}
+		}
+	}
+
+	# Verify fields are in the right order
+	my $tuple;
+	foreach $tuple (split /\s/, $regex) {
+		my ($key, $value) = split(/=/, $tuple);
+		my $expected = shift;
+		if ($key ne $expected) {
+			print("WARNING: Format not as expected '$key' != '$expected'");
+			$regex =~ s/$key=\((.*)\)/$key=$1/;
+		}
+	}
+
+	if (defined shift) {
+		die("Fewer fields than expected in format");
+	}
+
+	return $regex;
+}
+$regex_fragdetails = generate_traceevent_regex("kmem/mm_page_alloc_extfrag",
+			$regex_fragdetails_default,
+			"page", "pfn",
+			"alloc_order", "fallback_order", "pageblock_order",
+			"alloc_migratetype", "fallback_migratetype",
+			"fragmenting", "change_ownership");
+
+sub read_statline($) {
+	my $pid = $_[0];
+	my $statline;
+
+	if (open(STAT, "/proc/$pid/stat")) {
+		$statline = <STAT>;
+		close(STAT);
+	}
+
+	if ($statline eq '') {
+		$statline = "-1 (UNKNOWN_PROCESS_NAME) R 0";
+	}
+
+	return $statline;
+}
+
+sub guess_process_pid($$) {
+	my $pid = $_[0];
+	my $statline = $_[1];
+
+	if ($pid == 0) {
+		return "swapper-0";
+	}
+
+	if ($statline !~ /$regex_statname/o) {
+		die("Failed to math stat line for process name :: $statline");
+	}
+	return "$1-$pid";
+}
+
+sub parent_info($$) {
+	my $pid = $_[0];
+	my $statline = $_[1];
+	my $ppid;
+
+	if ($pid == 0) {
+		return "NOPARENT-0";
+	}
+
+	if ($statline !~ /$regex_statppid/o) {
+		die("Failed to match stat line process ppid:: $statline");
+	}
+
+	# Read the ppid stat line
+	$ppid = $1;
+	return guess_process_pid($ppid, read_statline($ppid));
+}
+
+sub process_events {
+	my $traceevent;
+	my $process_pid;
+	my $cpus;
+	my $timestamp;
+	my $tracepoint;
+	my $details;
+	my $statline;
+
+	# Read each line of the event log
+EVENT_PROCESS:
+	while ($traceevent = <STDIN>) {
+		if ($traceevent =~ /$regex_traceevent/o) {
+			$process_pid = $1;
+			$tracepoint = $4;
+
+			if ($opt_read_procstat || $opt_prepend_parent) {
+				$process_pid =~ /(.*)-([0-9]*)$/;
+				my $process = $1;
+				my $pid = $2;
+
+				$statline = read_statline($pid);
+
+				if ($opt_read_procstat && $process eq '') {
+					$process_pid = guess_process_pid($pid, $statline);
+				}
+
+				if ($opt_prepend_parent) {
+					$process_pid = parent_info($pid, $statline) . " :: $process_pid";
+				}
+			}
+
+			# Unnecessary in this script. Uncomment if required
+			# $cpus = $2;
+			# $timestamp = $3;
+		} else {
+			next;
+		}
+
+		# Perl Switch() sucks majorly
+		if ($tracepoint eq "mm_page_alloc") {
+			$perprocesspid{$process_pid}->{MM_PAGE_ALLOC}++;
+		} elsif ($tracepoint eq "mm_page_free_direct") {
+			$perprocesspid{$process_pid}->{MM_PAGE_FREE_DIRECT}++;
+		} elsif ($tracepoint eq "mm_pagevec_free") {
+			$perprocesspid{$process_pid}->{MM_PAGEVEC_FREE}++;
+		} elsif ($tracepoint eq "mm_page_pcpu_drain") {
+			$perprocesspid{$process_pid}->{MM_PAGE_PCPU_DRAIN}++;
+			$perprocesspid{$process_pid}->{STATE_PCPU_PAGES_DRAINED}++;
+		} elsif ($tracepoint eq "mm_page_alloc_zone_locked") {
+			$perprocesspid{$process_pid}->{MM_PAGE_ALLOC_ZONE_LOCKED}++;
+			$perprocesspid{$process_pid}->{STATE_PCPU_PAGES_REFILLED}++;
+		} elsif ($tracepoint eq "mm_page_alloc_extfrag") {
+
+			# Extract the details of the event now
+			$details = $5;
+
+			my ($page, $pfn);
+			my ($alloc_order, $fallback_order, $pageblock_order);
+			my ($alloc_migratetype, $fallback_migratetype);
+			my ($fragmenting, $change_ownership);
+
+			if ($details !~ /$regex_fragdetails/o) {
+				print "WARNING: Failed to parse mm_page_alloc_extfrag as expected\n";
+				next;
+			}
+
+			$perprocesspid{$process_pid}->{MM_PAGE_ALLOC_EXTFRAG}++;
+			$page = $1;
+			$pfn = $2;
+			$alloc_order = $3;
+			$fallback_order = $4;
+			$pageblock_order = $5;
+			$alloc_migratetype = $6;
+			$fallback_migratetype = $7;
+			$fragmenting = $8;
+			$change_ownership = $9;
+
+			if ($fragmenting) {
+				$perprocesspid{$process_pid}->{HIGH_EXT_FRAG}++;
+				if ($fallback_order <= 3) {
+					$perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_SEVERE}++;
+				} else {
+					$perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_MODERATE}++;
+				}
+			}
+			if ($change_ownership) {
+				$perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_CHANGED}++;
+			}
+		} else {
+			$perprocesspid{$process_pid}->{EVENT_UNKNOWN}++;
+		}
+
+		# Catch a full pcpu drain event
+		if ($perprocesspid{$process_pid}->{STATE_PCPU_PAGES_DRAINED} &&
+				$tracepoint ne "mm_page_pcpu_drain") {
+
+			$perprocesspid{$process_pid}->{HIGH_PCPU_DRAINS}++;
+			$perprocesspid{$process_pid}->{STATE_PCPU_PAGES_DRAINED} = 0;
+		}
+
+		# Catch a full pcpu refill event
+		if ($perprocesspid{$process_pid}->{STATE_PCPU_PAGES_REFILLED} &&
+				$tracepoint ne "mm_page_alloc_zone_locked") {
+			$perprocesspid{$process_pid}->{HIGH_PCPU_REFILLS}++;
+			$perprocesspid{$process_pid}->{STATE_PCPU_PAGES_REFILLED} = 0;
+		}
+
+		if ($sigint_pending) {
+			last EVENT_PROCESS;
+		}
+	}
+}
+
+sub dump_stats {
+	my $hashref = shift;
+	my %stats = %$hashref;
+
+	# Dump per-process stats
+	my $process_pid;
+	my $max_strlen = 0;
+
+	# Get the maximum process name
+	foreach $process_pid (keys %perprocesspid) {
+		my $len = length($process_pid);
+		if ($len > $max_strlen) {
+			$max_strlen = $len;
+		}
+	}
+	$max_strlen += 2;
+
+	printf("\n");
+	printf("%-" . $max_strlen . "s %8s %10s   %8s %8s   %8s %8s %8s   %8s %8s %8s %8s %8s %8s\n",
+		"Process", "Pages",  "Pages",      "Pages", "Pages", "PCPU",  "PCPU",   "PCPU",    "Fragment",  "Fragment", "MigType", "Fragment", "Fragment", "Unknown");
+	printf("%-" . $max_strlen . "s %8s %10s   %8s %8s   %8s %8s %8s   %8s %8s %8s %8s %8s %8s\n",
+		"details", "allocd", "allocd",     "freed", "freed", "pages", "drains", "refills", "Fallback", "Causing",   "Changed", "Severe", "Moderate", "");
+
+	printf("%-" . $max_strlen . "s %8s %10s   %8s %8s   %8s %8s %8s   %8s %8s %8s %8s %8s %8s\n",
+		"",        "",       "under lock", "direct", "pagevec", "drain", "", "", "", "", "", "", "", "");
+
+	foreach $process_pid (keys %stats) {
+		# Dump final aggregates
+		if ($stats{$process_pid}->{STATE_PCPU_PAGES_DRAINED}) {
+			$stats{$process_pid}->{HIGH_PCPU_DRAINS}++;
+			$stats{$process_pid}->{STATE_PCPU_PAGES_DRAINED} = 0;
+		}
+		if ($stats{$process_pid}->{STATE_PCPU_PAGES_REFILLED}) {
+			$stats{$process_pid}->{HIGH_PCPU_REFILLS}++;
+			$stats{$process_pid}->{STATE_PCPU_PAGES_REFILLED} = 0;
+		}
+
+		printf("%-" . $max_strlen . "s %8d %10d   %8d %8d   %8d %8d %8d   %8d %8d %8d %8d %8d %8d\n",
+			$process_pid,
+			$stats{$process_pid}->{MM_PAGE_ALLOC},
+			$stats{$process_pid}->{MM_PAGE_ALLOC_ZONE_LOCKED},
+			$stats{$process_pid}->{MM_PAGE_FREE_DIRECT},
+			$stats{$process_pid}->{MM_PAGEVEC_FREE},
+			$stats{$process_pid}->{MM_PAGE_PCPU_DRAIN},
+			$stats{$process_pid}->{HIGH_PCPU_DRAINS},
+			$stats{$process_pid}->{HIGH_PCPU_REFILLS},
+			$stats{$process_pid}->{MM_PAGE_ALLOC_EXTFRAG},
+			$stats{$process_pid}->{HIGH_EXT_FRAG},
+			$stats{$process_pid}->{HIGH_EXT_FRAGMENT_CHANGED},
+			$stats{$process_pid}->{HIGH_EXT_FRAGMENT_SEVERE},
+			$stats{$process_pid}->{HIGH_EXT_FRAGMENT_MODERATE},
+			$stats{$process_pid}->{EVENT_UNKNOWN});
+	}
+}
+
+sub aggregate_perprocesspid() {
+	my $process_pid;
+	my $process;
+	undef %perprocess;
+
+	foreach $process_pid (keys %perprocesspid) {
+		$process = $process_pid;
+		$process =~ s/-([0-9])*$//;
+		if ($process eq '') {
+			$process = "NO_PROCESS_NAME";
+		}
+
+		$perprocess{$process}->{MM_PAGE_ALLOC} += $perprocesspid{$process_pid}->{MM_PAGE_ALLOC};
+		$perprocess{$process}->{MM_PAGE_ALLOC_ZONE_LOCKED} += $perprocesspid{$process_pid}->{MM_PAGE_ALLOC_ZONE_LOCKED};
+		$perprocess{$process}->{MM_PAGE_FREE_DIRECT} += $perprocesspid{$process_pid}->{MM_PAGE_FREE_DIRECT};
+		$perprocess{$process}->{MM_PAGEVEC_FREE} += $perprocesspid{$process_pid}->{MM_PAGEVEC_FREE};
+		$perprocess{$process}->{MM_PAGE_PCPU_DRAIN} += $perprocesspid{$process_pid}->{MM_PAGE_PCPU_DRAIN};
+		$perprocess{$process}->{HIGH_PCPU_DRAINS} += $perprocesspid{$process_pid}->{HIGH_PCPU_DRAINS};
+		$perprocess{$process}->{HIGH_PCPU_REFILLS} += $perprocesspid{$process_pid}->{HIGH_PCPU_REFILLS};
+		$perprocess{$process}->{MM_PAGE_ALLOC_EXTFRAG} += $perprocesspid{$process_pid}->{MM_PAGE_ALLOC_EXTFRAG};
+		$perprocess{$process}->{HIGH_EXT_FRAG} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAG};
+		$perprocess{$process}->{HIGH_EXT_FRAGMENT_CHANGED} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_CHANGED};
+		$perprocess{$process}->{HIGH_EXT_FRAGMENT_SEVERE} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_SEVERE};
+		$perprocess{$process}->{HIGH_EXT_FRAGMENT_MODERATE} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_MODERATE};
+		$perprocess{$process}->{EVENT_UNKNOWN} += $perprocesspid{$process_pid}->{EVENT_UNKNOWN};
+	}
+}
+
+sub report() {
+	if (!$opt_ignorepid) {
+		dump_stats(\%perprocesspid);
+	} else {
+		aggregate_perprocesspid();
+		dump_stats(\%perprocess);
+	}
+}
+
+# Process events or signals until neither is available
+sub signal_loop() {
+	my $sigint_processed;
+	do {
+		$sigint_processed = 0;
+		process_events();
+
+		# Handle pending signals if any
+		if ($sigint_pending) {
+			my $current_time = time;
+
+			if ($sigint_exit) {
+				print "Received exit signal\n";
+				$sigint_pending = 0;
+			}
+			if ($sigint_report) {
+				if ($current_time >= $sigint_received + 2) {
+					report();
+					$sigint_report = 0;
+					$sigint_pending = 0;
+					$sigint_processed = 1;
+				}
+			}
+		}
+	} while ($sigint_pending || $sigint_processed);
+}
+
+signal_loop();
+report();
-- 
cgit v0.10.2


From bb72222086260695d71afe60fa105649c1ea9463 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Mon, 21 Sep 2009 17:02:48 -0700
Subject: tracing, documentation: add a document describing how to do some
 performance analysis with tracepoints

The documentation for ftrace, events and tracepoints is pretty extensive.
Similarly, the perf PCL tools help files --help are there and the code
simple enough to figure out what much of the switches mean.  However,
pulling the discrete bits and pieces together and translating that into
"how do I solve a problem" requires a fair amount of imagination.

This patch adds a simple document intended to get someone started on the

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Cc: Rik van Riel <riel@redhat.com>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Cc: Larry Woodman <lwoodman@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Li Ming Chun <macli@brc.ubc.ca>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/Documentation/trace/tracepoint-analysis.txt b/Documentation/trace/tracepoint-analysis.txt
new file mode 100644
index 0000000..5eb4e48
--- /dev/null
+++ b/Documentation/trace/tracepoint-analysis.txt
@@ -0,0 +1,327 @@
+		Notes on Analysing Behaviour Using Events and Tracepoints
+
+			Documentation written by Mel Gorman
+		PCL information heavily based on email from Ingo Molnar
+
+1. Introduction
+===============
+
+Tracepoints (see Documentation/trace/tracepoints.txt) can be used without
+creating custom kernel modules to register probe functions using the event
+tracing infrastructure.
+
+Simplistically, tracepoints will represent an important event that when can
+be taken in conjunction with other tracepoints to build a "Big Picture" of
+what is going on within the system. There are a large number of methods for
+gathering and interpreting these events. Lacking any current Best Practises,
+this document describes some of the methods that can be used.
+
+This document assumes that debugfs is mounted on /sys/kernel/debug and that
+the appropriate tracing options have been configured into the kernel. It is
+assumed that the PCL tool tools/perf has been installed and is in your path.
+
+2. Listing Available Events
+===========================
+
+2.1 Standard Utilities
+----------------------
+
+All possible events are visible from /sys/kernel/debug/tracing/events. Simply
+calling
+
+  $ find /sys/kernel/debug/tracing/events -type d
+
+will give a fair indication of the number of events available.
+
+2.2 PCL
+-------
+
+Discovery and enumeration of all counters and events, including tracepoints
+are available with the perf tool. Getting a list of available events is a
+simple case of
+
+  $ perf list 2>&1 | grep Tracepoint
+  ext4:ext4_free_inode                     [Tracepoint event]
+  ext4:ext4_request_inode                  [Tracepoint event]
+  ext4:ext4_allocate_inode                 [Tracepoint event]
+  ext4:ext4_write_begin                    [Tracepoint event]
+  ext4:ext4_ordered_write_end              [Tracepoint event]
+  [ .... remaining output snipped .... ]
+
+
+2. Enabling Events
+==================
+
+2.1 System-Wide Event Enabling
+------------------------------
+
+See Documentation/trace/events.txt for a proper description on how events
+can be enabled system-wide. A short example of enabling all events related
+to page allocation would look something like
+
+  $ for i in `find /sys/kernel/debug/tracing/events -name "enable" | grep mm_`; do echo 1 > $i; done
+
+2.2 System-Wide Event Enabling with SystemTap
+---------------------------------------------
+
+In SystemTap, tracepoints are accessible using the kernel.trace() function
+call. The following is an example that reports every 5 seconds what processes
+were allocating the pages.
+
+  global page_allocs
+
+  probe kernel.trace("mm_page_alloc") {
+  	page_allocs[execname()]++
+  }
+
+  function print_count() {
+  	printf ("%-25s %-s\n", "#Pages Allocated", "Process Name")
+  	foreach (proc in page_allocs-)
+  		printf("%-25d %s\n", page_allocs[proc], proc)
+  	printf ("\n")
+  	delete page_allocs
+  }
+
+  probe timer.s(5) {
+          print_count()
+  }
+
+2.3 System-Wide Event Enabling with PCL
+---------------------------------------
+
+By specifying the -a switch and analysing sleep, the system-wide events
+for a duration of time can be examined.
+
+ $ perf stat -a \
+	-e kmem:mm_page_alloc -e kmem:mm_page_free_direct \
+	-e kmem:mm_pagevec_free \
+	sleep 10
+ Performance counter stats for 'sleep 10':
+
+           9630  kmem:mm_page_alloc
+           2143  kmem:mm_page_free_direct
+           7424  kmem:mm_pagevec_free
+
+   10.002577764  seconds time elapsed
+
+Similarly, one could execute a shell and exit it as desired to get a report
+at that point.
+
+2.4 Local Event Enabling
+------------------------
+
+Documentation/trace/ftrace.txt describes how to enable events on a per-thread
+basis using set_ftrace_pid.
+
+2.5 Local Event Enablement with PCL
+-----------------------------------
+
+Events can be activate and tracked for the duration of a process on a local
+basis using PCL such as follows.
+
+  $ perf stat -e kmem:mm_page_alloc -e kmem:mm_page_free_direct \
+		 -e kmem:mm_pagevec_free ./hackbench 10
+  Time: 0.909
+
+    Performance counter stats for './hackbench 10':
+
+          17803  kmem:mm_page_alloc
+          12398  kmem:mm_page_free_direct
+           4827  kmem:mm_pagevec_free
+
+    0.973913387  seconds time elapsed
+
+3. Event Filtering
+==================
+
+Documentation/trace/ftrace.txt covers in-depth how to filter events in
+ftrace.  Obviously using grep and awk of trace_pipe is an option as well
+as any script reading trace_pipe.
+
+4. Analysing Event Variances with PCL
+=====================================
+
+Any workload can exhibit variances between runs and it can be important
+to know what the standard deviation in. By and large, this is left to the
+performance analyst to do it by hand. In the event that the discrete event
+occurrences are useful to the performance analyst, then perf can be used.
+
+  $ perf stat --repeat 5 -e kmem:mm_page_alloc -e kmem:mm_page_free_direct
+			-e kmem:mm_pagevec_free ./hackbench 10
+  Time: 0.890
+  Time: 0.895
+  Time: 0.915
+  Time: 1.001
+  Time: 0.899
+
+   Performance counter stats for './hackbench 10' (5 runs):
+
+          16630  kmem:mm_page_alloc         ( +-   3.542% )
+          11486  kmem:mm_page_free_direct   ( +-   4.771% )
+           4730  kmem:mm_pagevec_free       ( +-   2.325% )
+
+    0.982653002  seconds time elapsed   ( +-   1.448% )
+
+In the event that some higher-level event is required that depends on some
+aggregation of discrete events, then a script would need to be developed.
+
+Using --repeat, it is also possible to view how events are fluctuating over
+time on a system wide basis using -a and sleep.
+
+  $ perf stat -e kmem:mm_page_alloc -e kmem:mm_page_free_direct \
+		-e kmem:mm_pagevec_free \
+		-a --repeat 10 \
+		sleep 1
+  Performance counter stats for 'sleep 1' (10 runs):
+
+           1066  kmem:mm_page_alloc         ( +-  26.148% )
+            182  kmem:mm_page_free_direct   ( +-   5.464% )
+            890  kmem:mm_pagevec_free       ( +-  30.079% )
+
+    1.002251757  seconds time elapsed   ( +-   0.005% )
+
+5. Higher-Level Analysis with Helper Scripts
+============================================
+
+When events are enabled the events that are triggering can be read from
+/sys/kernel/debug/tracing/trace_pipe in human-readable format although binary
+options exist as well. By post-processing the output, further information can
+be gathered on-line as appropriate. Examples of post-processing might include
+
+  o Reading information from /proc for the PID that triggered the event
+  o Deriving a higher-level event from a series of lower-level events.
+  o Calculate latencies between two events
+
+Documentation/trace/postprocess/trace-pagealloc-postprocess.pl is an example
+script that can read trace_pipe from STDIN or a copy of a trace. When used
+on-line, it can be interrupted once to generate a report without existing
+and twice to exit.
+
+Simplistically, the script just reads STDIN and counts up events but it
+also can do more such as
+
+  o Derive high-level events from many low-level events. If a number of pages
+    are freed to the main allocator from the per-CPU lists, it recognises
+    that as one per-CPU drain even though there is no specific tracepoint
+    for that event
+  o It can aggregate based on PID or individual process number
+  o In the event memory is getting externally fragmented, it reports
+    on whether the fragmentation event was severe or moderate.
+  o When receiving an event about a PID, it can record who the parent was so
+    that if large numbers of events are coming from very short-lived
+    processes, the parent process responsible for creating all the helpers
+    can be identified
+
+6. Lower-Level Analysis with PCL
+================================
+
+There may also be a requirement to identify what functions with a program
+were generating events within the kernel. To begin this sort of analysis, the
+data must be recorded. At the time of writing, this required root
+
+  $ perf record -c 1 \
+	-e kmem:mm_page_alloc -e kmem:mm_page_free_direct \
+	-e kmem:mm_pagevec_free \
+	./hackbench 10
+  Time: 0.894
+  [ perf record: Captured and wrote 0.733 MB perf.data (~32010 samples) ]
+
+Note the use of '-c 1' to set the event period to sample. The default sample
+period is quite high to minimise overhead but the information collected can be
+very coarse as a result.
+
+This record outputted a file called perf.data which can be analysed using
+perf report.
+
+  $ perf report
+  # Samples: 30922
+  #
+  # Overhead    Command                     Shared Object
+  # ........  .........  ................................
+  #
+      87.27%  hackbench  [vdso]
+       6.85%  hackbench  /lib/i686/cmov/libc-2.9.so
+       2.62%  hackbench  /lib/ld-2.9.so
+       1.52%       perf  [vdso]
+       1.22%  hackbench  ./hackbench
+       0.48%  hackbench  [kernel]
+       0.02%       perf  /lib/i686/cmov/libc-2.9.so
+       0.01%       perf  /usr/bin/perf
+       0.01%       perf  /lib/ld-2.9.so
+       0.00%  hackbench  /lib/i686/cmov/libpthread-2.9.so
+  #
+  # (For more details, try: perf report --sort comm,dso,symbol)
+  #
+
+According to this, the vast majority of events occured triggered on events
+within the VDSO. With simple binaries, this will often be the case so lets
+take a slightly different example. In the course of writing this, it was
+noticed that X was generating an insane amount of page allocations so lets look
+at it
+
+  $ perf record -c 1 -f \
+		-e kmem:mm_page_alloc -e kmem:mm_page_free_direct \
+		-e kmem:mm_pagevec_free \
+		-p `pidof X`
+
+This was interrupted after a few seconds and
+
+  $ perf report
+  # Samples: 27666
+  #
+  # Overhead  Command                            Shared Object
+  # ........  .......  .......................................
+  #
+      51.95%     Xorg  [vdso]
+      47.95%     Xorg  /opt/gfx-test/lib/libpixman-1.so.0.13.1
+       0.09%     Xorg  /lib/i686/cmov/libc-2.9.so
+       0.01%     Xorg  [kernel]
+  #
+  # (For more details, try: perf report --sort comm,dso,symbol)
+  #
+
+So, almost half of the events are occuring in a library. To get an idea which
+symbol.
+
+  $ perf report --sort comm,dso,symbol
+  # Samples: 27666
+  #
+  # Overhead  Command                            Shared Object  Symbol
+  # ........  .......  .......................................  ......
+  #
+      51.95%     Xorg  [vdso]                                   [.] 0x000000ffffe424
+      47.93%     Xorg  /opt/gfx-test/lib/libpixman-1.so.0.13.1  [.] pixmanFillsse2
+       0.09%     Xorg  /lib/i686/cmov/libc-2.9.so               [.] _int_malloc
+       0.01%     Xorg  /opt/gfx-test/lib/libpixman-1.so.0.13.1  [.] pixman_region32_copy_f
+       0.01%     Xorg  [kernel]                                 [k] read_hpet
+       0.01%     Xorg  /opt/gfx-test/lib/libpixman-1.so.0.13.1  [.] get_fast_path
+       0.00%     Xorg  [kernel]                                 [k] ftrace_trace_userstack
+
+To see where within the function pixmanFillsse2 things are going wrong
+
+  $ perf annotate pixmanFillsse2
+  [ ... ]
+    0.00 :         34eeb:       0f 18 08                prefetcht0 (%eax)
+         :      }
+         :
+         :      extern __inline void __attribute__((__gnu_inline__, __always_inline__, _
+         :      _mm_store_si128 (__m128i *__P, __m128i __B) :      {
+         :        *__P = __B;
+   12.40 :         34eee:       66 0f 7f 80 40 ff ff    movdqa %xmm0,-0xc0(%eax)
+    0.00 :         34ef5:       ff
+   12.40 :         34ef6:       66 0f 7f 80 50 ff ff    movdqa %xmm0,-0xb0(%eax)
+    0.00 :         34efd:       ff
+   12.39 :         34efe:       66 0f 7f 80 60 ff ff    movdqa %xmm0,-0xa0(%eax)
+    0.00 :         34f05:       ff
+   12.67 :         34f06:       66 0f 7f 80 70 ff ff    movdqa %xmm0,-0x90(%eax)
+    0.00 :         34f0d:       ff
+   12.58 :         34f0e:       66 0f 7f 40 80          movdqa %xmm0,-0x80(%eax)
+   12.31 :         34f13:       66 0f 7f 40 90          movdqa %xmm0,-0x70(%eax)
+   12.40 :         34f18:       66 0f 7f 40 a0          movdqa %xmm0,-0x60(%eax)
+   12.31 :         34f1d:       66 0f 7f 40 b0          movdqa %xmm0,-0x50(%eax)
+
+At a glance, it looks like the time is being spent copying pixmaps to
+the card.  Further investigation would be needed to determine why pixmaps
+are being copied around so much but a starting point would be to take an
+ancient build of libpixmap out of the library path where it was totally
+forgotten about from months ago!
-- 
cgit v0.10.2


From 8fbb398f5c78832ee61e0d5ed0793fa8857bd853 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Mon, 21 Sep 2009 17:02:49 -0700
Subject: tracing, documentation: Add a document on the kmem tracepoints

Knowing tracepoints exist is not quite the same as knowing what they
should be used for.  This patch adds a document giving a basic description
of the kmem tracepoints and why they might be useful to a performance
analyst.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Cc: Rik van Riel <riel@redhat.com>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Cc: Larry Woodman <lwoodman@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Li Ming Chun <macli@brc.ubc.ca>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/Documentation/trace/events-kmem.txt b/Documentation/trace/events-kmem.txt
new file mode 100644
index 0000000..6ef2a86
--- /dev/null
+++ b/Documentation/trace/events-kmem.txt
@@ -0,0 +1,107 @@
+			Subsystem Trace Points: kmem
+
+The tracing system kmem captures events related to object and page allocation
+within the kernel. Broadly speaking there are four major subheadings.
+
+  o Slab allocation of small objects of unknown type (kmalloc)
+  o Slab allocation of small objects of known type
+  o Page allocation
+  o Per-CPU Allocator Activity
+  o External Fragmentation
+
+This document will describe what each of the tracepoints are and why they
+might be useful.
+
+1. Slab allocation of small objects of unknown type
+===================================================
+kmalloc		call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s
+kmalloc_node	call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d
+kfree		call_site=%lx ptr=%p
+
+Heavy activity for these events may indicate that a specific cache is
+justified, particularly if kmalloc slab pages are getting significantly
+internal fragmented as a result of the allocation pattern. By correlating
+kmalloc with kfree, it may be possible to identify memory leaks and where
+the allocation sites were.
+
+
+2. Slab allocation of small objects of known type
+=================================================
+kmem_cache_alloc	call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s
+kmem_cache_alloc_node	call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d
+kmem_cache_free		call_site=%lx ptr=%p
+
+These events are similar in usage to the kmalloc-related events except that
+it is likely easier to pin the event down to a specific cache. At the time
+of writing, no information is available on what slab is being allocated from,
+but the call_site can usually be used to extrapolate that information
+
+3. Page allocation
+==================
+mm_page_alloc		  page=%p pfn=%lu order=%d migratetype=%d gfp_flags=%s
+mm_page_alloc_zone_locked page=%p pfn=%lu order=%u migratetype=%d cpu=%d percpu_refill=%d
+mm_page_free_direct	  page=%p pfn=%lu order=%d
+mm_pagevec_free		  page=%p pfn=%lu order=%d cold=%d
+
+These four events deal with page allocation and freeing. mm_page_alloc is
+a simple indicator of page allocator activity. Pages may be allocated from
+the per-CPU allocator (high performance) or the buddy allocator.
+
+If pages are allocated directly from the buddy allocator, the
+mm_page_alloc_zone_locked event is triggered. This event is important as high
+amounts of activity imply high activity on the zone->lock. Taking this lock
+impairs performance by disabling interrupts, dirtying cache lines between
+CPUs and serialising many CPUs.
+
+When a page is freed directly by the caller, the mm_page_free_direct event
+is triggered. Significant amounts of activity here could indicate that the
+callers should be batching their activities.
+
+When pages are freed using a pagevec, the mm_pagevec_free is
+triggered. Broadly speaking, pages are taken off the LRU lock in bulk and
+freed in batch with a pagevec. Significant amounts of activity here could
+indicate that the system is under memory pressure and can also indicate
+contention on the zone->lru_lock.
+
+4. Per-CPU Allocator Activity
+=============================
+mm_page_alloc_zone_locked	page=%p pfn=%lu order=%u migratetype=%d cpu=%d percpu_refill=%d
+mm_page_pcpu_drain		page=%p pfn=%lu order=%d cpu=%d migratetype=%d
+
+In front of the page allocator is a per-cpu page allocator. It exists only
+for order-0 pages, reduces contention on the zone->lock and reduces the
+amount of writing on struct page.
+
+When a per-CPU list is empty or pages of the wrong type are allocated,
+the zone->lock will be taken once and the per-CPU list refilled. The event
+triggered is mm_page_alloc_zone_locked for each page allocated with the
+event indicating whether it is for a percpu_refill or not.
+
+When the per-CPU list is too full, a number of pages are freed, each one
+which triggers a mm_page_pcpu_drain event.
+
+The individual nature of the events are so that pages can be tracked
+between allocation and freeing. A number of drain or refill pages that occur
+consecutively imply the zone->lock being taken once. Large amounts of PCP
+refills and drains could imply an imbalance between CPUs where too much work
+is being concentrated in one place. It could also indicate that the per-CPU
+lists should be a larger size. Finally, large amounts of refills on one CPU
+and drains on another could be a factor in causing large amounts of cache
+line bounces due to writes between CPUs and worth investigating if pages
+can be allocated and freed on the same CPU through some algorithm change.
+
+5. External Fragmentation
+=========================
+mm_page_alloc_extfrag		page=%p pfn=%lu alloc_order=%d fallback_order=%d pageblock_order=%d alloc_migratetype=%d fallback_migratetype=%d fragmenting=%d change_ownership=%d
+
+External fragmentation affects whether a high-order allocation will be
+successful or not. For some types of hardware, this is important although
+it is avoided where possible. If the system is using huge pages and needs
+to be able to resize the pool over the lifetime of the system, this value
+is important.
+
+Large numbers of this event implies that memory is fragmenting and
+high-order allocations will start failing at some time in the future. One
+means of reducing the occurange of this event is to increase the size of
+min_free_kbytes in increments of 3*pageblock_size*nr_online_nodes where
+pageblock_size is usually the size of the default hugepage size.
-- 
cgit v0.10.2


From 31a5639623a487d6db996c8138c9e53fef2e2d91 Mon Sep 17 00:00:00 2001
From: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Date: Mon, 21 Sep 2009 17:02:50 -0700
Subject: mm: add_to_swap_cache() must not sleep

After commit 355cfa73 ("mm: modify swap_map and add SWAP_HAS_CACHE flag"),
read_swap_cache_async() will busy-wait while a entry doesn't exist in swap
cache but it has SWAP_HAS_CACHE flag.

Such entries can exist on add/delete path of swap cache.  On add path,
add_to_swap_cache() is called soon after SWAP_HAS_CACHE flag is set, and
on delete path, swapcache_free() will be called (SWAP_HAS_CACHE flag is
cleared) soon after __delete_from_swap_cache() is called.  So, the
busy-wait works well in most cases.

But this mechanism can cause soft lockup if add_to_swap_cache() sleeps and
read_swap_cache_async() tries to swap-in the same entry on the same cpu.

This patch calls radix_tree_preload() before swapcache_prepare() and
divides add_to_swap_cache() into two part: radix_tree_preload() part and
radix_tree_insert() part(define it as __add_to_swap_cache()).

Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/swap_state.c b/mm/swap_state.c
index 5ae6b8b..b076a1a 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -67,10 +67,10 @@ void show_swap_cache_info(void)
 }
 
 /*
- * add_to_swap_cache resembles add_to_page_cache_locked on swapper_space,
+ * __add_to_swap_cache resembles add_to_page_cache_locked on swapper_space,
  * but sets SwapCache flag and private instead of mapping and index.
  */
-int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
+static int __add_to_swap_cache(struct page *page, swp_entry_t entry)
 {
 	int error;
 
@@ -78,28 +78,37 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
 	VM_BUG_ON(PageSwapCache(page));
 	VM_BUG_ON(!PageSwapBacked(page));
 
+	page_cache_get(page);
+	SetPageSwapCache(page);
+	set_page_private(page, entry.val);
+
+	spin_lock_irq(&swapper_space.tree_lock);
+	error = radix_tree_insert(&swapper_space.page_tree, entry.val, page);
+	if (likely(!error)) {
+		total_swapcache_pages++;
+		__inc_zone_page_state(page, NR_FILE_PAGES);
+		INC_CACHE_INFO(add_total);
+	}
+	spin_unlock_irq(&swapper_space.tree_lock);
+
+	if (unlikely(error)) {
+		set_page_private(page, 0UL);
+		ClearPageSwapCache(page);
+		page_cache_release(page);
+	}
+
+	return error;
+}
+
+
+int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
+{
+	int error;
+
 	error = radix_tree_preload(gfp_mask);
 	if (!error) {
-		page_cache_get(page);
-		SetPageSwapCache(page);
-		set_page_private(page, entry.val);
-
-		spin_lock_irq(&swapper_space.tree_lock);
-		error = radix_tree_insert(&swapper_space.page_tree,
-						entry.val, page);
-		if (likely(!error)) {
-			total_swapcache_pages++;
-			__inc_zone_page_state(page, NR_FILE_PAGES);
-			INC_CACHE_INFO(add_total);
-		}
-		spin_unlock_irq(&swapper_space.tree_lock);
+		error = __add_to_swap_cache(page, entry);
 		radix_tree_preload_end();
-
-		if (unlikely(error)) {
-			set_page_private(page, 0UL);
-			ClearPageSwapCache(page);
-			page_cache_release(page);
-		}
 	}
 	return error;
 }
@@ -290,13 +299,24 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 		}
 
 		/*
+		 * call radix_tree_preload() while we can wait.
+		 */
+		err = radix_tree_preload(gfp_mask & GFP_KERNEL);
+		if (err)
+			break;
+
+		/*
 		 * Swap entry may have been freed since our caller observed it.
 		 */
 		err = swapcache_prepare(entry);
-		if (err == -EEXIST) /* seems racy */
+		if (err == -EEXIST) {	/* seems racy */
+			radix_tree_preload_end();
 			continue;
-		if (err)           /* swp entry is obsolete ? */
+		}
+		if (err) {		/* swp entry is obsolete ? */
+			radix_tree_preload_end();
 			break;
+		}
 
 		/*
 		 * Associate the page with swap entry in the swap cache.
@@ -308,8 +328,9 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 		 */
 		__set_page_locked(new_page);
 		SetPageSwapBacked(new_page);
-		err = add_to_swap_cache(new_page, entry, gfp_mask & GFP_KERNEL);
+		err = __add_to_swap_cache(new_page, entry);
 		if (likely(!err)) {
+			radix_tree_preload_end();
 			/*
 			 * Initiate read into locked page and return.
 			 */
@@ -317,6 +338,7 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 			swap_readpage(new_page);
 			return new_page;
 		}
+		radix_tree_preload_end();
 		ClearPageSwapBacked(new_page);
 		__clear_page_locked(new_page);
 		swapcache_free(entry, NULL);
-- 
cgit v0.10.2


From 2ca4532a49be92d7b2766c3244b30fa8bfb0114d Mon Sep 17 00:00:00 2001
From: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Date: Mon, 21 Sep 2009 17:02:52 -0700
Subject: mm: add_to_swap_cache() does not return -EEXIST

After commit 355cfa73 ("mm: modify swap_map and add SWAP_HAS_CACHE flag"),
only the context which have set SWAP_HAS_CACHE flag by swapcache_prepare()
or get_swap_page() would call add_to_swap_cache().  So add_to_swap_cache()
doesn't return -EEXIST any more.

Even though it doesn't return -EEXIST, it's not good behavior conceptually
to call swapcache_prepare() in the -EEXIST case, because it means clearing
SWAP_HAS_CACHE flag while the entry is on swap cache.

This patch removes redundant codes and comments from callers of it, and
adds VM_BUG_ON() in error path of add_to_swap_cache() and some comments.

Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/shmem.c b/mm/shmem.c
index bd20f8b..376d8f0 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1097,6 +1097,10 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
 	shmem_swp_unmap(entry);
 unlock:
 	spin_unlock(&info->lock);
+	/*
+	 * add_to_swap_cache() doesn't return -EEXIST, so we can safely
+	 * clear SWAP_HAS_CACHE flag.
+	 */
 	swapcache_free(swap, NULL);
 redirty:
 	set_page_dirty(page);
diff --git a/mm/swap_state.c b/mm/swap_state.c
index b076a1a..6d1daeb 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -92,6 +92,12 @@ static int __add_to_swap_cache(struct page *page, swp_entry_t entry)
 	spin_unlock_irq(&swapper_space.tree_lock);
 
 	if (unlikely(error)) {
+		/*
+		 * Only the context which have set SWAP_HAS_CACHE flag
+		 * would call add_to_swap_cache().
+		 * So add_to_swap_cache() doesn't returns -EEXIST.
+		 */
+		VM_BUG_ON(error == -EEXIST);
 		set_page_private(page, 0UL);
 		ClearPageSwapCache(page);
 		page_cache_release(page);
@@ -146,38 +152,34 @@ int add_to_swap(struct page *page)
 	VM_BUG_ON(!PageLocked(page));
 	VM_BUG_ON(!PageUptodate(page));
 
-	for (;;) {
-		entry = get_swap_page();
-		if (!entry.val)
-			return 0;
+	entry = get_swap_page();
+	if (!entry.val)
+		return 0;
 
+	/*
+	 * Radix-tree node allocations from PF_MEMALLOC contexts could
+	 * completely exhaust the page allocator. __GFP_NOMEMALLOC
+	 * stops emergency reserves from being allocated.
+	 *
+	 * TODO: this could cause a theoretical memory reclaim
+	 * deadlock in the swap out path.
+	 */
+	/*
+	 * Add it to the swap cache and mark it dirty
+	 */
+	err = add_to_swap_cache(page, entry,
+			__GFP_HIGH|__GFP_NOMEMALLOC|__GFP_NOWARN);
+
+	if (!err) {	/* Success */
+		SetPageDirty(page);
+		return 1;
+	} else {	/* -ENOMEM radix-tree allocation failure */
 		/*
-		 * Radix-tree node allocations from PF_MEMALLOC contexts could
-		 * completely exhaust the page allocator. __GFP_NOMEMALLOC
-		 * stops emergency reserves from being allocated.
-		 *
-		 * TODO: this could cause a theoretical memory reclaim
-		 * deadlock in the swap out path.
-		 */
-		/*
-		 * Add it to the swap cache and mark it dirty
+		 * add_to_swap_cache() doesn't return -EEXIST, so we can safely
+		 * clear SWAP_HAS_CACHE flag.
 		 */
-		err = add_to_swap_cache(page, entry,
-				__GFP_HIGH|__GFP_NOMEMALLOC|__GFP_NOWARN);
-
-		switch (err) {
-		case 0:				/* Success */
-			SetPageDirty(page);
-			return 1;
-		case -EEXIST:
-			/* Raced with "speculative" read_swap_cache_async */
-			swapcache_free(entry, NULL);
-			continue;
-		default:
-			/* -ENOMEM radix-tree allocation failure */
-			swapcache_free(entry, NULL);
-			return 0;
-		}
+		swapcache_free(entry, NULL);
+		return 0;
 	}
 }
 
@@ -318,14 +320,7 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 			break;
 		}
 
-		/*
-		 * Associate the page with swap entry in the swap cache.
-		 * May fail (-EEXIST) if there is already a page associated
-		 * with this entry in the swap cache: added by a racing
-		 * read_swap_cache_async, or add_to_swap or shmem_writepage
-		 * re-using the just freed swap entry for an existing page.
-		 * May fail (-ENOMEM) if radix-tree node allocation failed.
-		 */
+		/* May fail (-ENOMEM) if radix-tree node allocation failed. */
 		__set_page_locked(new_page);
 		SetPageSwapBacked(new_page);
 		err = __add_to_swap_cache(new_page, entry);
@@ -341,6 +336,10 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 		radix_tree_preload_end();
 		ClearPageSwapBacked(new_page);
 		__clear_page_locked(new_page);
+		/*
+		 * add_to_swap_cache() doesn't return -EEXIST, so we can safely
+		 * clear SWAP_HAS_CACHE flag.
+		 */
 		swapcache_free(entry, NULL);
 	} while (err != -ENOMEM);
 
-- 
cgit v0.10.2


From cff397e6b3c5bd67e26946792e81ab064c9acc1f Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@kernel.org>
Date: Mon, 21 Sep 2009 17:02:53 -0700
Subject: mm: includecheck fix for mm/shmem.c

Fix the following 'make includecheck' warning:

  mm/shmem.c: linux/vfs.h is included more than once.

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/shmem.c b/mm/shmem.c
index 376d8f0..25ba75d 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -49,7 +49,6 @@ static struct vfsmount *shm_mnt;
 #include <linux/backing-dev.h>
 #include <linux/shmem_fs.h>
 #include <linux/writeback.h>
-#include <linux/vfs.h>
 #include <linux/blkdev.h>
 #include <linux/security.h>
 #include <linux/swapops.h>
-- 
cgit v0.10.2


From 72ff13b7036bc7923e0f2b5f4a724ca260d49aab Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@kernel.org>
Date: Mon, 21 Sep 2009 17:02:53 -0700
Subject: mm: includecheck fix for mm/nommu.c

Fix the following 'make includecheck' warning:

  mm/nommu.c: internal.h is included more than once.

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Cc: David Howells <dhowells@redhat.com>
Acked-by: Greg Ungerer <gerg@snapgear.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/nommu.c b/mm/nommu.c
index 66e81e7..3b90086 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -56,8 +56,6 @@ void no_printk(const char *fmt, ...)
 	no_printk(KERN_DEBUG FMT"\n", ##__VA_ARGS__)
 #endif
 
-#include "internal.h"
-
 void *high_memory;
 struct page *mem_map;
 unsigned long max_mapnr;
-- 
cgit v0.10.2


From bbba809e96539672f775a3d70102657d05816a5b Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Mon, 21 Sep 2009 17:02:55 -0700
Subject: md: avoid use of broken kzalloc mempool

The kzalloc mempool does not re-zero items that have been used and then
returned to the pool.  Manually zero the allocated multipath_bh instead.

Acked-by: Neil Brown <neilb@suse.de>
Signed-off-by: Sage Weil <sage@newdream.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index 89e7681..d2d3fd5 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -150,6 +150,7 @@ static int multipath_make_request (struct request_queue *q, struct bio * bio)
 	}
 
 	mp_bh = mempool_alloc(conf->pool, GFP_NOIO);
+	memset(mp_bh, 0, sizeof(*mp_bh));
 
 	mp_bh->master_bio = bio;
 	mp_bh->mddev = mddev;
@@ -493,7 +494,7 @@ static int multipath_run (mddev_t *mddev)
 	}
 	mddev->degraded = conf->raid_disks - conf->working_disks;
 
-	conf->pool = mempool_create_kzalloc_pool(NR_RESERVED_BUFS,
+	conf->pool = mempool_create_kmalloc_pool(NR_RESERVED_BUFS,
 						 sizeof(struct multipath_bh));
 	if (conf->pool == NULL) {
 		printk(KERN_ERR 
-- 
cgit v0.10.2


From bba78819548a59a52e60f0b259997bbd011164ae Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Mon, 21 Sep 2009 17:02:56 -0700
Subject: mm: remove broken 'kzalloc' mempool

The kzalloc mempool zeros items when they are initially allocated, but
does not rezero used items that are returned to the pool.  Consequently
mempool_alloc()s may return non-zeroed memory.

Since there are/were only two in-tree users for
mempool_create_kzalloc_pool(), and 'fixing' this in a way that will
re-zero used (but not new) items before first use is non-trivial, just
remove it.

Signed-off-by: Sage Weil <sage@newdream.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/mempool.h b/include/linux/mempool.h
index 9be484d..7c08052 100644
--- a/include/linux/mempool.h
+++ b/include/linux/mempool.h
@@ -47,22 +47,16 @@ mempool_create_slab_pool(int min_nr, struct kmem_cache *kc)
 }
 
 /*
- * 2 mempool_alloc_t's and a mempool_free_t to kmalloc/kzalloc and kfree
- * the amount of memory specified by pool_data
+ * a mempool_alloc_t and a mempool_free_t to kmalloc and kfree the
+ * amount of memory specified by pool_data
  */
 void *mempool_kmalloc(gfp_t gfp_mask, void *pool_data);
-void *mempool_kzalloc(gfp_t gfp_mask, void *pool_data);
 void mempool_kfree(void *element, void *pool_data);
 static inline mempool_t *mempool_create_kmalloc_pool(int min_nr, size_t size)
 {
 	return mempool_create(min_nr, mempool_kmalloc, mempool_kfree,
 			      (void *) size);
 }
-static inline mempool_t *mempool_create_kzalloc_pool(int min_nr, size_t size)
-{
-	return mempool_create(min_nr, mempool_kzalloc, mempool_kfree,
-			      (void *) size);
-}
 
 /*
  * A mempool_alloc_t and mempool_free_t for a simple page allocator that
diff --git a/mm/mempool.c b/mm/mempool.c
index 32e75d4..1a3bc3d 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -308,13 +308,6 @@ void *mempool_kmalloc(gfp_t gfp_mask, void *pool_data)
 }
 EXPORT_SYMBOL(mempool_kmalloc);
 
-void *mempool_kzalloc(gfp_t gfp_mask, void *pool_data)
-{
-	size_t size = (size_t)pool_data;
-	return kzalloc(size, gfp_mask);
-}
-EXPORT_SYMBOL(mempool_kzalloc);
-
 void mempool_kfree(void *element, void *pool_data)
 {
 	kfree(element);
-- 
cgit v0.10.2


From b7c46d151cb82856a429709d1227ba1648028232 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Mon, 21 Sep 2009 17:02:56 -0700
Subject: mm: drop unneeded double negations

Remove double negations where the operand is already boolean.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Mel Gorman <mel@csn.ul.ie>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index fd4529d..9b10d87 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -648,7 +648,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
 	int nid = z->zone_pgdat->node_id;
 	int zid = zone_idx(z);
 	struct mem_cgroup_per_zone *mz;
-	int lru = LRU_FILE * !!file + !!active;
+	int lru = LRU_FILE * file + active;
 	int ret;
 
 	BUG_ON(!mem_cont);
diff --git a/mm/memory.c b/mm/memory.c
index 05feaa1..3cbeaab 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -599,7 +599,7 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 	if (page) {
 		get_page(page);
 		page_dup_rmap(page);
-		rss[!!PageAnon(page)]++;
+		rss[PageAnon(page)]++;
 	}
 
 out_set_pte:
diff --git a/mm/vmscan.c b/mm/vmscan.c
index e219b47..cad5d52 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -971,7 +971,7 @@ static unsigned long isolate_pages_global(unsigned long nr,
 	if (file)
 		lru += LRU_FILE;
 	return isolate_lru_pages(nr, &z->lru[lru].list, dst, scanned, order,
-								mode, !!file);
+								mode, file);
 }
 
 /*
@@ -1209,7 +1209,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
 			lru = page_lru(page);
 			add_page_to_lru_list(zone, page, lru);
 			if (is_active_lru(lru)) {
-				int file = !!is_file_lru(lru);
+				int file = is_file_lru(lru);
 				reclaim_stat->recent_rotated[file]++;
 			}
 			if (!pagevec_add(&pvec, page)) {
@@ -1319,7 +1319,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 	if (scanning_global_lru(sc)) {
 		zone->pages_scanned += pgscanned;
 	}
-	reclaim_stat->recent_scanned[!!file] += nr_taken;
+	reclaim_stat->recent_scanned[file] += nr_taken;
 
 	__count_zone_vm_events(PGREFILL, zone, pgscanned);
 	if (file)
@@ -1372,7 +1372,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 	 * helps balance scan pressure between file and anonymous pages in
 	 * get_scan_ratio.
 	 */
-	reclaim_stat->recent_rotated[!!file] += nr_rotated;
+	reclaim_stat->recent_rotated[file] += nr_rotated;
 
 	move_active_pages_to_lru(zone, &l_active,
 						LRU_ACTIVE + file * LRU_FILE);
-- 
cgit v0.10.2


From 401a8e1c1670085b8177330ca47d4f7c4ac88761 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Mon, 21 Sep 2009 17:02:58 -0700
Subject: mm: introduce page_lru_base_type()

Instead of abusing page_is_file_cache() for LRU list index arithmetic, add
another helper with a more appropriate name and convert the non-boolean
users of page_is_file_cache() accordingly.

This new helper gives the LRU base type a page is supposed to live on,
inactive anon or inactive file.

[hugh.dickins@tiscali.co.uk: convert del_page_from_lru() also]
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 7fbb972..99977ff 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -39,21 +39,36 @@ del_page_from_lru_list(struct zone *zone, struct page *page, enum lru_list l)
 	mem_cgroup_del_lru_list(page, l);
 }
 
+/**
+ * page_lru_base_type - which LRU list type should a page be on?
+ * @page: the page to test
+ *
+ * Used for LRU list index arithmetic.
+ *
+ * Returns the base LRU type - file or anon - @page should be on.
+ */
+static inline enum lru_list page_lru_base_type(struct page *page)
+{
+	if (page_is_file_cache(page))
+		return LRU_INACTIVE_FILE;
+	return LRU_INACTIVE_ANON;
+}
+
 static inline void
 del_page_from_lru(struct zone *zone, struct page *page)
 {
-	enum lru_list l = LRU_BASE;
+	enum lru_list l;
 
 	list_del(&page->lru);
 	if (PageUnevictable(page)) {
 		__ClearPageUnevictable(page);
 		l = LRU_UNEVICTABLE;
 	} else {
+		l = page_lru_base_type(page);
 		if (PageActive(page)) {
 			__ClearPageActive(page);
 			l += LRU_ACTIVE;
 		}
-		l += page_is_file_cache(page);
 	}
 	__dec_zone_state(zone, NR_LRU_BASE + l);
 	mem_cgroup_del_lru_list(page, l);
@@ -68,14 +83,14 @@ del_page_from_lru(struct zone *zone, struct page *page)
  */
 static inline enum lru_list page_lru(struct page *page)
 {
-	enum lru_list lru = LRU_BASE;
+	enum lru_list lru;
 
 	if (PageUnevictable(page))
 		lru = LRU_UNEVICTABLE;
 	else {
+		lru = page_lru_base_type(page);
 		if (PageActive(page))
 			lru += LRU_ACTIVE;
-		lru += page_is_file_cache(page);
 	}
 
 	return lru;
diff --git a/mm/swap.c b/mm/swap.c
index cb29ae5..168d53e 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -118,7 +118,7 @@ static void pagevec_move_tail(struct pagevec *pvec)
 			spin_lock(&zone->lru_lock);
 		}
 		if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
-			int lru = page_is_file_cache(page);
+			int lru = page_lru_base_type(page);
 			list_move_tail(&page->lru, &zone->lru[lru].list);
 			pgmoved++;
 		}
@@ -181,7 +181,7 @@ void activate_page(struct page *page)
 	spin_lock_irq(&zone->lru_lock);
 	if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
 		int file = page_is_file_cache(page);
-		int lru = LRU_BASE + file;
+		int lru = page_lru_base_type(page);
 		del_page_from_lru_list(zone, page, lru);
 
 		SetPageActive(page);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index cad5d52..30e56ee 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -531,7 +531,7 @@ redo:
 		 * unevictable page on [in]active list.
 		 * We know how to handle that.
 		 */
-		lru = active + page_is_file_cache(page);
+		lru = active + page_lru_base_type(page);
 		lru_cache_add_lru(page, lru);
 	} else {
 		/*
@@ -986,7 +986,7 @@ static unsigned long clear_active_flags(struct list_head *page_list,
 	struct page *page;
 
 	list_for_each_entry(page, page_list, lru) {
-		lru = page_is_file_cache(page);
+		lru = page_lru_base_type(page);
 		if (PageActive(page)) {
 			lru += LRU_ACTIVE;
 			ClearPageActive(page);
@@ -2652,7 +2652,7 @@ static void check_move_unevictable_page(struct page *page, struct zone *zone)
 retry:
 	ClearPageUnevictable(page);
 	if (page_evictable(page, NULL)) {
-		enum lru_list l = LRU_INACTIVE_ANON + page_is_file_cache(page);
+		enum lru_list l = page_lru_base_type(page);
 
 		__dec_zone_state(zone, NR_UNEVICTABLE);
 		list_move(&page->lru, &zone->lru[l].list);
-- 
cgit v0.10.2


From 6c0b13519d1c755d874e82c8fb8a6dcef0ee402c Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Mon, 21 Sep 2009 17:02:59 -0700
Subject: mm: return boolean from page_is_file_cache()

page_is_file_cache() has been used for both boolean checks and LRU
arithmetic, which was always a bit weird.

Now that page_lru_base_type() exists for LRU arithmetic, make
page_is_file_cache() a real predicate function and adjust the
boolean-using callsites to drop those pesky double negations.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 99977ff..8835b87 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -5,7 +5,7 @@
  * page_is_file_cache - should the page be on a file LRU or anon LRU?
  * @page: the page to test
  *
- * Returns LRU_FILE if @page is page cache page backed by a regular filesystem,
+ * Returns 1 if @page is page cache page backed by a regular filesystem,
  * or 0 if @page is anonymous, tmpfs or otherwise ram or swap backed.
  * Used by functions that manipulate the LRU lists, to sort a page
  * onto the right LRU list.
@@ -16,11 +16,7 @@
  */
 static inline int page_is_file_cache(struct page *page)
 {
-	if (PageSwapBacked(page))
-		return 0;
-
-	/* The page is page cache backed by a normal filesystem. */
-	return LRU_FILE;
+	return !PageSwapBacked(page);
 }
 
 static inline void
diff --git a/mm/migrate.c b/mm/migrate.c
index b535a2c..e97e513 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -68,7 +68,7 @@ int putback_lru_pages(struct list_head *l)
 	list_for_each_entry_safe(page, page2, l, lru) {
 		list_del(&page->lru);
 		dec_zone_page_state(page, NR_ISOLATED_ANON +
-				    !!page_is_file_cache(page));
+				page_is_file_cache(page));
 		putback_lru_page(page);
 		count++;
 	}
@@ -701,7 +701,7 @@ unlock:
  		 */
  		list_del(&page->lru);
 		dec_zone_page_state(page, NR_ISOLATED_ANON +
-				    !!page_is_file_cache(page));
+				page_is_file_cache(page));
 		putback_lru_page(page);
 	}
 
@@ -751,7 +751,7 @@ int migrate_pages(struct list_head *from,
 	local_irq_save(flags);
 	list_for_each_entry(page, from, lru)
 		__inc_zone_page_state(page, NR_ISOLATED_ANON +
-				      !!page_is_file_cache(page));
+				page_is_file_cache(page));
 	local_irq_restore(flags);
 
 	if (!swapwrite)
diff --git a/mm/swap.c b/mm/swap.c
index 168d53e..4a8a59e 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -189,7 +189,7 @@ void activate_page(struct page *page)
 		add_page_to_lru_list(zone, page, lru);
 		__count_vm_event(PGACTIVATE);
 
-		update_page_reclaim_stat(zone, page, !!file, 1);
+		update_page_reclaim_stat(zone, page, file, 1);
 	}
 	spin_unlock_irq(&zone->lru_lock);
 }
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 30e56ee..172119c 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -821,7 +821,7 @@ int __isolate_lru_page(struct page *page, int mode, int file)
 	if (mode != ISOLATE_BOTH && (!PageActive(page) != !mode))
 		return ret;
 
-	if (mode != ISOLATE_BOTH && (!page_is_file_cache(page) != !file))
+	if (mode != ISOLATE_BOTH && page_is_file_cache(page) != file)
 		return ret;
 
 	/*
-- 
cgit v0.10.2


From edcf4748cd56adcdf0856cc99ef108a4ea3ac7fe Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Mon, 21 Sep 2009 17:02:59 -0700
Subject: mm: return boolean from page_has_private()

Make page_has_private() return a true boolean value and remove the double
negations from the two callsites using it for arithmetic.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Christoph Lameter <cl@linux-foundation.org>
Reviewed-by: Christoph Lameter <cl@linux-foundation.org>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index d07c0bb..13de789 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -402,8 +402,8 @@ static inline void __ClearPageTail(struct page *page)
  */
 #define PAGE_FLAGS_CHECK_AT_PREP	((1 << NR_PAGEFLAGS) - 1)
 
-#endif /* !__GENERATING_BOUNDS_H */
-
+#define PAGE_FLAGS_PRIVATE				\
+	(1 << PG_private | 1 << PG_private_2)
 /**
  * page_has_private - Determine if page has private stuff
  * @page: The page to be checked
@@ -411,8 +411,11 @@ static inline void __ClearPageTail(struct page *page)
  * Determine if a page has private stuff, indicating that release routines
  * should be invoked upon it.
  */
-#define page_has_private(page)			\
-	((page)->flags & ((1 << PG_private) |	\
-			  (1 << PG_private_2)))
+static inline int page_has_private(struct page *page)
+{
+	return !!(page->flags & PAGE_FLAGS_PRIVATE);
+}
+
+#endif /* !__GENERATING_BOUNDS_H */
 
 #endif	/* PAGE_FLAGS_H */
diff --git a/mm/migrate.c b/mm/migrate.c
index e97e513..16052e8 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -272,7 +272,7 @@ static int migrate_page_move_mapping(struct address_space *mapping,
 	pslot = radix_tree_lookup_slot(&mapping->page_tree,
  					page_index(page));
 
-	expected_count = 2 + !!page_has_private(page);
+	expected_count = 2 + page_has_private(page);
 	if (page_count(page) != expected_count ||
 			(struct page *)radix_tree_deref_slot(pslot) != page) {
 		spin_unlock_irq(&mapping->tree_lock);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 172119c..f5b5f02 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -286,7 +286,7 @@ static inline int page_mapping_inuse(struct page *page)
 
 static inline int is_page_cache_freeable(struct page *page)
 {
-	return page_count(page) - !!page_has_private(page) == 2;
+	return page_count(page) - page_has_private(page) == 2;
 }
 
 static int may_write_to_queue(struct backing_dev_info *bdi)
-- 
cgit v0.10.2


From ceddc3a52d783fabbf1ba623601419b9d6337194 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Mon, 21 Sep 2009 17:03:00 -0700
Subject: mm: document is_page_cache_freeable()

Enlighten the reader of this code about what reference count makes a page
cache page freeable.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Christoph Lameter <cl@linux-foundation.org>
Reviewed-by: Christoph Lameter <cl@linux-foundation.org>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/vmscan.c b/mm/vmscan.c
index f5b5f02..5be8107 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -286,6 +286,11 @@ static inline int page_mapping_inuse(struct page *page)
 
 static inline int is_page_cache_freeable(struct page *page)
 {
+	/*
+	 * A freeable page cache page is referenced only by the caller
+	 * that isolated the page, the page cache radix tree and
+	 * optional buffer heads at page->private.
+	 */
 	return page_count(page) - page_has_private(page) == 2;
 }
 
-- 
cgit v0.10.2


From 78986a678f6ec3759a01976749f4437d8bf2d6c3 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Mon, 21 Sep 2009 17:03:02 -0700
Subject: page-allocator: limit the number of MIGRATE_RESERVE pageblocks per
 zone

After anti-fragmentation was merged, a bug was reported whereby devices
that depended on high-order atomic allocations were failing.  The solution
was to preserve a property in the buddy allocator which tended to keep the
minimum number of free pages in the zone at the lower physical addresses
and contiguous.  To preserve this property, MIGRATE_RESERVE was introduced
and a number of pageblocks at the start of a zone would be marked
"reserve", the number of which depended on min_free_kbytes.

Anti-fragmentation works by avoiding the mixing of page migratetypes
within the same pageblock.  One way of helping this is to increase
min_free_kbytes because it becomes less like that it will be necessary to
place pages of of MIGRATE_RESERVE is unbounded, the free memory is kept
there in large contiguous blocks instead of helping anti-fragmentation as
much as it should.  With the page-allocator tracepoint patches applied, it
was found during anti-fragmentation tests that the number of
fragmentation-related events were far higher than expected even with
min_free_kbytes at higher values.

This patch limits the number of MIGRATE_RESERVE blocks that exist per zone
to two.  For example, with a sufficient min_free_kbytes, 4MB of memory
will be kept aside on an x86-64 and remain more or less free and
contiguous for the systems uptime.  This should be sufficient for devices
depending on high-order atomic allocations while helping fragmentation
control when min_free_kbytes is tuned appropriately.  As side-effect of
this patch is that the reserve variable is converted to int as unsigned
long was the wrong type to use when ensuring that only the required number
of reserve blocks are created.

With the patches applied, fragmentation-related events as measured by the
page allocator tracepoints were significantly reduced when running some
fragmentation stress-tests on systems with min_free_kbytes tuned to a
value appropriate for hugepage allocations at runtime.  On x86, the events
recorded were reduced by 99.8%, on x86-64 by 99.72% and on ppc64 by
99.83%.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 4c847cc..33b1a47 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2836,7 +2836,8 @@ static void setup_zone_migrate_reserve(struct zone *zone)
 {
 	unsigned long start_pfn, pfn, end_pfn;
 	struct page *page;
-	unsigned long reserve, block_migratetype;
+	unsigned long block_migratetype;
+	int reserve;
 
 	/* Get the start pfn, end pfn and the number of blocks to reserve */
 	start_pfn = zone->zone_start_pfn;
@@ -2844,6 +2845,15 @@ static void setup_zone_migrate_reserve(struct zone *zone)
 	reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >>
 							pageblock_order;
 
+	/*
+	 * Reserve blocks are generally in place to help high-order atomic
+	 * allocations that are short-lived. A min_free_kbytes value that
+	 * would result in more than 2 reserve blocks for atomic allocations
+	 * is assumed to be in place to help anti-fragmentation for the
+	 * future allocation of hugepages at runtime.
+	 */
+	reserve = min(2, reserve);
+
 	for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
 		if (!pfn_valid(pfn))
 			continue;
-- 
cgit v0.10.2


From 4738e1b9cf8f9e28d7de080a5e6ce5d0095ea18f Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@novell.com>
Date: Mon, 21 Sep 2009 17:03:03 -0700
Subject: memory hotplug: fix updating of num_physpages for hot plugged memory

Sizing of memory allocations shouldn't depend on the number of physical
pages found in a system, as that generally includes (perhaps a huge amount
of) non-RAM pages.  The amount of what actually is usable as storage
should instead be used as a basis here.

In line with that, the memory hotplug code should update num_physpages in
a way that it retains its original (post-boot) meaning; in particular,
decreasing the value should at best be done with great care - this patch
doesn't try to ever decrease this value at all as it doesn't really seem
meaningful to do so.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Yasunori Goto <y-goto@jp.fujitsu.com>
Cc: Badari Pulavarty <pbadari@us.ibm.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Dave Hansen <haveblue@us.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 616236e..efe3e0e 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -339,8 +339,11 @@ EXPORT_SYMBOL_GPL(__remove_pages);
 
 void online_page(struct page *page)
 {
+	unsigned long pfn = page_to_pfn(page);
+
 	totalram_pages++;
-	num_physpages++;
+	if (pfn >= num_physpages)
+		num_physpages = pfn + 1;
 
 #ifdef CONFIG_HIGHMEM
 	if (PageHighMem(page))
@@ -832,7 +835,6 @@ repeat:
 	zone->present_pages -= offlined_pages;
 	zone->zone_pgdat->node_present_pages -= offlined_pages;
 	totalram_pages -= offlined_pages;
-	num_physpages -= offlined_pages;
 
 	setup_per_zone_wmarks();
 	calculate_zone_inactive_ratio(zone);
-- 
cgit v0.10.2


From 4481374ce88ba8f460c8b89f2572027bd27057d0 Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@novell.com>
Date: Mon, 21 Sep 2009 17:03:05 -0700
Subject: mm: replace various uses of num_physpages by totalram_pages

Sizing of memory allocations shouldn't depend on the number of physical
pages found in a system, as that generally includes (perhaps a huge amount
of) non-RAM pages.  The amount of what actually is usable as storage
should instead be used as a basis here.

Some of the calculations (i.e.  those not intending to use high memory)
should likely even use (totalram_pages - totalhigh_pages).

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: Dave Airlie <airlied@linux.ie>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Patrick McHardy <kaber@trash.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 0db7969..378e9a8 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -210,8 +210,8 @@ static ssize_t microcode_write(struct file *file, const char __user *buf,
 {
 	ssize_t ret = -EINVAL;
 
-	if ((len >> PAGE_SHIFT) > num_physpages) {
-		pr_err("microcode: too much data (max %ld pages)\n", num_physpages);
+	if ((len >> PAGE_SHIFT) > totalram_pages) {
+		pr_err("microcode: too much data (max %ld pages)\n", totalram_pages);
 		return ret;
 	}
 
diff --git a/drivers/char/agp/backend.c b/drivers/char/agp/backend.c
index ad87753..a56ca08 100644
--- a/drivers/char/agp/backend.c
+++ b/drivers/char/agp/backend.c
@@ -114,9 +114,9 @@ static int agp_find_max(void)
 	long memory, index, result;
 
 #if PAGE_SHIFT < 20
-	memory = num_physpages >> (20 - PAGE_SHIFT);
+	memory = totalram_pages >> (20 - PAGE_SHIFT);
 #else
-	memory = num_physpages << (PAGE_SHIFT - 20);
+	memory = totalram_pages << (PAGE_SHIFT - 20);
 #endif
 	index = 1;
 
diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c
index a45b0c0..a6b4a5a 100644
--- a/drivers/parisc/ccio-dma.c
+++ b/drivers/parisc/ccio-dma.c
@@ -1266,7 +1266,7 @@ ccio_ioc_init(struct ioc *ioc)
 	** Hot-Plug/Removal of PCI cards. (aka PCI OLARD).
 	*/
 
-	iova_space_size = (u32) (num_physpages / count_parisc_driver(&ccio_driver));
+	iova_space_size = (u32) (totalram_pages / count_parisc_driver(&ccio_driver));
 
 	/* limit IOVA space size to 1MB-1GB */
 
@@ -1305,7 +1305,7 @@ ccio_ioc_init(struct ioc *ioc)
 
 	DBG_INIT("%s() hpa 0x%p mem %luMB IOV %dMB (%d bits)\n",
 			__func__, ioc->ioc_regs,
-			(unsigned long) num_physpages >> (20 - PAGE_SHIFT),
+			(unsigned long) totalram_pages >> (20 - PAGE_SHIFT),
 			iova_space_size>>20,
 			iov_order + PAGE_SHIFT);
 
diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c
index 123d8fe..57a6d19 100644
--- a/drivers/parisc/sba_iommu.c
+++ b/drivers/parisc/sba_iommu.c
@@ -1390,7 +1390,7 @@ sba_ioc_init(struct parisc_device *sba, struct ioc *ioc, int ioc_num)
 	** for DMA hints - ergo only 30 bits max.
 	*/
 
-	iova_space_size = (u32) (num_physpages/global_ioc_cnt);
+	iova_space_size = (u32) (totalram_pages/global_ioc_cnt);
 
 	/* limit IOVA space size to 1MB-1GB */
 	if (iova_space_size < (1 << (20 - PAGE_SHIFT))) {
@@ -1415,7 +1415,7 @@ sba_ioc_init(struct parisc_device *sba, struct ioc *ioc, int ioc_num)
 	DBG_INIT("%s() hpa 0x%lx mem %ldMB IOV %dMB (%d bits)\n",
 			__func__,
 			ioc->ioc_hpa,
-			(unsigned long) num_physpages >> (20 - PAGE_SHIFT),
+			(unsigned long) totalram_pages >> (20 - PAGE_SHIFT),
 			iova_space_size>>20,
 			iov_order + PAGE_SHIFT);
 
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index f5bbd9e..1b7123e 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -96,11 +96,7 @@ static struct balloon_stats balloon_stats;
 /* We increase/decrease in batches which fit in a page */
 static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)];
 
-/* VM /proc information for memory */
-extern unsigned long totalram_pages;
-
 #ifdef CONFIG_HIGHMEM
-extern unsigned long totalhigh_pages;
 #define inc_totalhigh_pages() (totalhigh_pages++)
 #define dec_totalhigh_pages() (totalhigh_pages--)
 #else
diff --git a/fs/ntfs/malloc.h b/fs/ntfs/malloc.h
index cd0be3f..a44b14c 100644
--- a/fs/ntfs/malloc.h
+++ b/fs/ntfs/malloc.h
@@ -47,7 +47,7 @@ static inline void *__ntfs_malloc(unsigned long size, gfp_t gfp_mask)
 		return kmalloc(PAGE_SIZE, gfp_mask & ~__GFP_HIGHMEM);
 		/* return (void *)__get_free_page(gfp_mask); */
 	}
-	if (likely(size >> PAGE_SHIFT < num_physpages))
+	if (likely((size >> PAGE_SHIFT) < totalram_pages))
 		return __vmalloc(size, gfp_mask, PAGE_KERNEL);
 	return NULL;
 }
diff --git a/include/linux/mm.h b/include/linux/mm.h
index d808cf8..19ff81c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -25,6 +25,7 @@ extern unsigned long max_mapnr;
 #endif
 
 extern unsigned long num_physpages;
+extern unsigned long totalram_pages;
 extern void * high_memory;
 extern int page_cluster;
 
diff --git a/init/main.c b/init/main.c
index 34971be..2c48c31 100644
--- a/init/main.c
+++ b/init/main.c
@@ -668,12 +668,12 @@ asmlinkage void __init start_kernel(void)
 #endif
 	thread_info_cache_init();
 	cred_init();
-	fork_init(num_physpages);
+	fork_init(totalram_pages);
 	proc_caches_init();
 	buffer_init();
 	key_init();
 	security_init();
-	vfs_caches_init(num_physpages);
+	vfs_caches_init(totalram_pages);
 	radix_tree_init();
 	signals_init();
 	/* rootfs populating might need page-writeback */
diff --git a/mm/slab.c b/mm/slab.c
index 7b5d4de..7dfa481 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1384,7 +1384,7 @@ void __init kmem_cache_init(void)
 	 * Fragmentation resistance on low memory - only use bigger
 	 * page orders on machines with more than 32MB of memory.
 	 */
-	if (num_physpages > (32 << 20) >> PAGE_SHIFT)
+	if (totalram_pages > (32 << 20) >> PAGE_SHIFT)
 		slab_break_gfp_order = BREAK_GFP_ORDER_HI;
 
 	/* Bootstrap is tricky, because several objects are allocated
diff --git a/mm/swap.c b/mm/swap.c
index 4a8a59e..308e57d 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -496,7 +496,7 @@ EXPORT_SYMBOL(pagevec_lookup_tag);
  */
 void __init swap_setup(void)
 {
-	unsigned long megs = num_physpages >> (20 - PAGE_SHIFT);
+	unsigned long megs = totalram_pages >> (20 - PAGE_SHIFT);
 
 #ifdef CONFIG_SWAP
 	bdi_init(swapper_space.backing_dev_info);
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 9216b25..5535da1 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1386,7 +1386,7 @@ void *vmap(struct page **pages, unsigned int count,
 
 	might_sleep();
 
-	if (count > num_physpages)
+	if (count > totalram_pages)
 		return NULL;
 
 	area = get_vm_area_caller((count << PAGE_SHIFT), flags,
@@ -1493,7 +1493,7 @@ static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot,
 	unsigned long real_size = size;
 
 	size = PAGE_ALIGN(size);
-	if (!size || (size >> PAGE_SHIFT) > num_physpages)
+	if (!size || (size >> PAGE_SHIFT) > totalram_pages)
 		return NULL;
 
 	area = __get_vm_area_node(size, VM_ALLOC, VMALLOC_START, VMALLOC_END,
diff --git a/net/core/sock.c b/net/core/sock.c
index 30d5446..524712a 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1206,12 +1206,12 @@ EXPORT_SYMBOL_GPL(sk_setup_caps);
 
 void __init sk_init(void)
 {
-	if (num_physpages <= 4096) {
+	if (totalram_pages <= 4096) {
 		sysctl_wmem_max = 32767;
 		sysctl_rmem_max = 32767;
 		sysctl_wmem_default = 32767;
 		sysctl_rmem_default = 32767;
-	} else if (num_physpages >= 131072) {
+	} else if (totalram_pages >= 131072) {
 		sysctl_wmem_max = 131071;
 		sysctl_rmem_max = 131071;
 	}
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 923db06..bc44670 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -1049,10 +1049,10 @@ static int __init dccp_init(void)
 	 *
 	 * The methodology is similar to that of the buffer cache.
 	 */
-	if (num_physpages >= (128 * 1024))
-		goal = num_physpages >> (21 - PAGE_SHIFT);
+	if (totalram_pages >= (128 * 1024))
+		goal = totalram_pages >> (21 - PAGE_SHIFT);
 	else
-		goal = num_physpages >> (23 - PAGE_SHIFT);
+		goal = totalram_pages >> (23 - PAGE_SHIFT);
 
 	if (thash_entries)
 		goal = (thash_entries *
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 9383d3e..57662ca 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1750,7 +1750,7 @@ void __init dn_route_init(void)
 	dn_route_timer.expires = jiffies + decnet_dst_gc_interval * HZ;
 	add_timer(&dn_route_timer);
 
-	goal = num_physpages >> (26 - PAGE_SHIFT);
+	goal = totalram_pages >> (26 - PAGE_SHIFT);
 
 	for(order = 0; (1UL << order) < goal; order++)
 		/* NOTHING */;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 91867d3..df93473 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -3414,7 +3414,7 @@ int __init ip_rt_init(void)
 		alloc_large_system_hash("IP route cache",
 					sizeof(struct rt_hash_bucket),
 					rhash_entries,
-					(num_physpages >= 128 * 1024) ?
+					(totalram_pages >= 128 * 1024) ?
 					15 : 17,
 					0,
 					&rt_hash_log,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 19a0612..21387eb 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2862,7 +2862,7 @@ void __init tcp_init(void)
 		alloc_large_system_hash("TCP established",
 					sizeof(struct inet_ehash_bucket),
 					thash_entries,
-					(num_physpages >= 128 * 1024) ?
+					(totalram_pages >= 128 * 1024) ?
 					13 : 15,
 					0,
 					&tcp_hashinfo.ehash_size,
@@ -2879,7 +2879,7 @@ void __init tcp_init(void)
 		alloc_large_system_hash("TCP bind",
 					sizeof(struct inet_bind_hashbucket),
 					tcp_hashinfo.ehash_size,
-					(num_physpages >= 128 * 1024) ?
+					(totalram_pages >= 128 * 1024) ?
 					13 : 15,
 					0,
 					&tcp_hashinfo.bhash_size,
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index b371098..7c9ec3d 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1245,9 +1245,9 @@ static int nf_conntrack_init_init_net(void)
 	 * machine has 512 buckets. >= 1GB machines have 16384 buckets. */
 	if (!nf_conntrack_htable_size) {
 		nf_conntrack_htable_size
-			= (((num_physpages << PAGE_SHIFT) / 16384)
+			= (((totalram_pages << PAGE_SHIFT) / 16384)
 			   / sizeof(struct hlist_head));
-		if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
+		if (totalram_pages > (1024 * 1024 * 1024 / PAGE_SIZE))
 			nf_conntrack_htable_size = 16384;
 		if (nf_conntrack_htable_size < 32)
 			nf_conntrack_htable_size = 32;
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index a6ac83a..f01955c 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -617,7 +617,7 @@ struct xt_table_info *xt_alloc_table_info(unsigned int size)
 	int cpu;
 
 	/* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
-	if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > num_physpages)
+	if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > totalram_pages)
 		return NULL;
 
 	newinfo = kzalloc(XT_TABLE_INFO_SZ, GFP_KERNEL);
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 219dcdb..dd16e40 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -194,9 +194,9 @@ static int htable_create_v0(struct xt_hashlimit_info *minfo, u_int8_t family)
 	if (minfo->cfg.size)
 		size = minfo->cfg.size;
 	else {
-		size = ((num_physpages << PAGE_SHIFT) / 16384) /
+		size = ((totalram_pages << PAGE_SHIFT) / 16384) /
 		       sizeof(struct list_head);
-		if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
+		if (totalram_pages > (1024 * 1024 * 1024 / PAGE_SIZE))
 			size = 8192;
 		if (size < 16)
 			size = 16;
@@ -266,9 +266,9 @@ static int htable_create(struct xt_hashlimit_mtinfo1 *minfo, u_int8_t family)
 	if (minfo->cfg.size) {
 		size = minfo->cfg.size;
 	} else {
-		size = (num_physpages << PAGE_SHIFT) / 16384 /
+		size = (totalram_pages << PAGE_SHIFT) / 16384 /
 		       sizeof(struct list_head);
-		if (num_physpages > 1024 * 1024 * 1024 / PAGE_SIZE)
+		if (totalram_pages > 1024 * 1024 * 1024 / PAGE_SIZE)
 			size = 8192;
 		if (size < 16)
 			size = 16;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index c5aab6a..55180b9 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2091,10 +2091,10 @@ static int __init netlink_proto_init(void)
 	if (!nl_table)
 		goto panic;
 
-	if (num_physpages >= (128 * 1024))
-		limit = num_physpages >> (21 - PAGE_SHIFT);
+	if (totalram_pages >= (128 * 1024))
+		limit = totalram_pages >> (21 - PAGE_SHIFT);
 	else
-		limit = num_physpages >> (23 - PAGE_SHIFT);
+		limit = totalram_pages >> (23 - PAGE_SHIFT);
 
 	order = get_bitmask_order(limit) - 1 + PAGE_SHIFT;
 	limit = (1UL << order) / sizeof(struct hlist_head);
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index c557f1f..612dc87 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1184,10 +1184,10 @@ SCTP_STATIC __init int sctp_init(void)
 	/* Size and allocate the association hash table.
 	 * The methodology is similar to that of the tcp hash tables.
 	 */
-	if (num_physpages >= (128 * 1024))
-		goal = num_physpages >> (22 - PAGE_SHIFT);
+	if (totalram_pages >= (128 * 1024))
+		goal = totalram_pages >> (22 - PAGE_SHIFT);
 	else
-		goal = num_physpages >> (24 - PAGE_SHIFT);
+		goal = totalram_pages >> (24 - PAGE_SHIFT);
 
 	for (order = 0; (1UL << order) < goal; order++)
 		;
-- 
cgit v0.10.2


From 3c1596efe167322dae87f8390d36f91ce2d7f936 Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@novell.com>
Date: Mon, 21 Sep 2009 17:03:06 -0700
Subject: mm: don't use alloc_bootmem_low() where not strictly needed

Since alloc_bootmem() will never return inaccessible (via virtual
addressing) memory anyway, using the ..._low() variant only makes sense
when the physical address range of the allocated memory must fulfill
further constraints, espacially since on 64-bits (or more generally in all
cases where the pools the two variants allocate from are than the full
available range.

Probably the use in alloc_tce_table() could also be eliminated (based on
code inspection of pci-calgary_64.c), but that seems too risky given I
know nothing about that hardware and have no way to test it.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index a3210ce..85419bb 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1331,7 +1331,7 @@ void __init e820_reserve_resources(void)
 	struct resource *res;
 	u64 end;
 
-	res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map);
+	res = alloc_bootmem(sizeof(struct resource) * e820.nr_map);
 	e820_res = res;
 	for (i = 0; i < e820.nr_map; i++) {
 		end = e820.map[i].addr + e820.map[i].size - 1;
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 95e877f..b49b4f6 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -84,7 +84,7 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
 #ifdef CONFIG_X86_PAE
 	if (!(pgd_val(*pgd) & _PAGE_PRESENT)) {
 		if (after_bootmem)
-			pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
+			pmd_table = (pmd_t *)alloc_bootmem_pages(PAGE_SIZE);
 		else
 			pmd_table = (pmd_t *)alloc_low_page();
 		paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
@@ -116,7 +116,7 @@ static pte_t * __init one_page_table_init(pmd_t *pmd)
 #endif
 			if (!page_table)
 				page_table =
-				(pte_t *)alloc_bootmem_low_pages(PAGE_SIZE);
+				(pte_t *)alloc_bootmem_pages(PAGE_SIZE);
 		} else
 			page_table = (pte_t *)alloc_low_page();
 
diff --git a/drivers/firmware/memmap.c b/drivers/firmware/memmap.c
index d5ea8a6..56f9234 100644
--- a/drivers/firmware/memmap.c
+++ b/drivers/firmware/memmap.c
@@ -164,7 +164,7 @@ int __init firmware_map_add_early(u64 start, u64 end, const char *type)
 {
 	struct firmware_map_entry *entry;
 
-	entry = alloc_bootmem_low(sizeof(struct firmware_map_entry));
+	entry = alloc_bootmem(sizeof(struct firmware_map_entry));
 	if (WARN_ON(!entry))
 		return -ENOMEM;
 
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 97955b0..36cb168 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -619,7 +619,7 @@ __register_nosave_region(unsigned long start_pfn, unsigned long end_pfn,
 		BUG_ON(!region);
 	} else
 		/* This allocation cannot fail */
-		region = alloc_bootmem_low(sizeof(struct nosave_region));
+		region = alloc_bootmem(sizeof(struct nosave_region));
 	region->start_pfn = start_pfn;
 	region->end_pfn = end_pfn;
 	list_add_tail(&region->list, &nosave_regions);
-- 
cgit v0.10.2


From 2c85f51d222ccdd8c401d77a36b723a89156810d Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@novell.com>
Date: Mon, 21 Sep 2009 17:03:07 -0700
Subject: mm: also use alloc_large_system_hash() for the PID hash table

This is being done by allowing boot time allocations to specify that they
may want a sub-page sized amount of memory.

Overall this seems more consistent with the other hash table allocations,
and allows making two supposedly mm-only variables really mm-only
(nr_{kernel,all}_pages).

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index bc3ab70..dd97fb8 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -132,9 +132,6 @@ static inline void *alloc_remap(int nid, unsigned long size)
 }
 #endif /* CONFIG_HAVE_ARCH_ALLOC_REMAP */
 
-extern unsigned long __meminitdata nr_kernel_pages;
-extern unsigned long __meminitdata nr_all_pages;
-
 extern void *alloc_large_system_hash(const char *tablename,
 				     unsigned long bucketsize,
 				     unsigned long numentries,
@@ -145,6 +142,8 @@ extern void *alloc_large_system_hash(const char *tablename,
 				     unsigned long limit);
 
 #define HASH_EARLY	0x00000001	/* Allocating during early boot? */
+#define HASH_SMALL	0x00000002	/* sub-page allocation allowed, min
+					 * shift passed via *_hash_shift */
 
 /* Only NUMA needs hash distribution. 64bit NUMA architectures have
  * sufficient vmalloc space.
diff --git a/kernel/pid.c b/kernel/pid.c
index 31310b5..d3f722d 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -40,7 +40,7 @@
 #define pid_hashfn(nr, ns)	\
 	hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift)
 static struct hlist_head *pid_hash;
-static int pidhash_shift;
+static unsigned int pidhash_shift = 4;
 struct pid init_struct_pid = INIT_STRUCT_PID;
 
 int pid_max = PID_MAX_DEFAULT;
@@ -499,19 +499,12 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
 void __init pidhash_init(void)
 {
 	int i, pidhash_size;
-	unsigned long megabytes = nr_kernel_pages >> (20 - PAGE_SHIFT);
 
-	pidhash_shift = max(4, fls(megabytes * 4));
-	pidhash_shift = min(12, pidhash_shift);
+	pid_hash = alloc_large_system_hash("PID", sizeof(*pid_hash), 0, 18,
+					   HASH_EARLY | HASH_SMALL,
+					   &pidhash_shift, NULL, 4096);
 	pidhash_size = 1 << pidhash_shift;
 
-	printk("PID hash table entries: %d (order: %d, %Zd bytes)\n",
-		pidhash_size, pidhash_shift,
-		pidhash_size * sizeof(struct hlist_head));
-
-	pid_hash = alloc_bootmem(pidhash_size *	sizeof(*(pid_hash)));
-	if (!pid_hash)
-		panic("Could not alloc pidhash!\n");
 	for (i = 0; i < pidhash_size; i++)
 		INIT_HLIST_HEAD(&pid_hash[i]);
 }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 33b1a47..770f011 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -124,8 +124,8 @@ static char * const zone_names[MAX_NR_ZONES] = {
 
 int min_free_kbytes = 1024;
 
-unsigned long __meminitdata nr_kernel_pages;
-unsigned long __meminitdata nr_all_pages;
+static unsigned long __meminitdata nr_kernel_pages;
+static unsigned long __meminitdata nr_all_pages;
 static unsigned long __meminitdata dma_reserve;
 
 #ifdef CONFIG_ARCH_POPULATES_NODE_MAP
@@ -4821,7 +4821,14 @@ void *__init alloc_large_system_hash(const char *tablename,
 			numentries <<= (PAGE_SHIFT - scale);
 
 		/* Make sure we've got at least a 0-order allocation.. */
-		if (unlikely((numentries * bucketsize) < PAGE_SIZE))
+		if (unlikely(flags & HASH_SMALL)) {
+			/* Makes no sense without HASH_EARLY */
+			WARN_ON(!(flags & HASH_EARLY));
+			if (!(numentries >> *_hash_shift)) {
+				numentries = 1UL << *_hash_shift;
+				BUG_ON(!numentries);
+			}
+		} else if (unlikely((numentries * bucketsize) < PAGE_SIZE))
 			numentries = PAGE_SIZE / bucketsize;
 	}
 	numentries = roundup_pow_of_two(numentries);
-- 
cgit v0.10.2


From 0b21767637c3c99890a248fe47ac414e51cf5eb7 Mon Sep 17 00:00:00 2001
From: Vincent Li <macli@brc.ubc.ca>
Date: Mon, 21 Sep 2009 17:03:09 -0700
Subject: mm/vmscan: rename zone_nr_pages() to zone_nr_lru_pages()

The name `zone_nr_pages' can be mis-read as zone's (total) number pages,
but it actually returns zone's LRU list number pages.

Signed-off-by: Vincent Li <macli@brc.ubc.ca>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 5be8107..5432c23 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -148,8 +148,8 @@ static struct zone_reclaim_stat *get_reclaim_stat(struct zone *zone,
 	return &zone->reclaim_stat;
 }
 
-static unsigned long zone_nr_pages(struct zone *zone, struct scan_control *sc,
-				   enum lru_list lru)
+static unsigned long zone_nr_lru_pages(struct zone *zone,
+				struct scan_control *sc, enum lru_list lru)
 {
 	if (!scanning_global_lru(sc))
 		return mem_cgroup_zone_nr_pages(sc->mem_cgroup, zone, lru);
@@ -1489,10 +1489,10 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc,
 	unsigned long ap, fp;
 	struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
 
-	anon  = zone_nr_pages(zone, sc, LRU_ACTIVE_ANON) +
-		zone_nr_pages(zone, sc, LRU_INACTIVE_ANON);
-	file  = zone_nr_pages(zone, sc, LRU_ACTIVE_FILE) +
-		zone_nr_pages(zone, sc, LRU_INACTIVE_FILE);
+	anon  = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_ANON) +
+		zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON);
+	file  = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_FILE) +
+		zone_nr_lru_pages(zone, sc, LRU_INACTIVE_FILE);
 
 	if (scanning_global_lru(sc)) {
 		free  = zone_page_state(zone, NR_FREE_PAGES);
@@ -1600,7 +1600,7 @@ static void shrink_zone(int priority, struct zone *zone,
 		int file = is_file_lru(l);
 		unsigned long scan;
 
-		scan = zone_nr_pages(zone, sc, l);
+		scan = zone_nr_lru_pages(zone, sc, l);
 		if (priority || noswap) {
 			scan >>= priority;
 			scan = (scan * percent[file]) / 100;
-- 
cgit v0.10.2


From 1a8670a29b5277cbe601f74ab63d2c5211fb3005 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 21 Sep 2009 17:03:09 -0700
Subject: oom: move oom_killer_enable()/oom_killer_disable to where they belong

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Acked-by: David Rientjes <rientjes@google.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index c32bfa8..f53e9b8 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -335,18 +335,6 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp);
 void drain_all_pages(void);
 void drain_local_pages(void *dummy);
 
-extern bool oom_killer_disabled;
-
-static inline void oom_killer_disable(void)
-{
-	oom_killer_disabled = true;
-}
-
-static inline void oom_killer_enable(void)
-{
-	oom_killer_disabled = false;
-}
-
 extern gfp_t gfp_allowed_mask;
 
 static inline void set_gfp_allowed_mask(gfp_t mask)
diff --git a/include/linux/oom.h b/include/linux/oom.h
index a7979ba..6aac5fe 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -30,5 +30,16 @@ extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order);
 extern int register_oom_notifier(struct notifier_block *nb);
 extern int unregister_oom_notifier(struct notifier_block *nb);
 
+extern bool oom_killer_disabled;
+
+static inline void oom_killer_disable(void)
+{
+	oom_killer_disabled = true;
+}
+
+static inline void oom_killer_enable(void)
+{
+	oom_killer_disabled = false;
+}
 #endif /* __KERNEL__*/
 #endif /* _INCLUDE_LINUX_OOM_H */
diff --git a/kernel/power/process.c b/kernel/power/process.c
index da2072d..cc2e553 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -9,6 +9,7 @@
 #undef DEBUG
 
 #include <linux/interrupt.h>
+#include <linux/oom.h>
 #include <linux/suspend.h>
 #include <linux/module.h>
 #include <linux/syscalls.h>
-- 
cgit v0.10.2


From f86296317434b21585e229f6c49a33cb9ebab4d3 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Mon, 21 Sep 2009 17:03:11 -0700
Subject: mm: do batched scans for mem_cgroup

For mem_cgroup, shrink_zone() may call shrink_list() with nr_to_scan=1, in
which case shrink_list() _still_ calls isolate_pages() with the much
larger SWAP_CLUSTER_MAX.  It effectively scales up the inactive list scan
rate by up to 32 times.

For example, with 16k inactive pages and DEF_PRIORITY=12, (16k >> 12)=4.
So when shrink_zone() expects to scan 4 pages in the active/inactive list,
the active list will be scanned 4 pages, while the inactive list will be
(over) scanned SWAP_CLUSTER_MAX=32 pages in effect.  And that could break
the balance between the two lists.

It can further impact the scan of anon active list, due to the anon
active/inactive ratio rebalance logic in balance_pgdat()/shrink_zone():

inactive anon list over scanned => inactive_anon_is_low() == TRUE
                                => shrink_active_list()
                                => active anon list over scanned

So the end result may be

- anon inactive  => over scanned
- anon active    => over scanned (maybe not as much)
- file inactive  => over scanned
- file active    => under scanned (relatively)

The accesses to nr_saved_scan are not lock protected and so not 100%
accurate, however we can tolerate small errors and the resulted small
imbalanced scan rates between zones.

Cc: Rik van Riel <riel@redhat.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 9c50309..c188ea6 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -273,6 +273,11 @@ struct zone_reclaim_stat {
 	 */
 	unsigned long		recent_rotated[2];
 	unsigned long		recent_scanned[2];
+
+	/*
+	 * accumulated for batching
+	 */
+	unsigned long		nr_saved_scan[NR_LRU_LISTS];
 };
 
 struct zone {
@@ -327,7 +332,6 @@ struct zone {
 	spinlock_t		lru_lock;	
 	struct zone_lru {
 		struct list_head list;
-		unsigned long nr_saved_scan;	/* accumulated for batching */
 	} lru[NR_LRU_LISTS];
 
 	struct zone_reclaim_stat reclaim_stat;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 770f011..84d9da1 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3809,7 +3809,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
 		zone_pcp_init(zone);
 		for_each_lru(l) {
 			INIT_LIST_HEAD(&zone->lru[l].list);
-			zone->lru[l].nr_saved_scan = 0;
+			zone->reclaim_stat.nr_saved_scan[l] = 0;
 		}
 		zone->reclaim_stat.recent_rotated[0] = 0;
 		zone->reclaim_stat.recent_rotated[1] = 0;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 5432c23..0e7f5e4 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1586,6 +1586,7 @@ static void shrink_zone(int priority, struct zone *zone,
 	enum lru_list l;
 	unsigned long nr_reclaimed = sc->nr_reclaimed;
 	unsigned long swap_cluster_max = sc->swap_cluster_max;
+	struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
 	int noswap = 0;
 
 	/* If we have no swap space, do not bother scanning anon pages. */
@@ -1605,12 +1606,9 @@ static void shrink_zone(int priority, struct zone *zone,
 			scan >>= priority;
 			scan = (scan * percent[file]) / 100;
 		}
-		if (scanning_global_lru(sc))
-			nr[l] = nr_scan_try_batch(scan,
-						  &zone->lru[l].nr_saved_scan,
-						  swap_cluster_max);
-		else
-			nr[l] = scan;
+		nr[l] = nr_scan_try_batch(scan,
+					  &reclaim_stat->nr_saved_scan[l],
+					  swap_cluster_max);
 	}
 
 	while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
@@ -2220,6 +2218,7 @@ static void shrink_all_zones(unsigned long nr_pages, int prio,
 {
 	struct zone *zone;
 	unsigned long nr_reclaimed = 0;
+	struct zone_reclaim_stat *reclaim_stat;
 
 	for_each_populated_zone(zone) {
 		enum lru_list l;
@@ -2236,11 +2235,14 @@ static void shrink_all_zones(unsigned long nr_pages, int prio,
 						l == LRU_ACTIVE_FILE))
 				continue;
 
-			zone->lru[l].nr_saved_scan += (lru_pages >> prio) + 1;
-			if (zone->lru[l].nr_saved_scan >= nr_pages || pass > 3) {
+			reclaim_stat = get_reclaim_stat(zone, sc);
+			reclaim_stat->nr_saved_scan[l] +=
+						(lru_pages >> prio) + 1;
+			if (reclaim_stat->nr_saved_scan[l]
+						>= nr_pages || pass > 3) {
 				unsigned long nr_to_scan;
 
-				zone->lru[l].nr_saved_scan = 0;
+				reclaim_stat->nr_saved_scan[l] = 0;
 				nr_to_scan = min(nr_pages, lru_pages);
 				nr_reclaimed += shrink_list(l, nr_to_scan, zone,
 								sc, prio);
-- 
cgit v0.10.2


From f168e1b6390e2d79cf57e48e6ae6d9b0a9e2851a Mon Sep 17 00:00:00 2001
From: Vincent Li <macli@brc.ubc.ca>
Date: Mon, 21 Sep 2009 17:03:12 -0700
Subject: mm/vmscan: remove page_queue_congested() comment

Commit 084f71ae5c(kill page_queue_congested()) removed
page_queue_congested().  Remove the page_queue_congested() comment in
vmscan pageout() too.

Signed-off-by: Vincent Li <macli@brc.ubc.ca>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 0e7f5e4..613e89f 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -366,7 +366,6 @@ static pageout_t pageout(struct page *page, struct address_space *mapping,
 	 * block, for some throttling. This happens by accident, because
 	 * swap_backing_dev_info is bust: it doesn't reflect the
 	 * congestion state of the swapdevs.  Easy to fix, if needed.
-	 * See swapfile.c:page_queue_congested().
 	 */
 	if (!is_page_cache_freeable(page))
 		return PAGE_KEEP;
-- 
cgit v0.10.2


From 28b83c5193e7ab951e402252278f2cc79dc4d298 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Mon, 21 Sep 2009 17:03:13 -0700
Subject: oom: move oom_adj value from task_struct to signal_struct

Currently, OOM logic callflow is here.

    __out_of_memory()
        select_bad_process()            for each task
            badness()                   calculate badness of one task
                oom_kill_process()      search child
                    oom_kill_task()     kill target task and mm shared tasks with it

example, process-A have two thread, thread-A and thread-B and it have very
fat memory and each thread have following oom_adj and oom_score.

     thread-A: oom_adj = OOM_DISABLE, oom_score = 0
     thread-B: oom_adj = 0,           oom_score = very-high

Then, select_bad_process() select thread-B, but oom_kill_task() refuse
kill the task because thread-A have OOM_DISABLE.  Thus __out_of_memory()
call select_bad_process() again.  but select_bad_process() select the same
task.  It mean kernel fall in livelock.

The fact is, select_bad_process() must select killable task.  otherwise
OOM logic go into livelock.

And root cause is, oom_adj shouldn't be per-thread value.  it should be
per-process value because OOM-killer kill a process, not thread.  Thus
This patch moves oomkilladj (now more appropriately named oom_adj) from
struct task_struct to struct signal_struct.  it naturally prevent
select_bad_process() choose wrong task.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Paul Menage <menage@google.com>
Cc: David Rientjes <rientjes@google.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 6f742f6..81cfff8 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -999,11 +999,17 @@ static ssize_t oom_adjust_read(struct file *file, char __user *buf,
 	struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
 	char buffer[PROC_NUMBUF];
 	size_t len;
-	int oom_adjust;
+	int oom_adjust = OOM_DISABLE;
+	unsigned long flags;
 
 	if (!task)
 		return -ESRCH;
-	oom_adjust = task->oomkilladj;
+
+	if (lock_task_sighand(task, &flags)) {
+		oom_adjust = task->signal->oom_adj;
+		unlock_task_sighand(task, &flags);
+	}
+
 	put_task_struct(task);
 
 	len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust);
@@ -1017,6 +1023,7 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
 	struct task_struct *task;
 	char buffer[PROC_NUMBUF], *end;
 	int oom_adjust;
+	unsigned long flags;
 
 	memset(buffer, 0, sizeof(buffer));
 	if (count > sizeof(buffer) - 1)
@@ -1032,11 +1039,20 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
 	task = get_proc_task(file->f_path.dentry->d_inode);
 	if (!task)
 		return -ESRCH;
-	if (oom_adjust < task->oomkilladj && !capable(CAP_SYS_RESOURCE)) {
+	if (!lock_task_sighand(task, &flags)) {
+		put_task_struct(task);
+		return -ESRCH;
+	}
+
+	if (oom_adjust < task->signal->oom_adj && !capable(CAP_SYS_RESOURCE)) {
+		unlock_task_sighand(task, &flags);
 		put_task_struct(task);
 		return -EACCES;
 	}
-	task->oomkilladj = oom_adjust;
+
+	task->signal->oom_adj = oom_adjust;
+
+	unlock_task_sighand(task, &flags);
 	put_task_struct(task);
 	if (end - buffer == 0)
 		return -EIO;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 899d730..17e9a8e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -639,6 +639,8 @@ struct signal_struct {
 	unsigned audit_tty;
 	struct tty_audit_buf *tty_audit_buf;
 #endif
+
+	int oom_adj;	/* OOM kill score adjustment (bit shift) */
 };
 
 /* Context switch must be unlocked if interrupts are to be enabled */
@@ -1221,7 +1223,6 @@ struct task_struct {
 	 * a short time
 	 */
 	unsigned char fpu_counter;
-	s8 oomkilladj; /* OOM kill score adjustment (bit shift). */
 #ifdef CONFIG_BLK_DEV_IO_TRACE
 	unsigned int btrace_seq;
 #endif
diff --git a/kernel/fork.c b/kernel/fork.c
index 73a442b..1020977 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -880,6 +880,8 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 
 	tty_audit_fork(sig);
 
+	sig->oom_adj = current->signal->oom_adj;
+
 	return 0;
 }
 
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index da4c342..630b77f 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -58,6 +58,10 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
 	unsigned long points, cpu_time, run_time;
 	struct mm_struct *mm;
 	struct task_struct *child;
+	int oom_adj = p->signal->oom_adj;
+
+	if (oom_adj == OOM_DISABLE)
+		return 0;
 
 	task_lock(p);
 	mm = p->mm;
@@ -148,15 +152,15 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
 		points /= 8;
 
 	/*
-	 * Adjust the score by oomkilladj.
+	 * Adjust the score by oom_adj.
 	 */
-	if (p->oomkilladj) {
-		if (p->oomkilladj > 0) {
+	if (oom_adj) {
+		if (oom_adj > 0) {
 			if (!points)
 				points = 1;
-			points <<= p->oomkilladj;
+			points <<= oom_adj;
 		} else
-			points >>= -(p->oomkilladj);
+			points >>= -(oom_adj);
 	}
 
 #ifdef DEBUG
@@ -251,7 +255,7 @@ static struct task_struct *select_bad_process(unsigned long *ppoints,
 			*ppoints = ULONG_MAX;
 		}
 
-		if (p->oomkilladj == OOM_DISABLE)
+		if (p->signal->oom_adj == OOM_DISABLE)
 			continue;
 
 		points = badness(p, uptime.tv_sec);
@@ -304,7 +308,7 @@ static void dump_tasks(const struct mem_cgroup *mem)
 		}
 		printk(KERN_INFO "[%5d] %5d %5d %8lu %8lu %3d     %3d %s\n",
 		       p->pid, __task_cred(p)->uid, p->tgid, mm->total_vm,
-		       get_mm_rss(mm), (int)task_cpu(p), p->oomkilladj,
+		       get_mm_rss(mm), (int)task_cpu(p), p->signal->oom_adj,
 		       p->comm);
 		task_unlock(p);
 	} while_each_thread(g, p);
@@ -359,18 +363,9 @@ static int oom_kill_task(struct task_struct *p)
 	 * change to NULL at any time since we do not hold task_lock(p).
 	 * However, this is of no concern to us.
 	 */
-
-	if (mm == NULL)
+	if (!mm || p->signal->oom_adj == OOM_DISABLE)
 		return 1;
 
-	/*
-	 * Don't kill the process if any threads are set to OOM_DISABLE
-	 */
-	do_each_thread(g, q) {
-		if (q->mm == mm && q->oomkilladj == OOM_DISABLE)
-			return 1;
-	} while_each_thread(g, q);
-
 	__oom_kill_task(p, 1);
 
 	/*
@@ -394,8 +389,9 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
 
 	if (printk_ratelimit()) {
 		printk(KERN_WARNING "%s invoked oom-killer: "
-			"gfp_mask=0x%x, order=%d, oomkilladj=%d\n",
-			current->comm, gfp_mask, order, current->oomkilladj);
+			"gfp_mask=0x%x, order=%d, oom_adj=%d\n",
+			current->comm, gfp_mask, order,
+			current->signal->oom_adj);
 		task_lock(current);
 		cpuset_print_task_mems_allowed(current);
 		task_unlock(current);
-- 
cgit v0.10.2


From 495789a51a91cb8c015d8d77fecbac1caf20b186 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Mon, 21 Sep 2009 17:03:14 -0700
Subject: oom: make oom_score to per-process value

oom-killer kills a process, not task.  Then oom_score should be calculated
as per-process too.  it makes consistency more and makes speed up
select_bad_process().

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Paul Menage <menage@google.com>
Cc: David Rientjes <rientjes@google.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index ae7f8bb..75988ba 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -1205,7 +1205,7 @@ The following heuristics are then applied:
  * if the task was reniced, its score doubles
  * superuser or direct hardware access tasks (CAP_SYS_ADMIN, CAP_SYS_RESOURCE
  	or CAP_SYS_RAWIO) have their score divided by 4
- * if oom condition happened in one cpuset and checked task does not belong
+ * if oom condition happened in one cpuset and checked process does not belong
  	to it, its score is divided by 8
  * the resulting score is multiplied by two to the power of oom_adj, i.e.
 	points <<= oom_adj when it is positive and
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 81cfff8..71a3425 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -447,7 +447,7 @@ static int proc_oom_score(struct task_struct *task, char *buffer)
 
 	do_posix_clock_monotonic_gettime(&uptime);
 	read_lock(&tasklist_lock);
-	points = badness(task, uptime.tv_sec);
+	points = badness(task->group_leader, uptime.tv_sec);
 	read_unlock(&tasklist_lock);
 	return sprintf(buffer, "%lu\n", points);
 }
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 630b77f..3726922 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -34,6 +34,23 @@ int sysctl_oom_dump_tasks;
 static DEFINE_SPINLOCK(zone_scan_lock);
 /* #define DEBUG */
 
+/*
+ * Is all threads of the target process nodes overlap ours?
+ */
+static int has_intersects_mems_allowed(struct task_struct *tsk)
+{
+	struct task_struct *t;
+
+	t = tsk;
+	do {
+		if (cpuset_mems_allowed_intersects(current, t))
+			return 1;
+		t = next_thread(t);
+	} while (t != tsk);
+
+	return 0;
+}
+
 /**
  * badness - calculate a numeric value for how bad this task has been
  * @p: task struct of which task we should calculate
@@ -59,6 +76,9 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
 	struct mm_struct *mm;
 	struct task_struct *child;
 	int oom_adj = p->signal->oom_adj;
+	struct task_cputime task_time;
+	unsigned long utime;
+	unsigned long stime;
 
 	if (oom_adj == OOM_DISABLE)
 		return 0;
@@ -106,8 +126,11 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
          * of seconds. There is no particular reason for this other than
          * that it turned out to work very well in practice.
 	 */
-	cpu_time = (cputime_to_jiffies(p->utime) + cputime_to_jiffies(p->stime))
-		>> (SHIFT_HZ + 3);
+	thread_group_cputime(p, &task_time);
+	utime = cputime_to_jiffies(task_time.utime);
+	stime = cputime_to_jiffies(task_time.stime);
+	cpu_time = (utime + stime) >> (SHIFT_HZ + 3);
+
 
 	if (uptime >= p->start_time.tv_sec)
 		run_time = (uptime - p->start_time.tv_sec) >> 10;
@@ -148,7 +171,7 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
 	 * because p may have allocated or otherwise mapped memory on
 	 * this node before. However it will be less likely.
 	 */
-	if (!cpuset_mems_allowed_intersects(current, p))
+	if (!has_intersects_mems_allowed(p))
 		points /= 8;
 
 	/*
@@ -204,13 +227,13 @@ static inline enum oom_constraint constrained_alloc(struct zonelist *zonelist,
 static struct task_struct *select_bad_process(unsigned long *ppoints,
 						struct mem_cgroup *mem)
 {
-	struct task_struct *g, *p;
+	struct task_struct *p;
 	struct task_struct *chosen = NULL;
 	struct timespec uptime;
 	*ppoints = 0;
 
 	do_posix_clock_monotonic_gettime(&uptime);
-	do_each_thread(g, p) {
+	for_each_process(p) {
 		unsigned long points;
 
 		/*
@@ -263,7 +286,7 @@ static struct task_struct *select_bad_process(unsigned long *ppoints,
 			chosen = p;
 			*ppoints = points;
 		}
-	} while_each_thread(g, p);
+	}
 
 	return chosen;
 }
-- 
cgit v0.10.2


From 8c5cd6f3a1721085652da204d454af4f8b92eda2 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Mon, 21 Sep 2009 17:03:15 -0700
Subject: oom: oom_kill doesn't kill vfork parent (or child)

Current oom_kill doesn't only kill the victim process, but also kill all
thas shread the same mm.  it mean vfork parent will be killed.

This is definitely incorrect.  another process have another oom_adj.  we
shouldn't ignore their oom_adj (it might have OOM_DISABLE).

following caller hit the minefield.

===============================
        switch (constraint) {
        case CONSTRAINT_MEMORY_POLICY:
                oom_kill_process(current, gfp_mask, order, 0, NULL,
                                "No available memory (MPOL_BIND)");
                break;

Note: force_sig(SIGKILL) send SIGKILL to all thread in the process.
We don't need to care multi thread in here.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Paul Menage <menage@google.com>
Cc: David Rientjes <rientjes@google.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 3726922..ea2147d 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -373,11 +373,6 @@ static void __oom_kill_task(struct task_struct *p, int verbose)
 
 static int oom_kill_task(struct task_struct *p)
 {
-	struct mm_struct *mm;
-	struct task_struct *g, *q;
-
-	mm = p->mm;
-
 	/* WARNING: mm may not be dereferenced since we did not obtain its
 	 * value from get_task_mm(p).  This is OK since all we need to do is
 	 * compare mm to q->mm below.
@@ -386,21 +381,11 @@ static int oom_kill_task(struct task_struct *p)
 	 * change to NULL at any time since we do not hold task_lock(p).
 	 * However, this is of no concern to us.
 	 */
-	if (!mm || p->signal->oom_adj == OOM_DISABLE)
+	if (!p->mm || p->signal->oom_adj == OOM_DISABLE)
 		return 1;
 
 	__oom_kill_task(p, 1);
 
-	/*
-	 * kill all processes that share the ->mm (i.e. all threads),
-	 * but are in a different thread group. Don't let them have access
-	 * to memory reserves though, otherwise we might deplete all memory.
-	 */
-	do_each_thread(g, q) {
-		if (q->mm == mm && !same_thread_group(q, p))
-			force_sig(SIGKILL, q);
-	} while_each_thread(g, q);
-
 	return 0;
 }
 
-- 
cgit v0.10.2


From 5d863b89688e5811cd9e5bd0082cb38abe03adf3 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Mon, 21 Sep 2009 17:03:16 -0700
Subject: oom: fix oom_adjust_write() input sanity check

Andrew Morton pointed out oom_adjust_write() has very strange EIO
and new line handling. this patch fixes it.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Paul Menage <menage@google.com>
Cc: David Rientjes <rientjes@google.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 71a3425..55c4c80 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1021,21 +1021,24 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
 				size_t count, loff_t *ppos)
 {
 	struct task_struct *task;
-	char buffer[PROC_NUMBUF], *end;
-	int oom_adjust;
+	char buffer[PROC_NUMBUF];
+	long oom_adjust;
 	unsigned long flags;
+	int err;
 
 	memset(buffer, 0, sizeof(buffer));
 	if (count > sizeof(buffer) - 1)
 		count = sizeof(buffer) - 1;
 	if (copy_from_user(buffer, buf, count))
 		return -EFAULT;
-	oom_adjust = simple_strtol(buffer, &end, 0);
+
+	err = strict_strtol(strstrip(buffer), 0, &oom_adjust);
+	if (err)
+		return -EINVAL;
 	if ((oom_adjust < OOM_ADJUST_MIN || oom_adjust > OOM_ADJUST_MAX) &&
 	     oom_adjust != OOM_DISABLE)
 		return -EINVAL;
-	if (*end == '\n')
-		end++;
+
 	task = get_proc_task(file->f_path.dentry->d_inode);
 	if (!task)
 		return -ESRCH;
@@ -1054,9 +1057,8 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
 
 	unlock_task_sighand(task, &flags);
 	put_task_struct(task);
-	if (end - buffer == 0)
-		return -EIO;
-	return end - buffer;
+
+	return count;
 }
 
 static const struct file_operations proc_oom_adjust_operations = {
-- 
cgit v0.10.2


From 5f8dcc21211a3d4e3a7a5ca366b469fb88117f61 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Mon, 21 Sep 2009 17:03:19 -0700
Subject: page-allocator: split per-cpu list into one-list-per-migrate-type

The following two patches remove searching in the page allocator fast-path
by maintaining multiple free-lists in the per-cpu structure.  At the time
the search was introduced, increasing the per-cpu structures would waste a
lot of memory as per-cpu structures were statically allocated at
compile-time.  This is no longer the case.

The patches are as follows. They are based on mmotm-2009-08-27.

Patch 1 adds multiple lists to struct per_cpu_pages, one per
	migratetype that can be stored on the PCP lists.

Patch 2 notes that the pcpu drain path check empty lists multiple times. The
	patch reduces the number of checks by maintaining a count of free
	lists encountered. Lists containing pages will then free multiple
	pages in batch

The patches were tested with kernbench, netperf udp/tcp, hackbench and
sysbench.  The netperf tests were not bound to any CPU in particular and
were run such that the results should be 99% confidence that the reported
results are within 1% of the estimated mean.  sysbench was run with a
postgres background and read-only tests.  Similar to netperf, it was run
multiple times so that it's 99% confidence results are within 1%.  The
patches were tested on x86, x86-64 and ppc64 as

x86:	Intel Pentium D 3GHz with 8G RAM (no-brand machine)
	kernbench	- No significant difference, variance well within noise
	netperf-udp	- 1.34% to 2.28% gain
	netperf-tcp	- 0.45% to 1.22% gain
	hackbench	- Small variances, very close to noise
	sysbench	- Very small gains

x86-64:	AMD Phenom 9950 1.3GHz with 8G RAM (no-brand machine)
	kernbench	- No significant difference, variance well within noise
	netperf-udp	- 1.83% to 10.42% gains
	netperf-tcp	- No conclusive until buffer >= PAGE_SIZE
				4096	+15.83%
				8192	+ 0.34% (not significant)
				16384	+ 1%
	hackbench	- Small gains, very close to noise
	sysbench	- 0.79% to 1.6% gain

ppc64:	PPC970MP 2.5GHz with 10GB RAM (it's a terrasoft powerstation)
	kernbench	- No significant difference, variance well within noise
	netperf-udp	- 2-3% gain for almost all buffer sizes tested
	netperf-tcp	- losses on small buffers, gains on larger buffers
			  possibly indicates some bad caching effect.
	hackbench	- No significant difference
	sysbench	- 2-4% gain

This patch:

Currently the per-cpu page allocator searches the PCP list for pages of
the correct migrate-type to reduce the possibility of pages being
inappropriate placed from a fragmentation perspective.  This search is
potentially expensive in a fast-path and undesirable.  Splitting the
per-cpu list into multiple lists increases the size of a per-cpu structure
and this was potentially a major problem at the time the search was
introduced.  These problem has been mitigated as now only the necessary
number of structures is allocated for the running system.

This patch replaces a list search in the per-cpu allocator with one list
per migrate type.  The potential snag with this approach is when bulk
freeing pages.  We round-robin free pages based on migrate type which has
little bearing on the cache hotness of the page and potentially checks
empty lists repeatedly in the event the majority of PCP pages are of one
type.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Nick Piggin <npiggin@suse.de>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index c188ea6..652ef01 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -38,6 +38,7 @@
 #define MIGRATE_UNMOVABLE     0
 #define MIGRATE_RECLAIMABLE   1
 #define MIGRATE_MOVABLE       2
+#define MIGRATE_PCPTYPES      3 /* the number of types on the pcp lists */
 #define MIGRATE_RESERVE       3
 #define MIGRATE_ISOLATE       4 /* can't allocate from here */
 #define MIGRATE_TYPES         5
@@ -169,7 +170,9 @@ struct per_cpu_pages {
 	int count;		/* number of pages in the list */
 	int high;		/* high watermark, emptying needed */
 	int batch;		/* chunk size for buddy add/remove */
-	struct list_head list;	/* the list of pages */
+
+	/* Lists of pages, one per migrate type stored on the pcp-lists */
+	struct list_head lists[MIGRATE_PCPTYPES];
 };
 
 struct per_cpu_pageset {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 84d9da1..1b1c39e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -511,7 +511,7 @@ static inline int free_pages_check(struct page *page)
 }
 
 /*
- * Frees a list of pages. 
+ * Frees a number of pages from the PCP lists
  * Assumes all pages on list are in same zone, and of same order.
  * count is the number of pages to free.
  *
@@ -521,23 +521,36 @@ static inline int free_pages_check(struct page *page)
  * And clear the zone's pages_scanned counter, to hold off the "all pages are
  * pinned" detection logic.
  */
-static void free_pages_bulk(struct zone *zone, int count,
-					struct list_head *list, int order)
+static void free_pcppages_bulk(struct zone *zone, int count,
+					struct per_cpu_pages *pcp)
 {
+	int migratetype = 0;
+
 	spin_lock(&zone->lock);
 	zone_clear_flag(zone, ZONE_ALL_UNRECLAIMABLE);
 	zone->pages_scanned = 0;
 
-	__mod_zone_page_state(zone, NR_FREE_PAGES, count << order);
+	__mod_zone_page_state(zone, NR_FREE_PAGES, count);
 	while (count--) {
 		struct page *page;
+		struct list_head *list;
+
+		/*
+		 * Remove pages from lists in a round-robin fashion. This spinning
+		 * around potentially empty lists is bloody awful, alternatives that
+		 * don't suck are welcome
+		 */
+		do {
+			if (++migratetype == MIGRATE_PCPTYPES)
+				migratetype = 0;
+			list = &pcp->lists[migratetype];
+		} while (list_empty(list));
 
-		VM_BUG_ON(list_empty(list));
 		page = list_entry(list->prev, struct page, lru);
 		/* have to delete it as __free_one_page list manipulates */
 		list_del(&page->lru);
-		trace_mm_page_pcpu_drain(page, order, page_private(page));
-		__free_one_page(page, zone, order, page_private(page));
+		trace_mm_page_pcpu_drain(page, 0, migratetype);
+		__free_one_page(page, zone, 0, migratetype);
 	}
 	spin_unlock(&zone->lock);
 }
@@ -953,7 +966,7 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
 		to_drain = pcp->batch;
 	else
 		to_drain = pcp->count;
-	free_pages_bulk(zone, to_drain, &pcp->list, 0);
+	free_pcppages_bulk(zone, to_drain, pcp);
 	pcp->count -= to_drain;
 	local_irq_restore(flags);
 }
@@ -979,7 +992,7 @@ static void drain_pages(unsigned int cpu)
 
 		pcp = &pset->pcp;
 		local_irq_save(flags);
-		free_pages_bulk(zone, pcp->count, &pcp->list, 0);
+		free_pcppages_bulk(zone, pcp->count, pcp);
 		pcp->count = 0;
 		local_irq_restore(flags);
 	}
@@ -1045,6 +1058,7 @@ static void free_hot_cold_page(struct page *page, int cold)
 	struct zone *zone = page_zone(page);
 	struct per_cpu_pages *pcp;
 	unsigned long flags;
+	int migratetype;
 	int wasMlocked = __TestClearPageMlocked(page);
 
 	kmemcheck_free_shadow(page, 0);
@@ -1062,21 +1076,39 @@ static void free_hot_cold_page(struct page *page, int cold)
 	kernel_map_pages(page, 1, 0);
 
 	pcp = &zone_pcp(zone, get_cpu())->pcp;
-	set_page_private(page, get_pageblock_migratetype(page));
+	migratetype = get_pageblock_migratetype(page);
+	set_page_private(page, migratetype);
 	local_irq_save(flags);
 	if (unlikely(wasMlocked))
 		free_page_mlock(page);
 	__count_vm_event(PGFREE);
 
+	/*
+	 * We only track unmovable, reclaimable and movable on pcp lists.
+	 * Free ISOLATE pages back to the allocator because they are being
+	 * offlined but treat RESERVE as movable pages so we can get those
+	 * areas back if necessary. Otherwise, we may have to free
+	 * excessively into the page allocator
+	 */
+	if (migratetype >= MIGRATE_PCPTYPES) {
+		if (unlikely(migratetype == MIGRATE_ISOLATE)) {
+			free_one_page(zone, page, 0, migratetype);
+			goto out;
+		}
+		migratetype = MIGRATE_MOVABLE;
+	}
+
 	if (cold)
-		list_add_tail(&page->lru, &pcp->list);
+		list_add_tail(&page->lru, &pcp->lists[migratetype]);
 	else
-		list_add(&page->lru, &pcp->list);
+		list_add(&page->lru, &pcp->lists[migratetype]);
 	pcp->count++;
 	if (pcp->count >= pcp->high) {
-		free_pages_bulk(zone, pcp->batch, &pcp->list, 0);
+		free_pcppages_bulk(zone, pcp->batch, pcp);
 		pcp->count -= pcp->batch;
 	}
+
+out:
 	local_irq_restore(flags);
 	put_cpu();
 }
@@ -1134,46 +1166,24 @@ again:
 	cpu  = get_cpu();
 	if (likely(order == 0)) {
 		struct per_cpu_pages *pcp;
+		struct list_head *list;
 
 		pcp = &zone_pcp(zone, cpu)->pcp;
+		list = &pcp->lists[migratetype];
 		local_irq_save(flags);
-		if (!pcp->count) {
-			pcp->count = rmqueue_bulk(zone, 0,
-					pcp->batch, &pcp->list,
-					migratetype, cold);
-			if (unlikely(!pcp->count))
-				goto failed;
-		}
-
-		/* Find a page of the appropriate migrate type */
-		if (cold) {
-			list_for_each_entry_reverse(page, &pcp->list, lru)
-				if (page_private(page) == migratetype)
-					break;
-		} else {
-			list_for_each_entry(page, &pcp->list, lru)
-				if (page_private(page) == migratetype)
-					break;
-		}
-
-		/* Allocate more to the pcp list if necessary */
-		if (unlikely(&page->lru == &pcp->list)) {
-			int get_one_page = 0;
-
+		if (list_empty(list)) {
 			pcp->count += rmqueue_bulk(zone, 0,
-					pcp->batch, &pcp->list,
+					pcp->batch, list,
 					migratetype, cold);
-			list_for_each_entry(page, &pcp->list, lru) {
-				if (get_pageblock_migratetype(page) !=
-					    MIGRATE_ISOLATE) {
-					get_one_page = 1;
-					break;
-				}
-			}
-			if (!get_one_page)
+			if (unlikely(list_empty(list)))
 				goto failed;
 		}
 
+		if (cold)
+			page = list_entry(list->prev, struct page, lru);
+		else
+			page = list_entry(list->next, struct page, lru);
+
 		list_del(&page->lru);
 		pcp->count--;
 	} else {
@@ -3024,6 +3034,7 @@ static int zone_batchsize(struct zone *zone)
 static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch)
 {
 	struct per_cpu_pages *pcp;
+	int migratetype;
 
 	memset(p, 0, sizeof(*p));
 
@@ -3031,7 +3042,8 @@ static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch)
 	pcp->count = 0;
 	pcp->high = 6 * batch;
 	pcp->batch = max(1UL, 1 * batch);
-	INIT_LIST_HEAD(&pcp->list);
+	for (migratetype = 0; migratetype < MIGRATE_PCPTYPES; migratetype++)
+		INIT_LIST_HEAD(&pcp->lists[migratetype]);
 }
 
 /*
@@ -3223,7 +3235,7 @@ static int __zone_pcp_update(void *data)
 		pcp = &pset->pcp;
 
 		local_irq_save(flags);
-		free_pages_bulk(zone, pcp->count, &pcp->list, 0);
+		free_pcppages_bulk(zone, pcp->count, pcp);
 		setup_pageset(pset, batch);
 		local_irq_restore(flags);
 	}
-- 
cgit v0.10.2


From a6f9edd65beaef24836e8934c8912c1e974dd45c Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Mon, 21 Sep 2009 17:03:20 -0700
Subject: page-allocator: maintain rolling count of pages to free from the PCP

When round-robin freeing pages from the PCP lists, empty lists may be
encountered.  In the event one of the lists has more pages than another,
there may be numerous checks for list_empty() which is undesirable.  This
patch maintains a count of pages to free which is incremented when empty
lists are encountered.  The intention is that more pages will then be
freed from fuller lists than the empty ones reducing the number of empty
list checks in the free path.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Christoph Lameter <cl@linux-foundation.org>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 1b1c39e..6877e22 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -525,32 +525,38 @@ static void free_pcppages_bulk(struct zone *zone, int count,
 					struct per_cpu_pages *pcp)
 {
 	int migratetype = 0;
+	int batch_free = 0;
 
 	spin_lock(&zone->lock);
 	zone_clear_flag(zone, ZONE_ALL_UNRECLAIMABLE);
 	zone->pages_scanned = 0;
 
 	__mod_zone_page_state(zone, NR_FREE_PAGES, count);
-	while (count--) {
+	while (count) {
 		struct page *page;
 		struct list_head *list;
 
 		/*
-		 * Remove pages from lists in a round-robin fashion. This spinning
-		 * around potentially empty lists is bloody awful, alternatives that
-		 * don't suck are welcome
+		 * Remove pages from lists in a round-robin fashion. A
+		 * batch_free count is maintained that is incremented when an
+		 * empty list is encountered.  This is so more pages are freed
+		 * off fuller lists instead of spinning excessively around empty
+		 * lists
 		 */
 		do {
+			batch_free++;
 			if (++migratetype == MIGRATE_PCPTYPES)
 				migratetype = 0;
 			list = &pcp->lists[migratetype];
 		} while (list_empty(list));
 
-		page = list_entry(list->prev, struct page, lru);
-		/* have to delete it as __free_one_page list manipulates */
-		list_del(&page->lru);
-		trace_mm_page_pcpu_drain(page, 0, migratetype);
-		__free_one_page(page, zone, 0, migratetype);
+		do {
+			page = list_entry(list->prev, struct page, lru);
+			/* must delete as __free_one_page list manipulates */
+			list_del(&page->lru);
+			__free_one_page(page, zone, 0, migratetype);
+			trace_mm_page_pcpu_drain(page, 0, migratetype);
+		} while (--count && --batch_free && !list_empty(list));
 	}
 	spin_unlock(&zone->lock);
 }
-- 
cgit v0.10.2


From 5d3bc2709114b416cab588c577e02c2470e40a6c Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan.kim@gmail.com>
Date: Mon, 21 Sep 2009 17:03:21 -0700
Subject: mm: fix NUMA accounting in numastat.txt

In Documentation/numastat.txt, it confused me.  For example, there are
nodes [0,1] in system.

barrios:~$ cat /proc/zoneinfo | egrep 'numa|zone'
Node 0, zone	DMA
	numa_hit	33226
	numa_miss	1739
	numa_foreign	27978
	..
	..
Node 1, zone	DMA
	numa_hit	307
	numa_miss	46900
	numa_foreign	0

1) In node 0,  NUMA_MISS means it wanted to allocate page
in node 1 but ended up with page in node 0

2) In node 0, NUMA_FOREIGN means it wanted to allocate page
in node 0 but ended up with page from Node 1.

But now, numastat explains it oppositely about (MISS, FOREIGN).
Let's fix up with viewpoint of zone.

Signed-off-by: Minchan Kim <minchan.kim@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/Documentation/numastat.txt b/Documentation/numastat.txt
index 80133ac..9fcc9a6 100644
--- a/Documentation/numastat.txt
+++ b/Documentation/numastat.txt
@@ -7,10 +7,10 @@ All units are pages. Hugepages have separate counters.
 
 numa_hit			A process wanted to allocate memory from this node,
 					and succeeded.
-numa_miss			A process wanted to allocate memory from this node,
-					but ended up with memory from another.
-numa_foreign		A process wanted to allocate on another node,
-				    but ended up with memory from this one.
+numa_miss			A process wanted to allocate memory from another node,
+					but ended up with memory from this node.
+numa_foreign		A process wanted to allocate on this node,
+				    but ended up with memory from another one.
 local_node			A process ran on this node and got memory from it.
 other_node			A process ran on this node and got memory from another node.
 interleave_hit 		Interleaving wanted to allocate from this node
-- 
cgit v0.10.2


From 408e82b78bcc9f1b47c76e833c3df97f675947de Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 21 Sep 2009 17:03:23 -0700
Subject: mm: munlock use follow_page

Hiroaki Wakabayashi points out that when mlock() has been interrupted
by SIGKILL, the subsequent munlock() takes unnecessarily long because
its use of __get_user_pages() insists on faulting in all the pages
which mlock() never reached.

It's worse than slowness if mlock() is terminated by Out Of Memory kill:
the munlock_vma_pages_all() in exit_mmap() insists on faulting in all the
pages which mlock() could not find memory for; so innocent bystanders are
killed too, and perhaps the system hangs.

__get_user_pages() does a lot that's silly for munlock(): so remove the
munlock option from __mlock_vma_pages_range(), and use a simple loop of
follow_page()s in munlock_vma_pages_range() instead; ignoring absent
pages, and not marking present pages as accessed or dirty.

(Change munlock() to only go so far as mlock() reached?  That does not
work out, given the convention that mlock() claims complete success even
when it has to give up early - in part so that an underlying file can be
extended later, and those pages locked which earlier would give SIGBUS.)

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: <stable@kernel.org>
Acked-by: Rik van Riel <riel@redhat.com>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Mel Gorman <mel@csn.ul.ie>
Reviewed-by: Hiroaki Wakabayashi <primulaelatior@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/mlock.c b/mm/mlock.c
index 45eb650..e13918d 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -139,49 +139,36 @@ static void munlock_vma_page(struct page *page)
 }
 
 /**
- * __mlock_vma_pages_range() -  mlock/munlock a range of pages in the vma.
+ * __mlock_vma_pages_range() -  mlock a range of pages in the vma.
  * @vma:   target vma
  * @start: start address
  * @end:   end address
- * @mlock: 0 indicate munlock, otherwise mlock.
  *
- * If @mlock == 0, unlock an mlocked range;
- * else mlock the range of pages.  This takes care of making the pages present ,
- * too.
+ * This takes care of making the pages present too.
  *
  * return 0 on success, negative error code on error.
  *
  * vma->vm_mm->mmap_sem must be held for at least read.
  */
 static long __mlock_vma_pages_range(struct vm_area_struct *vma,
-				   unsigned long start, unsigned long end,
-				   int mlock)
+				    unsigned long start, unsigned long end)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	unsigned long addr = start;
 	struct page *pages[16]; /* 16 gives a reasonable batch */
 	int nr_pages = (end - start) / PAGE_SIZE;
 	int ret = 0;
-	int gup_flags = 0;
+	int gup_flags;
 
 	VM_BUG_ON(start & ~PAGE_MASK);
 	VM_BUG_ON(end   & ~PAGE_MASK);
 	VM_BUG_ON(start < vma->vm_start);
 	VM_BUG_ON(end   > vma->vm_end);
-	VM_BUG_ON((!rwsem_is_locked(&mm->mmap_sem)) &&
-		  (atomic_read(&mm->mm_users) != 0));
-
-	/*
-	 * mlock:   don't page populate if vma has PROT_NONE permission.
-	 * munlock: always do munlock although the vma has PROT_NONE
-	 *          permission, or SIGKILL is pending.
-	 */
-	if (!mlock)
-		gup_flags |= GUP_FLAGS_IGNORE_VMA_PERMISSIONS |
-			     GUP_FLAGS_IGNORE_SIGKILL;
+	VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
 
+	gup_flags = 0;
 	if (vma->vm_flags & VM_WRITE)
-		gup_flags |= GUP_FLAGS_WRITE;
+		gup_flags = GUP_FLAGS_WRITE;
 
 	while (nr_pages > 0) {
 		int i;
@@ -201,19 +188,10 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma,
 		 * This can happen for, e.g., VM_NONLINEAR regions before
 		 * a page has been allocated and mapped at a given offset,
 		 * or for addresses that map beyond end of a file.
-		 * We'll mlock the the pages if/when they get faulted in.
+		 * We'll mlock the pages if/when they get faulted in.
 		 */
 		if (ret < 0)
 			break;
-		if (ret == 0) {
-			/*
-			 * We know the vma is there, so the only time
-			 * we cannot get a single page should be an
-			 * error (ret < 0) case.
-			 */
-			WARN_ON(1);
-			break;
-		}
 
 		lru_add_drain();	/* push cached pages to LRU */
 
@@ -224,28 +202,22 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma,
 			/*
 			 * Because we lock page here and migration is blocked
 			 * by the elevated reference, we need only check for
-			 * page truncation (file-cache only).
+			 * file-cache page truncation.  This page->mapping
+			 * check also neatly skips over the ZERO_PAGE(),
+			 * though if that's common we'd prefer not to lock it.
 			 */
-			if (page->mapping) {
-				if (mlock)
-					mlock_vma_page(page);
-				else
-					munlock_vma_page(page);
-			}
+			if (page->mapping)
+				mlock_vma_page(page);
 			unlock_page(page);
-			put_page(page);		/* ref from get_user_pages() */
-
-			/*
-			 * here we assume that get_user_pages() has given us
-			 * a list of virtually contiguous pages.
-			 */
-			addr += PAGE_SIZE;	/* for next get_user_pages() */
-			nr_pages--;
+			put_page(page);	/* ref from get_user_pages() */
 		}
+
+		addr += ret * PAGE_SIZE;
+		nr_pages -= ret;
 		ret = 0;
 	}
 
-	return ret;	/* count entire vma as locked_vm */
+	return ret;	/* 0 or negative error code */
 }
 
 /*
@@ -289,7 +261,7 @@ long mlock_vma_pages_range(struct vm_area_struct *vma,
 			is_vm_hugetlb_page(vma) ||
 			vma == get_gate_vma(current))) {
 
-		__mlock_vma_pages_range(vma, start, end, 1);
+		__mlock_vma_pages_range(vma, start, end);
 
 		/* Hide errors from mmap() and other callers */
 		return 0;
@@ -310,7 +282,6 @@ no_mlock:
 	return nr_pages;		/* error or pages NOT mlocked */
 }
 
-
 /*
  * munlock_vma_pages_range() - munlock all pages in the vma range.'
  * @vma - vma containing range to be munlock()ed.
@@ -330,10 +301,24 @@ no_mlock:
  * free them.  This will result in freeing mlocked pages.
  */
 void munlock_vma_pages_range(struct vm_area_struct *vma,
-			   unsigned long start, unsigned long end)
+			     unsigned long start, unsigned long end)
 {
+	unsigned long addr;
+
+	lru_add_drain();
 	vma->vm_flags &= ~VM_LOCKED;
-	__mlock_vma_pages_range(vma, start, end, 0);
+
+	for (addr = start; addr < end; addr += PAGE_SIZE) {
+		struct page *page = follow_page(vma, addr, FOLL_GET);
+		if (page) {
+			lock_page(page);
+			if (page->mapping)
+				munlock_vma_page(page);
+			unlock_page(page);
+			put_page(page);
+		}
+		cond_resched();
+	}
 }
 
 /*
@@ -400,18 +385,14 @@ success:
 	 * It's okay if try_to_unmap_one unmaps a page just after we
 	 * set VM_LOCKED, __mlock_vma_pages_range will bring it back.
 	 */
-	vma->vm_flags = newflags;
 
 	if (lock) {
-		ret = __mlock_vma_pages_range(vma, start, end, 1);
-
-		if (ret > 0) {
-			mm->locked_vm -= ret;
-			ret = 0;
-		} else
-			ret = __mlock_posix_error_return(ret); /* translate if needed */
+		vma->vm_flags = newflags;
+		ret = __mlock_vma_pages_range(vma, start, end);
+		if (ret < 0)
+			ret = __mlock_posix_error_return(ret);
 	} else {
-		__mlock_vma_pages_range(vma, start, end, 0);
+		munlock_vma_pages_range(vma, start, end);
 	}
 
 out:
-- 
cgit v0.10.2


From 1c3aff1ceec2cc86810e2690e67873ff0c505862 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 21 Sep 2009 17:03:24 -0700
Subject: mm: remove unused GUP flags

GUP_FLAGS_IGNORE_VMA_PERMISSIONS and GUP_FLAGS_IGNORE_SIGKILL were
flags added solely to prevent __get_user_pages() from doing some of
what it usually does, in the munlock case: we can now remove them.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Minchan Kim <minchan.kim@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/internal.h b/mm/internal.h
index f290c4d..166765c 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -250,10 +250,8 @@ static inline void mminit_validate_memmodel_limits(unsigned long *start_pfn,
 }
 #endif /* CONFIG_SPARSEMEM */
 
-#define GUP_FLAGS_WRITE                  0x1
-#define GUP_FLAGS_FORCE                  0x2
-#define GUP_FLAGS_IGNORE_VMA_PERMISSIONS 0x4
-#define GUP_FLAGS_IGNORE_SIGKILL         0x8
+#define GUP_FLAGS_WRITE		0x01
+#define GUP_FLAGS_FORCE		0x02
 
 int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 		     unsigned long start, int len, int flags,
diff --git a/mm/memory.c b/mm/memory.c
index 3cbeaab..4b5200f 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1217,8 +1217,6 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 	unsigned int vm_flags = 0;
 	int write = !!(flags & GUP_FLAGS_WRITE);
 	int force = !!(flags & GUP_FLAGS_FORCE);
-	int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS);
-	int ignore_sigkill = !!(flags & GUP_FLAGS_IGNORE_SIGKILL);
 
 	if (nr_pages <= 0)
 		return 0;
@@ -1244,7 +1242,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 			pte_t *pte;
 
 			/* user gate pages are read-only */
-			if (!ignore && write)
+			if (write)
 				return i ? : -EFAULT;
 			if (pg > TASK_SIZE)
 				pgd = pgd_offset_k(pg);
@@ -1278,7 +1276,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 
 		if (!vma ||
 		    (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
-		    (!ignore && !(vm_flags & vma->vm_flags)))
+		    !(vm_flags & vma->vm_flags))
 			return i ? : -EFAULT;
 
 		if (is_vm_hugetlb_page(vma)) {
@@ -1298,13 +1296,9 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 
 			/*
 			 * If we have a pending SIGKILL, don't keep faulting
-			 * pages and potentially allocating memory, unless
-			 * current is handling munlock--e.g., on exit. In
-			 * that case, we are not allocating memory.  Rather,
-			 * we're only unlocking already resident/mapped pages.
+			 * pages and potentially allocating memory.
 			 */
-			if (unlikely(!ignore_sigkill &&
-					fatal_signal_pending(current)))
+			if (unlikely(fatal_signal_pending(current)))
 				return i ? i : -ERESTARTSYS;
 
 			if (write)
diff --git a/mm/nommu.c b/mm/nommu.c
index 3b90086..386443e 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -176,7 +176,6 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 	int i;
 	int write = !!(flags & GUP_FLAGS_WRITE);
 	int force = !!(flags & GUP_FLAGS_FORCE);
-	int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS);
 
 	/* calculate required read or write permissions.
 	 * - if 'force' is set, we only require the "MAY" flags.
@@ -190,8 +189,8 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 			goto finish_or_fault;
 
 		/* protect what we can, including chardevs */
-		if (vma->vm_flags & (VM_IO | VM_PFNMAP) ||
-		    (!ignore && !(vm_flags & vma->vm_flags)))
+		if ((vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
+		    !(vm_flags & vma->vm_flags))
 			goto finish_or_fault;
 
 		if (pages) {
@@ -210,7 +209,6 @@ finish_or_fault:
 	return i ? : -EFAULT;
 }
 
-
 /*
  * get a list of pages in an address range belonging to the specified process
  * and indicate the VMA that covers each page
-- 
cgit v0.10.2


From f3e8fccd06d27773186a0094371daf2d84c79469 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 21 Sep 2009 17:03:25 -0700
Subject: mm: add get_dump_page

In preparation for the next patch, add a simple get_dump_page(addr)
interface for the CONFIG_ELF_CORE dumpers to use, instead of calling
get_user_pages() directly.  They're not interested in errors: they
just want to use holes as much as possible, to save space and make
sure that the data is aligned where the headers said it would be.

Oh, and don't use that horrid DUMP_SEEK(off) macro!

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Minchan Kim <minchan.kim@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 7c1e65d..442d94f 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1280,9 +1280,6 @@ static int writenote(struct memelfnote *men, struct file *file,
 #define DUMP_WRITE(addr, nr)	\
 	if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
 		goto end_coredump;
-#define DUMP_SEEK(off)	\
-	if (!dump_seek(file, (off))) \
-		goto end_coredump;
 
 static void fill_elf_header(struct elfhdr *elf, int segs,
 			    u16 machine, u32 flags, u8 osabi)
@@ -2016,7 +2013,8 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
 		goto end_coredump;
 
 	/* Align to page */
-	DUMP_SEEK(dataoff - foffset);
+	if (!dump_seek(file, dataoff - foffset))
+		goto end_coredump;
 
 	for (vma = first_vma(current, gate_vma); vma != NULL;
 			vma = next_vma(vma, gate_vma)) {
@@ -2027,33 +2025,19 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
 
 		for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
 			struct page *page;
-			struct vm_area_struct *tmp_vma;
-
-			if (get_user_pages(current, current->mm, addr, 1, 0, 1,
-						&page, &tmp_vma) <= 0) {
-				DUMP_SEEK(PAGE_SIZE);
-			} else {
-				if (page == ZERO_PAGE(0)) {
-					if (!dump_seek(file, PAGE_SIZE)) {
-						page_cache_release(page);
-						goto end_coredump;
-					}
-				} else {
-					void *kaddr;
-					flush_cache_page(tmp_vma, addr,
-							 page_to_pfn(page));
-					kaddr = kmap(page);
-					if ((size += PAGE_SIZE) > limit ||
-					    !dump_write(file, kaddr,
-					    PAGE_SIZE)) {
-						kunmap(page);
-						page_cache_release(page);
-						goto end_coredump;
-					}
-					kunmap(page);
-				}
+			int stop;
+
+			page = get_dump_page(addr);
+			if (page) {
+				void *kaddr = kmap(page);
+				stop = ((size += PAGE_SIZE) > limit) ||
+					!dump_write(file, kaddr, PAGE_SIZE);
+				kunmap(page);
 				page_cache_release(page);
-			}
+			} else
+				stop = !dump_seek(file, PAGE_SIZE);
+			if (stop)
+				goto end_coredump;
 		}
 	}
 
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 20fbece..7628547 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1325,9 +1325,6 @@ static int writenote(struct memelfnote *men, struct file *file)
 #define DUMP_WRITE(addr, nr)	\
 	if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
 		goto end_coredump;
-#define DUMP_SEEK(off)	\
-	if (!dump_seek(file, (off))) \
-		goto end_coredump;
 
 static inline void fill_elf_fdpic_header(struct elfhdr *elf, int segs)
 {
@@ -1518,6 +1515,7 @@ static int elf_fdpic_dump_segments(struct file *file, size_t *size,
 			   unsigned long *limit, unsigned long mm_flags)
 {
 	struct vm_area_struct *vma;
+	int err = 0;
 
 	for (vma = current->mm->mmap; vma; vma = vma->vm_next) {
 		unsigned long addr;
@@ -1525,43 +1523,26 @@ static int elf_fdpic_dump_segments(struct file *file, size_t *size,
 		if (!maydump(vma, mm_flags))
 			continue;
 
-		for (addr = vma->vm_start;
-		     addr < vma->vm_end;
-		     addr += PAGE_SIZE
-		     ) {
-			struct vm_area_struct *vma;
-			struct page *page;
-
-			if (get_user_pages(current, current->mm, addr, 1, 0, 1,
-					   &page, &vma) <= 0) {
-				DUMP_SEEK(file->f_pos + PAGE_SIZE);
-			}
-			else if (page == ZERO_PAGE(0)) {
-				page_cache_release(page);
-				DUMP_SEEK(file->f_pos + PAGE_SIZE);
-			}
-			else {
-				void *kaddr;
-
-				flush_cache_page(vma, addr, page_to_pfn(page));
-				kaddr = kmap(page);
-				if ((*size += PAGE_SIZE) > *limit ||
-				    !dump_write(file, kaddr, PAGE_SIZE)
-				    ) {
-					kunmap(page);
-					page_cache_release(page);
-					return -EIO;
-				}
+		for (addr = vma->vm_start; addr < vma->vm_end;
+							addr += PAGE_SIZE) {
+			struct page *page = get_dump_page(addr);
+			if (page) {
+				void *kaddr = kmap(page);
+				*size += PAGE_SIZE;
+				if (*size > *limit)
+					err = -EFBIG;
+				else if (!dump_write(file, kaddr, PAGE_SIZE))
+					err = -EIO;
 				kunmap(page);
 				page_cache_release(page);
-			}
+			} else if (!dump_seek(file, file->f_pos + PAGE_SIZE))
+				err = -EFBIG;
+			if (err)
+				goto out;
 		}
 	}
-
-	return 0;
-
-end_coredump:
-	return -EFBIG;
+out:
+	return err;
 }
 #endif
 
@@ -1802,7 +1783,8 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
 				goto end_coredump;
 	}
 
-	DUMP_SEEK(dataoff);
+	if (!dump_seek(file, dataoff))
+		goto end_coredump;
 
 	if (elf_fdpic_dump_segments(file, &size, &limit, mm_flags) < 0)
 		goto end_coredump;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 19ff81c..e41795b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -817,6 +817,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 			struct page **pages, struct vm_area_struct **vmas);
 int get_user_pages_fast(unsigned long start, int nr_pages, int write,
 			struct page **pages);
+struct page *get_dump_page(unsigned long addr);
 
 extern int try_to_release_page(struct page * page, gfp_t gfp_mask);
 extern void do_invalidatepage(struct page *page, unsigned long offset);
diff --git a/mm/memory.c b/mm/memory.c
index 4b5200f..a8430ff 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1423,9 +1423,40 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 
 	return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas);
 }
-
 EXPORT_SYMBOL(get_user_pages);
 
+/**
+ * get_dump_page() - pin user page in memory while writing it to core dump
+ * @addr: user address
+ *
+ * Returns struct page pointer of user page pinned for dump,
+ * to be freed afterwards by page_cache_release() or put_page().
+ *
+ * Returns NULL on any kind of failure - a hole must then be inserted into
+ * the corefile, to preserve alignment with its headers; and also returns
+ * NULL wherever the ZERO_PAGE, or an anonymous pte_none, has been found -
+ * allowing a hole to be left in the corefile to save diskspace.
+ *
+ * Called without mmap_sem, but after all other threads have been killed.
+ */
+#ifdef CONFIG_ELF_CORE
+struct page *get_dump_page(unsigned long addr)
+{
+	struct vm_area_struct *vma;
+	struct page *page;
+
+	if (__get_user_pages(current, current->mm, addr, 1,
+				GUP_FLAGS_FORCE, &page, &vma) < 1)
+		return NULL;
+	if (page == ZERO_PAGE(0)) {
+		page_cache_release(page);
+		return NULL;
+	}
+	flush_cache_page(vma, addr, page_to_pfn(page));
+	return page;
+}
+#endif /* CONFIG_ELF_CORE */
+
 pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr,
 			spinlock_t **ptl)
 {
-- 
cgit v0.10.2


From 8e4b9a60718970bbc02dfd3abd0b956ab65af231 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 21 Sep 2009 17:03:26 -0700
Subject: mm: FOLL_DUMP replace FOLL_ANON

The "FOLL_ANON optimization" and its use_zero_page() test have caused
confusion and bugs: why does it test VM_SHARED? for the very good but
unsatisfying reason that VMware crashed without.  As we look to maybe
reinstating anonymous use of the ZERO_PAGE, we need to sort this out.

Easily done: it's silly for __get_user_pages() and follow_page() to
be guessing whether it's safe to assume that they're being used for
a coredump (which can take a shortcut snapshot where other uses must
handle a fault) - just tell them with GUP_FLAGS_DUMP and FOLL_DUMP.

get_dump_page() doesn't even want a ZERO_PAGE: an error suits fine.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Acked-by: Rik van Riel <riel@redhat.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/mm.h b/include/linux/mm.h
index e41795b..45ee5b5 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1231,7 +1231,7 @@ struct page *follow_page(struct vm_area_struct *, unsigned long address,
 #define FOLL_WRITE	0x01	/* check pte is writable */
 #define FOLL_TOUCH	0x02	/* mark page accessed */
 #define FOLL_GET	0x04	/* do get_page on page */
-#define FOLL_ANON	0x08	/* give ZERO_PAGE if no pgtable */
+#define FOLL_DUMP	0x08	/* give error on hole if it would be zero */
 
 typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
 			void *data);
diff --git a/mm/internal.h b/mm/internal.h
index 166765c..d414750 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -252,6 +252,7 @@ static inline void mminit_validate_memmodel_limits(unsigned long *start_pfn,
 
 #define GUP_FLAGS_WRITE		0x01
 #define GUP_FLAGS_FORCE		0x02
+#define GUP_FLAGS_DUMP		0x04
 
 int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 		     unsigned long start, int len, int flags,
diff --git a/mm/memory.c b/mm/memory.c
index a8430ff..532a55b 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1174,41 +1174,22 @@ no_page:
 	pte_unmap_unlock(ptep, ptl);
 	if (!pte_none(pte))
 		return page;
-	/* Fall through to ZERO_PAGE handling */
+
 no_page_table:
 	/*
 	 * When core dumping an enormous anonymous area that nobody
-	 * has touched so far, we don't want to allocate page tables.
+	 * has touched so far, we don't want to allocate unnecessary pages or
+	 * page tables.  Return error instead of NULL to skip handle_mm_fault,
+	 * then get_dump_page() will return NULL to leave a hole in the dump.
+	 * But we can only make this optimization where a hole would surely
+	 * be zero-filled if handle_mm_fault() actually did handle it.
 	 */
-	if (flags & FOLL_ANON) {
-		page = ZERO_PAGE(0);
-		if (flags & FOLL_GET)
-			get_page(page);
-		BUG_ON(flags & FOLL_WRITE);
-	}
+	if ((flags & FOLL_DUMP) &&
+	    (!vma->vm_ops || !vma->vm_ops->fault))
+		return ERR_PTR(-EFAULT);
 	return page;
 }
 
-/* Can we do the FOLL_ANON optimization? */
-static inline int use_zero_page(struct vm_area_struct *vma)
-{
-	/*
-	 * We don't want to optimize FOLL_ANON for make_pages_present()
-	 * when it tries to page in a VM_LOCKED region. As to VM_SHARED,
-	 * we want to get the page from the page tables to make sure
-	 * that we serialize and update with any other user of that
-	 * mapping.
-	 */
-	if (vma->vm_flags & (VM_LOCKED | VM_SHARED))
-		return 0;
-	/*
-	 * And if we have a fault routine, it's not an anonymous region.
-	 */
-	return !vma->vm_ops || !vma->vm_ops->fault;
-}
-
-
-
 int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 		     unsigned long start, int nr_pages, int flags,
 		     struct page **pages, struct vm_area_struct **vmas)
@@ -1288,8 +1269,8 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 		foll_flags = FOLL_TOUCH;
 		if (pages)
 			foll_flags |= FOLL_GET;
-		if (!write && use_zero_page(vma))
-			foll_flags |= FOLL_ANON;
+		if (flags & GUP_FLAGS_DUMP)
+			foll_flags |= FOLL_DUMP;
 
 		do {
 			struct page *page;
@@ -1446,7 +1427,7 @@ struct page *get_dump_page(unsigned long addr)
 	struct page *page;
 
 	if (__get_user_pages(current, current->mm, addr, 1,
-				GUP_FLAGS_FORCE, &page, &vma) < 1)
+			GUP_FLAGS_FORCE | GUP_FLAGS_DUMP, &page, &vma) < 1)
 		return NULL;
 	if (page == ZERO_PAGE(0)) {
 		page_cache_release(page);
-- 
cgit v0.10.2


From 2a15efc953b26ad57d7d38b9e6782d57e53b4ab2 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 21 Sep 2009 17:03:27 -0700
Subject: mm: follow_hugetlb_page flags

follow_hugetlb_page() shouldn't be guessing about the coredump case
either: pass the foll_flags down to it, instead of just the write bit.

Remove that obscure huge_zeropage_ok() test.  The decision is easy,
though unlike the non-huge case - here vm_ops->fault is always set.
But we know that a fault would serve up zeroes, unless there's
already a hugetlbfs pagecache page to back the range.

(Alternatively, since hugetlb pages aren't swapped out under pressure,
you could save more dump space by arguing that a page not yet faulted
into this process cannot be relevant to the dump; but that would be
more surprising.)

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Minchan Kim <minchan.kim@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 16cdb75..e7f0fab 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -24,7 +24,9 @@ int hugetlb_sysctl_handler(struct ctl_table *, int, struct file *, void __user *
 int hugetlb_overcommit_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
 int hugetlb_treat_movable_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
 int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *);
-int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, int *, int, int);
+int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *,
+			struct page **, struct vm_area_struct **,
+			unsigned long *, int *, int, unsigned int flags);
 void unmap_hugepage_range(struct vm_area_struct *,
 			unsigned long, unsigned long, struct page *);
 void __unmap_hugepage_range(struct vm_area_struct *,
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index c001f84..6b41f70 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2016,6 +2016,23 @@ static struct page *hugetlbfs_pagecache_page(struct hstate *h,
 	return find_lock_page(mapping, idx);
 }
 
+/* Return whether there is a pagecache page to back given address within VMA */
+static bool hugetlbfs_backed(struct hstate *h,
+			struct vm_area_struct *vma, unsigned long address)
+{
+	struct address_space *mapping;
+	pgoff_t idx;
+	struct page *page;
+
+	mapping = vma->vm_file->f_mapping;
+	idx = vma_hugecache_offset(h, vma, address);
+
+	page = find_get_page(mapping, idx);
+	if (page)
+		put_page(page);
+	return page != NULL;
+}
+
 static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
 			unsigned long address, pte_t *ptep, unsigned int flags)
 {
@@ -2211,54 +2228,52 @@ follow_huge_pud(struct mm_struct *mm, unsigned long address,
 	return NULL;
 }
 
-static int huge_zeropage_ok(pte_t *ptep, int write, int shared)
-{
-	if (!ptep || write || shared)
-		return 0;
-	else
-		return huge_pte_none(huge_ptep_get(ptep));
-}
-
 int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 			struct page **pages, struct vm_area_struct **vmas,
 			unsigned long *position, int *length, int i,
-			int write)
+			unsigned int flags)
 {
 	unsigned long pfn_offset;
 	unsigned long vaddr = *position;
 	int remainder = *length;
 	struct hstate *h = hstate_vma(vma);
-	int zeropage_ok = 0;
-	int shared = vma->vm_flags & VM_SHARED;
 
 	spin_lock(&mm->page_table_lock);
 	while (vaddr < vma->vm_end && remainder) {
 		pte_t *pte;
+		int absent;
 		struct page *page;
 
 		/*
 		 * Some archs (sparc64, sh*) have multiple pte_ts to
-		 * each hugepage.  We have to make * sure we get the
+		 * each hugepage.  We have to make sure we get the
 		 * first, for the page indexing below to work.
 		 */
 		pte = huge_pte_offset(mm, vaddr & huge_page_mask(h));
-		if (huge_zeropage_ok(pte, write, shared))
-			zeropage_ok = 1;
+		absent = !pte || huge_pte_none(huge_ptep_get(pte));
+
+		/*
+		 * When coredumping, it suits get_dump_page if we just return
+		 * an error if there's a hole and no huge pagecache to back it.
+		 */
+		if (absent &&
+		    ((flags & FOLL_DUMP) && !hugetlbfs_backed(h, vma, vaddr))) {
+			remainder = 0;
+			break;
+		}
 
-		if (!pte ||
-		    (huge_pte_none(huge_ptep_get(pte)) && !zeropage_ok) ||
-		    (write && !pte_write(huge_ptep_get(pte)))) {
+		if (absent ||
+		    ((flags & FOLL_WRITE) && !pte_write(huge_ptep_get(pte)))) {
 			int ret;
 
 			spin_unlock(&mm->page_table_lock);
-			ret = hugetlb_fault(mm, vma, vaddr, write);
+			ret = hugetlb_fault(mm, vma, vaddr,
+				(flags & FOLL_WRITE) ? FAULT_FLAG_WRITE : 0);
 			spin_lock(&mm->page_table_lock);
 			if (!(ret & VM_FAULT_ERROR))
 				continue;
 
 			remainder = 0;
-			if (!i)
-				i = -EFAULT;
 			break;
 		}
 
@@ -2266,10 +2281,7 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		page = pte_page(huge_ptep_get(pte));
 same_page:
 		if (pages) {
-			if (zeropage_ok)
-				pages[i] = ZERO_PAGE(0);
-			else
-				pages[i] = mem_map_offset(page, pfn_offset);
+			pages[i] = mem_map_offset(page, pfn_offset);
 			get_page(pages[i]);
 		}
 
@@ -2293,7 +2305,7 @@ same_page:
 	*length = remainder;
 	*position = vaddr;
 
-	return i;
+	return i ? i : -EFAULT;
 }
 
 void hugetlb_change_protection(struct vm_area_struct *vma,
diff --git a/mm/memory.c b/mm/memory.c
index 532a55b..6359a4f 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1260,17 +1260,19 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 		    !(vm_flags & vma->vm_flags))
 			return i ? : -EFAULT;
 
-		if (is_vm_hugetlb_page(vma)) {
-			i = follow_hugetlb_page(mm, vma, pages, vmas,
-						&start, &nr_pages, i, write);
-			continue;
-		}
-
 		foll_flags = FOLL_TOUCH;
 		if (pages)
 			foll_flags |= FOLL_GET;
 		if (flags & GUP_FLAGS_DUMP)
 			foll_flags |= FOLL_DUMP;
+		if (write)
+			foll_flags |= FOLL_WRITE;
+
+		if (is_vm_hugetlb_page(vma)) {
+			i = follow_hugetlb_page(mm, vma, pages, vmas,
+					&start, &nr_pages, i, foll_flags);
+			continue;
+		}
 
 		do {
 			struct page *page;
-- 
cgit v0.10.2


From 1ac0cb5d0e22d5e483f56b2bc12172dec1cf7536 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 21 Sep 2009 17:03:29 -0700
Subject: mm: fix anonymous dirtying

do_anonymous_page() has been wrong to dirty the pte regardless.
If it's not going to mark the pte writable, then it won't help
to mark it dirty here, and clogs up memory with pages which will
need swap instead of being thrown away.  Especially wrong if no
overcommit is chosen, and this vma is not yet VM_ACCOUNTed -
we could exceed the limit and OOM despite no overcommit.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: <stable@kernel.org>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Minchan Kim <minchan.kim@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/memory.c b/mm/memory.c
index 6359a4f..fc38d4e 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2653,7 +2653,8 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		goto oom_free_page;
 
 	entry = mk_pte(page, vma->vm_page_prot);
-	entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+	if (vma->vm_flags & VM_WRITE)
+		entry = pte_mkwrite(pte_mkdirty(entry));
 
 	page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
 	if (!pte_none(*page_table))
-- 
cgit v0.10.2


From a13ea5b759645a0779edc6dbfec9abfd83220844 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 21 Sep 2009 17:03:30 -0700
Subject: mm: reinstate ZERO_PAGE

KAMEZAWA Hiroyuki has observed customers of earlier kernels taking
advantage of the ZERO_PAGE: which we stopped do_anonymous_page() from
using in 2.6.24.  And there were a couple of regression reports on LKML.

Following suggestions from Linus, reinstate do_anonymous_page() use of
the ZERO_PAGE; but this time avoid dirtying its struct page cacheline
with (map)count updates - let vm_normal_page() regard it as abnormal.

Use it only on arches which __HAVE_ARCH_PTE_SPECIAL (x86, s390, sh32,
most powerpc): that's not essential, but minimizes additional branches
(keeping them in the unlikely pte_special case); and incidentally
excludes mips (some models of which needed eight colours of ZERO_PAGE
to avoid costly exceptions).

Don't be fanatical about avoiding ZERO_PAGE updates: get_user_pages()
callers won't want to make exceptions for it, so increment its count
there.  Changes to mlock and migration? happily seems not needed.

In most places it's quicker to check pfn than struct page address:
prepare a __read_mostly zero_pfn for that.  Does get_dump_page()
still need its ZERO_PAGE check? probably not, but keep it anyway.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Minchan Kim <minchan.kim@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/memory.c b/mm/memory.c
index fc38d4e..c8b5b94 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -108,6 +108,17 @@ static int __init disable_randmaps(char *s)
 }
 __setup("norandmaps", disable_randmaps);
 
+static unsigned long zero_pfn __read_mostly;
+
+/*
+ * CONFIG_MMU architectures set up ZERO_PAGE in their paging_init()
+ */
+static int __init init_zero_pfn(void)
+{
+	zero_pfn = page_to_pfn(ZERO_PAGE(0));
+	return 0;
+}
+core_initcall(init_zero_pfn);
 
 /*
  * If a p?d_bad entry is found while walking page tables, report
@@ -499,7 +510,9 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
 	if (HAVE_PTE_SPECIAL) {
 		if (likely(!pte_special(pte)))
 			goto check_pfn;
-		if (!(vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP)))
+		if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))
+			return NULL;
+		if (pfn != zero_pfn)
 			print_bad_pte(vma, addr, pte, NULL);
 		return NULL;
 	}
@@ -1144,9 +1157,14 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
 		goto no_page;
 	if ((flags & FOLL_WRITE) && !pte_write(pte))
 		goto unlock;
+
 	page = vm_normal_page(vma, address, pte);
-	if (unlikely(!page))
-		goto bad_page;
+	if (unlikely(!page)) {
+		if ((flags & FOLL_DUMP) ||
+		    pte_pfn(pte) != zero_pfn)
+			goto bad_page;
+		page = pte_page(pte);
+	}
 
 	if (flags & FOLL_GET)
 		get_page(page);
@@ -2084,10 +2102,19 @@ gotten:
 
 	if (unlikely(anon_vma_prepare(vma)))
 		goto oom;
-	VM_BUG_ON(old_page == ZERO_PAGE(0));
-	new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
-	if (!new_page)
-		goto oom;
+
+	if (pte_pfn(orig_pte) == zero_pfn) {
+		new_page = alloc_zeroed_user_highpage_movable(vma, address);
+		if (!new_page)
+			goto oom;
+	} else {
+		new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
+		if (!new_page)
+			goto oom;
+		cow_user_page(new_page, old_page, address, vma);
+	}
+	__SetPageUptodate(new_page);
+
 	/*
 	 * Don't let another task, with possibly unlocked vma,
 	 * keep the mlocked page.
@@ -2097,8 +2124,6 @@ gotten:
 		clear_page_mlock(old_page);
 		unlock_page(old_page);
 	}
-	cow_user_page(new_page, old_page, address, vma);
-	__SetPageUptodate(new_page);
 
 	if (mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))
 		goto oom_free_new;
@@ -2639,6 +2664,15 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	spinlock_t *ptl;
 	pte_t entry;
 
+	if (HAVE_PTE_SPECIAL && !(flags & FAULT_FLAG_WRITE)) {
+		entry = pte_mkspecial(pfn_pte(zero_pfn, vma->vm_page_prot));
+		ptl = pte_lockptr(mm, pmd);
+		spin_lock(ptl);
+		if (!pte_none(*page_table))
+			goto unlock;
+		goto setpte;
+	}
+
 	/* Allocate our own private page. */
 	pte_unmap(page_table);
 
@@ -2662,6 +2696,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 
 	inc_mm_counter(mm, anon_rss);
 	page_add_new_anon_rmap(page, vma, address);
+setpte:
 	set_pte_at(mm, address, page_table, entry);
 
 	/* No need to invalidate - it was non-present before */
-- 
cgit v0.10.2


From 58fa879e1e640a1856f736b418984ebeccee1c95 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 21 Sep 2009 17:03:31 -0700
Subject: mm: FOLL flags for GUP flags

__get_user_pages() has been taking its own GUP flags, then processing
them into FOLL flags for follow_page().  Though oddly named, the FOLL
flags are more widely used, so pass them to __get_user_pages() now.
Sorry, VM flags, VM_FAULT flags and FAULT_FLAGs are still distinct.

(The patch to __get_user_pages() looks peculiar, with both gup_flags
and foll_flags: the gup_flags remain constant; but as before there's
an exceptional case, out of scope of the patch, in which foll_flags
per page have FOLL_WRITE masked off.)

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: Rik van Riel <riel@redhat.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Minchan Kim <minchan.kim@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 45ee5b5..5409ece 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1232,6 +1232,7 @@ struct page *follow_page(struct vm_area_struct *, unsigned long address,
 #define FOLL_TOUCH	0x02	/* mark page accessed */
 #define FOLL_GET	0x04	/* do get_page on page */
 #define FOLL_DUMP	0x08	/* give error on hole if it would be zero */
+#define FOLL_FORCE	0x10	/* get_user_pages read/write w/o permission */
 
 typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
 			void *data);
diff --git a/mm/internal.h b/mm/internal.h
index d414750..7559657 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -250,12 +250,8 @@ static inline void mminit_validate_memmodel_limits(unsigned long *start_pfn,
 }
 #endif /* CONFIG_SPARSEMEM */
 
-#define GUP_FLAGS_WRITE		0x01
-#define GUP_FLAGS_FORCE		0x02
-#define GUP_FLAGS_DUMP		0x04
-
 int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
-		     unsigned long start, int len, int flags,
+		     unsigned long start, int len, unsigned int foll_flags,
 		     struct page **pages, struct vm_area_struct **vmas);
 
 #define ZONE_RECLAIM_NOSCAN	-2
diff --git a/mm/memory.c b/mm/memory.c
index c8b5b94..5c694f2 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1209,27 +1209,29 @@ no_page_table:
 }
 
 int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
-		     unsigned long start, int nr_pages, int flags,
+		     unsigned long start, int nr_pages, unsigned int gup_flags,
 		     struct page **pages, struct vm_area_struct **vmas)
 {
 	int i;
-	unsigned int vm_flags = 0;
-	int write = !!(flags & GUP_FLAGS_WRITE);
-	int force = !!(flags & GUP_FLAGS_FORCE);
+	unsigned long vm_flags;
 
 	if (nr_pages <= 0)
 		return 0;
+
+	VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
+
 	/* 
 	 * Require read or write permissions.
-	 * If 'force' is set, we only require the "MAY" flags.
+	 * If FOLL_FORCE is set, we only require the "MAY" flags.
 	 */
-	vm_flags  = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
-	vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
+	vm_flags  = (gup_flags & FOLL_WRITE) ?
+			(VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
+	vm_flags &= (gup_flags & FOLL_FORCE) ?
+			(VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
 	i = 0;
 
 	do {
 		struct vm_area_struct *vma;
-		unsigned int foll_flags;
 
 		vma = find_extend_vma(mm, start);
 		if (!vma && in_gate_area(tsk, start)) {
@@ -1241,7 +1243,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 			pte_t *pte;
 
 			/* user gate pages are read-only */
-			if (write)
+			if (gup_flags & FOLL_WRITE)
 				return i ? : -EFAULT;
 			if (pg > TASK_SIZE)
 				pgd = pgd_offset_k(pg);
@@ -1278,22 +1280,15 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 		    !(vm_flags & vma->vm_flags))
 			return i ? : -EFAULT;
 
-		foll_flags = FOLL_TOUCH;
-		if (pages)
-			foll_flags |= FOLL_GET;
-		if (flags & GUP_FLAGS_DUMP)
-			foll_flags |= FOLL_DUMP;
-		if (write)
-			foll_flags |= FOLL_WRITE;
-
 		if (is_vm_hugetlb_page(vma)) {
 			i = follow_hugetlb_page(mm, vma, pages, vmas,
-					&start, &nr_pages, i, foll_flags);
+					&start, &nr_pages, i, gup_flags);
 			continue;
 		}
 
 		do {
 			struct page *page;
+			unsigned int foll_flags = gup_flags;
 
 			/*
 			 * If we have a pending SIGKILL, don't keep faulting
@@ -1302,9 +1297,6 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 			if (unlikely(fatal_signal_pending(current)))
 				return i ? i : -ERESTARTSYS;
 
-			if (write)
-				foll_flags |= FOLL_WRITE;
-
 			cond_resched();
 			while (!(page = follow_page(vma, start, foll_flags))) {
 				int ret;
@@ -1415,12 +1407,14 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 		unsigned long start, int nr_pages, int write, int force,
 		struct page **pages, struct vm_area_struct **vmas)
 {
-	int flags = 0;
+	int flags = FOLL_TOUCH;
 
+	if (pages)
+		flags |= FOLL_GET;
 	if (write)
-		flags |= GUP_FLAGS_WRITE;
+		flags |= FOLL_WRITE;
 	if (force)
-		flags |= GUP_FLAGS_FORCE;
+		flags |= FOLL_FORCE;
 
 	return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas);
 }
@@ -1447,7 +1441,7 @@ struct page *get_dump_page(unsigned long addr)
 	struct page *page;
 
 	if (__get_user_pages(current, current->mm, addr, 1,
-			GUP_FLAGS_FORCE | GUP_FLAGS_DUMP, &page, &vma) < 1)
+			FOLL_FORCE | FOLL_DUMP | FOLL_GET, &page, &vma) < 1)
 		return NULL;
 	if (page == ZERO_PAGE(0)) {
 		page_cache_release(page);
diff --git a/mm/mlock.c b/mm/mlock.c
index e13918d..22041aa 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -166,9 +166,9 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma,
 	VM_BUG_ON(end   > vma->vm_end);
 	VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
 
-	gup_flags = 0;
+	gup_flags = FOLL_TOUCH | FOLL_GET;
 	if (vma->vm_flags & VM_WRITE)
-		gup_flags = GUP_FLAGS_WRITE;
+		gup_flags |= FOLL_WRITE;
 
 	while (nr_pages > 0) {
 		int i;
diff --git a/mm/nommu.c b/mm/nommu.c
index 386443e..2d02ca1 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -168,20 +168,20 @@ unsigned int kobjsize(const void *objp)
 }
 
 int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
-		     unsigned long start, int nr_pages, int flags,
+		     unsigned long start, int nr_pages, int foll_flags,
 		     struct page **pages, struct vm_area_struct **vmas)
 {
 	struct vm_area_struct *vma;
 	unsigned long vm_flags;
 	int i;
-	int write = !!(flags & GUP_FLAGS_WRITE);
-	int force = !!(flags & GUP_FLAGS_FORCE);
 
 	/* calculate required read or write permissions.
-	 * - if 'force' is set, we only require the "MAY" flags.
+	 * If FOLL_FORCE is set, we only require the "MAY" flags.
 	 */
-	vm_flags  = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
-	vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
+	vm_flags  = (foll_flags & FOLL_WRITE) ?
+			(VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
+	vm_flags &= (foll_flags & FOLL_FORCE) ?
+			(VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
 
 	for (i = 0; i < nr_pages; i++) {
 		vma = find_vma(mm, start);
@@ -223,9 +223,9 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 	int flags = 0;
 
 	if (write)
-		flags |= GUP_FLAGS_WRITE;
+		flags |= FOLL_WRITE;
 	if (force)
-		flags |= GUP_FLAGS_FORCE;
+		flags |= FOLL_FORCE;
 
 	return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas);
 }
-- 
cgit v0.10.2


From 6e919717c82c5773ac671816c8392c70d261685f Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 21 Sep 2009 17:03:32 -0700
Subject: mm: m(un)lock avoid ZERO_PAGE

I'm still reluctant to clutter __get_user_pages() with another flag, just
to avoid touching ZERO_PAGE count in mlock(); though we can add that later
if it shows up as an issue in practice.

But when mlocking, we can test page->mapping slightly earlier, to avoid
the potentially bouncy rescheduling of lock_page on ZERO_PAGE - mlock
didn't lock_page in olden ZERO_PAGE days, so we might have regressed.

And when munlocking, it turns out that FOLL_DUMP coincidentally does
what's needed to avoid all updates to ZERO_PAGE, so use that here also.
Plus add comment suggested by KAMEZAWA Hiroyuki.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: Rik van Riel <riel@redhat.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Nick Piggin <npiggin@suse.de>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Cc: Minchan Kim <minchan.kim@gmail.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/mlock.c b/mm/mlock.c
index 22041aa..bd6f0e4 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -198,17 +198,26 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma,
 		for (i = 0; i < ret; i++) {
 			struct page *page = pages[i];
 
-			lock_page(page);
-			/*
-			 * Because we lock page here and migration is blocked
-			 * by the elevated reference, we need only check for
-			 * file-cache page truncation.  This page->mapping
-			 * check also neatly skips over the ZERO_PAGE(),
-			 * though if that's common we'd prefer not to lock it.
-			 */
-			if (page->mapping)
-				mlock_vma_page(page);
-			unlock_page(page);
+			if (page->mapping) {
+				/*
+				 * That preliminary check is mainly to avoid
+				 * the pointless overhead of lock_page on the
+				 * ZERO_PAGE: which might bounce very badly if
+				 * there is contention.  However, we're still
+				 * dirtying its cacheline with get/put_page:
+				 * we'll add another __get_user_pages flag to
+				 * avoid it if that case turns out to matter.
+				 */
+				lock_page(page);
+				/*
+				 * Because we lock page here and migration is
+				 * blocked by the elevated reference, we need
+				 * only check for file-cache page truncation.
+				 */
+				if (page->mapping)
+					mlock_vma_page(page);
+				unlock_page(page);
+			}
 			put_page(page);	/* ref from get_user_pages() */
 		}
 
@@ -309,9 +318,23 @@ void munlock_vma_pages_range(struct vm_area_struct *vma,
 	vma->vm_flags &= ~VM_LOCKED;
 
 	for (addr = start; addr < end; addr += PAGE_SIZE) {
-		struct page *page = follow_page(vma, addr, FOLL_GET);
-		if (page) {
+		struct page *page;
+		/*
+		 * Although FOLL_DUMP is intended for get_dump_page(),
+		 * it just so happens that its special treatment of the
+		 * ZERO_PAGE (returning an error instead of doing get_page)
+		 * suits munlock very well (and if somehow an abnormal page
+		 * has sneaked into the range, we won't oops here: great).
+		 */
+		page = follow_page(vma, addr, FOLL_GET | FOLL_DUMP);
+		if (page && !IS_ERR(page)) {
 			lock_page(page);
+			/*
+			 * Like in __mlock_vma_pages_range(),
+			 * because we lock page here and migration is
+			 * blocked by the elevated reference, we need
+			 * only check for file-cache page truncation.
+			 */
 			if (page->mapping)
 				munlock_vma_page(page);
 			unlock_page(page);
-- 
cgit v0.10.2


From 3ae77f43b1118a76ea37952d444319c15e002c03 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 21 Sep 2009 17:03:33 -0700
Subject: mm: hugetlbfs_pagecache_present

Rename hugetlbfs_backed() to hugetlbfs_pagecache_present()
and add more comments, as suggested by Mel Gorman.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: Rik van Riel <riel@redhat.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Minchan Kim <minchan.kim@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 6b41f70..815dbd4 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2016,8 +2016,11 @@ static struct page *hugetlbfs_pagecache_page(struct hstate *h,
 	return find_lock_page(mapping, idx);
 }
 
-/* Return whether there is a pagecache page to back given address within VMA */
-static bool hugetlbfs_backed(struct hstate *h,
+/*
+ * Return whether there is a pagecache page to back given address within VMA.
+ * Caller follow_hugetlb_page() holds page_table_lock so we cannot lock_page.
+ */
+static bool hugetlbfs_pagecache_present(struct hstate *h,
 			struct vm_area_struct *vma, unsigned long address)
 {
 	struct address_space *mapping;
@@ -2254,10 +2257,13 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 
 		/*
 		 * When coredumping, it suits get_dump_page if we just return
-		 * an error if there's a hole and no huge pagecache to back it.
+		 * an error where there's an empty slot with no huge pagecache
+		 * to back it.  This way, we avoid allocating a hugepage, and
+		 * the sparse dumpfile avoids allocating disk blocks, but its
+		 * huge holes still show up with zeroes where they need to be.
 		 */
-		if (absent &&
-		    ((flags & FOLL_DUMP) && !hugetlbfs_backed(h, vma, vaddr))) {
+		if (absent && (flags & FOLL_DUMP) &&
+		    !hugetlbfs_pagecache_present(h, vma, vaddr)) {
 			remainder = 0;
 			break;
 		}
-- 
cgit v0.10.2


From 62eede62dafb4a6633eae7ffbeb34c60dba5e7b1 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 21 Sep 2009 17:03:34 -0700
Subject: mm: ZERO_PAGE without PTE_SPECIAL

Reinstate anonymous use of ZERO_PAGE to all architectures, not just to
those which __HAVE_ARCH_PTE_SPECIAL: as suggested by Nick Piggin.

Contrary to how I'd imagined it, there's nothing ugly about this, just a
zero_pfn test built into one or another block of vm_normal_page().

But the MIPS ZERO_PAGE-of-many-colours case demands is_zero_pfn() and
my_zero_pfn() inlines.  Reinstate its mremap move_pte() shuffling of
ZERO_PAGEs we did from 2.6.17 to 2.6.19?  Not unless someone shouts for
that: it would have to take vm_flags to weed out some cases.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: Rik van Riel <riel@redhat.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index 1a9f9b2..d6eb613 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -76,6 +76,16 @@ extern unsigned long zero_page_mask;
 #define ZERO_PAGE(vaddr) \
 	(virt_to_page((void *)(empty_zero_page + (((unsigned long)(vaddr)) & zero_page_mask))))
 
+#define is_zero_pfn is_zero_pfn
+static inline int is_zero_pfn(unsigned long pfn)
+{
+	extern unsigned long zero_pfn;
+	unsigned long offset_from_zero_pfn = pfn - zero_pfn;
+	return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT);
+}
+
+#define my_zero_pfn(addr)	page_to_pfn(ZERO_PAGE(addr))
+
 extern void paging_init(void);
 
 /*
diff --git a/mm/memory.c b/mm/memory.c
index 5c694f2..9bdbd10 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -108,7 +108,7 @@ static int __init disable_randmaps(char *s)
 }
 __setup("norandmaps", disable_randmaps);
 
-static unsigned long zero_pfn __read_mostly;
+unsigned long zero_pfn __read_mostly;
 
 /*
  * CONFIG_MMU architectures set up ZERO_PAGE in their paging_init()
@@ -455,6 +455,20 @@ static inline int is_cow_mapping(unsigned int flags)
 	return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
 }
 
+#ifndef is_zero_pfn
+static inline int is_zero_pfn(unsigned long pfn)
+{
+	return pfn == zero_pfn;
+}
+#endif
+
+#ifndef my_zero_pfn
+static inline unsigned long my_zero_pfn(unsigned long addr)
+{
+	return zero_pfn;
+}
+#endif
+
 /*
  * vm_normal_page -- This function gets the "struct page" associated with a pte.
  *
@@ -512,7 +526,7 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
 			goto check_pfn;
 		if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))
 			return NULL;
-		if (pfn != zero_pfn)
+		if (!is_zero_pfn(pfn))
 			print_bad_pte(vma, addr, pte, NULL);
 		return NULL;
 	}
@@ -534,6 +548,8 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
 		}
 	}
 
+	if (is_zero_pfn(pfn))
+		return NULL;
 check_pfn:
 	if (unlikely(pfn > highest_memmap_pfn)) {
 		print_bad_pte(vma, addr, pte, NULL);
@@ -1161,7 +1177,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
 	page = vm_normal_page(vma, address, pte);
 	if (unlikely(!page)) {
 		if ((flags & FOLL_DUMP) ||
-		    pte_pfn(pte) != zero_pfn)
+		    !is_zero_pfn(pte_pfn(pte)))
 			goto bad_page;
 		page = pte_page(pte);
 	}
@@ -1443,10 +1459,6 @@ struct page *get_dump_page(unsigned long addr)
 	if (__get_user_pages(current, current->mm, addr, 1,
 			FOLL_FORCE | FOLL_DUMP | FOLL_GET, &page, &vma) < 1)
 		return NULL;
-	if (page == ZERO_PAGE(0)) {
-		page_cache_release(page);
-		return NULL;
-	}
 	flush_cache_page(vma, addr, page_to_pfn(page));
 	return page;
 }
@@ -1629,7 +1641,8 @@ int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
 	 * If we don't have pte special, then we have to use the pfn_valid()
 	 * based VM_MIXEDMAP scheme (see vm_normal_page), and thus we *must*
 	 * refcount the page if pfn_valid is true (hence insert_page rather
-	 * than insert_pfn).
+	 * than insert_pfn).  If a zero_pfn were inserted into a VM_MIXEDMAP
+	 * without pte special, it would there be refcounted as a normal page.
 	 */
 	if (!HAVE_PTE_SPECIAL && pfn_valid(pfn)) {
 		struct page *page;
@@ -2097,7 +2110,7 @@ gotten:
 	if (unlikely(anon_vma_prepare(vma)))
 		goto oom;
 
-	if (pte_pfn(orig_pte) == zero_pfn) {
+	if (is_zero_pfn(pte_pfn(orig_pte))) {
 		new_page = alloc_zeroed_user_highpage_movable(vma, address);
 		if (!new_page)
 			goto oom;
@@ -2658,8 +2671,9 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	spinlock_t *ptl;
 	pte_t entry;
 
-	if (HAVE_PTE_SPECIAL && !(flags & FAULT_FLAG_WRITE)) {
-		entry = pte_mkspecial(pfn_pte(zero_pfn, vma->vm_page_prot));
+	if (!(flags & FAULT_FLAG_WRITE)) {
+		entry = pte_mkspecial(pfn_pte(my_zero_pfn(address),
+						vma->vm_page_prot));
 		ptl = pte_lockptr(mm, pmd);
 		spin_lock(ptl);
 		if (!pte_none(*page_table))
-- 
cgit v0.10.2


From 03f6462a3ae78f36eb1f0ee8b4d5ae2f7859c1d5 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 21 Sep 2009 17:03:35 -0700
Subject: mm: move highest_memmap_pfn

Move highest_memmap_pfn __read_mostly from page_alloc.c next to zero_pfn
__read_mostly in memory.c: to help them share a cacheline, since they're
very often tested together in vm_normal_page().

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: Rik van Riel <riel@redhat.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Minchan Kim <minchan.kim@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/internal.h b/mm/internal.h
index 7559657..22ec8d2 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -37,6 +37,8 @@ static inline void __put_page(struct page *page)
 	atomic_dec(&page->_count);
 }
 
+extern unsigned long highest_memmap_pfn;
+
 /*
  * in mm/vmscan.c:
  */
@@ -46,7 +48,6 @@ extern void putback_lru_page(struct page *page);
 /*
  * in mm/page_alloc.c
  */
-extern unsigned long highest_memmap_pfn;
 extern void __free_pages_bootmem(struct page *page, unsigned int order);
 extern void prep_compound_page(struct page *page, unsigned long order);
 
diff --git a/mm/memory.c b/mm/memory.c
index 9bdbd10..b1443ac07 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -109,6 +109,7 @@ static int __init disable_randmaps(char *s)
 __setup("norandmaps", disable_randmaps);
 
 unsigned long zero_pfn __read_mostly;
+unsigned long highest_memmap_pfn __read_mostly;
 
 /*
  * CONFIG_MMU architectures set up ZERO_PAGE in their paging_init()
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6877e22..5717f27 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -72,7 +72,6 @@ EXPORT_SYMBOL(node_states);
 
 unsigned long totalram_pages __read_mostly;
 unsigned long totalreserve_pages __read_mostly;
-unsigned long highest_memmap_pfn __read_mostly;
 int percpu_pagelist_fraction;
 gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK;
 
-- 
cgit v0.10.2


From cdf7b3418ad5a8783efe8f9124023d9b869fec0f Mon Sep 17 00:00:00 2001
From: Huang Shijie <shijie8@gmail.com>
Date: Mon, 21 Sep 2009 17:03:36 -0700
Subject: mmap: remove unnecessary code

If (flags & MAP_LOCKED) is true, it means vm_flags has already contained
the bit VM_LOCKED which is set by calc_vm_flag_bits().

So there is no need to reset it again, just remove it.

Signed-off-by: Huang Shijie <shijie8@gmail.com>
Acked-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/mmap.c b/mm/mmap.c
index 6eed98c..56eb871 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -963,11 +963,9 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
 	vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags) |
 			mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
 
-	if (flags & MAP_LOCKED) {
+	if (flags & MAP_LOCKED)
 		if (!can_do_mlock())
 			return -EPERM;
-		vm_flags |= VM_LOCKED;
-	}
 
 	/* mlock MCL_FUTURE? */
 	if (vm_flags & VM_LOCKED) {
-- 
cgit v0.10.2


From 3f96b79ad96263cc0ece7bb340cddf9b2ddfb1b3 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 21 Sep 2009 17:03:37 -0700
Subject: tmpfs: depend on shmem

CONFIG_SHMEM off gives you (ramfs masquerading as) tmpfs, even when
CONFIG_TMPFS is off: that's a little anomalous, and I'd intended to make
more sense of it by removing CONFIG_TMPFS altogether, always enabling its
code when CONFIG_SHMEM; but so many defconfigs have CONFIG_SHMEM on
CONFIG_TMPFS off that we'd better leave that as is.

But there is no point in asking for CONFIG_TMPFS if CONFIG_SHMEM is off:
make TMPFS depend on SHMEM, which also prevents TMPFS_POSIX_ACL
shmem_acl.o being pointlessly built into the kernel when SHMEM is off.

And a selfish change, to prevent the world from being rebuilt when I
switch between CONFIG_SHMEM on and off: the only CONFIG_SHMEM in the
header files is mm.h shmem_lock() - give that a shmem.c stub instead.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Acked-by: Matt Mackall <mpm@selenic.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/Kconfig b/fs/Kconfig
index 455aa20..d4bf8ca 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -109,6 +109,7 @@ source "fs/sysfs/Kconfig"
 
 config TMPFS
 	bool "Virtual memory file system support (former shm fs)"
+	depends on SHMEM
 	help
 	  Tmpfs is a file system which keeps all files in virtual memory.
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5409ece..5946e2f 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -702,17 +702,8 @@ extern void pagefault_out_of_memory(void);
 
 extern void show_free_areas(void);
 
-#ifdef CONFIG_SHMEM
-extern int shmem_lock(struct file *file, int lock, struct user_struct *user);
-#else
-static inline int shmem_lock(struct file *file, int lock,
-			    struct user_struct *user)
-{
-	return 0;
-}
-#endif
+int shmem_lock(struct file *file, int lock, struct user_struct *user);
 struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags);
-
 int shmem_zero_setup(struct vm_area_struct *);
 
 #ifndef CONFIG_MMU
diff --git a/mm/shmem.c b/mm/shmem.c
index 25ba75d..b4b56fd 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2593,6 +2593,11 @@ int shmem_unuse(swp_entry_t entry, struct page *page)
 	return 0;
 }
 
+int shmem_lock(struct file *file, int lock, struct user_struct *user)
+{
+	return 0;
+}
+
 #define shmem_vm_ops				generic_file_vm_ops
 #define shmem_file_operations			ramfs_file_operations
 #define shmem_get_inode(sb, mode, dev, flags)	ramfs_get_inode(sb, mode, dev)
-- 
cgit v0.10.2


From 252c5f94d944487e9f50ece7942b0fbf659c5c31 Mon Sep 17 00:00:00 2001
From: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
Date: Mon, 21 Sep 2009 17:03:40 -0700
Subject: mmap: avoid unnecessary anon_vma lock acquisition in vma_adjust()

We noticed very erratic behavior [throughput] with the AIM7 shared
workload running on recent distro [SLES11] and mainline kernels on an
8-socket, 32-core, 256GB x86_64 platform.  On the SLES11 kernel
[2.6.27.19+] with Barcelona processors, as we increased the load [10s of
thousands of tasks], the throughput would vary between two "plateaus"--one
at ~65K jobs per minute and one at ~130K jpm.  The simple patch below
causes the results to smooth out at the ~130k plateau.

But wait, there's more:

We do not see this behavior on smaller platforms--e.g., 4 socket/8 core.
This could be the result of the larger number of cpus on the larger
platform--a scalability issue--or it could be the result of the larger
number of interconnect "hops" between some nodes in this platform and how
the tasks for a given load end up distributed over the nodes' cpus and
memories--a stochastic NUMA effect.

The variability in the results are less pronounced [on the same platform]
with Shanghai processors and with mainline kernels.  With 31-rc6 on
Shanghai processors and 288 file systems on 288 fibre attached storage
volumes, the curves [jpm vs load] are both quite flat with the patched
kernel consistently producing ~3.9% better throughput [~80K jpm vs ~77K
jpm] than the unpatched kernel.

Profiling indicated that the "slow" runs were incurring high[er]
contention on an anon_vma lock in vma_adjust(), apparently called from the
sbrk() system call.

The patch:

A comment in mm/mmap.c:vma_adjust() suggests that we don't really need the
anon_vma lock when we're only adjusting the end of a vma, as is the case
for brk().  The comment questions whether it's worth while to optimize for
this case.  Apparently, on the newer, larger x86_64 platforms, with
interesting NUMA topologies, it is worth while--especially considering
that the patch [if correct!] is quite simple.

We can detect this condition--no overlap with next vma--by noting a NULL
"importer".  The anon_vma pointer will also be NULL in this case, so
simply avoid loading vma->anon_vma to avoid the lock.

However, we DO need to take the anon_vma lock when we're inserting a vma
['insert' non-NULL] even when we have no overlap [NULL "importer"], so we
need to check for 'insert', as well.  And Hugh points out that we should
also take it when adjusting vm_start (so that rmap.c can rely upon
vma_address() while it holds the anon_vma lock).

akpm: Zhang Yanmin reprts a 150% throughput improvement with aim7, so it
might be -stable material even though thiss isn't a regression: "this
issue is not clear on dual socket Nehalem machine (2*4*2 cpu), but is
severe on large machine (4*8*2 cpu)"

[hugh.dickins@tiscali.co.uk: test vma start too]
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Eric Whitney <eric.whitney@hp.com>
Tested-by: "Zhang, Yanmin" <yanmin_zhang@linux.intel.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/mmap.c b/mm/mmap.c
index 56eb871..b6d74b3 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -570,9 +570,9 @@ again:			remove_next = 1 + (end > next->vm_end);
 
 	/*
 	 * When changing only vma->vm_end, we don't really need
-	 * anon_vma lock: but is that case worth optimizing out?
+	 * anon_vma lock.
 	 */
-	if (vma->anon_vma)
+	if (vma->anon_vma && (insert || importer || start != vma->vm_start))
 		anon_vma = vma->anon_vma;
 	if (anon_vma) {
 		spin_lock(&anon_vma->lock);
-- 
cgit v0.10.2


From f8dbf0a7a4c5d98e8b70da9f7f4f6a89f3b7a7bb Mon Sep 17 00:00:00 2001
From: Huang Shijie <shijie8@gmail.com>
Date: Mon, 21 Sep 2009 17:03:41 -0700
Subject: mmap: save some cycles for the shared anonymous mapping

shmem_zero_setup() does not change vm_start, pgoff or vm_flags, only some
drivers change them (such as /driver/video/bfin-t350mcqb-fb.c).

Move these codes to a more proper place to save cycles for shared
anonymous mapping.

Signed-off-by: Huang Shijie <shijie8@gmail.com>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Acked-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/mmap.c b/mm/mmap.c
index b6d74b3..1aeef66 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1191,21 +1191,21 @@ munmap_back:
 			goto unmap_and_free_vma;
 		if (vm_flags & VM_EXECUTABLE)
 			added_exe_file_vma(mm);
+
+		/* Can addr have changed??
+		 *
+		 * Answer: Yes, several device drivers can do it in their
+		 *         f_op->mmap method. -DaveM
+		 */
+		addr = vma->vm_start;
+		pgoff = vma->vm_pgoff;
+		vm_flags = vma->vm_flags;
 	} else if (vm_flags & VM_SHARED) {
 		error = shmem_zero_setup(vma);
 		if (error)
 			goto free_vma;
 	}
 
-	/* Can addr have changed??
-	 *
-	 * Answer: Yes, several device drivers can do it in their
-	 *         f_op->mmap method. -DaveM
-	 */
-	addr = vma->vm_start;
-	pgoff = vma->vm_pgoff;
-	vm_flags = vma->vm_flags;
-
 	if (vma_wants_writenotify(vma))
 		vma->vm_page_prot = vm_get_page_prot(vm_flags & ~VM_SHARED);
 
-- 
cgit v0.10.2


From 6bfde05bf5c9682e255c6a2c669dc80f91af6296 Mon Sep 17 00:00:00 2001
From: Eric B Munson <ebmunson@us.ibm.com>
Date: Mon, 21 Sep 2009 17:03:43 -0700
Subject: hugetlbfs: allow the creation of files suitable for MAP_PRIVATE on
 the vfs internal mount

This patchset adds a flag to mmap that allows the user to request that an
anonymous mapping be backed with huge pages.  This mapping will borrow
functionality from the huge page shm code to create a file on the kernel
internal mount and use it to approximate an anonymous mapping.  The
MAP_HUGETLB flag is a modifier to MAP_ANONYMOUS and will not work without
both flags being preset.

A new flag is necessary because there is no other way to hook into huge
pages without creating a file on a hugetlbfs mount which wouldn't be
MAP_ANONYMOUS.

To userspace, this mapping will behave just like an anonymous mapping
because the file is not accessible outside of the kernel.

This patchset is meant to simplify the programming model.  Presently there
is a large chunk of boiler platecode, contained in libhugetlbfs, required
to create private, hugepage backed mappings.  This patch set would allow
use of hugepages without linking to libhugetlbfs or having hugetblfs
mounted.

Unification of the VM code would provide these same benefits, but it has
been resisted each time that it has been suggested for several reasons: it
would break PAGE_SIZE assumptions across the kernel, it makes page-table
abstractions really expensive, and it does not provide any benefit on
architectures that do not support huge pages, incurring fast path
penalties without providing any benefit on these architectures.

This patch:

There are two means of creating mappings backed by huge pages:

        1. mmap() a file created on hugetlbfs
        2. Use shm which creates a file on an internal mount which essentially
           maps it MAP_SHARED

The internal mount is only used for shared mappings but there is very
little that stops it being used for private mappings. This patch extends
hugetlbfs_file_setup() to deal with the creation of files that will be
mapped MAP_PRIVATE on the internal hugetlbfs mount. This extended API is
used in a subsequent patch to implement the MAP_HUGETLB mmap() flag.

Signed-off-by: Eric Munson <ebmunson@us.ibm.com>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Adam Litke <agl@us.ibm.com>
Cc: David Gibson <david@gibson.dropbear.id.au>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index a93b885..06b7c26 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -507,6 +507,13 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid,
 		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 		INIT_LIST_HEAD(&inode->i_mapping->private_list);
 		info = HUGETLBFS_I(inode);
+		/*
+		 * The policy is initialized here even if we are creating a
+		 * private inode because initialization simply creates an
+		 * an empty rb tree and calls spin_lock_init(), later when we
+		 * call mpol_free_shared_policy() it will just return because
+		 * the rb tree will still be empty.
+		 */
 		mpol_shared_policy_init(&info->policy, NULL);
 		switch (mode & S_IFMT) {
 		default:
@@ -931,13 +938,19 @@ static struct file_system_type hugetlbfs_fs_type = {
 
 static struct vfsmount *hugetlbfs_vfsmount;
 
-static int can_do_hugetlb_shm(void)
+static int can_do_hugetlb_shm(int creat_flags)
 {
-	return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group);
+	if (creat_flags != HUGETLB_SHMFS_INODE)
+		return 0;
+	if (capable(CAP_IPC_LOCK))
+		return 1;
+	if (in_group_p(sysctl_hugetlb_shm_group))
+		return 1;
+	return 0;
 }
 
 struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
-						struct user_struct **user)
+				struct user_struct **user, int creat_flags)
 {
 	int error = -ENOMEM;
 	struct file *file;
@@ -949,7 +962,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
 	if (!hugetlbfs_vfsmount)
 		return ERR_PTR(-ENOENT);
 
-	if (!can_do_hugetlb_shm()) {
+	if (!can_do_hugetlb_shm(creat_flags)) {
 		*user = current_user();
 		if (user_shm_lock(size, *user)) {
 			WARN_ONCE(1,
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index e7f0fab..f6505ad 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -112,6 +112,14 @@ static inline void hugetlb_report_meminfo(struct seq_file *m)
 
 #endif /* !CONFIG_HUGETLB_PAGE */
 
+enum {
+	/*
+	 * The file will be used as an shm file so shmfs accounting rules
+	 * apply
+	 */
+	HUGETLB_SHMFS_INODE     = 1,
+};
+
 #ifdef CONFIG_HUGETLBFS
 struct hugetlbfs_config {
 	uid_t   uid;
@@ -150,7 +158,7 @@ static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb)
 extern const struct file_operations hugetlbfs_file_operations;
 extern struct vm_operations_struct hugetlb_vm_ops;
 struct file *hugetlb_file_setup(const char *name, size_t size, int acct,
-						struct user_struct **user);
+				struct user_struct **user, int creat_flags);
 int hugetlb_get_quota(struct address_space *mapping, long delta);
 void hugetlb_put_quota(struct address_space *mapping, long delta);
 
@@ -172,7 +180,7 @@ static inline void set_file_hugepages(struct file *file)
 
 #define is_file_hugepages(file)			0
 #define set_file_hugepages(file)		BUG()
-#define hugetlb_file_setup(name,size,acct,user)	ERR_PTR(-ENOSYS)
+#define hugetlb_file_setup(name,size,acct,user,creat)	ERR_PTR(-ENOSYS)
 
 #endif /* !CONFIG_HUGETLBFS */
 
diff --git a/ipc/shm.c b/ipc/shm.c
index 30162a5..9eb1488 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -370,7 +370,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
 		if (shmflg & SHM_NORESERVE)
 			acctflag = VM_NORESERVE;
 		file = hugetlb_file_setup(name, size, acctflag,
-							&shp->mlock_user);
+					&shp->mlock_user, HUGETLB_SHMFS_INODE);
 	} else {
 		/*
 		 * Do not allow no accounting for OVERCOMMIT_NEVER, even
-- 
cgit v0.10.2


From 90f72aa58bbf076b68e289fbd71eb829bc505923 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 21 Sep 2009 17:03:45 -0700
Subject: mm: add MAP_HUGETLB for mmaping pseudo-anonymous huge page regions

Add a flag for mmap that will be used to request a huge page region that
will look like anonymous memory to user space.  This is accomplished by
using a file on the internal vfsmount.  MAP_HUGETLB is a modifier of
MAP_ANONYMOUS and so must be specified with it.  The region will behave
the same as a MAP_ANONYMOUS region using small pages.

The patch also adds the MAP_STACK flag, which was previously defined only
on some architectures but not on others.  Since MAP_STACK is meant to be a
hint only, architectures can define it without assigning a specific
meaning to it.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Eric B Munson <ebmunson@us.ibm.com>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: David Rientjes <rientjes@google.com>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/alpha/include/asm/mman.h b/arch/alpha/include/asm/mman.h
index c77c557..99c56d4 100644
--- a/arch/alpha/include/asm/mman.h
+++ b/arch/alpha/include/asm/mman.h
@@ -28,6 +28,8 @@
 #define MAP_NORESERVE	0x10000		/* don't check for reservations */
 #define MAP_POPULATE	0x20000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x40000		/* do not block on IO */
+#define MAP_STACK	0x80000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB	0x100000	/* create a huge page mapping */
 
 #define MS_ASYNC	1		/* sync memory asynchronously */
 #define MS_SYNC		2		/* synchronous memory sync */
diff --git a/arch/arm/include/asm/mman.h b/arch/arm/include/asm/mman.h
index fc26976..6464d47 100644
--- a/arch/arm/include/asm/mman.h
+++ b/arch/arm/include/asm/mman.h
@@ -10,6 +10,8 @@
 #define MAP_NORESERVE	0x4000		/* don't check for reservations */
 #define MAP_POPULATE	0x8000		/* populate (prefault) page tables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
+#define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB	0x40000		/* create a huge page mapping */
 
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
diff --git a/arch/avr32/include/asm/mman.h b/arch/avr32/include/asm/mman.h
index 9a92b15..38cea1b 100644
--- a/arch/avr32/include/asm/mman.h
+++ b/arch/avr32/include/asm/mman.h
@@ -10,6 +10,8 @@
 #define MAP_NORESERVE	0x4000		/* don't check for reservations */
 #define MAP_POPULATE	0x8000		/* populate (prefault) page tables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
+#define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB	0x40000		/* create a huge page mapping */
 
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
diff --git a/arch/cris/include/asm/mman.h b/arch/cris/include/asm/mman.h
index b7f0afb..de6b903 100644
--- a/arch/cris/include/asm/mman.h
+++ b/arch/cris/include/asm/mman.h
@@ -12,6 +12,8 @@
 #define MAP_NORESERVE	0x4000		/* don't check for reservations */
 #define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
+#define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB	0x40000		/* create a huge page mapping */
 
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
diff --git a/arch/frv/include/asm/mman.h b/arch/frv/include/asm/mman.h
index 58c1d11..1939343 100644
--- a/arch/frv/include/asm/mman.h
+++ b/arch/frv/include/asm/mman.h
@@ -10,6 +10,8 @@
 #define MAP_NORESERVE	0x4000		/* don't check for reservations */
 #define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
+#define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB	0x40000		/* create a huge page mapping */
 
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
diff --git a/arch/h8300/include/asm/mman.h b/arch/h8300/include/asm/mman.h
index cf35f0a..eacacd0 100644
--- a/arch/h8300/include/asm/mman.h
+++ b/arch/h8300/include/asm/mman.h
@@ -10,6 +10,8 @@
 #define MAP_NORESERVE	0x4000		/* don't check for reservations */
 #define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
+#define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB	0x40000		/* create a huge page mapping */
 
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
diff --git a/arch/ia64/include/asm/mman.h b/arch/ia64/include/asm/mman.h
index 48cf8b9..cf55884 100644
--- a/arch/ia64/include/asm/mman.h
+++ b/arch/ia64/include/asm/mman.h
@@ -18,6 +18,8 @@
 #define MAP_NORESERVE	0x04000		/* don't check for reservations */
 #define MAP_POPULATE	0x08000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
+#define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB	0x40000		/* create a huge page mapping */
 
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
diff --git a/arch/m32r/include/asm/mman.h b/arch/m32r/include/asm/mman.h
index 04a5f40..d191089 100644
--- a/arch/m32r/include/asm/mman.h
+++ b/arch/m32r/include/asm/mman.h
@@ -10,6 +10,8 @@
 #define MAP_NORESERVE	0x4000		/* don't check for reservations */
 #define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
+#define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB	0x40000		/* create a huge page mapping */
 
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
diff --git a/arch/m68k/include/asm/mman.h b/arch/m68k/include/asm/mman.h
index 9f5c4c4..c421fef 100644
--- a/arch/m68k/include/asm/mman.h
+++ b/arch/m68k/include/asm/mman.h
@@ -10,6 +10,8 @@
 #define MAP_NORESERVE	0x4000		/* don't check for reservations */
 #define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
+#define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB	0x40000		/* create a huge page mapping */
 
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
diff --git a/arch/mips/include/asm/mman.h b/arch/mips/include/asm/mman.h
index f15554d..a2250f3 100644
--- a/arch/mips/include/asm/mman.h
+++ b/arch/mips/include/asm/mman.h
@@ -46,6 +46,8 @@
 #define MAP_LOCKED	0x8000		/* pages are locked */
 #define MAP_POPULATE	0x10000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x20000		/* do not block on IO */
+#define MAP_STACK	0x40000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB	0x80000		/* create a huge page mapping */
 
 /*
  * Flags for msync
diff --git a/arch/mn10300/include/asm/mman.h b/arch/mn10300/include/asm/mman.h
index d04fac1..94611c3 100644
--- a/arch/mn10300/include/asm/mman.h
+++ b/arch/mn10300/include/asm/mman.h
@@ -21,6 +21,8 @@
 #define MAP_NORESERVE	0x4000		/* don't check for reservations */
 #define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
+#define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB	0x40000		/* create a huge page mapping */
 
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
diff --git a/arch/parisc/include/asm/mman.h b/arch/parisc/include/asm/mman.h
index a12d9d4..9749c8a 100644
--- a/arch/parisc/include/asm/mman.h
+++ b/arch/parisc/include/asm/mman.h
@@ -22,6 +22,8 @@
 #define MAP_GROWSDOWN	0x8000		/* stack-like segment */
 #define MAP_POPULATE	0x10000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x20000		/* do not block on IO */
+#define MAP_STACK	0x40000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB	0x80000		/* create a huge page mapping */
 
 #define MS_SYNC		1		/* synchronous memory sync */
 #define MS_ASYNC	2		/* sync memory asynchronously */
diff --git a/arch/powerpc/include/asm/mman.h b/arch/powerpc/include/asm/mman.h
index 7b1c498..d4a7f64 100644
--- a/arch/powerpc/include/asm/mman.h
+++ b/arch/powerpc/include/asm/mman.h
@@ -25,6 +25,8 @@
 
 #define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
+#define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB	0x40000		/* create a huge page mapping */
 
 #ifdef __KERNEL__
 #ifdef CONFIG_PPC64
diff --git a/arch/s390/include/asm/mman.h b/arch/s390/include/asm/mman.h
index f63fe7b..22714ca 100644
--- a/arch/s390/include/asm/mman.h
+++ b/arch/s390/include/asm/mman.h
@@ -18,6 +18,8 @@
 #define MAP_NORESERVE	0x4000		/* don't check for reservations */
 #define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
+#define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB	0x40000		/* create a huge page mapping */
 
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
diff --git a/arch/sparc/include/asm/mman.h b/arch/sparc/include/asm/mman.h
index 988192e..c3029ad 100644
--- a/arch/sparc/include/asm/mman.h
+++ b/arch/sparc/include/asm/mman.h
@@ -20,6 +20,8 @@
 
 #define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
+#define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB	0x40000		/* create a huge page mapping */
 
 #ifdef __KERNEL__
 #ifndef __ASSEMBLY__
diff --git a/arch/xtensa/include/asm/mman.h b/arch/xtensa/include/asm/mman.h
index 6e55b4d..fca4db4 100644
--- a/arch/xtensa/include/asm/mman.h
+++ b/arch/xtensa/include/asm/mman.h
@@ -53,6 +53,8 @@
 #define MAP_LOCKED	0x8000		/* pages are locked */
 #define MAP_POPULATE	0x10000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x20000		/* do not block on IO */
+#define MAP_STACK	0x40000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB	0x80000		/* create a huge page mapping */
 
 /*
  * Flags for msync
diff --git a/include/asm-generic/mman.h b/include/asm-generic/mman.h
index 7cab4de..32c8bd6 100644
--- a/include/asm-generic/mman.h
+++ b/include/asm-generic/mman.h
@@ -11,6 +11,7 @@
 #define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
 #define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB	0x40000		/* create a huge page mapping */
 
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
-- 
cgit v0.10.2


From 4e52780d41a741fb4861ae1df2413dd816ec11b1 Mon Sep 17 00:00:00 2001
From: Eric B Munson <ebmunson@us.ibm.com>
Date: Mon, 21 Sep 2009 17:03:47 -0700
Subject: hugetlb: add MAP_HUGETLB for mmaping pseudo-anonymous huge page
 regions

Add a flag for mmap that will be used to request a huge page region that
will look like anonymous memory to userspace.  This is accomplished by
using a file on the internal vfsmount.  MAP_HUGETLB is a modifier of
MAP_ANONYMOUS and so must be specified with it.  The region will behave
the same as a MAP_ANONYMOUS region using small pages.

[akpm@linux-foundation.org: fix arch definitions of MAP_HUGETLB]
Signed-off-by: Eric B Munson <ebmunson@us.ibm.com>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Adam Litke <agl@us.ibm.com>
Cc: David Gibson <david@gibson.dropbear.id.au>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index f6505ad..176e7ee 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -112,12 +112,19 @@ static inline void hugetlb_report_meminfo(struct seq_file *m)
 
 #endif /* !CONFIG_HUGETLB_PAGE */
 
+#define HUGETLB_ANON_FILE "anon_hugepage"
+
 enum {
 	/*
 	 * The file will be used as an shm file so shmfs accounting rules
 	 * apply
 	 */
 	HUGETLB_SHMFS_INODE     = 1,
+	/*
+	 * The file is being created on the internal vfs mount and shmfs
+	 * accounting rules do not apply
+	 */
+	HUGETLB_ANONHUGE_INODE  = 2,
 };
 
 #ifdef CONFIG_HUGETLBFS
diff --git a/mm/mmap.c b/mm/mmap.c
index 1aeef66..21d4029 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -949,6 +949,24 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
 	if (mm->map_count > sysctl_max_map_count)
 		return -ENOMEM;
 
+	if (flags & MAP_HUGETLB) {
+		struct user_struct *user = NULL;
+		if (file)
+			return -EINVAL;
+
+		/*
+		 * VM_NORESERVE is used because the reservations will be
+		 * taken when vm_ops->mmap() is called
+		 * A dummy user value is used because we are not locking
+		 * memory so no accounting is necessary
+		 */
+		len = ALIGN(len, huge_page_size(&default_hstate));
+		file = hugetlb_file_setup(HUGETLB_ANON_FILE, len, VM_NORESERVE,
+						&user, HUGETLB_ANONHUGE_INODE);
+		if (IS_ERR(file))
+			return PTR_ERR(file);
+	}
+
 	/* Obtain the address to map to. we verify (or select) it and ensure
 	 * that it represents a valid section of the address space.
 	 */
-- 
cgit v0.10.2


From 94bf5ceac095c7d4cb5e4d40fa7e2dd81d722b75 Mon Sep 17 00:00:00 2001
From: Eric B Munson <ebmunson@us.ibm.com>
Date: Mon, 21 Sep 2009 17:03:48 -0700
Subject: hugetlb: add MAP_HUGETLB example

Add an example of how to use the MAP_HUGETLB flag to the vm documentation
directory and a reference to the example in hugetlbpage.txt.

Signed-off-by: Eric B Munson <ebmunson@us.ibm.com>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Adam Litke <agl@us.ibm.com>
Cc: David Gibson <david@gibson.dropbear.id.au>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/Documentation/vm/00-INDEX b/Documentation/vm/00-INDEX
index f80a449..e57d6a9 100644
--- a/Documentation/vm/00-INDEX
+++ b/Documentation/vm/00-INDEX
@@ -22,3 +22,5 @@ slabinfo.c
 	- source code for a tool to get reports about slabs.
 slub.txt
 	- a short users guide for SLUB.
+map_hugetlb.c
+	- an example program that uses the MAP_HUGETLB mmap flag.
diff --git a/Documentation/vm/hugetlbpage.txt b/Documentation/vm/hugetlbpage.txt
index 3a167be7..82a7bd1 100644
--- a/Documentation/vm/hugetlbpage.txt
+++ b/Documentation/vm/hugetlbpage.txt
@@ -187,12 +187,14 @@ Regular chown, chgrp, and chmod commands (with right permissions) could be
 used to change the file attributes on hugetlbfs.
 
 Also, it is important to note that no such mount command is required if the
-applications are going to use only shmat/shmget system calls.  Users who
-wish to use hugetlb page via shared memory segment should be a member of
-a supplementary group and system admin needs to configure that gid into
-/proc/sys/vm/hugetlb_shm_group.  It is possible for same or different
-applications to use any combination of mmaps and shm* calls, though the
-mount of filesystem will be required for using mmap calls.
+applications are going to use only shmat/shmget system calls or mmap with
+MAP_HUGETLB.  Users who wish to use hugetlb page via shared memory segment
+should be a member of a supplementary group and system admin needs to
+configure that gid into /proc/sys/vm/hugetlb_shm_group.  It is possible for
+same or different applications to use any combination of mmaps and shm*
+calls, though the mount of filesystem will be required for using mmap calls
+without MAP_HUGETLB.  For an example of how to use mmap with MAP_HUGETLB see
+map_hugetlb.c.
 
 *******************************************************************
 
diff --git a/Documentation/vm/map_hugetlb.c b/Documentation/vm/map_hugetlb.c
new file mode 100644
index 0000000..e2bdae3
--- /dev/null
+++ b/Documentation/vm/map_hugetlb.c
@@ -0,0 +1,77 @@
+/*
+ * Example of using hugepage memory in a user application using the mmap
+ * system call with MAP_HUGETLB flag.  Before running this program make
+ * sure the administrator has allocated enough default sized huge pages
+ * to cover the 256 MB allocation.
+ *
+ * For ia64 architecture, Linux kernel reserves Region number 4 for hugepages.
+ * That means the addresses starting with 0x800000... will need to be
+ * specified.  Specifying a fixed address is not required on ppc64, i386
+ * or x86_64.
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+
+#define LENGTH (256UL*1024*1024)
+#define PROTECTION (PROT_READ | PROT_WRITE)
+
+#ifndef MAP_HUGETLB
+#define MAP_HUGETLB 0x40
+#endif
+
+/* Only ia64 requires this */
+#ifdef __ia64__
+#define ADDR (void *)(0x8000000000000000UL)
+#define FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_FIXED)
+#else
+#define ADDR (void *)(0x0UL)
+#define FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB)
+#endif
+
+void check_bytes(char *addr)
+{
+	printf("First hex is %x\n", *((unsigned int *)addr));
+}
+
+void write_bytes(char *addr)
+{
+	unsigned long i;
+
+	for (i = 0; i < LENGTH; i++)
+		*(addr + i) = (char)i;
+}
+
+void read_bytes(char *addr)
+{
+	unsigned long i;
+
+	check_bytes(addr);
+	for (i = 0; i < LENGTH; i++)
+		if (*(addr + i) != (char)i) {
+			printf("Mismatch at %lu\n", i);
+			break;
+		}
+}
+
+int main(void)
+{
+	void *addr;
+
+	addr = mmap(ADDR, LENGTH, PROTECTION, FLAGS, 0, 0);
+	if (addr == MAP_FAILED) {
+		perror("mmap");
+		exit(1);
+	}
+
+	printf("Returned address is %p\n", addr);
+	check_bytes(addr);
+	write_bytes(addr);
+	read_bytes(addr);
+
+	munmap(addr, LENGTH);
+
+	return 0;
+}
-- 
cgit v0.10.2


From 6e17b17f1fc7b2f24383a693d63550d9e1460081 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 21 Sep 2009 17:03:48 -0700
Subject: mm: remove duplicate asm/mman.h files

A number of architectures have identical asm/mman.h files so they can all
be merged by using the new generic file.

The remaining asm/mman.h files are substantially different from each
other.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/arm/include/asm/mman.h b/arch/arm/include/asm/mman.h
index 6464d47..8eebf89 100644
--- a/arch/arm/include/asm/mman.h
+++ b/arch/arm/include/asm/mman.h
@@ -1,19 +1 @@
-#ifndef __ARM_MMAN_H__
-#define __ARM_MMAN_H__
-
-#include <asm-generic/mman-common.h>
-
-#define MAP_GROWSDOWN	0x0100		/* stack-like segment */
-#define MAP_DENYWRITE	0x0800		/* ETXTBSY */
-#define MAP_EXECUTABLE	0x1000		/* mark it as an executable */
-#define MAP_LOCKED	0x2000		/* pages are locked */
-#define MAP_NORESERVE	0x4000		/* don't check for reservations */
-#define MAP_POPULATE	0x8000		/* populate (prefault) page tables */
-#define MAP_NONBLOCK	0x10000		/* do not block on IO */
-#define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
-#define MAP_HUGETLB	0x40000		/* create a huge page mapping */
-
-#define MCL_CURRENT	1		/* lock all current mappings */
-#define MCL_FUTURE	2		/* lock all future mappings */
-
-#endif /* __ARM_MMAN_H__ */
+#include <asm-generic/mman.h>
diff --git a/arch/avr32/include/asm/mman.h b/arch/avr32/include/asm/mman.h
index 38cea1b..8eebf89 100644
--- a/arch/avr32/include/asm/mman.h
+++ b/arch/avr32/include/asm/mman.h
@@ -1,19 +1 @@
-#ifndef __ASM_AVR32_MMAN_H__
-#define __ASM_AVR32_MMAN_H__
-
-#include <asm-generic/mman-common.h>
-
-#define MAP_GROWSDOWN	0x0100		/* stack-like segment */
-#define MAP_DENYWRITE	0x0800		/* ETXTBSY */
-#define MAP_EXECUTABLE	0x1000		/* mark it as an executable */
-#define MAP_LOCKED	0x2000		/* pages are locked */
-#define MAP_NORESERVE	0x4000		/* don't check for reservations */
-#define MAP_POPULATE	0x8000		/* populate (prefault) page tables */
-#define MAP_NONBLOCK	0x10000		/* do not block on IO */
-#define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
-#define MAP_HUGETLB	0x40000		/* create a huge page mapping */
-
-#define MCL_CURRENT	1		/* lock all current mappings */
-#define MCL_FUTURE	2		/* lock all future mappings */
-
-#endif /* __ASM_AVR32_MMAN_H__ */
+#include <asm-generic/mman.h>
diff --git a/arch/cris/include/asm/mman.h b/arch/cris/include/asm/mman.h
index de6b903..8eebf89 100644
--- a/arch/cris/include/asm/mman.h
+++ b/arch/cris/include/asm/mman.h
@@ -1,21 +1 @@
-#ifndef __CRIS_MMAN_H__
-#define __CRIS_MMAN_H__
-
-/* verbatim copy of asm-i386/ version */
-
-#include <asm-generic/mman-common.h>
-
-#define MAP_GROWSDOWN	0x0100		/* stack-like segment */
-#define MAP_DENYWRITE	0x0800		/* ETXTBSY */
-#define MAP_EXECUTABLE	0x1000		/* mark it as an executable */
-#define MAP_LOCKED	0x2000		/* pages are locked */
-#define MAP_NORESERVE	0x4000		/* don't check for reservations */
-#define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
-#define MAP_NONBLOCK	0x10000		/* do not block on IO */
-#define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
-#define MAP_HUGETLB	0x40000		/* create a huge page mapping */
-
-#define MCL_CURRENT	1		/* lock all current mappings */
-#define MCL_FUTURE	2		/* lock all future mappings */
-
-#endif /* __CRIS_MMAN_H__ */
+#include <asm-generic/mman.h>
diff --git a/arch/frv/include/asm/mman.h b/arch/frv/include/asm/mman.h
index 1939343..8eebf89 100644
--- a/arch/frv/include/asm/mman.h
+++ b/arch/frv/include/asm/mman.h
@@ -1,20 +1 @@
-#ifndef __ASM_MMAN_H__
-#define __ASM_MMAN_H__
-
-#include <asm-generic/mman-common.h>
-
-#define MAP_GROWSDOWN	0x0100		/* stack-like segment */
-#define MAP_DENYWRITE	0x0800		/* ETXTBSY */
-#define MAP_EXECUTABLE	0x1000		/* mark it as an executable */
-#define MAP_LOCKED	0x2000		/* pages are locked */
-#define MAP_NORESERVE	0x4000		/* don't check for reservations */
-#define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
-#define MAP_NONBLOCK	0x10000		/* do not block on IO */
-#define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
-#define MAP_HUGETLB	0x40000		/* create a huge page mapping */
-
-#define MCL_CURRENT	1		/* lock all current mappings */
-#define MCL_FUTURE	2		/* lock all future mappings */
-
-#endif /* __ASM_MMAN_H__ */
-
+#include <asm-generic/mman.h>
diff --git a/arch/h8300/include/asm/mman.h b/arch/h8300/include/asm/mman.h
index eacacd0..8eebf89 100644
--- a/arch/h8300/include/asm/mman.h
+++ b/arch/h8300/include/asm/mman.h
@@ -1,19 +1 @@
-#ifndef __H8300_MMAN_H__
-#define __H8300_MMAN_H__
-
-#include <asm-generic/mman-common.h>
-
-#define MAP_GROWSDOWN	0x0100		/* stack-like segment */
-#define MAP_DENYWRITE	0x0800		/* ETXTBSY */
-#define MAP_EXECUTABLE	0x1000		/* mark it as an executable */
-#define MAP_LOCKED	0x2000		/* pages are locked */
-#define MAP_NORESERVE	0x4000		/* don't check for reservations */
-#define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
-#define MAP_NONBLOCK	0x10000		/* do not block on IO */
-#define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
-#define MAP_HUGETLB	0x40000		/* create a huge page mapping */
-
-#define MCL_CURRENT	1		/* lock all current mappings */
-#define MCL_FUTURE	2		/* lock all future mappings */
-
-#endif /* __H8300_MMAN_H__ */
+#include <asm-generic/mman.h>
diff --git a/arch/ia64/include/asm/mman.h b/arch/ia64/include/asm/mman.h
index cf55884..4459028 100644
--- a/arch/ia64/include/asm/mman.h
+++ b/arch/ia64/include/asm/mman.h
@@ -8,21 +8,9 @@
  *	David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co
  */
 
-#include <asm-generic/mman-common.h>
+#include <asm-generic/mman.h>
 
-#define MAP_GROWSDOWN	0x00100		/* stack-like segment */
-#define MAP_GROWSUP	0x00200		/* register stack-like segment */
-#define MAP_DENYWRITE	0x00800		/* ETXTBSY */
-#define MAP_EXECUTABLE	0x01000		/* mark it as an executable */
-#define MAP_LOCKED	0x02000		/* pages are locked */
-#define MAP_NORESERVE	0x04000		/* don't check for reservations */
-#define MAP_POPULATE	0x08000		/* populate (prefault) pagetables */
-#define MAP_NONBLOCK	0x10000		/* do not block on IO */
-#define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
-#define MAP_HUGETLB	0x40000		/* create a huge page mapping */
-
-#define MCL_CURRENT	1		/* lock all current mappings */
-#define MCL_FUTURE	2		/* lock all future mappings */
+#define MAP_GROWSUP	0x0200		/* register stack-like segment */
 
 #ifdef __KERNEL__
 #ifndef __ASSEMBLY__
diff --git a/arch/m32r/include/asm/mman.h b/arch/m32r/include/asm/mman.h
index d191089..8eebf89 100644
--- a/arch/m32r/include/asm/mman.h
+++ b/arch/m32r/include/asm/mman.h
@@ -1,19 +1 @@
-#ifndef __M32R_MMAN_H__
-#define __M32R_MMAN_H__
-
-#include <asm-generic/mman-common.h>
-
-#define MAP_GROWSDOWN	0x0100		/* stack-like segment */
-#define MAP_DENYWRITE	0x0800		/* ETXTBSY */
-#define MAP_EXECUTABLE	0x1000		/* mark it as an executable */
-#define MAP_LOCKED	0x2000		/* pages are locked */
-#define MAP_NORESERVE	0x4000		/* don't check for reservations */
-#define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
-#define MAP_NONBLOCK	0x10000		/* do not block on IO */
-#define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
-#define MAP_HUGETLB	0x40000		/* create a huge page mapping */
-
-#define MCL_CURRENT	1		/* lock all current mappings */
-#define MCL_FUTURE	2		/* lock all future mappings */
-
-#endif /* __M32R_MMAN_H__ */
+#include <asm-generic/mman.h>
diff --git a/arch/m68k/include/asm/mman.h b/arch/m68k/include/asm/mman.h
index c421fef..8eebf89 100644
--- a/arch/m68k/include/asm/mman.h
+++ b/arch/m68k/include/asm/mman.h
@@ -1,19 +1 @@
-#ifndef __M68K_MMAN_H__
-#define __M68K_MMAN_H__
-
-#include <asm-generic/mman-common.h>
-
-#define MAP_GROWSDOWN	0x0100		/* stack-like segment */
-#define MAP_DENYWRITE	0x0800		/* ETXTBSY */
-#define MAP_EXECUTABLE	0x1000		/* mark it as an executable */
-#define MAP_LOCKED	0x2000		/* pages are locked */
-#define MAP_NORESERVE	0x4000		/* don't check for reservations */
-#define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
-#define MAP_NONBLOCK	0x10000		/* do not block on IO */
-#define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
-#define MAP_HUGETLB	0x40000		/* create a huge page mapping */
-
-#define MCL_CURRENT	1		/* lock all current mappings */
-#define MCL_FUTURE	2		/* lock all future mappings */
-
-#endif /* __M68K_MMAN_H__ */
+#include <asm-generic/mman.h>
diff --git a/arch/mn10300/include/asm/mman.h b/arch/mn10300/include/asm/mman.h
index 94611c3..8eebf89 100644
--- a/arch/mn10300/include/asm/mman.h
+++ b/arch/mn10300/include/asm/mman.h
@@ -1,30 +1 @@
-/* MN10300 Constants for mmap and co.
- *
- * Copyright (C) 2007 Matsushita Electric Industrial Co., Ltd.
- * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
- * - Derived from asm-x86/mman.h
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
- */
-#ifndef _ASM_MMAN_H
-#define _ASM_MMAN_H
-
-#include <asm-generic/mman-common.h>
-
-#define MAP_GROWSDOWN	0x0100		/* stack-like segment */
-#define MAP_DENYWRITE	0x0800		/* ETXTBSY */
-#define MAP_EXECUTABLE	0x1000		/* mark it as an executable */
-#define MAP_LOCKED	0x2000		/* pages are locked */
-#define MAP_NORESERVE	0x4000		/* don't check for reservations */
-#define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
-#define MAP_NONBLOCK	0x10000		/* do not block on IO */
-#define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
-#define MAP_HUGETLB	0x40000		/* create a huge page mapping */
-
-#define MCL_CURRENT	1		/* lock all current mappings */
-#define MCL_FUTURE	2		/* lock all future mappings */
-
-#endif /* _ASM_MMAN_H */
+#include <asm-generic/mman.h>
diff --git a/arch/s390/include/asm/mman.h b/arch/s390/include/asm/mman.h
index 22714ca..4e9c8ae 100644
--- a/arch/s390/include/asm/mman.h
+++ b/arch/s390/include/asm/mman.h
@@ -9,20 +9,7 @@
 #ifndef __S390_MMAN_H__
 #define __S390_MMAN_H__
 
-#include <asm-generic/mman-common.h>
-
-#define MAP_GROWSDOWN	0x0100		/* stack-like segment */
-#define MAP_DENYWRITE	0x0800		/* ETXTBSY */
-#define MAP_EXECUTABLE	0x1000		/* mark it as an executable */
-#define MAP_LOCKED	0x2000		/* pages are locked */
-#define MAP_NORESERVE	0x4000		/* don't check for reservations */
-#define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
-#define MAP_NONBLOCK	0x10000		/* do not block on IO */
-#define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
-#define MAP_HUGETLB	0x40000		/* create a huge page mapping */
-
-#define MCL_CURRENT	1		/* lock all current mappings */
-#define MCL_FUTURE	2		/* lock all future mappings */
+#include <asm-generic/mman.h>
 
 #if defined(__KERNEL__) && !defined(__ASSEMBLY__) && defined(CONFIG_64BIT)
 int s390_mmap_check(unsigned long addr, unsigned long len);
-- 
cgit v0.10.2


From 425fbf047cc70bb30dff368a6da02c8c2d229318 Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Mon, 21 Sep 2009 17:03:50 -0700
Subject: shmem: initialize struct shmem_sb_info to zero

Fixes the following kmemcheck false positive (the compiler is using
a 32-bit mov to load the 16-bit sbinfo->mode in shmem_fill_super):

[    0.337000] Total of 1 processors activated (3088.38 BogoMIPS).
[    0.352000] CPU0 attaching NULL sched-domain.
[    0.360000] WARNING: kmemcheck: Caught 32-bit read from uninitialized
memory (9f8020fc)
[    0.361000]
a44240820000000041f6998100000000000000000000000000000000ff030000
[    0.368000]  i i i i i i i i i i i i i i i i u u u u i i i i i i i i i i u
u
[    0.375000]                                                          ^
[    0.376000]
[    0.377000] Pid: 9, comm: khelper Not tainted (2.6.31-tip #206) P4DC6
[    0.378000] EIP: 0060:[<810a3a95>] EFLAGS: 00010246 CPU: 0
[    0.379000] EIP is at shmem_fill_super+0xb5/0x120
[    0.380000] EAX: 00000000 EBX: 9f845400 ECX: 824042a4 EDX: 8199f641
[    0.381000] ESI: 9f8020c0 EDI: 9f845400 EBP: 9f81af68 ESP: 81cd6eec
[    0.382000]  DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068
[    0.383000] CR0: 8005003b CR2: 9f806200 CR3: 01ccd000 CR4: 000006d0
[    0.384000] DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000
[    0.385000] DR6: ffff4ff0 DR7: 00000400
[    0.386000]  [<810c25fc>] get_sb_nodev+0x3c/0x80
[    0.388000]  [<810a3514>] shmem_get_sb+0x14/0x20
[    0.390000]  [<810c207f>] vfs_kern_mount+0x4f/0x120
[    0.392000]  [<81b2849e>] init_tmpfs+0x7e/0xb0
[    0.394000]  [<81b11597>] do_basic_setup+0x17/0x30
[    0.396000]  [<81b11907>] kernel_init+0x57/0xa0
[    0.398000]  [<810039b7>] kernel_thread_helper+0x7/0x10
[    0.400000]  [<ffffffff>] 0xffffffff
[    0.402000] khelper used greatest stack depth: 2820 bytes left
[    0.407000] calling  init_mmap_min_addr+0x0/0x10 @ 1
[    0.408000] initcall init_mmap_min_addr+0x0/0x10 returned 0 after 0 usecs

Reported-by: Ingo Molnar <mingo@elte.hu>
Analysed-by: Vegard Nossum <vegard.nossum@gmail.com>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
Acked-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/shmem.c b/mm/shmem.c
index b4b56fd..b206a7a 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2309,17 +2309,14 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent)
 	int err = -ENOMEM;
 
 	/* Round up to L1_CACHE_BYTES to resist false sharing */
-	sbinfo = kmalloc(max((int)sizeof(struct shmem_sb_info),
+	sbinfo = kzalloc(max((int)sizeof(struct shmem_sb_info),
 				L1_CACHE_BYTES), GFP_KERNEL);
 	if (!sbinfo)
 		return -ENOMEM;
 
-	sbinfo->max_blocks = 0;
-	sbinfo->max_inodes = 0;
 	sbinfo->mode = S_IRWXUGO | S_ISVTX;
 	sbinfo->uid = current_fsuid();
 	sbinfo->gid = current_fsgid();
-	sbinfo->mpol = NULL;
 	sb->s_fs_info = sbinfo;
 
 #ifdef CONFIG_TMPFS
-- 
cgit v0.10.2


From 3d2d827f5ca5e32816194119d5c980c7e04474a6 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Mon, 21 Sep 2009 17:03:51 -0700
Subject: mm: move use_mm/unuse_mm from aio.c to mm/

Anyone who wants to do copy to/from user from a kernel thread, needs
use_mm (like what fs/aio has).  Move that into mm/, to make reusing and
exporting easier down the line, and make aio use it.  Next intended user,
besides aio, will be vhost-net.

Acked-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/aio.c b/fs/aio.c
index d065b2c..fc21c23 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -24,6 +24,7 @@
 #include <linux/file.h>
 #include <linux/mm.h>
 #include <linux/mman.h>
+#include <linux/mmu_context.h>
 #include <linux/slab.h>
 #include <linux/timer.h>
 #include <linux/aio.h>
@@ -34,7 +35,6 @@
 
 #include <asm/kmap_types.h>
 #include <asm/uaccess.h>
-#include <asm/mmu_context.h>
 
 #if DEBUG > 1
 #define dprintk		printk
@@ -595,51 +595,6 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
 }
 
 /*
- * use_mm
- *	Makes the calling kernel thread take on the specified
- *	mm context.
- *	Called by the retry thread execute retries within the
- *	iocb issuer's mm context, so that copy_from/to_user
- *	operations work seamlessly for aio.
- *	(Note: this routine is intended to be called only
- *	from a kernel thread context)
- */
-static void use_mm(struct mm_struct *mm)
-{
-	struct mm_struct *active_mm;
-	struct task_struct *tsk = current;
-
-	task_lock(tsk);
-	active_mm = tsk->active_mm;
-	atomic_inc(&mm->mm_count);
-	tsk->mm = mm;
-	tsk->active_mm = mm;
-	switch_mm(active_mm, mm, tsk);
-	task_unlock(tsk);
-
-	mmdrop(active_mm);
-}
-
-/*
- * unuse_mm
- *	Reverses the effect of use_mm, i.e. releases the
- *	specified mm context which was earlier taken on
- *	by the calling kernel thread
- *	(Note: this routine is intended to be called only
- *	from a kernel thread context)
- */
-static void unuse_mm(struct mm_struct *mm)
-{
-	struct task_struct *tsk = current;
-
-	task_lock(tsk);
-	tsk->mm = NULL;
-	/* active_mm is still 'mm' */
-	enter_lazy_tlb(mm, tsk);
-	task_unlock(tsk);
-}
-
-/*
  * Queue up a kiocb to be retried. Assumes that the kiocb
  * has already been marked as kicked, and places it on
  * the retry run list for the corresponding ioctx, if it
diff --git a/include/linux/mmu_context.h b/include/linux/mmu_context.h
new file mode 100644
index 0000000..70fffeb
--- /dev/null
+++ b/include/linux/mmu_context.h
@@ -0,0 +1,9 @@
+#ifndef _LINUX_MMU_CONTEXT_H
+#define _LINUX_MMU_CONTEXT_H
+
+struct mm_struct;
+
+void use_mm(struct mm_struct *mm);
+void unuse_mm(struct mm_struct *mm);
+
+#endif
diff --git a/mm/Makefile b/mm/Makefile
index a63bf59..728a9fd 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -11,7 +11,7 @@ obj-y			:= bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
 			   maccess.o page_alloc.o page-writeback.o \
 			   readahead.o swap.o truncate.o vmscan.o shmem.o \
 			   prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
-			   page_isolation.o mm_init.o $(mmu-y)
+			   page_isolation.o mm_init.o mmu_context.o $(mmu-y)
 obj-y += init-mm.o
 
 obj-$(CONFIG_PROC_PAGE_MONITOR) += pagewalk.o
diff --git a/mm/mmu_context.c b/mm/mmu_context.c
new file mode 100644
index 0000000..fd473b5
--- /dev/null
+++ b/mm/mmu_context.c
@@ -0,0 +1,55 @@
+/* Copyright (C) 2009 Red Hat, Inc.
+ *
+ * See ../COPYING for licensing terms.
+ */
+
+#include <linux/mm.h>
+#include <linux/mmu_context.h>
+#include <linux/sched.h>
+
+#include <asm/mmu_context.h>
+
+/*
+ * use_mm
+ *	Makes the calling kernel thread take on the specified
+ *	mm context.
+ *	Called by the retry thread execute retries within the
+ *	iocb issuer's mm context, so that copy_from/to_user
+ *	operations work seamlessly for aio.
+ *	(Note: this routine is intended to be called only
+ *	from a kernel thread context)
+ */
+void use_mm(struct mm_struct *mm)
+{
+	struct mm_struct *active_mm;
+	struct task_struct *tsk = current;
+
+	task_lock(tsk);
+	active_mm = tsk->active_mm;
+	atomic_inc(&mm->mm_count);
+	tsk->mm = mm;
+	tsk->active_mm = mm;
+	switch_mm(active_mm, mm, tsk);
+	task_unlock(tsk);
+
+	mmdrop(active_mm);
+}
+
+/*
+ * unuse_mm
+ *	Reverses the effect of use_mm, i.e. releases the
+ *	specified mm context which was earlier taken on
+ *	by the calling kernel thread
+ *	(Note: this routine is intended to be called only
+ *	from a kernel thread context)
+ */
+void unuse_mm(struct mm_struct *mm)
+{
+	struct task_struct *tsk = current;
+
+	task_lock(tsk);
+	tsk->mm = NULL;
+	/* active_mm is still 'mm' */
+	enter_lazy_tlb(mm, tsk);
+	task_unlock(tsk);
+}
-- 
cgit v0.10.2


From f68e14805085972b4e0b0ab684af37f713b9c262 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Mon, 21 Sep 2009 17:03:52 -0700
Subject: mm: reduce atomic use on use_mm fast path

When the mm being switched to matches the active mm, we don't need to
increment and then drop the mm count.  In a simple benchmark this happens
in about 50% of time.  Making that conditional reduces contention on that
cacheline on SMP systems.

Acked-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/mmu_context.c b/mm/mmu_context.c
index fd473b5..ded9081 100644
--- a/mm/mmu_context.c
+++ b/mm/mmu_context.c
@@ -26,13 +26,16 @@ void use_mm(struct mm_struct *mm)
 
 	task_lock(tsk);
 	active_mm = tsk->active_mm;
-	atomic_inc(&mm->mm_count);
+	if (active_mm != mm) {
+		atomic_inc(&mm->mm_count);
+		tsk->active_mm = mm;
+	}
 	tsk->mm = mm;
-	tsk->active_mm = mm;
 	switch_mm(active_mm, mm, tsk);
 	task_unlock(tsk);
 
-	mmdrop(active_mm);
+	if (active_mm != mm)
+		mmdrop(active_mm);
 }
 
 /*
-- 
cgit v0.10.2


From 734f3fa18d460995c8621cf2331b7fba88c977ce Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Mon, 21 Sep 2009 17:03:53 -0700
Subject: pcmcia: yenta: add missing __devexit marking

The remove member of the pci_driver yenta_cardbus_driver uses
__devexit_p(), so the remove function itself should be marked with
__devexit.  Even more so considering the probe function is marked with
__devinit.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Cc: Daniel Ritz <daniel.ritz-ml@swissonline.ch>
Cc: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/pcmcia/yenta_socket.c b/drivers/pcmcia/yenta_socket.c
index 737fe5d..b459e87 100644
--- a/drivers/pcmcia/yenta_socket.c
+++ b/drivers/pcmcia/yenta_socket.c
@@ -717,7 +717,7 @@ static void yenta_free_resources(struct yenta_socket *socket)
 /*
  * Close it down - release our resources and go home..
  */
-static void yenta_close(struct pci_dev *dev)
+static void __devexit yenta_close(struct pci_dev *dev)
 {
 	struct yenta_socket *sock = pci_get_drvdata(dev);
 
-- 
cgit v0.10.2


From 470967dc6c38696f853b7f338eb9d743c28a9e11 Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Mon, 21 Sep 2009 17:03:54 -0700
Subject: pcmcia: fix read buffer overflow

If count > 0 and dev->rlen == dev->rpos and dev->proto == 0 then we read
and write dev->rbuf[-1];

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Cc: Harald Welte <laforge@gnumonks.org>
Cc: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/pcmcia/cm4000_cs.c b/drivers/char/pcmcia/cm4000_cs.c
index 881934c..c250a31 100644
--- a/drivers/char/pcmcia/cm4000_cs.c
+++ b/drivers/char/pcmcia/cm4000_cs.c
@@ -1017,7 +1017,7 @@ static ssize_t cmm_read(struct file *filp, __user char *buf, size_t count,
 		}
 	}
 
-	if (dev->proto == 0 && count > dev->rlen - dev->rpos) {
+	if (dev->proto == 0 && count > dev->rlen - dev->rpos && i) {
 		DEBUGP(4, dev, "T=0 and count > buffer\n");
 		dev->rbuf[i] = dev->rbuf[i - 1];
 		dev->rbuf[i - 1] = dev->procbyte;
-- 
cgit v0.10.2


From e6be4a8c26d0a9bacd811084c468e25863e3d069 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 21 Sep 2009 17:03:55 -0700
Subject: pcmcia: switch /proc/bus/pccard/drivers to seq_file

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/pcmcia/pcmcia_ioctl.c b/drivers/pcmcia/pcmcia_ioctl.c
index 7b424e0..32c4404 100644
--- a/drivers/pcmcia/pcmcia_ioctl.c
+++ b/drivers/pcmcia/pcmcia_ioctl.c
@@ -27,6 +27,7 @@
 #include <linux/proc_fs.h>
 #include <linux/poll.h>
 #include <linux/pci.h>
+#include <linux/seq_file.h>
 #include <linux/smp_lock.h>
 #include <linux/workqueue.h>
 
@@ -105,37 +106,40 @@ static struct pcmcia_driver *get_pcmcia_driver(dev_info_t *dev_info)
 #ifdef CONFIG_PROC_FS
 static struct proc_dir_entry *proc_pccard = NULL;
 
-static int proc_read_drivers_callback(struct device_driver *driver, void *d)
+static int proc_read_drivers_callback(struct device_driver *driver, void *_m)
 {
-	char **p = d;
+	struct seq_file *m = _m;
 	struct pcmcia_driver *p_drv = container_of(driver,
 						   struct pcmcia_driver, drv);
 
-	*p += sprintf(*p, "%-24.24s 1 %d\n", p_drv->drv.name,
+	seq_printf(m, "%-24.24s 1 %d\n", p_drv->drv.name,
 #ifdef CONFIG_MODULE_UNLOAD
 		      (p_drv->owner) ? module_refcount(p_drv->owner) : 1
 #else
 		      1
 #endif
 	);
-	d = (void *) p;
-
 	return 0;
 }
 
-static int proc_read_drivers(char *buf, char **start, off_t pos,
-			     int count, int *eof, void *data)
+static int pccard_drivers_proc_show(struct seq_file *m, void *v)
 {
-	char *p = buf;
-	int rc;
-
-	rc = bus_for_each_drv(&pcmcia_bus_type, NULL,
-			      (void *) &p, proc_read_drivers_callback);
-	if (rc < 0)
-		return rc;
+	return bus_for_each_drv(&pcmcia_bus_type, NULL,
+				m, proc_read_drivers_callback);
+}
 
-	return (p - buf);
+static int pccard_drivers_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, pccard_drivers_proc_show, NULL);
 }
+
+static const struct file_operations pccard_drivers_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= pccard_drivers_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
 #endif
 
 
@@ -1011,7 +1015,7 @@ void __init pcmcia_setup_ioctl(void) {
 #ifdef CONFIG_PROC_FS
 	proc_pccard = proc_mkdir("bus/pccard", NULL);
 	if (proc_pccard)
-		create_proc_read_entry("drivers",0,proc_pccard,proc_read_drivers,NULL);
+		proc_create("drivers", 0, proc_pccard, &pccard_drivers_proc_fops);
 #endif
 }
 
-- 
cgit v0.10.2


From 02e87d1a934c70e3599eb7a29db783806d329e17 Mon Sep 17 00:00:00 2001
From: Kristoffer Ericson <kristoffer.ericson@gmail.com>
Date: Mon, 21 Sep 2009 17:03:56 -0700
Subject: pcmcia: cleanup/fixup patch for sa1100_jornada_pcmcia driver

Clean up the /drivers/pcmcia/sa1100_jornada.c file with respect to
formatting.  It also changes a build warning into a code comment (since
its a pain to watch every build and havent seen any problems with driver
in 3.5years).

Signed-off-by: Kristoffer Ericson <kristoffer.ericson@gmail.com>
Cc: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/pcmcia/sa1100_jornada720.c b/drivers/pcmcia/sa1100_jornada720.c
index 57ca085..7eedb42 100644
--- a/drivers/pcmcia/sa1100_jornada720.c
+++ b/drivers/pcmcia/sa1100_jornada720.c
@@ -16,89 +16,103 @@
 
 #include "sa1111_generic.h"
 
-#define SOCKET0_POWER   GPIO_GPIO0
-#define SOCKET0_3V      GPIO_GPIO2
-#define SOCKET1_POWER   (GPIO_GPIO1 | GPIO_GPIO3)
-#warning *** Does SOCKET1_3V actually do anything?
+/* Does SOCKET1_3V actually do anything? */
+#define SOCKET0_POWER	GPIO_GPIO0
+#define SOCKET0_3V	GPIO_GPIO2
+#define SOCKET1_POWER	(GPIO_GPIO1 | GPIO_GPIO3)
 #define SOCKET1_3V	GPIO_GPIO3
 
 static int jornada720_pcmcia_hw_init(struct soc_pcmcia_socket *skt)
 {
-  /*
-   * What is all this crap for?
-   */
-  GRER |= 0x00000002;
-  /* Set GPIO_A<3:1> to be outputs for PCMCIA/CF power controller: */
-  sa1111_set_io_dir(SA1111_DEV(skt->dev), GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0, 0);
-  sa1111_set_io(SA1111_DEV(skt->dev), GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0);
-  sa1111_set_sleep_io(SA1111_DEV(skt->dev), GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0);
-
-  return sa1111_pcmcia_hw_init(skt);
+	unsigned int pin = GPIO_A0 | GPIO_A1 | GPIO_A2 | GPIO_A3;
+
+	/*
+	* What is all this crap for?
+	*/
+	GRER |= 0x00000002;
+	/* Set GPIO_A<3:1> to be outputs for PCMCIA/CF power controller: */
+	sa1111_set_io_dir(SA1111_DEV(skt->dev), pin, 0, 0);
+	sa1111_set_io(SA1111_DEV(skt->dev), pin, 0);
+	sa1111_set_sleep_io(SA1111_DEV(skt->dev), pin, 0);
+
+	return sa1111_pcmcia_hw_init(skt);
 }
 
 static int
 jornada720_pcmcia_configure_socket(struct soc_pcmcia_socket *skt, const socket_state_t *state)
 {
-  unsigned int pa_dwr_mask, pa_dwr_set;
-  int ret;
-
-printk("%s(): config socket %d vcc %d vpp %d\n", __func__,
-	skt->nr, state->Vcc, state->Vpp);
-
-  switch (skt->nr) {
-  case 0:
-    pa_dwr_mask = SOCKET0_POWER | SOCKET0_3V;
-
-    switch (state->Vcc) {
-    default:
-    case 0:	pa_dwr_set = 0;					break;
-    case 33:	pa_dwr_set = SOCKET0_POWER | SOCKET0_3V;	break;
-    case 50:	pa_dwr_set = SOCKET0_POWER;			break;
-    }
-    break;
-
-  case 1:
-    pa_dwr_mask = SOCKET1_POWER;
-
-    switch (state->Vcc) {
-    default:
-    case 0:	pa_dwr_set = 0;					break;
-    case 33:	pa_dwr_set = SOCKET1_POWER;			break;
-    case 50:	pa_dwr_set = SOCKET1_POWER;			break;
-    }
-    break;
-
-  default:
-    return -1;
-  }
-
-  if (state->Vpp != state->Vcc && state->Vpp != 0) {
-    printk(KERN_ERR "%s(): slot cannot support VPP %u\n",
-	   __func__, state->Vpp);
-    return -1;
-  }
-
-  ret = sa1111_pcmcia_configure_socket(skt, state);
-  if (ret == 0) {
-    unsigned long flags;
-
-    local_irq_save(flags);
-    sa1111_set_io(SA1111_DEV(skt->dev), pa_dwr_mask, pa_dwr_set);
-    local_irq_restore(flags);
-  }
-
-  return ret;
+	unsigned int pa_dwr_mask, pa_dwr_set;
+	int ret;
+
+	printk(KERN_INFO "%s(): config socket %d vcc %d vpp %d\n", __func__,
+		skt->nr, state->Vcc, state->Vpp);
+
+	switch (skt->nr) {
+	case 0:
+		pa_dwr_mask = SOCKET0_POWER | SOCKET0_3V;
+
+		switch (state->Vcc) {
+		default:
+		case  0:
+			pa_dwr_set = 0;
+			break;
+		case 33:
+			pa_dwr_set = SOCKET0_POWER | SOCKET0_3V;
+			break;
+		case 50:
+			pa_dwr_set = SOCKET0_POWER;
+			break;
+		}
+		break;
+
+	case 1:
+		pa_dwr_mask = SOCKET1_POWER;
+
+		switch (state->Vcc) {
+		default:
+		case 0:
+			pa_dwr_set = 0;
+			break;
+		case 33:
+			pa_dwr_set = SOCKET1_POWER;
+			break;
+		case 50:
+			pa_dwr_set = SOCKET1_POWER;
+			break;
+		}
+		break;
+
+	default:
+		return -1;
+	}
+
+	if (state->Vpp != state->Vcc && state->Vpp != 0) {
+		printk(KERN_ERR "%s(): slot cannot support VPP %u\n",
+			__func__, state->Vpp);
+		return -EPERM;
+	}
+
+	ret = sa1111_pcmcia_configure_socket(skt, state);
+	if (ret == 0) {
+		unsigned long flags;
+
+		local_irq_save(flags);
+		sa1111_set_io(SA1111_DEV(skt->dev), pa_dwr_mask, pa_dwr_set);
+		local_irq_restore(flags);
+	}
+
+	return ret;
 }
 
 static struct pcmcia_low_level jornada720_pcmcia_ops = {
-  .owner		= THIS_MODULE,
-  .hw_init		= jornada720_pcmcia_hw_init,
-  .hw_shutdown		= sa1111_pcmcia_hw_shutdown,
-  .socket_state		= sa1111_pcmcia_socket_state,
-  .configure_socket	= jornada720_pcmcia_configure_socket,
-
-  .socket_init		= sa1111_pcmcia_socket_init,
-  .socket_suspend	= sa1111_pcmcia_socket_suspend,
+	.owner			= THIS_MODULE,
+	.hw_init		= jornada720_pcmcia_hw_init,
+	.hw_shutdown		= sa1111_pcmcia_hw_shutdown,
+	.socket_state		= sa1111_pcmcia_socket_state,
+	.configure_socket	= jornada720_pcmcia_configure_socket,
+
+	.socket_init		= sa1111_pcmcia_socket_init,
+	.socket_suspend		= sa1111_pcmcia_socket_suspend,
 };
 
 int __devinit pcmcia_jornada720_init(struct device *dev)
-- 
cgit v0.10.2


From eb8cdec4a984fde123a91250dcc9e0bddf5eafdc Mon Sep 17 00:00:00 2001
From: Bernd Schmidt <bernds_cb1@t-online.de>
Date: Mon, 21 Sep 2009 17:03:57 -0700
Subject: nommu: add support for Memory Protection Units (MPU)

Some architectures (like the Blackfin arch) implement some of the
"simpler" features that one would expect out of a MMU such as memory
protection.

In our case, we actually get read/write/exec protection down to the page
boundary so processes can't stomp on each other let alone the kernel.

There is a performance decrease (which depends greatly on the workload)
however as the hardware/software interaction was not optimized at design
time.

Signed-off-by: Bernd Schmidt <bernds_cb1@t-online.de>
Signed-off-by: Bryan Wu <cooloney@kernel.org>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Acked-by: David Howells <dhowells@redhat.com>
Acked-by: Greg Ungerer <gerg@snapgear.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/kernel/module.c b/kernel/module.c
index b6ee424..e6bc4b2 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -47,6 +47,7 @@
 #include <linux/rculist.h>
 #include <asm/uaccess.h>
 #include <asm/cacheflush.h>
+#include <asm/mmu_context.h>
 #include <linux/license.h>
 #include <asm/sections.h>
 #include <linux/tracepoint.h>
@@ -1535,6 +1536,10 @@ static void free_module(struct module *mod)
 
 	/* Finally, free the core (containing the module structure) */
 	module_free(mod, mod->module_core);
+
+#ifdef CONFIG_MPU
+	update_protections(current->mm);
+#endif
 }
 
 void *__symbol_get(const char *symbol)
diff --git a/mm/nommu.c b/mm/nommu.c
index 2d02ca1..1a4473fa 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -33,6 +33,7 @@
 #include <asm/uaccess.h>
 #include <asm/tlb.h>
 #include <asm/tlbflush.h>
+#include <asm/mmu_context.h>
 #include "internal.h"
 
 static inline __attribute__((format(printf, 1, 2)))
@@ -623,6 +624,22 @@ static void put_nommu_region(struct vm_region *region)
 }
 
 /*
+ * update protection on a vma
+ */
+static void protect_vma(struct vm_area_struct *vma, unsigned long flags)
+{
+#ifdef CONFIG_MPU
+	struct mm_struct *mm = vma->vm_mm;
+	long start = vma->vm_start & PAGE_MASK;
+	while (start < vma->vm_end) {
+		protect_page(mm, start, flags);
+		start += PAGE_SIZE;
+	}
+	update_protections(mm);
+#endif
+}
+
+/*
  * add a VMA into a process's mm_struct in the appropriate place in the list
  * and tree and add to the address space's page tree also if not an anonymous
  * page
@@ -641,6 +658,8 @@ static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma)
 	mm->map_count++;
 	vma->vm_mm = mm;
 
+	protect_vma(vma, vma->vm_flags);
+
 	/* add the VMA to the mapping */
 	if (vma->vm_file) {
 		mapping = vma->vm_file->f_mapping;
@@ -703,6 +722,8 @@ static void delete_vma_from_mm(struct vm_area_struct *vma)
 
 	kenter("%p", vma);
 
+	protect_vma(vma, 0);
+
 	mm->map_count--;
 	if (mm->mmap_cache == vma)
 		mm->mmap_cache = NULL;
-- 
cgit v0.10.2


From 6e0c9e77771e08b171c4abeb073285d8fb03f528 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 21 Sep 2009 17:03:58 -0700
Subject: h8300: convert to asm-generic/hardirq.h

Signed-off-by: Christoph Hellwig <hch@lst.de>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/h8300/include/asm/hardirq.h b/arch/h8300/include/asm/hardirq.h
index 9d7f7a7..c2e1aa0 100644
--- a/arch/h8300/include/asm/hardirq.h
+++ b/arch/h8300/include/asm/hardirq.h
@@ -1,18 +1,7 @@
 #ifndef __H8300_HARDIRQ_H
 #define __H8300_HARDIRQ_H
 
-#include <linux/kernel.h>
-#include <linux/threads.h>
-#include <linux/interrupt.h>
-#include <linux/irq.h>
-
-typedef struct {
-	unsigned int __softirq_pending;
-} ____cacheline_aligned irq_cpustat_t;
-
-#include <linux/irq_cpustat.h>	/* Standard mappings for irq_cpustat_t above */
-
-extern void ack_bad_irq(unsigned int irq);
+#include <asm/irq.h>
 
 #define HARDIRQ_BITS	8
 
@@ -25,4 +14,6 @@ extern void ack_bad_irq(unsigned int irq);
 # error HARDIRQ_BITS is too low!
 #endif
 
+#include <asm-generic/hardirq.h>
+
 #endif
diff --git a/arch/h8300/kernel/irq.c b/arch/h8300/kernel/irq.c
index 74f8dd7..5c913d4 100644
--- a/arch/h8300/kernel/irq.c
+++ b/arch/h8300/kernel/irq.c
@@ -81,11 +81,6 @@ struct irq_chip h8300irq_chip = {
 	.end		= h8300_end_irq,
 };
 
-void ack_bad_irq(unsigned int irq)
-{
-	printk("unexpected IRQ trap at vector %02x\n", irq);
-}
-
 #if defined(CONFIG_RAMKERNEL)
 static unsigned long __init *get_vector_address(void)
 {
-- 
cgit v0.10.2


From 4f543fa41e78bd366123424a3378f2f4918c0f33 Mon Sep 17 00:00:00 2001
From: john stultz <johnstul@us.ibm.com>
Date: Mon, 21 Sep 2009 17:04:00 -0700
Subject: alpha: convert to use arch_gettimeoffset()

Converts alpha to use GENERIC_TIME via the arch_getoffset()
infrastructure, reducing the amount of arch specific code we need to
maintain.

I suspect the alpha arch could even be further improved to provide and
rpcc() based clocksource, but not having the hardware, I don't feel
comfortable attempting the more complicated conversion (but I'd be glad to
help if anyone else is interested).

[akpm@linux-foundation.org: fix build]
Signed-off-by: John Stultz <johnstul@us.ibm.com>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 9fb8aae..4434481 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -45,6 +45,14 @@ config GENERIC_CALIBRATE_DELAY
 	bool
 	default y
 
+config GENERIC_TIME
+	bool
+	default y
+
+config ARCH_USES_GETTIMEOFFSET
+	bool
+	default y
+
 config ZONE_DMA
 	bool
 	default y
diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c
index b04e2cb..5d08266 100644
--- a/arch/alpha/kernel/time.c
+++ b/arch/alpha/kernel/time.c
@@ -408,28 +408,17 @@ time_init(void)
  * part.  So we can't do the "find absolute time in terms of cycles" thing
  * that the other ports do.
  */
-void
-do_gettimeofday(struct timeval *tv)
+u32 arch_gettimeoffset(void)
 {
-	unsigned long flags;
-	unsigned long sec, usec, seq;
-	unsigned long delta_cycles, delta_usec, partial_tick;
-
-	do {
-		seq = read_seqbegin_irqsave(&xtime_lock, flags);
-
-		delta_cycles = rpcc() - state.last_time;
-		sec = xtime.tv_sec;
-		usec = (xtime.tv_nsec / 1000);
-		partial_tick = state.partial_tick;
-
-	} while (read_seqretry_irqrestore(&xtime_lock, seq, flags));
-
 #ifdef CONFIG_SMP
 	/* Until and unless we figure out how to get cpu cycle counters
 	   in sync and keep them there, we can't use the rpcc tricks.  */
-	delta_usec = 0;
+	return 0;
 #else
+	unsigned long delta_cycles, delta_usec, partial_tick;
+
+	delta_cycles = rpcc() - state.last_time;
+	partial_tick = state.partial_tick;
 	/*
 	 * usec = cycles * ticks_per_cycle * 2**48 * 1e6 / (2**48 * ticks)
 	 *	= cycles * (s_t_p_c) * 1e6 / (2**48 * ticks)
@@ -446,64 +435,10 @@ do_gettimeofday(struct timeval *tv)
 	delta_usec = (delta_cycles * state.scaled_ticks_per_cycle 
 		      + partial_tick) * 15625;
 	delta_usec = ((delta_usec / ((1UL << (FIX_SHIFT-6-1)) * HZ)) + 1) / 2;
+	return delta_usec * 1000;
 #endif
-
-	usec += delta_usec;
-	if (usec >= 1000000) {
-		sec += 1;
-		usec -= 1000000;
-	}
-
-	tv->tv_sec = sec;
-	tv->tv_usec = usec;
 }
 
-EXPORT_SYMBOL(do_gettimeofday);
-
-int
-do_settimeofday(struct timespec *tv)
-{
-	time_t wtm_sec, sec = tv->tv_sec;
-	long wtm_nsec, nsec = tv->tv_nsec;
-	unsigned long delta_nsec;
-
-	if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
-		return -EINVAL;
-
-	write_seqlock_irq(&xtime_lock);
-
-	/* The offset that is added into time in do_gettimeofday above
-	   must be subtracted out here to keep a coherent view of the
-	   time.  Without this, a full-tick error is possible.  */
-
-#ifdef CONFIG_SMP
-	delta_nsec = 0;
-#else
-	delta_nsec = rpcc() - state.last_time;
-	delta_nsec = (delta_nsec * state.scaled_ticks_per_cycle 
-		      + state.partial_tick) * 15625;
-	delta_nsec = ((delta_nsec / ((1UL << (FIX_SHIFT-6-1)) * HZ)) + 1) / 2;
-	delta_nsec *= 1000;
-#endif
-
-	nsec -= delta_nsec;
-
-	wtm_sec  = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
-	wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
-
-	set_normalized_timespec(&xtime, sec, nsec);
-	set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
-
-	ntp_clear();
-
-	write_sequnlock_irq(&xtime_lock);
-	clock_was_set();
-	return 0;
-}
-
-EXPORT_SYMBOL(do_settimeofday);
-
-
 /*
  * In order to set the CMOS clock precisely, set_rtc_mmss has to be
  * called 500 ms after the second nowtime has started, because when
-- 
cgit v0.10.2


From 27258e448eb301cf89e351df87aa8cb916653bf2 Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Mon, 21 Sep 2009 17:04:01 -0700
Subject: arch/alpha/boot/tools/objstrip.c: wrong variable tested after open()

The incorrect variable is tested. fd is used for another open()
and is already tested.

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Richard Henderson <rth@twiddle.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/alpha/boot/tools/objstrip.c b/arch/alpha/boot/tools/objstrip.c
index ef18382..9d0727d 100644
--- a/arch/alpha/boot/tools/objstrip.c
+++ b/arch/alpha/boot/tools/objstrip.c
@@ -93,7 +93,7 @@ main (int argc, char *argv[])
     ofd = 1;
     if (i < argc) {
 	ofd = open(argv[i++], O_WRONLY | O_CREAT | O_TRUNC, 0666);
-	if (fd == -1) {
+	if (ofd == -1) {
 	    perror("open");
 	    exit(1);
 	}
-- 
cgit v0.10.2


From 621731980fe1b19f0a107e17e2af5f8d4411db3e Mon Sep 17 00:00:00 2001
From: Marcin Slusarz <marcin.slusarz@gmail.com>
Date: Mon, 21 Sep 2009 17:04:01 -0700
Subject: alpha: use printk_once

Signed-off-by: Marcin Slusarz <marcin.slusarz@gmail.com>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Richard Henderson <rth@twiddle.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c
index bfb880a..d15aedf 100644
--- a/arch/alpha/kernel/pci_iommu.c
+++ b/arch/alpha/kernel/pci_iommu.c
@@ -268,11 +268,7 @@ pci_map_single_1(struct pci_dev *pdev, void *cpu_addr, size_t size,
 	   assume it doesn't support sg mapping, and, since we tried to
 	   use direct_map above, it now must be considered an error. */
 	if (! alpha_mv.mv_pci_tbi) {
-		static int been_here = 0; /* Only print the message once. */
-		if (!been_here) {
-		    printk(KERN_WARNING "pci_map_single: no HW sg\n");
-		    been_here = 1;
-		}
+		printk_once(KERN_WARNING "pci_map_single: no HW sg\n");
 		return 0;
 	}
 
-- 
cgit v0.10.2


From 1f693665457539e919856149151b7a7e96550d70 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 21 Sep 2009 17:04:02 -0700
Subject: alpha: convert to asm-generic/hardirq.h

Signed-off-by: Christoph Hellwig <hch@lst.de>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/alpha/include/asm/hardirq.h b/arch/alpha/include/asm/hardirq.h
index 8897146..242c09b 100644
--- a/arch/alpha/include/asm/hardirq.h
+++ b/arch/alpha/include/asm/hardirq.h
@@ -1,17 +1,9 @@
 #ifndef _ALPHA_HARDIRQ_H
 #define _ALPHA_HARDIRQ_H
 
-#include <linux/threads.h>
-#include <linux/cache.h>
-
-
-/* entry.S is sensitive to the offsets of these fields */
-typedef struct {
-	unsigned long __softirq_pending;
-} ____cacheline_aligned irq_cpustat_t;
-
-#include <linux/irq_cpustat.h>	/* Standard mappings for irq_cpustat_t above */
-
 void ack_bad_irq(unsigned int irq);
+#define ack_bad_irq ack_bad_irq
+
+#include <asm-generic/hardirq.h>
 
 #endif /* _ALPHA_HARDIRQ_H */
-- 
cgit v0.10.2


From d5a6d1739526ed8c383db3dabc232bc15603439a Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Mon, 21 Sep 2009 17:04:03 -0700
Subject: m32r: remove redundant tests on unsigned

`off' and `max_cpus' are unsigned.  When negative they are wrapped and
caught by the other test.

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Cc: Hirokazu Takata <takata@linux-m32r.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/m32r/kernel/ptrace.c b/arch/m32r/kernel/ptrace.c
index 98b8feb..98682bb 100644
--- a/arch/m32r/kernel/ptrace.c
+++ b/arch/m32r/kernel/ptrace.c
@@ -77,7 +77,7 @@ static int ptrace_read_user(struct task_struct *tsk, unsigned long off,
 	struct user * dummy = NULL;
 #endif
 
-	if ((off & 3) || (off < 0) || (off > sizeof(struct user) - 3))
+	if ((off & 3) || off > sizeof(struct user) - 3)
 		return -EIO;
 
 	off >>= 2;
@@ -139,8 +139,7 @@ static int ptrace_write_user(struct task_struct *tsk, unsigned long off,
 	struct user * dummy = NULL;
 #endif
 
-	if ((off & 3) || off < 0 ||
-	    off > sizeof(struct user) - 3)
+	if ((off & 3) || off > sizeof(struct user) - 3)
 		return -EIO;
 
 	off >>= 2;
diff --git a/arch/m32r/kernel/smpboot.c b/arch/m32r/kernel/smpboot.c
index 2547d6c..655ea1c 100644
--- a/arch/m32r/kernel/smpboot.c
+++ b/arch/m32r/kernel/smpboot.c
@@ -213,7 +213,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 		if (!physid_isset(phys_id, phys_cpu_present_map))
 			continue;
 
-		if ((max_cpus >= 0) && (max_cpus <= cpucount + 1))
+		if (max_cpus <= cpucount + 1)
 			continue;
 
 		do_boot_cpu(phys_id);
-- 
cgit v0.10.2


From 95ad759c6b0f30ad9aa5efbdbcecb9597238c00f Mon Sep 17 00:00:00 2001
From: john stultz <johnstul@us.ibm.com>
Date: Mon, 21 Sep 2009 17:04:04 -0700
Subject: m32r: convert to use arch_gettimeoffset()

Convert m32r to use GENERIC_TIME via the arch_getoffset() infrastructure,
reducing the amount of arch specific code we need to maintain.

I also noted that m32r doesn't seem to be taking the xtime write lock
before calling do_timer()!  That looks like a pretty bad bug to me.  If
folks agree, let me know and I can move the lock grab to the correct spot.

Signed-off-by: John Stultz <johnstul@us.ibm.com>
Cc: Hirokazu Takata <takata@linux-m32r.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig
index cabba33..c41234f 100644
--- a/arch/m32r/Kconfig
+++ b/arch/m32r/Kconfig
@@ -41,6 +41,12 @@ config HZ
 	int
 	default 100
 
+config GENERIC_TIME
+	def_bool y
+
+config ARCH_USES_GETTIMEOFFSET
+	def_bool y
+
 source "init/Kconfig"
 
 source "kernel/Kconfig.freezer"
diff --git a/arch/m32r/kernel/time.c b/arch/m32r/kernel/time.c
index cada3ba..ba61c4c 100644
--- a/arch/m32r/kernel/time.c
+++ b/arch/m32r/kernel/time.c
@@ -48,7 +48,7 @@ extern void smp_local_timer_interrupt(void);
 
 static unsigned long latch;
 
-static unsigned long do_gettimeoffset(void)
+u32 arch_gettimeoffset(void)
 {
 	unsigned long  elapsed_time = 0;  /* [us] */
 
@@ -93,79 +93,10 @@ static unsigned long do_gettimeoffset(void)
 #error no chip configuration
 #endif
 
-	return elapsed_time;
+	return elapsed_time * 1000;
 }
 
 /*
- * This version of gettimeofday has near microsecond resolution.
- */
-void do_gettimeofday(struct timeval *tv)
-{
-	unsigned long seq;
-	unsigned long usec, sec;
-	unsigned long max_ntp_tick = tick_usec - tickadj;
-
-	do {
-		seq = read_seqbegin(&xtime_lock);
-
-		usec = do_gettimeoffset();
-
-		/*
-		 * If time_adjust is negative then NTP is slowing the clock
-		 * so make sure not to go into next possible interval.
-		 * Better to lose some accuracy than have time go backwards..
-		 */
-		if (unlikely(time_adjust < 0))
-			usec = min(usec, max_ntp_tick);
-
-		sec = xtime.tv_sec;
-		usec += (xtime.tv_nsec / 1000);
-	} while (read_seqretry(&xtime_lock, seq));
-
-	while (usec >= 1000000) {
-		usec -= 1000000;
-		sec++;
-	}
-
-	tv->tv_sec = sec;
-	tv->tv_usec = usec;
-}
-
-EXPORT_SYMBOL(do_gettimeofday);
-
-int do_settimeofday(struct timespec *tv)
-{
-	time_t wtm_sec, sec = tv->tv_sec;
- 	long wtm_nsec, nsec = tv->tv_nsec;
-
-	if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
-		return -EINVAL;
-
-	write_seqlock_irq(&xtime_lock);
-	/*
-	 * This is revolting. We need to set "xtime" correctly. However, the
-	 * value in this location is the value at the most recent update of
-	 * wall time.  Discover what correction gettimeofday() would have
-	 * made, and then undo it!
-	 */
-	nsec -= do_gettimeoffset() * NSEC_PER_USEC;
-
-	wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
-	wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
-
-	set_normalized_timespec(&xtime, sec, nsec);
-	set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
-
-	ntp_clear();
-	write_sequnlock_irq(&xtime_lock);
-	clock_was_set();
-
-	return 0;
-}
-
-EXPORT_SYMBOL(do_settimeofday);
-
-/*
  * In order to set the CMOS clock precisely, set_rtc_mmss has to be
  * called 500 ms after the second nowtime has started, because when
  * nowtime is written into the registers of the CMOS clock, it will
@@ -192,6 +123,7 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id)
 #ifndef CONFIG_SMP
 	profile_tick(CPU_PROFILING);
 #endif
+	/* XXX FIXME. Uh, the xtime_lock should be held here, no? */
 	do_timer(1);
 
 #ifndef CONFIG_SMP
-- 
cgit v0.10.2


From ef187fd799c50e15dbb56a0286c81bf467bd0201 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 21 Sep 2009 17:04:04 -0700
Subject: m32r: convert to asm-generic/hardirq.h

Signed-off-by: Christoph Hellwig <hch@lst.de>
Cc: Hirokazu Takata <takata@linux-m32r.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/m32r/include/asm/hardirq.h b/arch/m32r/include/asm/hardirq.h
index cb8aa76..4c31c0a 100644
--- a/arch/m32r/include/asm/hardirq.h
+++ b/arch/m32r/include/asm/hardirq.h
@@ -2,14 +2,7 @@
 #ifndef __ASM_HARDIRQ_H
 #define __ASM_HARDIRQ_H
 
-#include <linux/threads.h>
-#include <linux/irq.h>
-
-typedef struct {
-	unsigned int __softirq_pending;
-} ____cacheline_aligned irq_cpustat_t;
-
-#include <linux/irq_cpustat.h>	/* Standard mappings for irq_cpustat_t above */
+#include <asm/irq.h>
 
 #if NR_IRQS > 256
 #define HARDIRQ_BITS	9
@@ -26,11 +19,7 @@ typedef struct {
 # error HARDIRQ_BITS is too low!
 #endif
 
-static inline void ack_bad_irq(int irq)
-{
-	printk(KERN_CRIT "unexpected IRQ trap at vector %02x\n", irq);
-	BUG();
-}
+#include <asm-generic/hardirq.h>
 
 #endif /* __ASM_HARDIRQ_H */
 #endif /* __KERNEL__ */
-- 
cgit v0.10.2


From 4ad4c76b7afb71774b846b322ad2ae42f814331a Mon Sep 17 00:00:00 2001
From: john stultz <johnstul@us.ibm.com>
Date: Mon, 21 Sep 2009 17:04:05 -0700
Subject: m68k: convert to use arch_gettimeoffset()

Convert m68k to use GENERIC_TIME via the arch_getoffset() infrastructure,
reducing the amount of arch specific code we need to maintain.

I've taken my best swing at converting this, but I'm not 100% confident
I got it right. My cross-compiler is now out of date (gcc4.2) so I
wasn't able to  check if it compiled. Any assistance from arch
maintainers or testers to get this merged would be great.

Signed-off-by: John Stultz <johnstul@us.ibm.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Roman Zippel <zippel@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index fb87c08..29dd848 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -58,6 +58,12 @@ config HZ
 	int
 	default 100
 
+config GENERIC_TIME
+	def_bool y
+
+config ARCH_USES_GETTIMEOFFSET
+	def_bool y
+
 mainmenu "Linux/68k Kernel Configuration"
 
 source "init/Kconfig"
diff --git a/arch/m68k/kernel/time.c b/arch/m68k/kernel/time.c
index 54d9807..17dc2a3 100644
--- a/arch/m68k/kernel/time.c
+++ b/arch/m68k/kernel/time.c
@@ -91,77 +91,11 @@ void __init time_init(void)
 	mach_sched_init(timer_interrupt);
 }
 
-/*
- * This version of gettimeofday has near microsecond resolution.
- */
-void do_gettimeofday(struct timeval *tv)
+u32 arch_gettimeoffset(void)
 {
-	unsigned long flags;
-	unsigned long seq;
-	unsigned long usec, sec;
-	unsigned long max_ntp_tick = tick_usec - tickadj;
-
-	do {
-		seq = read_seqbegin_irqsave(&xtime_lock, flags);
-
-		usec = mach_gettimeoffset();
-
-		/*
-		 * If time_adjust is negative then NTP is slowing the clock
-		 * so make sure not to go into next possible interval.
-		 * Better to lose some accuracy than have time go backwards..
-		 */
-		if (unlikely(time_adjust < 0))
-			usec = min(usec, max_ntp_tick);
-
-		sec = xtime.tv_sec;
-		usec += xtime.tv_nsec/1000;
-	} while (read_seqretry_irqrestore(&xtime_lock, seq, flags));
-
-
-	while (usec >= 1000000) {
-		usec -= 1000000;
-		sec++;
-	}
-
-	tv->tv_sec = sec;
-	tv->tv_usec = usec;
-}
-
-EXPORT_SYMBOL(do_gettimeofday);
-
-int do_settimeofday(struct timespec *tv)
-{
-	time_t wtm_sec, sec = tv->tv_sec;
-	long wtm_nsec, nsec = tv->tv_nsec;
-
-	if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
-		return -EINVAL;
-
-	write_seqlock_irq(&xtime_lock);
-	/* This is revolting. We need to set the xtime.tv_nsec
-	 * correctly. However, the value in this location is
-	 * is value at the last tick.
-	 * Discover what correction gettimeofday
-	 * would have done, and then undo it!
-	 */
-	nsec -= 1000 * mach_gettimeoffset();
-
-	wtm_sec  = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
-	wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
-
-	set_normalized_timespec(&xtime, sec, nsec);
-	set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
-
-	ntp_clear();
-	write_sequnlock_irq(&xtime_lock);
-	clock_was_set();
-	return 0;
+	return mach_gettimeoffset() * 1000;
 }
 
-EXPORT_SYMBOL(do_settimeofday);
-
-
 static int __init rtc_init(void)
 {
 	struct platform_device *pdev;
-- 
cgit v0.10.2


From 45d80eea87c9f8292d2d33173d6866c0ec57238a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 21 Sep 2009 17:04:07 -0700
Subject: m68k: convert to asm-generic/hardirq.h

Signed-off-by: Christoph Hellwig <hch@lst.de>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/m68k/include/asm/hardirq_mm.h b/arch/m68k/include/asm/hardirq_mm.h
index 394ee94..554f65b 100644
--- a/arch/m68k/include/asm/hardirq_mm.h
+++ b/arch/m68k/include/asm/hardirq_mm.h
@@ -1,16 +1,8 @@
 #ifndef __M68K_HARDIRQ_H
 #define __M68K_HARDIRQ_H
 
-#include <linux/threads.h>
-#include <linux/cache.h>
-
-/* entry.S is sensitive to the offsets of these fields */
-typedef struct {
-	unsigned int __softirq_pending;
-} ____cacheline_aligned irq_cpustat_t;
-
-#include <linux/irq_cpustat.h>	/* Standard mappings for irq_cpustat_t above */
-
 #define HARDIRQ_BITS	8
 
+#include <asm-generic/hardirq.h>
+
 #endif
-- 
cgit v0.10.2


From 69d25870f20c4b2563304f2b79c5300dd60a067e Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Mon, 21 Sep 2009 17:04:08 -0700
Subject: cpuidle: fix the menu governor to boost IO performance

Fix the menu idle governor which balances power savings, energy efficiency
and performance impact.

The reason for a reworked governor is that there have been serious
performance issues reported with the existing code on Nehalem server
systems.

To show this I'm sure Andrew wants to see benchmark results:
(benchmark is "fio", "no cstates" is using "idle=poll")

		no cstates	current linux	new algorithm
1 disk		107 Mb/s	85 Mb/s		105 Mb/s
2 disks		215 Mb/s	123 Mb/s	209 Mb/s
12 disks	590 Mb/s	320 Mb/s	585 Mb/s

In various power benchmark measurements, no degredation was found by our
measurement&diagnostics team.  Obviously a small percentage more power was
used in the "fio" benchmark, due to the much higher performance.

While it would be a novel idea to describe the new algorithm in this
commit message, I cheaped out and described it in comments in the code
instead.

[changes since first post: spelling fixes from akpm, review feedback,
folded menu-tng into menu.c]

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Cc: Len Brown <lenb@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Yanmin Zhang <yanmin_zhang@linux.intel.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index f1df59f..9f3d775 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -2,8 +2,12 @@
  * menu.c - the menu idle governor
  *
  * Copyright (C) 2006-2007 Adam Belay <abelay@novell.com>
+ * Copyright (C) 2009 Intel Corporation
+ * Author:
+ *        Arjan van de Ven <arjan@linux.intel.com>
  *
- * This code is licenced under the GPL.
+ * This code is licenced under the GPL version 2 as described
+ * in the COPYING file that acompanies the Linux Kernel.
  */
 
 #include <linux/kernel.h>
@@ -13,20 +17,153 @@
 #include <linux/ktime.h>
 #include <linux/hrtimer.h>
 #include <linux/tick.h>
+#include <linux/sched.h>
 
-#define BREAK_FUZZ	4	/* 4 us */
-#define PRED_HISTORY_PCT	50
+#define BUCKETS 12
+#define RESOLUTION 1024
+#define DECAY 4
+#define MAX_INTERESTING 50000
+
+/*
+ * Concepts and ideas behind the menu governor
+ *
+ * For the menu governor, there are 3 decision factors for picking a C
+ * state:
+ * 1) Energy break even point
+ * 2) Performance impact
+ * 3) Latency tolerance (from pmqos infrastructure)
+ * These these three factors are treated independently.
+ *
+ * Energy break even point
+ * -----------------------
+ * C state entry and exit have an energy cost, and a certain amount of time in
+ * the  C state is required to actually break even on this cost. CPUIDLE
+ * provides us this duration in the "target_residency" field. So all that we
+ * need is a good prediction of how long we'll be idle. Like the traditional
+ * menu governor, we start with the actual known "next timer event" time.
+ *
+ * Since there are other source of wakeups (interrupts for example) than
+ * the next timer event, this estimation is rather optimistic. To get a
+ * more realistic estimate, a correction factor is applied to the estimate,
+ * that is based on historic behavior. For example, if in the past the actual
+ * duration always was 50% of the next timer tick, the correction factor will
+ * be 0.5.
+ *
+ * menu uses a running average for this correction factor, however it uses a
+ * set of factors, not just a single factor. This stems from the realization
+ * that the ratio is dependent on the order of magnitude of the expected
+ * duration; if we expect 500 milliseconds of idle time the likelihood of
+ * getting an interrupt very early is much higher than if we expect 50 micro
+ * seconds of idle time. A second independent factor that has big impact on
+ * the actual factor is if there is (disk) IO outstanding or not.
+ * (as a special twist, we consider every sleep longer than 50 milliseconds
+ * as perfect; there are no power gains for sleeping longer than this)
+ *
+ * For these two reasons we keep an array of 12 independent factors, that gets
+ * indexed based on the magnitude of the expected duration as well as the
+ * "is IO outstanding" property.
+ *
+ * Limiting Performance Impact
+ * ---------------------------
+ * C states, especially those with large exit latencies, can have a real
+ * noticable impact on workloads, which is not acceptable for most sysadmins,
+ * and in addition, less performance has a power price of its own.
+ *
+ * As a general rule of thumb, menu assumes that the following heuristic
+ * holds:
+ *     The busier the system, the less impact of C states is acceptable
+ *
+ * This rule-of-thumb is implemented using a performance-multiplier:
+ * If the exit latency times the performance multiplier is longer than
+ * the predicted duration, the C state is not considered a candidate
+ * for selection due to a too high performance impact. So the higher
+ * this multiplier is, the longer we need to be idle to pick a deep C
+ * state, and thus the less likely a busy CPU will hit such a deep
+ * C state.
+ *
+ * Two factors are used in determing this multiplier:
+ * a value of 10 is added for each point of "per cpu load average" we have.
+ * a value of 5 points is added for each process that is waiting for
+ * IO on this CPU.
+ * (these values are experimentally determined)
+ *
+ * The load average factor gives a longer term (few seconds) input to the
+ * decision, while the iowait value gives a cpu local instantanious input.
+ * The iowait factor may look low, but realize that this is also already
+ * represented in the system load average.
+ *
+ */
 
 struct menu_device {
 	int		last_state_idx;
 
 	unsigned int	expected_us;
-	unsigned int	predicted_us;
-	unsigned int    current_predicted_us;
-	unsigned int	last_measured_us;
-	unsigned int	elapsed_us;
+	u64		predicted_us;
+	unsigned int	measured_us;
+	unsigned int	exit_us;
+	unsigned int	bucket;
+	u64		correction_factor[BUCKETS];
 };
 
+
+#define LOAD_INT(x) ((x) >> FSHIFT)
+#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
+
+static int get_loadavg(void)
+{
+	unsigned long this = this_cpu_load();
+
+
+	return LOAD_INT(this) * 10 + LOAD_FRAC(this) / 10;
+}
+
+static inline int which_bucket(unsigned int duration)
+{
+	int bucket = 0;
+
+	/*
+	 * We keep two groups of stats; one with no
+	 * IO pending, one without.
+	 * This allows us to calculate
+	 * E(duration)|iowait
+	 */
+	if (nr_iowait_cpu())
+		bucket = BUCKETS/2;
+
+	if (duration < 10)
+		return bucket;
+	if (duration < 100)
+		return bucket + 1;
+	if (duration < 1000)
+		return bucket + 2;
+	if (duration < 10000)
+		return bucket + 3;
+	if (duration < 100000)
+		return bucket + 4;
+	return bucket + 5;
+}
+
+/*
+ * Return a multiplier for the exit latency that is intended
+ * to take performance requirements into account.
+ * The more performance critical we estimate the system
+ * to be, the higher this multiplier, and thus the higher
+ * the barrier to go to an expensive C state.
+ */
+static inline int performance_multiplier(void)
+{
+	int mult = 1;
+
+	/* for higher loadavg, we are more reluctant */
+
+	mult += 2 * get_loadavg();
+
+	/* for IO wait tasks (per cpu!) we add 5x each */
+	mult += 10 * nr_iowait_cpu();
+
+	return mult;
+}
+
 static DEFINE_PER_CPU(struct menu_device, menu_devices);
 
 /**
@@ -38,37 +175,59 @@ static int menu_select(struct cpuidle_device *dev)
 	struct menu_device *data = &__get_cpu_var(menu_devices);
 	int latency_req = pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY);
 	int i;
+	int multiplier;
+
+	data->last_state_idx = 0;
+	data->exit_us = 0;
 
 	/* Special case when user has set very strict latency requirement */
-	if (unlikely(latency_req == 0)) {
-		data->last_state_idx = 0;
+	if (unlikely(latency_req == 0))
 		return 0;
-	}
 
-	/* determine the expected residency time */
+	/* determine the expected residency time, round up */
 	data->expected_us =
-		(u32) ktime_to_ns(tick_nohz_get_sleep_length()) / 1000;
+	    DIV_ROUND_UP((u32)ktime_to_ns(tick_nohz_get_sleep_length()), 1000);
+
+
+	data->bucket = which_bucket(data->expected_us);
+
+	multiplier = performance_multiplier();
+
+	/*
+	 * if the correction factor is 0 (eg first time init or cpu hotplug
+	 * etc), we actually want to start out with a unity factor.
+	 */
+	if (data->correction_factor[data->bucket] == 0)
+		data->correction_factor[data->bucket] = RESOLUTION * DECAY;
+
+	/* Make sure to round up for half microseconds */
+	data->predicted_us = DIV_ROUND_CLOSEST(
+		data->expected_us * data->correction_factor[data->bucket],
+		RESOLUTION * DECAY);
+
+	/*
+	 * We want to default to C1 (hlt), not to busy polling
+	 * unless the timer is happening really really soon.
+	 */
+	if (data->expected_us > 5)
+		data->last_state_idx = CPUIDLE_DRIVER_STATE_START;
 
-	/* Recalculate predicted_us based on prediction_history_pct */
-	data->predicted_us *= PRED_HISTORY_PCT;
-	data->predicted_us += (100 - PRED_HISTORY_PCT) *
-				data->current_predicted_us;
-	data->predicted_us /= 100;
 
 	/* find the deepest idle state that satisfies our constraints */
-	for (i = CPUIDLE_DRIVER_STATE_START + 1; i < dev->state_count; i++) {
+	for (i = CPUIDLE_DRIVER_STATE_START; i < dev->state_count; i++) {
 		struct cpuidle_state *s = &dev->states[i];
 
-		if (s->target_residency > data->expected_us)
-			break;
 		if (s->target_residency > data->predicted_us)
 			break;
 		if (s->exit_latency > latency_req)
 			break;
+		if (s->exit_latency * multiplier > data->predicted_us)
+			break;
+		data->exit_us = s->exit_latency;
+		data->last_state_idx = i;
 	}
 
-	data->last_state_idx = i - 1;
-	return i - 1;
+	return data->last_state_idx;
 }
 
 /**
@@ -85,35 +244,49 @@ static void menu_reflect(struct cpuidle_device *dev)
 	unsigned int last_idle_us = cpuidle_get_last_residency(dev);
 	struct cpuidle_state *target = &dev->states[last_idx];
 	unsigned int measured_us;
+	u64 new_factor;
 
 	/*
 	 * Ugh, this idle state doesn't support residency measurements, so we
 	 * are basically lost in the dark.  As a compromise, assume we slept
-	 * for one full standard timer tick.  However, be aware that this
-	 * could potentially result in a suboptimal state transition.
+	 * for the whole expected time.
 	 */
 	if (unlikely(!(target->flags & CPUIDLE_FLAG_TIME_VALID)))
-		last_idle_us = USEC_PER_SEC / HZ;
+		last_idle_us = data->expected_us;
+
+
+	measured_us = last_idle_us;
 
 	/*
-	 * measured_us and elapsed_us are the cumulative idle time, since the
-	 * last time we were woken out of idle by an interrupt.
+	 * We correct for the exit latency; we are assuming here that the
+	 * exit latency happens after the event that we're interested in.
 	 */
-	if (data->elapsed_us <= data->elapsed_us + last_idle_us)
-		measured_us = data->elapsed_us + last_idle_us;
+	if (measured_us > data->exit_us)
+		measured_us -= data->exit_us;
+
+
+	/* update our correction ratio */
+
+	new_factor = data->correction_factor[data->bucket]
+			* (DECAY - 1) / DECAY;
+
+	if (data->expected_us > 0 && data->measured_us < MAX_INTERESTING)
+		new_factor += RESOLUTION * measured_us / data->expected_us;
 	else
-		measured_us = -1;
+		/*
+		 * we were idle so long that we count it as a perfect
+		 * prediction
+		 */
+		new_factor += RESOLUTION;
 
-	/* Predict time until next break event */
-	data->current_predicted_us = max(measured_us, data->last_measured_us);
+	/*
+	 * We don't want 0 as factor; we always want at least
+	 * a tiny bit of estimated time.
+	 */
+	if (new_factor == 0)
+		new_factor = 1;
 
-	if (last_idle_us + BREAK_FUZZ <
-	    data->expected_us - target->exit_latency) {
-		data->last_measured_us = measured_us;
-		data->elapsed_us = 0;
-	} else {
-		data->elapsed_us = measured_us;
-	}
+	data->correction_factor[data->bucket] = new_factor;
 }
 
 /**
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 17e9a8e..97b10da 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -140,6 +140,10 @@ extern int nr_processes(void);
 extern unsigned long nr_running(void);
 extern unsigned long nr_uninterruptible(void);
 extern unsigned long nr_iowait(void);
+extern unsigned long nr_iowait_cpu(void);
+extern unsigned long this_cpu_load(void);
+
+
 extern void calc_global_load(void);
 extern u64 cpu_nr_migrations(int cpu);
 
diff --git a/kernel/sched.c b/kernel/sched.c
index 91843ba..0ac9053 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2904,6 +2904,19 @@ unsigned long nr_iowait(void)
 	return sum;
 }
 
+unsigned long nr_iowait_cpu(void)
+{
+	struct rq *this = this_rq();
+	return atomic_read(&this->nr_iowait);
+}
+
+unsigned long this_cpu_load(void)
+{
+	struct rq *this = this_rq();
+	return this->cpu_load[0];
+}
+
+
 /* Variables and functions for calc_load */
 static atomic_long_t calc_load_tasks;
 static unsigned long calc_load_update;
-- 
cgit v0.10.2


From 672917dcc781ead7652a8b11b1fba14e38ac15b8 Mon Sep 17 00:00:00 2001
From: Corrado Zoccolo <czoccolo@gmail.com>
Date: Mon, 21 Sep 2009 17:04:09 -0700
Subject: cpuidle: menu governor: reduce latency on exit

Move the state residency accounting and statistics computation off the hot
exit path.

On exit, the need to recompute statistics is recorded, and new statistics
will be computed when menu_select is called again.

The expected effect is to reduce processor wakeup latency from sleep
(C-states).  We are speaking of few hundreds of cycles reduction out of a
several microseconds latency (determined by the hardware transition), so
it is difficult to measure.

Signed-off-by: Corrado Zoccolo <czoccolo@gmail.com>
Cc: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Adam Belay <abelay@novell.com
Acked-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index 9f3d775..6810443 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -96,6 +96,7 @@
 
 struct menu_device {
 	int		last_state_idx;
+	int             needs_update;
 
 	unsigned int	expected_us;
 	u64		predicted_us;
@@ -166,6 +167,8 @@ static inline int performance_multiplier(void)
 
 static DEFINE_PER_CPU(struct menu_device, menu_devices);
 
+static void menu_update(struct cpuidle_device *dev);
+
 /**
  * menu_select - selects the next idle state to enter
  * @dev: the CPU
@@ -180,6 +183,11 @@ static int menu_select(struct cpuidle_device *dev)
 	data->last_state_idx = 0;
 	data->exit_us = 0;
 
+	if (data->needs_update) {
+		menu_update(dev);
+		data->needs_update = 0;
+	}
+
 	/* Special case when user has set very strict latency requirement */
 	if (unlikely(latency_req == 0))
 		return 0;
@@ -231,7 +239,7 @@ static int menu_select(struct cpuidle_device *dev)
 }
 
 /**
- * menu_reflect - attempts to guess what happened after entry
+ * menu_reflect - records that data structures need update
  * @dev: the CPU
  *
  * NOTE: it's important to be fast here because this operation will add to
@@ -240,6 +248,16 @@ static int menu_select(struct cpuidle_device *dev)
 static void menu_reflect(struct cpuidle_device *dev)
 {
 	struct menu_device *data = &__get_cpu_var(menu_devices);
+	data->needs_update = 1;
+}
+
+/**
+ * menu_update - attempts to guess what happened after entry
+ * @dev: the CPU
+ */
+static void menu_update(struct cpuidle_device *dev)
+{
+	struct menu_device *data = &__get_cpu_var(menu_devices);
 	int last_idx = data->last_state_idx;
 	unsigned int last_idle_us = cpuidle_get_last_residency(dev);
 	struct cpuidle_state *target = &dev->states[last_idx];
-- 
cgit v0.10.2


From db4e5cbe2f201c6abd51f7dfe41dbd2589affeba Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 21 Sep 2009 17:04:10 -0700
Subject: um: convert to asm-generic/hardirq.h

Signed-off-by: Christoph Hellwig <hch@lst.de>
Cc: Jeff Dike <jdike@addtoit.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/um/include/asm/hardirq.h b/arch/um/include/asm/hardirq.h
index 313ebb8..fb3c05a 100644
--- a/arch/um/include/asm/hardirq.h
+++ b/arch/um/include/asm/hardirq.h
@@ -1,25 +1 @@
-/* (c) 2004 cw@f00f.org, GPLv2 blah blah */
-
-#ifndef __ASM_UM_HARDIRQ_H
-#define __ASM_UM_HARDIRQ_H
-
-#include <linux/threads.h>
-#include <linux/irq.h>
-
-/* NOTE: When SMP works again we might want to make this
- * ____cacheline_aligned or maybe use per_cpu state? --cw */
-typedef struct {
-	unsigned int __softirq_pending;
-} irq_cpustat_t;
-
-#include <linux/irq_cpustat.h>
-
-/* As this would be very strange for UML to get we BUG() after the
- * printk. */
-static inline void ack_bad_irq(unsigned int irq)
-{
-	printk(KERN_ERR "unexpected IRQ %02x\n", irq);
-	BUG();
-}
-
-#endif /* __ASM_UM_HARDIRQ_H */
+#include <asm-generic/hardirq.h>
-- 
cgit v0.10.2


From 4d04c70754eec6d0fd342a5bc3f684db69cc2226 Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Mon, 21 Sep 2009 17:04:11 -0700
Subject: uml: fix order of pud and pmd_free()

If pmd_alloc() fails we should only free the prior allocated pud, if
pte_alloc_map() fails, we should free pmd as well.

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Cc: Jeff Dike <jdike@addtoit.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c
index 0cd9a7a..8bfd1e9 100644
--- a/arch/um/kernel/skas/mmu.c
+++ b/arch/um/kernel/skas/mmu.c
@@ -38,10 +38,10 @@ static int init_stub_pte(struct mm_struct *mm, unsigned long proc,
 	*pte = pte_mkread(*pte);
 	return 0;
 
- out_pmd:
-	pud_free(mm, pud);
  out_pte:
 	pmd_free(mm, pmd);
+ out_pmd:
+	pud_free(mm, pud);
  out:
 	return -ENOMEM;
 }
-- 
cgit v0.10.2


From b61d4a71e483fe1aa1c4b170c28d85be77edce4f Mon Sep 17 00:00:00 2001
From: Hannes Eder <heder@google.com>
Date: Mon, 21 Sep 2009 17:04:12 -0700
Subject: MAINTAINERS: add IPVS include files

Signed-off-by: Hannes Eder <heder@google.com>
Cc: Joe Perches <joe@perches.com>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/MAINTAINERS b/MAINTAINERS
index b212f55..ee58af1 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2803,6 +2803,8 @@ L:	netdev@vger.kernel.org
 L:	lvs-devel@vger.kernel.org
 S:	Maintained
 F:	Documentation/networking/ipvs-sysctl.txt
+F:	include/net/ip_vs.h
+F:	include/linux/ip_vs.h
 F:	net/netfilter/ipvs/
 
 IPWIRELESS DRIVER
-- 
cgit v0.10.2


From f5492666a3b62344de9026a960c11888160362c9 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 21 Sep 2009 17:04:13 -0700
Subject: scripts/get_maintainer.pl: add --git-blame

Julia Lawall suggested that get_maintainers.pl should have the
ability to include signatories of commits that are modified by
a particular patch.

Vegard Nossum did something similar once.
http://lkml.org/lkml/2008/5/29/449

The modified script looks the commits for all lines in the
patch, and includes the "-by:" signatories for those commits.
It uses the same git-min-percent, git-max-maintainers, and
git-min-signatures options.  git-since is ignored.

It can be used independently from the --git default, so
        ./scripts/get_maintainers.pl --nogit --git-blame <patch>
or
        ./scripts/get_maintainers.pl --nogit --git-blame -f <file>
is acceptable.

If used with -f <file>, all lines/commits for the file are
checked.

--git-blame can be slow if used with -f <file>
--git-blame does not work with -f <directory>

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl
index 278a45b..35781e0 100755
--- a/scripts/get_maintainer.pl
+++ b/scripts/get_maintainer.pl
@@ -13,7 +13,7 @@
 use strict;
 
 my $P = $0;
-my $V = '0.17';
+my $V = '0.18beta2';
 
 use Getopt::Long qw(:config no_auto_abbrev);
 
@@ -29,6 +29,7 @@ my $email_git_min_signatures = 1;
 my $email_git_max_maintainers = 5;
 my $email_git_min_percent = 5;
 my $email_git_since = "1-year-ago";
+my $email_git_blame = 0;
 my $output_multiline = 1;
 my $output_separator = ", ";
 my $scm = 0;
@@ -68,6 +69,7 @@ if (!GetOptions(
 		'git-max-maintainers=i' => \$email_git_max_maintainers,
 		'git-min-percent=i' => \$email_git_min_percent,
 		'git-since=s' => \$email_git_since,
+		'git-blame!' => \$email_git_blame,
 		'm!' => \$email_maintainer,
 		'n!' => \$email_usename,
 		'l!' => \$email_list,
@@ -107,8 +109,9 @@ if ($selections == 0) {
     die "$P:  Missing required option: email, scm, status, subsystem or web\n";
 }
 
-if ($email && ($email_maintainer + $email_list + $email_subscriber_list
-	       + $email_git + $email_git_penguin_chiefs) == 0) {
+if ($email &&
+    ($email_maintainer + $email_list + $email_subscriber_list +
+     $email_git + $email_git_penguin_chiefs + $email_git_blame) == 0) {
     usage();
     die "$P: Please select at least 1 email option\n";
 }
@@ -150,6 +153,7 @@ close(MAINT);
 ## use the filenames on the command line or find the filenames in the patchfiles
 
 my @files = ();
+my @range = ();
 
 foreach my $file (@ARGV) {
     ##if $file is a directory and it lacks a trailing slash, add one
@@ -162,13 +166,19 @@ foreach my $file (@ARGV) {
 	push(@files, $file);
     } else {
 	my $file_cnt = @files;
+	my $lastfile;
 	open(PATCH, "<$file") or die "$P: Can't open ${file}\n";
 	while (<PATCH>) {
 	    if (m/^\+\+\+\s+(\S+)/) {
 		my $filename = $1;
 		$filename =~ s@^[^/]*/@@;
 		$filename =~ s@\n@@;
+		$lastfile = $filename;
 		push(@files, $filename);
+	    } elsif (m/^\@\@ -(\d+),(\d+)/) {
+		if ($email_git_blame) {
+		    push(@range, "$lastfile:$1:$2");
+		}
 	    }
 	}
 	close(PATCH);
@@ -226,6 +236,9 @@ foreach my $file (@files) {
 	recent_git_signoffs($file);
     }
 
+    if ($email && $email_git_blame) {
+	git_assign_blame($file);
+    }
 }
 
 if ($email) {
@@ -311,6 +324,7 @@ MAINTAINER field selection options:
     --git-max-maintainers => maximum maintainers to add (default: 5)
     --git-min-percent => minimum percentage of commits required (default: 5)
     --git-since => git history to use (default: 1-year-ago)
+    --git-blame => use git blame to find modified commits for patch or file
     --m => include maintainer(s) if any
     --n => include name 'Full Name <addr\@domain.tld>'
     --l => include list(s) if any
@@ -333,13 +347,15 @@ Other options:
 
 Notes:
   Using "-f directory" may give unexpected results:
-
-  Used with "--git", git signators for _all_ files in and below
-     directory are examined as git recurses directories.
-     Any specified X: (exclude) pattern matches are _not_ ignored.
-  Used with "--nogit", directory is used as a pattern match,
-     no individual file within the directory or subdirectory
-     is matched.
+      Used with "--git", git signators for _all_ files in and below
+          directory are examined as git recurses directories.
+          Any specified X: (exclude) pattern matches are _not_ ignored.
+      Used with "--nogit", directory is used as a pattern match,
+         no individual file within the directory or subdirectory
+         is matched.
+      Used with "--git-blame", does not iterate all files in directory
+  Using "--git-blame" is slow and may add old committers and authors
+      that are no longer active maintainers to the output.
 EOT
 }
 
@@ -449,14 +465,19 @@ sub push_email_address {
     my ($email_address) = @_;
 
     my $email_name = "";
-    if ($email_address =~ m/([^<]+)<(.*\@.*)>$/) {
-	$email_name = $1;
-	$email_address = $2;
-    }
 
     if ($email_maintainer) {
-	if ($email_usename && $email_name) {
-	    push(@email_to, format_email($email_name, $email_address));
+	if ($email_address =~ m/([^<]+)<(.*\@.*)>$/) {
+	    $email_name = $1;
+	    $email_address = $2;
+	    if ($email_usename) {
+		push(@email_to, format_email($email_name, $email_address));
+	    } else {
+		push(@email_to, $email_address);
+	    }
+	} elsif ($email_address =~ m/<(.+)>/) {
+	    $email_address = $1;
+	    push(@email_to, $email_address);
 	} else {
 	    push(@email_to, $email_address);
 	}
@@ -545,20 +566,79 @@ sub recent_git_signoffs {
 		last;
 	    }
 	}
-	if ($line =~ m/(.+)<(.+)>/) {
-	    my $git_name = $1;
-	    my $git_addr = $2;
-	    if ($email_usename) {
-		push(@email_to, format_email($git_name, $git_addr));
-	    } else {
-		push(@email_to, $git_addr);
-	    }
-	} elsif ($line =~ m/<(.+)>/) {
-	    my $git_addr = $1;
-	    push(@email_to, $git_addr);
-	} else {
-	    push(@email_to, $line);
+	push_email_address($line);
+    }
+}
+
+sub save_commits {
+    my ($cmd, @commits) = @_;
+    my $output;
+    my @lines = ();
+
+    $output = `${cmd}`;
+
+    @lines = split("\n", $output);
+    foreach my $line (@lines) {
+	if ($line =~ m/^(\w+) /) {
+	    push (@commits, $1);
+	}
+    }
+    return @commits;
+}
+
+sub git_assign_blame {
+    my ($file) = @_;
+
+    my @lines = ();
+    my @commits = ();
+    my $cmd;
+    my $output;
+    my %hash;
+    my $total_sign_offs;
+    my $count;
+
+    if (@range) {
+	foreach my $file_range_diff (@range) {
+	    next if (!($file_range_diff =~ m/(.+):(.+):(.+)/));
+	    my $diff_file = $1;
+	    my $diff_start = $2;
+	    my $diff_length = $3;
+	    next if (!("$file" eq "$diff_file"));
+	    $cmd = "git blame -l -L $diff_start,+$diff_length $file\n";
+	    @commits = save_commits($cmd, @commits);
 	}
+    } else {
+	if (-f $file) {
+	    $cmd = "git blame -l $file\n";
+	    @commits = save_commits($cmd, @commits);
+	}
+    }
+
+    $total_sign_offs = 0;
+    @commits = uniq(@commits);
+    foreach my $commit (@commits) {
+	$cmd = "git log -1 ${commit}";
+	$cmd .= " | grep -Ei \"^[-_ 	a-z]+by:.*\\\@.*\$\"";
+	if (!$email_git_penguin_chiefs) {
+	    $cmd .= " | grep -Ev \"${penguin_chiefs}\"";
+	}
+	$cmd .= " | cut -f2- -d\":\"";
+
+	$output = `${cmd}`;
+	$output =~ s/^\s*//gm;
+	@lines = split("\n", $output);
+	$hash{$_}++ for @lines;
+	$total_sign_offs += @lines;
+    }
+
+    $count = 0;
+    foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) {
+	my $sign_offs = $hash{$line};
+	$count++;
+	last if ($sign_offs < $email_git_min_signatures ||
+		 $count > $email_git_max_maintainers ||
+		 $sign_offs * 100 / $total_sign_offs < $email_git_min_percent);
+	push_email_address($line);
     }
 }
 
-- 
cgit v0.10.2


From 1d606b4e0bf8fe45e3f88543dfce83207ae0027d Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 21 Sep 2009 17:04:14 -0700
Subject: scripts/get_maintainer.pl: add sections in pattern match depth order

Before this change, matched sections were added in the order
of appearance in the normally alphabetic section order of
the MAINTAINERS file.

For instance, finding the maintainer for drivers/scsi/wd7000.c
would first find "SCSI SUBSYSTEM", then "WD7000 SCSI SUBSYSTEM",
then "THE REST".

before patch:

$ ./scripts/get_maintainer.pl --nogit -f drivers/scsi/wd7000.c
James E.J. Bottomley <James.Bottomley@HansenPartnership.com>
Miroslav Zagorac <zaga@fly.cc.fer.hr>
linux-scsi@vger.kernel.org
linux-kernel@vger.kernel.org

get_maintainer.pl now selects matched sections by longest pattern match.
Longest is the number of "/"s and any specific file pattern.

This changes the example output order of MAINTAINERS to whatever is
selected in "WD7000 SUBSYSTEM", then "SCSI SYSTEM", then "THE REST".

after patch:

$ ./scripts/get_maintainer.pl --nogit -f drivers/scsi/wd7000.c
Miroslav Zagorac <zaga@fly.cc.fer.hr>
James E.J. Bottomley <James.Bottomley@HansenPartnership.com>
linux-scsi@vger.kernel.org
linux-kernel@vger.kernel.org

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl
index 35781e0..fb446e0 100755
--- a/scripts/get_maintainer.pl
+++ b/scripts/get_maintainer.pl
@@ -211,6 +211,7 @@ foreach my $file (@files) {
 	    if ($type eq 'X') {
 		if (file_match_pattern($file, $value)) {
 		    $exclude = 1;
+		    last;
 		}
 	    }
 	}
@@ -218,18 +219,24 @@ foreach my $file (@files) {
 
     if (!$exclude) {
 	my $tvi = 0;
+	my %hash;
 	foreach my $line (@typevalue) {
 	    if ($line =~ m/^(\C):\s*(.*)/) {
 		my $type = $1;
 		my $value = $2;
 		if ($type eq 'F') {
 		    if (file_match_pattern($file, $value)) {
-			add_categories($tvi);
+			my $pattern_depth = ($value =~ tr@/@@);
+			$pattern_depth++ if (!(substr($value,-1,1) eq "/"));
+			$hash{$tvi} = $pattern_depth;
 		    }
 		}
 	    }
 	    $tvi++;
 	}
+	foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) {
+	    add_categories($line);
+	}
     }
 
     if ($email && $email_git) {
-- 
cgit v0.10.2


From 3fb55652b9f754990e286723f209ce3c07c96d69 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 21 Sep 2009 17:04:17 -0700
Subject: scripts/get_maintainer.pl: add --pattern-depth

--pattern-depth is used to control how many levels of directory traversal
should be performed to find maintainers.  default is 0 (all directory levels).

For instance:

MAINTAINERS currently has multiple M: and F: entries that match
net/netfilter/ipvs/ip_vs_app.c

IPVS
M:	Wensong Zhang <wensong@linux-vs.org>
M:	Simon Horman <horms@verge.net.au>
M:	Julian Anastasov <ja@ssi.bg>
[...]
F:	net/netfilter/ipvs/

NETFILTER/IPTABLES/IPCHAINS
[...]
M:	Patrick McHardy <kaber@trash.net>
[...]
F:	net/netfilter/

NETWORKING [GENERAL]
M:	"David S. Miller" <davem@davemloft.net>
[...]
F:	net/

THE REST
M:	Linus Torvalds <torvalds@linux-foundation.org>
[...]
F:	*/

Using this command will return all of those maintainers:
(except Linus unless --git-chief-maintainers is specified)

$ ./scripts/get_maintainer.pl --nogit -nol \
	-f net/netfilter/ipvs/ip_vs_app.c
Julian Anastasov <ja@ssi.bg>
Simon Horman <horms@verge.net.au>
Wensong Zhang <wensong@linux-vs.org>
Patrick McHardy <kaber@trash.net>
David S. Miller <davem@davemloft.net>

Adding --pattern-depth=1 will match at the deepest level
$ ./scripts/get_maintainer.pl --nogit -nol --pattern-depth=1 \
	-f net/netfilter/ipvs/ip_vs_app.c
Julian Anastasov <ja@ssi.bg>
Simon Horman <horms@verge.net.au>
Wensong Zhang <wensong@linux-vs.org>

Adding --pattern-depth=2 will match at the deepest level and 1 higher
$ ./scripts/get_maintainer.pl --nogit -nol --pattern-depth=2 \
	-f net/netfilter/ipvs/ip_vs_app.c
Julian Anastasov <ja@ssi.bg>
Simon Horman <horms@verge.net.au>
Wensong Zhang <wensong@linux-vs.org>
Patrick McHardy <kaber@trash.net>

and so on.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl
index fb446e0..5132949 100755
--- a/scripts/get_maintainer.pl
+++ b/scripts/get_maintainer.pl
@@ -13,7 +13,7 @@
 use strict;
 
 my $P = $0;
-my $V = '0.18beta2';
+my $V = '0.19';
 
 use Getopt::Long qw(:config no_auto_abbrev);
 
@@ -37,6 +37,7 @@ my $web = 0;
 my $subsystem = 0;
 my $status = 0;
 my $from_filename = 0;
+my $pattern_depth = 0;
 my $version = 0;
 my $help = 0;
 
@@ -80,6 +81,7 @@ if (!GetOptions(
 		'status!' => \$status,
 		'scm!' => \$scm,
 		'web!' => \$web,
+		'pattern-depth=i' => \$pattern_depth,
 		'f|file' => \$from_filename,
 		'v|version' => \$version,
 		'h|help' => \$help,
@@ -226,9 +228,13 @@ foreach my $file (@files) {
 		my $value = $2;
 		if ($type eq 'F') {
 		    if (file_match_pattern($file, $value)) {
-			my $pattern_depth = ($value =~ tr@/@@);
-			$pattern_depth++ if (!(substr($value,-1,1) eq "/"));
-			$hash{$tvi} = $pattern_depth;
+			my $value_pd = ($value =~ tr@/@@);
+			my $file_pd = ($file  =~ tr@/@@);
+			$value_pd++ if (substr($value,-1,1) ne "/");
+			if ($pattern_depth == 0 ||
+			    (($file_pd - $value_pd) < $pattern_depth)) {
+			    $hash{$tvi} = $value_pd;
+			}
 		    }
 		}
 	    }
@@ -345,13 +351,14 @@ Output type options:
   --separator [, ] => separator for multiple entries on 1 line
   --multiline => print 1 entry per line
 
-Default options:
-  [--email --git --m --n --l --multiline]
-
 Other options:
+  --pattern-depth => Number of pattern directory traversals (default: 0 (all))
   --version => show version
   --help => show this help information
 
+Default options:
+  [--email --git --m --n --l --multiline --pattern-depth=0]
+
 Notes:
   Using "-f directory" may give unexpected results:
       Used with "--git", git signators for _all_ files in and below
-- 
cgit v0.10.2


From 0e70e83dfd40cac47e1fc3e2f1c7b893ea0cd2f8 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 21 Sep 2009 17:04:20 -0700
Subject: scripts/get_maintainer.pl: better email routines, use perl not shell
 where possible

Added format_email and parse_email routines to reduce inline use.

Added email_address_inuse to eliminate multiple maintainer entries
for the same email address, the first name encountered is used.

Used internal perl equivalents of shell cmd use of grep|cut|sort|uniq

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl
index 5132949..1200d72 100755
--- a/scripts/get_maintainer.pl
+++ b/scripts/get_maintainer.pl
@@ -13,7 +13,7 @@
 use strict;
 
 my $P = $0;
-my $V = '0.19';
+my $V = '0.20';
 
 use Getopt::Long qw(:config no_auto_abbrev);
 
@@ -258,11 +258,8 @@ if ($email) {
     foreach my $chief (@penguin_chief) {
 	if ($chief =~ m/^(.*):(.*)/) {
 	    my $email_address;
-	    if ($email_usename) {
-		$email_address = format_email($1, $2);
-	    } else {
-		$email_address = $2;
-	    }
+
+	    $email_address = format_email($1, $2);
 	    if ($email_git_penguin_chiefs) {
 		push(@email_to, $email_address);
 	    } else {
@@ -400,21 +397,57 @@ sub top_of_kernel_tree {
 	return 0;
 }
 
-sub format_email {
-    my ($name, $email) = @_;
+sub parse_email {
+    my ($formatted_email) = @_;
+
+    my $name = "";
+    my $address = "";
+
+    if ($formatted_email =~ /^([^<]+)<(.*\@.*)>$/) {
+	$name = $1;
+	$address = $2;
+    } elsif ($formatted_email =~ /^<(.*\@.*)>$/) {
+	$address = $1;
+    } elsif ($formatted_email =~ /^(.*\@.*)$/) {
+	$address = $1;
+    }
 
     $name =~ s/^\s+|\s+$//g;
     $name =~ s/^\"|\"$//g;
-    $email =~ s/^\s+|\s+$//g;
+    $address =~ s/^\s+|\s+$//g;
 
-    my $formatted_email = "";
+    if ($name =~ /[^a-z0-9 \.\-]/i) {    ##has "must quote" chars
+	$name =~ s/(?<!\\)"/\\"/g;       ##escape quotes
+	$name = "\"$name\"";
+    }
+
+    return ($name, $address);
+}
+
+sub format_email {
+    my ($name, $address) = @_;
+
+    my $formatted_email;
+
+    $name =~ s/^\s+|\s+$//g;
+    $name =~ s/^\"|\"$//g;
+    $address =~ s/^\s+|\s+$//g;
 
     if ($name =~ /[^a-z0-9 \.\-]/i) {    ##has "must quote" chars
 	$name =~ s/(?<!\\)"/\\"/g;       ##escape quotes
-	$formatted_email = "\"${name}\"\ \<${email}\>";
+	$name = "\"$name\"";
+    }
+
+    if ($email_usename) {
+	if ("$name" eq "") {
+	    $formatted_email = "$address";
+	} else {
+	    $formatted_email = "$name <${address}>";
+	}
     } else {
-	$formatted_email = "${name} \<${email}\>";
+	$formatted_email = $address;
     }
+
     return $formatted_email;
 }
 
@@ -444,19 +477,18 @@ sub add_categories {
 		    }
 		}
 	    } elsif ($ptype eq "M") {
-		my $p_used = 0;
-		if ($index >= 0) {
-		    my $tv = $typevalue[$index - 1];
-		    if ($tv =~ m/^(\C):\s*(.*)/) {
-			if ($1 eq "P") {
-			    if ($email_usename) {
-				push_email_address(format_email($2, $pvalue));
-				$p_used = 1;
+		my ($name, $address) = parse_email($pvalue);
+		if ($name eq "") {
+		    if ($index >= 0) {
+			my $tv = $typevalue[$index - 1];
+			if ($tv =~ m/^(\C):\s*(.*)/) {
+			    if ($1 eq "P") {
+				$name = $2;
 			    }
 			}
 		    }
 		}
-		if (!$p_used) {
+		if ($email_maintainer) {
 		    push_email_addresses($pvalue);
 		}
 	    } elsif ($ptype eq "T") {
@@ -475,26 +507,24 @@ sub add_categories {
     }
 }
 
+sub email_address_inuse {
+    my ($test_address) = @_;
+
+    foreach my $line (@email_to) {
+	my ($name, $address) = parse_email($line);
+
+	return 1 if ($address eq $test_address);
+    }
+    return 0;
+}
+
 sub push_email_address {
-    my ($email_address) = @_;
+    my ($line) = @_;
 
-    my $email_name = "";
+    my ($name, $address) = parse_email($line);
 
-    if ($email_maintainer) {
-	if ($email_address =~ m/([^<]+)<(.*\@.*)>$/) {
-	    $email_name = $1;
-	    $email_address = $2;
-	    if ($email_usename) {
-		push(@email_to, format_email($email_name, $email_address));
-	    } else {
-		push(@email_to, $email_address);
-	    }
-	} elsif ($email_address =~ m/<(.+)>/) {
-	    $email_address = $1;
-	    push(@email_to, $email_address);
-	} else {
-	    push(@email_to, $email_address);
-	}
+    if (!email_address_inuse($address)) {
+	push(@email_to, format_email($name, $address));
     }
 }
 
@@ -535,6 +565,7 @@ sub recent_git_signoffs {
     my $output = "";
     my $count = 0;
     my @lines = ();
+    my %hash;
     my $total_sign_offs;
 
     if (which("git") eq "") {
@@ -548,25 +579,31 @@ sub recent_git_signoffs {
     }
 
     $cmd = "git log --since=${email_git_since} -- ${file}";
-    $cmd .= " | grep -Ei \"^[-_ 	a-z]+by:.*\\\@.*\$\"";
-    if (!$email_git_penguin_chiefs) {
-	$cmd .= " | grep -Ev \"${penguin_chiefs}\"";
-    }
-    $cmd .= " | cut -f2- -d\":\"";
-    $cmd .= " | sort | uniq -c | sort -rn";
 
     $output = `${cmd}`;
     $output =~ s/^\s*//gm;
 
     @lines = split("\n", $output);
 
-    $total_sign_offs = 0;
+    @lines = grep(/^[-_ 	a-z]+by:.*\@.*$/i, @lines);
+    if (!$email_git_penguin_chiefs) {
+	@lines = grep(!/${penguin_chiefs}/i, @lines);
+    }
+    # cut -f2- -d":"
+    s/.*:\s*(.+)\s*/$1/ for (@lines);
+
+    @lines = mailmap(@lines);
+
+    $total_sign_offs = @lines;
+    @lines = sort(@lines);
+    # uniq -c
     foreach my $line (@lines) {
-	if ($line =~ m/([0-9]+)\s+(.*)/) {
-	    $total_sign_offs += $1;
-	} else {
-	    die("$P: Unexpected git output: ${line}\n");
-	}
+	$hash{$line}++;
+    }
+    # sort -rn
+    @lines = ();
+    foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) {
+	push(@lines,"$hash{$line}	$line");
     }
 
     foreach my $line (@lines) {
@@ -579,8 +616,8 @@ sub recent_git_signoffs {
 		$sign_offs * 100 / $total_sign_offs < $email_git_min_percent) {
 		last;
 	    }
+	    push_email_address($line);
 	}
-	push_email_address($line);
     }
 }
 
@@ -632,15 +669,18 @@ sub git_assign_blame {
     @commits = uniq(@commits);
     foreach my $commit (@commits) {
 	$cmd = "git log -1 ${commit}";
-	$cmd .= " | grep -Ei \"^[-_ 	a-z]+by:.*\\\@.*\$\"";
-	if (!$email_git_penguin_chiefs) {
-	    $cmd .= " | grep -Ev \"${penguin_chiefs}\"";
-	}
-	$cmd .= " | cut -f2- -d\":\"";
 
 	$output = `${cmd}`;
 	$output =~ s/^\s*//gm;
 	@lines = split("\n", $output);
+
+	@lines = grep(/^[-_ 	a-z]+by:.*\@.*$/i, @lines);
+	if (!$email_git_penguin_chiefs) {
+	    @lines = grep(!/${penguin_chiefs}/i, @lines);
+	}
+	# cut -f2- -d":"
+	s/.*:\s*(.+)\s*/$1/ for (@lines);
+
 	$hash{$_}++ for @lines;
 	$total_sign_offs += @lines;
     }
-- 
cgit v0.10.2


From 8cbb3a77e1a91073fb279a495a11d5093461dfe5 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 21 Sep 2009 17:04:21 -0700
Subject: scripts/get_maintainer.pl: add .mailmap use, shell and email cleanups

Add reading and using .mailmap file if it exists
Convert address entries in .mailmap to first encountered address
Don't terminate shell commands with \n
Strip characters found after sign-offs by: name <address> [stripped]

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl
index 1200d72..8b80b5a 100755
--- a/scripts/get_maintainer.pl
+++ b/scripts/get_maintainer.pl
@@ -152,6 +152,36 @@ while (<MAINT>) {
 }
 close(MAINT);
 
+my %mailmap;
+
+open(MAILMAP, "<${lk_path}.mailmap") || warn "$P: Can't open .mailmap\n";
+while (<MAILMAP>) {
+    my $line = $_;
+
+    next if ($line =~ m/^\s*#/);
+    next if ($line =~ m/^\s*$/);
+
+    my ($name, $address) = parse_email($line);
+    $line = format_email($name, $address);
+
+    next if ($line =~ m/^\s*$/);
+
+    if (exists($mailmap{$name})) {
+	my $obj = $mailmap{$name};
+	push(@$obj, $address);
+    } else {
+	my @arr = ($address);
+	$mailmap{$name} = \@arr;
+    }
+}
+close(MAILMAP);
+
+foreach my $name (sort {$mailmap{$a} <=> $mailmap{$b}} keys %mailmap) {
+    my $obj = $mailmap{$name};
+    foreach my $address (@$obj) {
+    }
+}
+
 ## use the filenames on the command line or find the filenames in the patchfiles
 
 my @files = ();
@@ -403,12 +433,12 @@ sub parse_email {
     my $name = "";
     my $address = "";
 
-    if ($formatted_email =~ /^([^<]+)<(.*\@.*)>$/) {
+    if ($formatted_email =~ /^([^<]+)<(.*\@.*)>.*$/) {
 	$name = $1;
 	$address = $2;
-    } elsif ($formatted_email =~ /^<(.*\@.*)>$/) {
+    } elsif ($formatted_email =~ /^\s*<(.*\@.*)>.*$/) {
 	$address = $1;
-    } elsif ($formatted_email =~ /^(.*\@.*)$/) {
+    } elsif ($formatted_email =~ /^\s*(.*\@.*)$/) {
 	$address = $1;
     }
 
@@ -557,6 +587,29 @@ sub which {
     return "";
 }
 
+sub mailmap {
+    my @lines = @_;
+    my %hash;
+
+    foreach my $line (@lines) {
+	my ($name, $address) = parse_email($line);
+	if (!exists($hash{$name})) {
+	    $hash{$name} = $address;
+	}
+	if (exists($mailmap{$name})) {
+	    my $obj = $mailmap{$name};
+	    foreach my $map_address (@$obj) {
+		if (($map_address eq $address) &&
+		    ($map_address ne $hash{$name})) {
+		    $line = format_email($name, $hash{$name});
+		}
+	    }
+	}
+    }
+
+    return @lines;
+}
+
 sub recent_git_signoffs {
     my ($file) = @_;
 
@@ -592,9 +645,10 @@ sub recent_git_signoffs {
     # cut -f2- -d":"
     s/.*:\s*(.+)\s*/$1/ for (@lines);
 
+    $total_sign_offs = @lines;
+
     @lines = mailmap(@lines);
 
-    $total_sign_offs = @lines;
     @lines = sort(@lines);
     # uniq -c
     foreach my $line (@lines) {
@@ -655,12 +709,12 @@ sub git_assign_blame {
 	    my $diff_start = $2;
 	    my $diff_length = $3;
 	    next if (!("$file" eq "$diff_file"));
-	    $cmd = "git blame -l -L $diff_start,+$diff_length $file\n";
+	    $cmd = "git blame -l -L $diff_start,+$diff_length $file";
 	    @commits = save_commits($cmd, @commits);
 	}
     } else {
 	if (-f $file) {
-	    $cmd = "git blame -l $file\n";
+	    $cmd = "git blame -l $file";
 	    @commits = save_commits($cmd, @commits);
 	}
     }
@@ -678,11 +732,15 @@ sub git_assign_blame {
 	if (!$email_git_penguin_chiefs) {
 	    @lines = grep(!/${penguin_chiefs}/i, @lines);
 	}
+
 	# cut -f2- -d":"
 	s/.*:\s*(.+)\s*/$1/ for (@lines);
 
-	$hash{$_}++ for @lines;
 	$total_sign_offs += @lines;
+
+	@lines = mailmap(@lines);
+
+	$hash{$_}++ for @lines;
     }
 
     $count = 0;
-- 
cgit v0.10.2


From 42498316132e89ca2835b977a7cfb32a83e97b35 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 21 Sep 2009 17:04:21 -0700
Subject: scripts/get_maintainer.pl: using --separator implies --nomultiline

If a person sets a separator, it's only used if --nomultiline is set.
Don't make the command line also include --nomultiline in that case.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl
index 8b80b5a..446803ef 100755
--- a/scripts/get_maintainer.pl
+++ b/scripts/get_maintainer.pl
@@ -105,6 +105,10 @@ if ($#ARGV < 0) {
     die "$P: argument missing: patchfile or -f file please\n";
 }
 
+if ($output_separator ne ", ") {
+    $output_multiline = 0;
+}
+
 my $selections = $email + $scm + $status + $subsystem + $web;
 if ($selections == 0) {
     usage();
@@ -376,6 +380,7 @@ MAINTAINER field selection options:
 
 Output type options:
   --separator [, ] => separator for multiple entries on 1 line
+    using --separator also sets --nomultiline if --separator is not [, ]
   --multiline => print 1 entry per line
 
 Other options:
-- 
cgit v0.10.2


From 11ecf53c97863a0609db3816d82f1d0ddf3d2bc2 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 21 Sep 2009 17:04:22 -0700
Subject: scripts/get_maintainer.pl: add --remove-duplicates

Allow control over the elimination of duplicate email names and addresses

--remove-duplicates will use the first email name or address presented
--noremove-duplicates will emit all names and addresses

--remove-duplicates is enabled by default

For instance:

$ ./scripts/get_maintainer.pl -f drivers/char/tty_ioctl.c
Greg Kroah-Hartman <gregkh@suse.de>
Alan Cox <alan@linux.intel.com>
Mike Frysinger <vapier@gentoo.org>
Alexey Dobriyan <adobriyan@gmail.com>
linux-kernel@vger.kernel.org

$ ./scripts/get_maintainer.pl -f --noremove-duplicates drivers/char/tty_ioctl.c
Greg Kroah-Hartman <gregkh@suse.de>
Alan Cox <alan@redhat.com>
Alan Cox <alan@linux.intel.com>
Alan Cox <alan@lxorguk.ukuu.org.uk>
Mike Frysinger <vapier@gentoo.org>
Alexey Dobriyan <adobriyan@gmail.com>
linux-kernel@vger.kernel.org

Using --remove-duplicates could eliminate multiple maintainers that
share the same name but not the same email address.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl
index 446803ef..473b674 100755
--- a/scripts/get_maintainer.pl
+++ b/scripts/get_maintainer.pl
@@ -30,6 +30,7 @@ my $email_git_max_maintainers = 5;
 my $email_git_min_percent = 5;
 my $email_git_since = "1-year-ago";
 my $email_git_blame = 0;
+my $email_remove_duplicates = 1;
 my $output_multiline = 1;
 my $output_separator = ", ";
 my $scm = 0;
@@ -71,6 +72,7 @@ if (!GetOptions(
 		'git-min-percent=i' => \$email_git_min_percent,
 		'git-since=s' => \$email_git_since,
 		'git-blame!' => \$email_git_blame,
+		'remove-duplicates!' => \$email_remove_duplicates,
 		'm!' => \$email_maintainer,
 		'n!' => \$email_usename,
 		'l!' => \$email_list,
@@ -158,32 +160,28 @@ close(MAINT);
 
 my %mailmap;
 
-open(MAILMAP, "<${lk_path}.mailmap") || warn "$P: Can't open .mailmap\n";
-while (<MAILMAP>) {
-    my $line = $_;
+if ($email_remove_duplicates) {
+    open(MAILMAP, "<${lk_path}.mailmap") || warn "$P: Can't open .mailmap\n";
+    while (<MAILMAP>) {
+	my $line = $_;
 
-    next if ($line =~ m/^\s*#/);
-    next if ($line =~ m/^\s*$/);
+	next if ($line =~ m/^\s*#/);
+	next if ($line =~ m/^\s*$/);
 
-    my ($name, $address) = parse_email($line);
-    $line = format_email($name, $address);
+	my ($name, $address) = parse_email($line);
+	$line = format_email($name, $address);
 
-    next if ($line =~ m/^\s*$/);
+	next if ($line =~ m/^\s*$/);
 
-    if (exists($mailmap{$name})) {
-	my $obj = $mailmap{$name};
-	push(@$obj, $address);
-    } else {
-	my @arr = ($address);
-	$mailmap{$name} = \@arr;
-    }
-}
-close(MAILMAP);
-
-foreach my $name (sort {$mailmap{$a} <=> $mailmap{$b}} keys %mailmap) {
-    my $obj = $mailmap{$name};
-    foreach my $address (@$obj) {
+	if (exists($mailmap{$name})) {
+	    my $obj = $mailmap{$name};
+	    push(@$obj, $address);
+	} else {
+	    my @arr = ($address);
+	    $mailmap{$name} = \@arr;
+	}
     }
+    close(MAILMAP);
 }
 
 ## use the filenames on the command line or find the filenames in the patchfiles
@@ -373,6 +371,7 @@ MAINTAINER field selection options:
     --n => include name 'Full Name <addr\@domain.tld>'
     --l => include list(s) if any
     --s => include subscriber only list(s) if any
+    --remove-duplicates => minimize duplicate email names/addresses
   --scm => print SCM tree(s) if any
   --status => print status if any
   --subsystem => print subsystem name if any
@@ -389,7 +388,7 @@ Other options:
   --help => show this help information
 
 Default options:
-  [--email --git --m --n --l --multiline --pattern-depth=0]
+  [--email --git --m --n --l --multiline --pattern-depth=0 --remove-duplicates]
 
 Notes:
   Using "-f directory" may give unexpected results:
@@ -438,12 +437,12 @@ sub parse_email {
     my $name = "";
     my $address = "";
 
-    if ($formatted_email =~ /^([^<]+)<(.*\@.*)>.*$/) {
+    if ($formatted_email =~ /^([^<]+)<(.+\@.*)>.*$/) {
 	$name = $1;
 	$address = $2;
-    } elsif ($formatted_email =~ /^\s*<(.*\@.*)>.*$/) {
+    } elsif ($formatted_email =~ /^\s*<(.+\@\S*)>.*$/) {
 	$address = $1;
-    } elsif ($formatted_email =~ /^\s*(.*\@.*)$/) {
+    } elsif ($formatted_email =~ /^(.+\@\S*)$/) {
 	$address = $1;
     }
 
@@ -542,14 +541,16 @@ sub add_categories {
     }
 }
 
-sub email_address_inuse {
-    my ($test_address) = @_;
+my %email_hash_name;
+my %email_hash_address;
 
-    foreach my $line (@email_to) {
-	my ($name, $address) = parse_email($line);
+sub email_inuse {
+    my ($name, $address) = @_;
+
+    return 1 if (($name eq "") && ($address eq ""));
+    return 1 if (($name ne "") && exists($email_hash_name{$name}));
+    return 1 if (($address ne "") && exists($email_hash_address{$address}));
 
-	return 1 if ($address eq $test_address);
-    }
     return 0;
 }
 
@@ -558,8 +559,12 @@ sub push_email_address {
 
     my ($name, $address) = parse_email($line);
 
-    if (!email_address_inuse($address)) {
+    if (!$email_remove_duplicates) {
+	push(@email_to, format_email($name, $address));
+    } elsif (!email_inuse($name, $address)) {
 	push(@email_to, format_email($name, $address));
+	$email_hash_name{$name}++;
+	$email_hash_address{$address}++;
     }
 }
 
@@ -600,6 +605,9 @@ sub mailmap {
 	my ($name, $address) = parse_email($line);
 	if (!exists($hash{$name})) {
 	    $hash{$name} = $address;
+	} elsif ($address ne $hash{$name}) {
+	    $address = $hash{$name};
+	    $line = format_email($name, $address);
 	}
 	if (exists($mailmap{$name})) {
 	    my $obj = $mailmap{$name};
@@ -652,31 +660,23 @@ sub recent_git_signoffs {
 
     $total_sign_offs = @lines;
 
-    @lines = mailmap(@lines);
+    if ($email_remove_duplicates) {
+	@lines = mailmap(@lines);
+    }
 
     @lines = sort(@lines);
+
     # uniq -c
-    foreach my $line (@lines) {
-	$hash{$line}++;
-    }
+    $hash{$_}++ for @lines;
+
     # sort -rn
-    @lines = ();
     foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) {
-	push(@lines,"$hash{$line}	$line");
-    }
-
-    foreach my $line (@lines) {
-	if ($line =~ m/([0-9]+)\s+(.*)/) {
-	    my $sign_offs = $1;
-	    $line = $2;
-	    $count++;
-	    if ($sign_offs < $email_git_min_signatures ||
-	        $count > $email_git_max_maintainers ||
-		$sign_offs * 100 / $total_sign_offs < $email_git_min_percent) {
-		last;
-	    }
-	    push_email_address($line);
-	}
+	my $sign_offs = $hash{$line};
+	$count++;
+	last if ($sign_offs < $email_git_min_signatures ||
+		 $count > $email_git_max_maintainers ||
+		 $sign_offs * 100 / $total_sign_offs < $email_git_min_percent);
+	push_email_address($line);
     }
 }
 
@@ -743,7 +743,9 @@ sub git_assign_blame {
 
 	$total_sign_offs += @lines;
 
-	@lines = mailmap(@lines);
+	if ($email_remove_duplicates) {
+	    @lines = mailmap(@lines);
+	}
 
 	$hash{$_}++ for @lines;
     }
-- 
cgit v0.10.2


From b781655a6f6d15bdcc96f2cc1d56b0658f9cf0b4 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 21 Sep 2009 17:04:24 -0700
Subject: scripts/get_maintainer.pl: add maintainers in order listed in matched
 section

Previous behavior was "bottom-up" in each section from the pattern "F:"
entry that matched.  Now information is entered into the various lists in
the "as entered" order for each matched section.

This also allows the F: entry to be put anywhere in a section, not just as
the last entries in the section.

And a couple of improvements:

Don't alphabetically sort before outputting the matched scm, status,
subsystem and web sections.

Ignore content after a single email address so these entries are acceptable
M:	name <address> whatever other comment

And a fix:

Make an M: entry without a name again use the name from an immediately
preceding P: line if it exists.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl
index 473b674..cdb44b6 100755
--- a/scripts/get_maintainer.pl
+++ b/scripts/get_maintainer.pl
@@ -313,22 +313,22 @@ if ($email || $email_list) {
 }
 
 if ($scm) {
-    @scm = sort_and_uniq(@scm);
+    @scm = uniq(@scm);
     output(@scm);
 }
 
 if ($status) {
-    @status = sort_and_uniq(@status);
+    @status = uniq(@status);
     output(@status);
 }
 
 if ($subsystem) {
-    @subsystem = sort_and_uniq(@subsystem);
+    @subsystem = uniq(@subsystem);
     output(@subsystem);
 }
 
 if ($web) {
-    @web = sort_and_uniq(@web);
+    @web = uniq(@web);
     output(@web);
 }
 
@@ -442,7 +442,7 @@ sub parse_email {
 	$address = $2;
     } elsif ($formatted_email =~ /^\s*<(.+\@\S*)>.*$/) {
 	$address = $1;
-    } elsif ($formatted_email =~ /^(.+\@\S*)$/) {
+    } elsif ($formatted_email =~ /^(.+\@\S*).*$/) {
 	$address = $1;
     }
 
@@ -485,12 +485,46 @@ sub format_email {
     return $formatted_email;
 }
 
-sub add_categories {
+sub find_starting_index {
+
+    my ($index) = @_;
+
+    while ($index > 0) {
+	my $tv = $typevalue[$index];
+	if (!($tv =~ m/^(\C):\s*(.*)/)) {
+	    last;
+	}
+	$index--;
+    }
+
+    return $index;
+}
+
+sub find_ending_index {
     my ($index) = @_;
 
-    $index = $index - 1;
-    while ($index >= 0) {
+    while ($index < @typevalue) {
 	my $tv = $typevalue[$index];
+	if (!($tv =~ m/^(\C):\s*(.*)/)) {
+	    last;
+	}
+	$index++;
+    }
+
+    return $index;
+}
+
+sub add_categories {
+    my ($index) = @_;
+
+    my $i;
+    my $start = find_starting_index($index);
+    my $end = find_ending_index($index);
+
+    push(@subsystem, $typevalue[$start]);
+
+    for ($i = $start + 1; $i < $end; $i++) {
+	my $tv = $typevalue[$i];
 	if ($tv =~ m/^(\C):\s*(.*)/) {
 	    my $ptype = $1;
 	    my $pvalue = $2;
@@ -513,11 +547,12 @@ sub add_categories {
 	    } elsif ($ptype eq "M") {
 		my ($name, $address) = parse_email($pvalue);
 		if ($name eq "") {
-		    if ($index >= 0) {
-			my $tv = $typevalue[$index - 1];
+		    if ($i > 0) {
+			my $tv = $typevalue[$i - 1];
 			if ($tv =~ m/^(\C):\s*(.*)/) {
 			    if ($1 eq "P") {
 				$name = $2;
+				$pvalue = format_email($name, $address);
 			    }
 			}
 		    }
@@ -532,11 +567,6 @@ sub add_categories {
 	    } elsif ($ptype eq "S") {
 		push(@status, $pvalue);
 	    }
-
-	    $index--;
-	} else {
-	    push(@subsystem,$tv);
-	    $index = -1;
 	}
     }
 }
@@ -559,6 +589,10 @@ sub push_email_address {
 
     my ($name, $address) = parse_email($line);
 
+    if ($address eq "") {
+	return 0;
+    }
+
     if (!$email_remove_duplicates) {
 	push(@email_to, format_email($name, $address));
     } elsif (!email_inuse($name, $address)) {
@@ -566,6 +600,8 @@ sub push_email_address {
 	$email_hash_name{$name}++;
 	$email_hash_address{$address}++;
     }
+
+    return 1;
 }
 
 sub push_email_addresses {
@@ -581,7 +617,9 @@ sub push_email_addresses {
 	    push_email_address($entry);
 	}
     } else {
-	warn("Invalid MAINTAINERS address: '" . $address . "'\n");
+	if (!push_email_address($address)) {
+	    warn("Invalid MAINTAINERS address: '" . $address . "'\n");
+	}
     }
 }
 
-- 
cgit v0.10.2


From 43368e74d126f8e874fce2c248f094edfcf0baa6 Mon Sep 17 00:00:00 2001
From: Felipe Contreras <felipe.contreras@gmail.com>
Date: Mon, 21 Sep 2009 17:04:24 -0700
Subject: MAINTAINERS: acpi: add 'include/acpi'

Signed-off-by: Felipe Contreras <felipe.contreras@gmail.com>
Cc: Joe Perches <joe@perches.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/MAINTAINERS b/MAINTAINERS
index ee58af1..8d30ce8 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -233,6 +233,7 @@ S:	Supported
 F:	drivers/acpi/
 F:	drivers/pnp/pnpacpi/
 F:	include/linux/acpi.h
+F:	include/acpi/
 
 ACPI BATTERY DRIVERS
 M:	Alexey Starikovskiy <astarikovskiy@suse.de>
-- 
cgit v0.10.2


From 4e04d5a3d57accb0b05d381aa84d93d3749b15e2 Mon Sep 17 00:00:00 2001
From: Felipe Contreras <felipe.contreras@gmail.com>
Date: Mon, 21 Sep 2009 17:04:25 -0700
Subject: MAINTAINERS: omap: fix regex

Otherwise 'arch/arm/*omap*/foo.c' wouldn't match

Signed-off-by: Felipe Contreras <felipe.contreras@gmail.com>
Cc: Joe Perches <joe@perches.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/MAINTAINERS b/MAINTAINERS
index 8d30ce8..4dd43f8 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3737,7 +3737,7 @@ W:	http://www.muru.com/linux/omap/
 W:	http://linux.omap.com/
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap-2.6.git
 S:	Maintained
-F:	arch/arm/*omap*
+F:	arch/arm/*omap*/
 
 OMAP CLOCK FRAMEWORK SUPPORT
 M:	Paul Walmsley <paul@pwsan.com>
-- 
cgit v0.10.2


From 076cfaaece1c4e57ea7d924f38b911b9a5502365 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 21 Sep 2009 17:04:26 -0700
Subject: MAINTAINERS: integrate P:/M: lines

A couple of new uses of separate "P: name" "M: address" lines are
converted to single line "M: name <address>"

Signed-off-by: Joe Perches <joe@perches.com>
Cc: Anil Ravindranath <anil_ravindranath@pmc-sierra.com>
Cc: Kalle Valo <kalle.valo@nokia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/MAINTAINERS b/MAINTAINERS
index 4dd43f8..7e54faf 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4028,8 +4028,7 @@ F:	drivers/block/pktcdvd.c
 F:	include/linux/pktcdvd.h
 
 PMC SIERRA MaxRAID DRIVER
-P:	Anil Ravindranath
-M:	anil_ravindranath@pmc-sierra.com
+M:	Anil Ravindranath <anil_ravindranath@pmc-sierra.com>
 L:	linux-scsi@vger.kernel.org
 W:	http://www.pmc-sierra.com/
 S:	Supported
@@ -5660,8 +5659,7 @@ S:	Maintained
 F:	drivers/input/misc/wistron_btns.c
 
 WL1251 WIRELESS DRIVER
-P:	Kalle Valo
-M:	kalle.valo@nokia.com
+M:	Kalle Valo <kalle.valo@nokia.com>
 L:	linux-wireless@vger.kernel.org
 W:	http://wireless.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-testing.git
-- 
cgit v0.10.2


From efc03ecb9d674588a13aee27289c2af2afe5e6b4 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 21 Sep 2009 17:04:27 -0700
Subject: MAINTAINERS: move ARM lists to infradead

Signed-off-by: Joe Perches <joe@perches.com>
Cc: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
Cc: Krzysztof Halasa <khc@pm.waw.pl>
Cc: Russell King <rmk@arm.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/MAINTAINERS b/MAINTAINERS
index 7e54faf..379db50 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -498,7 +498,7 @@ F:	arch/arm/include/asm/floppy.h
 
 ARM PORT
 M:	Russell King <linux@arm.linux.org.uk>
-L:	linux-arm-kernel@lists.arm.linux.org.uk	(subscribers-only)
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 W:	http://www.arm.linux.org.uk/
 S:	Maintained
 F:	arch/arm/
@@ -509,36 +509,36 @@ F:	drivers/mmc/host/mmci.*
 
 ARM/ADI ROADRUNNER MACHINE SUPPORT
 M:	Lennert Buytenhek <kernel@wantstofly.org>
-L:	linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only)
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	arch/arm/mach-ixp23xx/
 F:	arch/arm/mach-ixp23xx/include/mach/
 
 ARM/ADS SPHERE MACHINE SUPPORT
 M:	Lennert Buytenhek <kernel@wantstofly.org>
-L:	linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only)
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 
 ARM/AFEB9260 MACHINE SUPPORT
 M:	Sergey Lapin <slapin@ossfans.org>
-L:	linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only)
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 
 ARM/AJECO 1ARM MACHINE SUPPORT
 M:	Lennert Buytenhek <kernel@wantstofly.org>
-L:	linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only)
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 
 ARM/ATMEL AT91RM9200 ARM ARCHITECTURE
 M:	Andrew Victor <linux@maxim.org.za>
-L:	linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only)
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 W:	http://maxim.org.za/at91_26.html
 S:	Maintained
 
 ARM/BCMRING ARM ARCHITECTURE
 M:	Leo Chen <leochen@broadcom.com>
 M:	Scott Branden <sbranden@broadcom.com>
-L:	linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only)
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	arch/arm/mach-bcmring
 
@@ -555,25 +555,25 @@ F:	drivers/mtd/nand/nand_bcm_umi.h
 ARM/CIRRUS LOGIC EP93XX ARM ARCHITECTURE
 M:	Hartley Sweeten <hsweeten@visionengravers.com>
 M:	Ryan Mallon <ryan@bluewatersys.com>
-L:	linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only)
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	arch/arm/mach-ep93xx/
 F:	arch/arm/mach-ep93xx/include/mach/
 
 ARM/CIRRUS LOGIC EDB9315A MACHINE SUPPORT
 M:	Lennert Buytenhek <kernel@wantstofly.org>
-L:	linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only)
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 
 ARM/CLKDEV SUPPORT
 M:	Russell King <linux@arm.linux.org.uk>
-L:	linux-arm-kernel@lists.arm.linux.org.uk	(subscribers-only)
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 F:	arch/arm/common/clkdev.c
 F:	arch/arm/include/asm/clkdev.h
 
 ARM/COMPULAB CM-X270/EM-X270 and CM-X300 MACHINE SUPPORT
 M:	Mike Rapoport <mike@compulab.co.il>
-L:	linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only)
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 
 ARM/CORGI MACHINE SUPPORT
@@ -582,14 +582,14 @@ S:	Maintained
 
 ARM/CORTINA SYSTEMS GEMINI ARM ARCHITECTURE
 M:	Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
-L:	linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only)
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 T:	git git://gitorious.org/linux-gemini/mainline.git
 S:	Maintained
 F:	arch/arm/mach-gemini/
 
 ARM/EBSA110 MACHINE SUPPORT
 M:	Russell King <linux@arm.linux.org.uk>
-L:	linux-arm-kernel@lists.arm.linux.org.uk	(subscribers-only)
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 W:	http://www.arm.linux.org.uk/
 S:	Maintained
 F:	arch/arm/mach-ebsa110/
@@ -607,13 +607,13 @@ F:	arch/arm/mach-pxa/ezx.c
 
 ARM/FARADAY FA526 PORT
 M:	Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
-L:	linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only)
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	arch/arm/mm/*-fa*
 
 ARM/FOOTBRIDGE ARCHITECTURE
 M:	Russell King <linux@arm.linux.org.uk>
-L:	linux-arm-kernel@lists.arm.linux.org.uk	(subscribers-only)
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 W:	http://www.arm.linux.org.uk/
 S:	Maintained
 F:	arch/arm/include/asm/hardware/dec21285.h
@@ -621,17 +621,17 @@ F:	arch/arm/mach-footbridge/
 
 ARM/FREESCALE IMX / MXC ARM ARCHITECTURE
 M:	Sascha Hauer <kernel@pengutronix.de>
-L:	linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only)
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 
 ARM/GLOMATION GESBC9312SX MACHINE SUPPORT
 M:	Lennert Buytenhek <kernel@wantstofly.org>
-L:	linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only)
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 
 ARM/GUMSTIX MACHINE SUPPORT
 M:	Steve Sakoman <sakoman@gmail.com>
-L:	linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only)
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 
 ARM/H4700 (HP IPAQ HX4700) MACHINE SUPPORT
@@ -651,55 +651,55 @@ F:	arch/arm/mach-sa1100/include/mach/jornada720.h
 ARM/INTEL IOP32X ARM ARCHITECTURE
 M:	Lennert Buytenhek <kernel@wantstofly.org>
 M:	Dan Williams <dan.j.williams@intel.com>
-L:	linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only)
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Supported
 
 ARM/INTEL IOP33X ARM ARCHITECTURE
 M:	Dan Williams <dan.j.williams@intel.com>
-L:	linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only)
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Supported
 
 ARM/INTEL IOP13XX ARM ARCHITECTURE
 M:	Lennert Buytenhek <kernel@wantstofly.org>
 M:	Dan Williams <dan.j.williams@intel.com>
-L:	linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only)
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Supported
 
 ARM/INTEL IQ81342EX MACHINE SUPPORT
 M:	Lennert Buytenhek <kernel@wantstofly.org>
 M:	Dan Williams <dan.j.williams@intel.com>
-L:	linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only)
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Supported
 
 ARM/INTEL IXP2000 ARM ARCHITECTURE
 M:	Lennert Buytenhek <kernel@wantstofly.org>
-L:	linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only)
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 
 ARM/INTEL IXDP2850 MACHINE SUPPORT
 M:	Lennert Buytenhek <kernel@wantstofly.org>
-L:	linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only)
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 
 ARM/INTEL IXP23XX ARM ARCHITECTURE
 M:	Lennert Buytenhek <kernel@wantstofly.org>
-L:	linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only)
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 
 ARM/INTEL XSC3 (MANZANO) ARM CORE
 M:	Lennert Buytenhek <kernel@wantstofly.org>
 M:	Dan Williams <dan.j.williams@intel.com>
-L:	linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only)
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Supported
 
 ARM/IP FABRICS DOUBLE ESPRESSO MACHINE SUPPORT
 M:	Lennert Buytenhek <kernel@wantstofly.org>
-L:	linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only)
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 
 ARM/LOGICPD PXA270 MACHINE SUPPORT
 M:	Lennert Buytenhek <kernel@wantstofly.org>
-L:	linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only)
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 
 ARM/MAGICIAN MACHINE SUPPORT
@@ -709,7 +709,7 @@ S:	Maintained
 ARM/Marvell Loki/Kirkwood/MV78xx0/Orion SOC support
 M:	Lennert Buytenhek <buytenh@marvell.com>
 M:	Nicolas Pitre <nico@marvell.com>
-L:	linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only)
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 T:	git git://git.marvell.com/orion
 S:	Maintained
 F:	arch/arm/mach-loki/
@@ -720,7 +720,7 @@ F:	arch/arm/plat-orion/
 
 ARM/MIOA701 MACHINE SUPPORT
 M:	Robert Jarzmik <robert.jarzmik@free.fr>
-L:	linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only)
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 F:	arch/arm/mach-pxa/mioa701.c
 S:	Maintained
 
@@ -761,18 +761,18 @@ S:	Maintained
 
 ARM/PT DIGITAL BOARD PORT
 M:	Stefan Eletzhofer <stefan.eletzhofer@eletztrick.de>
-L:	linux-arm-kernel@lists.arm.linux.org.uk	(subscribers-only)
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 W:	http://www.arm.linux.org.uk/
 S:	Maintained
 
 ARM/RADISYS ENP2611 MACHINE SUPPORT
 M:	Lennert Buytenhek <kernel@wantstofly.org>
-L:	linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only)
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 
 ARM/RISCPC ARCHITECTURE
 M:	Russell King <linux@arm.linux.org.uk>
-L:	linux-arm-kernel@lists.arm.linux.org.uk	(subscribers-only)
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 W:	http://www.arm.linux.org.uk/
 S:	Maintained
 F:	arch/arm/common/time-acorn.c
@@ -791,7 +791,7 @@ S:	Maintained
 
 ARM/SAMSUNG ARM ARCHITECTURES
 M:	Ben Dooks <ben-linux@fluff.org>
-L:	linux-arm-kernel@lists.arm.linux.org.uk	(subscribers-only)
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 W:	http://www.fluff.org/ben/linux/
 S:	Maintained
 F:	arch/arm/plat-s3c/
@@ -799,65 +799,65 @@ F:	arch/arm/plat-s3c24xx/
 
 ARM/S3C2410 ARM ARCHITECTURE
 M:	Ben Dooks <ben-linux@fluff.org>
-L:	linux-arm-kernel@lists.arm.linux.org.uk	(subscribers-only)
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 W:	http://www.fluff.org/ben/linux/
 S:	Maintained
 F:	arch/arm/mach-s3c2410/
 
 ARM/S3C2440 ARM ARCHITECTURE
 M:	Ben Dooks <ben-linux@fluff.org>
-L:	linux-arm-kernel@lists.arm.linux.org.uk	(subscribers-only)
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 W:	http://www.fluff.org/ben/linux/
 S:	Maintained
 F:	arch/arm/mach-s3c2440/
 
 ARM/S3C2442 ARM ARCHITECTURE
 M:	Ben Dooks <ben-linux@fluff.org>
-L:	linux-arm-kernel@lists.arm.linux.org.uk	(subscribers-only)
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 W:	http://www.fluff.org/ben/linux/
 S:	Maintained
 F:	arch/arm/mach-s3c2442/
 
 ARM/S3C2443 ARM ARCHITECTURE
 M:	Ben Dooks <ben-linux@fluff.org>
-L:	linux-arm-kernel@lists.arm.linux.org.uk	(subscribers-only)
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 W:	http://www.fluff.org/ben/linux/
 S:	Maintained
 F:	arch/arm/mach-s3c2443/
 
 ARM/S3C6400 ARM ARCHITECTURE
 M:	Ben Dooks <ben-linux@fluff.org>
-L:	linux-arm-kernel@lists.arm.linux.org.uk	(subscribers-only)
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 W:	http://www.fluff.org/ben/linux/
 S:	Maintained
 F:	arch/arm/mach-s3c6400/
 
 ARM/S3C6410 ARM ARCHITECTURE
 M:	Ben Dooks <ben-linux@fluff.org>
-L:	linux-arm-kernel@lists.arm.linux.org.uk	(subscribers-only)
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 W:	http://www.fluff.org/ben/linux/
 S:	Maintained
 F:	arch/arm/mach-s3c6410/
 
 ARM/TECHNOLOGIC SYSTEMS TS7250 MACHINE SUPPORT
 M:	Lennert Buytenhek <kernel@wantstofly.org>
-L:	linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only)
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 
 ARM/THECUS N2100 MACHINE SUPPORT
 M:	Lennert Buytenhek <kernel@wantstofly.org>
-L:	linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only)
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 
 ARM/NUVOTON W90X900 ARM ARCHITECTURE
 M:	Wan ZongShun <mcuos.com@gmail.com>
-L:	linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only)
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 W:	http://www.mcuos.com
 S:	Maintained
 
 ARM/VFP SUPPORT
 M:	Russell King <linux@arm.linux.org.uk>
-L:	linux-arm-kernel@lists.arm.linux.org.uk	(subscribers-only)
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 W:	http://www.arm.linux.org.uk/
 S:	Maintained
 F:	arch/arm/vfp/
@@ -964,7 +964,7 @@ F:	include/linux/atm*
 
 ATMEL AT91 MCI DRIVER
 M:	Nicolas Ferre <nicolas.ferre@atmel.com>
-L:	linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only)
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 W:	http://www.atmel.com/products/AT91/
 W:	http://www.at91.com/
 S:	Maintained
@@ -1542,7 +1542,7 @@ F:	drivers/infiniband/hw/cxgb3/
 
 CYBERPRO FB DRIVER
 M:	Russell King <linux@arm.linux.org.uk>
-L:	linux-arm-kernel@lists.arm.linux.org.uk	(subscribers-only)
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 W:	http://www.arm.linux.org.uk/
 S:	Maintained
 F:	drivers/video/cyber2000fb.*
@@ -2086,7 +2086,7 @@ F:	drivers/i2c/busses/i2c-cpm.c
 FREESCALE IMX / MXC FRAMEBUFFER DRIVER
 M:	Sascha Hauer <kernel@pengutronix.de>
 L:	linux-fbdev-devel@lists.sourceforge.net (moderated for non-subscribers)
-L:	linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only)
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	arch/arm/plat-mxc/include/mach/imxfb.h
 F:	drivers/video/imxfb.c
@@ -3452,7 +3452,7 @@ F:	include/linux/meye.h
 
 MOTOROLA IMX MMC/SD HOST CONTROLLER INTERFACE DRIVER
 M:	Pavel Pisa <ppisa@pikron.com>
-L:	linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only)
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	drivers/mmc/host/imxmmc.*
 
@@ -4170,7 +4170,7 @@ F:	drivers/media/video/pvrusb2/
 PXA2xx/PXA3xx SUPPORT
 M:	Eric Miao <eric.y.miao@gmail.com>
 M:	Russell King <linux@arm.linux.org.uk>
-L:	linux-arm-kernel@lists.arm.linux.org.uk	(subscribers-only)
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	arch/arm/mach-pxa/
 F:	drivers/pcmcia/pxa2xx*
@@ -4183,13 +4183,13 @@ F:	sound/soc/pxa
 PXA168 SUPPORT
 M:	Eric Miao <eric.y.miao@gmail.com>
 M:	Jason Chagas <jason.chagas@marvell.com>
-L:	linux-arm-kernel@lists.arm.linux.org.uk	(subscribers-only)
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/ycmiao/pxa-linux-2.6.git
 S:	Maintained
 
 PXA910 SUPPORT
 M:	Eric Miao <eric.y.miao@gmail.com>
-L:	linux-arm-kernel@lists.arm.linux.org.uk	(subscribers-only)
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/ycmiao/pxa-linux-2.6.git
 S:	Maintained
 
@@ -4430,7 +4430,7 @@ F:	net/iucv/
 
 S3C24XX SD/MMC Driver
 M:	Ben Dooks <ben-linux@fluff.org>
-L:	linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only)
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Supported
 F:	drivers/mmc/host/s3cmci.*
 
@@ -4634,7 +4634,7 @@ F:	drivers/misc/sgi-xp/
 SHARP LH SUPPORT (LH7952X & LH7A40X)
 M:	Marc Singer <elf@buici.com>
 W:	http://projects.buici.com/arm
-L:	linux-arm-kernel@lists.arm.linux.org.uk	(subscribers-only)
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	Documentation/arm/Sharp-LH/ADC-LH7-Touchscreen
 F:	arch/arm/mach-lh7a40x/
-- 
cgit v0.10.2


From 2f30b1f9e1b612cdd1a17daeecf514229e8d6a5f Mon Sep 17 00:00:00 2001
From: Marcin Slusarz <marcin.slusarz@gmail.com>
Date: Mon, 21 Sep 2009 17:04:29 -0700
Subject: vsprintf: use WARN_ON_ONCE

Signed-off-by: Marcin Slusarz <marcin.slusarz@gmail.com>
Reviewed-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index d320c18..73a14b8 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -1092,13 +1092,8 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
 
 	/* Reject out-of-range values early.  Large positive sizes are
 	   used for unknown buffer sizes. */
-	if (unlikely((int) size < 0)) {
-		/* There can be only one.. */
-		static char warn = 1;
-		WARN_ON(warn);
-		warn = 0;
+	if (WARN_ON_ONCE((int) size < 0))
 		return 0;
-	}
 
 	str = buf;
 	end = buf + size;
@@ -1544,13 +1539,8 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf)
 
 	struct printf_spec spec = {0};
 
-	if (unlikely((int) size < 0)) {
-		/* There can be only one.. */
-		static char warn = 1;
-		WARN_ON(warn);
-		warn = 0;
+	if (WARN_ON_ONCE((int) size < 0))
 		return 0;
-	}
 
 	str = buf;
 	end = buf + size;
-- 
cgit v0.10.2


From e6de3988aa52debb25a427d085061f3bf1181d54 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Mon, 21 Sep 2009 17:04:30 -0700
Subject: flex_array: add flex_array_clear function

Add a new function to the flex_array API:

	int flex_array_clear(struct flex_array *fa,
				unsigned int element_nr)

This function will zero the element at element_nr in the flex_array.

Although this is equivalent to using flex_array_put() and passing a
pointer to zero'd memory, flex_array_clear() does not require such a
pointer to memory that would most likely need to be allocated on the
caller's stack which could be significantly large depending on
element_size.

Signed-off-by: David Rientjes <rientjes@google.com>
Cc: Dave Hansen <dave@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/flex_array.h b/include/linux/flex_array.h
index 45ff184..3887b21 100644
--- a/include/linux/flex_array.h
+++ b/include/linux/flex_array.h
@@ -44,6 +44,7 @@ void flex_array_free(struct flex_array *fa);
 void flex_array_free_parts(struct flex_array *fa);
 int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src,
 		gfp_t flags);
+int flex_array_clear(struct flex_array *fa, unsigned int element_nr);
 void *flex_array_get(struct flex_array *fa, unsigned int element_nr);
 
 #endif /* _FLEX_ARRAY_H */
diff --git a/lib/flex_array.c b/lib/flex_array.c
index 7baed2f..b68f99b 100644
--- a/lib/flex_array.c
+++ b/lib/flex_array.c
@@ -207,6 +207,32 @@ int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src,
 }
 
 /**
+ * flex_array_clear - clear element in array at @element_nr
+ * @element_nr:	index of the position to clear.
+ *
+ * Locking must be provided by the caller.
+ */
+int flex_array_clear(struct flex_array *fa, unsigned int element_nr)
+{
+	int part_nr = fa_element_to_part_nr(fa, element_nr);
+	struct flex_array_part *part;
+	void *dst;
+
+	if (element_nr >= fa->total_nr_elements)
+		return -ENOSPC;
+	if (elements_fit_in_base(fa))
+		part = (struct flex_array_part *)&fa->parts[0];
+	else {
+		part = fa->parts[part_nr];
+		if (!part)
+			return -EINVAL;
+	}
+	dst = &part->elements[index_inside_part(fa, element_nr)];
+	memset(dst, 0, fa->element_size);
+	return 0;
+}
+
+/**
  * flex_array_prealloc - guarantee that array space exists
  * @start:	index of first array element for which space is allocated
  * @end:	index of last (inclusive) element for which space is allocated
-- 
cgit v0.10.2


From 19da3dd157f8db6fe727ff268dab4791d55a6371 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Mon, 21 Sep 2009 17:04:31 -0700
Subject: flex_array: poison free elements

Newly initialized flex_array's and/or flex_array_part's are now poisoned
with a new poison value, FLEX_ARRAY_FREE.  It's value is similar to
POISON_FREE used in the various slab allocators, but is different to
distinguish between flex array's poisoned kmem and slab allocator poisoned
kmem.

This will allow us to identify flex_array_part's that only contain free
elements (and free them with an addition to the flex_array API).  This
could also be extended in the future to identify `get' uses on elements
that have not been `put'.

If __GFP_ZERO is passed for a part's gfp mask, the poisoning is avoided.
These elements are considered to be in-use since they have been
initialized.

Signed-off-by: David Rientjes <rientjes@google.com>
Cc: Dave Hansen <dave@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/poison.h b/include/linux/poison.h
index 6729f7d..7fc194a 100644
--- a/include/linux/poison.h
+++ b/include/linux/poison.h
@@ -65,6 +65,9 @@
 #define MUTEX_DEBUG_INIT	0x11
 #define MUTEX_DEBUG_FREE	0x22
 
+/********** lib/flex_array.c **********/
+#define FLEX_ARRAY_FREE	0x6c	/* for use-after-free poisoning */
+
 /********** security/ **********/
 #define KEY_DESTROY		0xbd
 
diff --git a/lib/flex_array.c b/lib/flex_array.c
index b68f99b..e22d0e9 100644
--- a/lib/flex_array.c
+++ b/lib/flex_array.c
@@ -113,6 +113,8 @@ struct flex_array *flex_array_alloc(int element_size, unsigned int total,
 		return NULL;
 	ret->element_size = element_size;
 	ret->total_nr_elements = total;
+	if (elements_fit_in_base(ret) && !(flags & __GFP_ZERO))
+		memset(ret->parts[0], FLEX_ARRAY_FREE, bytes_left_in_base());
 	return ret;
 }
 
@@ -159,15 +161,12 @@ __fa_get_part(struct flex_array *fa, int part_nr, gfp_t flags)
 {
 	struct flex_array_part *part = fa->parts[part_nr];
 	if (!part) {
-		/*
-		 * This leaves the part pages uninitialized
-		 * and with potentially random data, just
-		 * as if the user had kmalloc()'d the whole.
-		 * __GFP_ZERO can be used to zero it.
-		 */
-		part = kmalloc(FLEX_ARRAY_PART_SIZE, flags);
+		part = kmalloc(sizeof(struct flex_array_part), flags);
 		if (!part)
 			return NULL;
+		if (!(flags & __GFP_ZERO))
+			memset(part, FLEX_ARRAY_FREE,
+				sizeof(struct flex_array_part));
 		fa->parts[part_nr] = part;
 	}
 	return part;
@@ -228,7 +227,7 @@ int flex_array_clear(struct flex_array *fa, unsigned int element_nr)
 			return -EINVAL;
 	}
 	dst = &part->elements[index_inside_part(fa, element_nr)];
-	memset(dst, 0, fa->element_size);
+	memset(dst, FLEX_ARRAY_FREE, fa->element_size);
 	return 0;
 }
 
-- 
cgit v0.10.2


From 4af5a2f770cc8575840ccb1514ec76ecb592985c Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Mon, 21 Sep 2009 17:04:31 -0700
Subject: flex_array: add flex_array_shrink function

Add a new function to the flex_array API:

	int flex_array_shrink(struct flex_array *fa)

This function will free all unused second-level pages.  Since elements are
now poisoned if they are not allocated with __GFP_ZERO, it's possible to
identify parts that consist solely of unused elements.

flex_array_shrink() returns the number of pages freed.

Signed-off-by: David Rientjes <rientjes@google.com>
Cc: Dave Hansen <dave@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/flex_array.h b/include/linux/flex_array.h
index 3887b21..f12401e 100644
--- a/include/linux/flex_array.h
+++ b/include/linux/flex_array.h
@@ -46,5 +46,6 @@ int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src,
 		gfp_t flags);
 int flex_array_clear(struct flex_array *fa, unsigned int element_nr);
 void *flex_array_get(struct flex_array *fa, unsigned int element_nr);
+int flex_array_shrink(struct flex_array *fa);
 
 #endif /* _FLEX_ARRAY_H */
diff --git a/lib/flex_array.c b/lib/flex_array.c
index e22d0e9..1b03bb5 100644
--- a/lib/flex_array.c
+++ b/lib/flex_array.c
@@ -291,3 +291,43 @@ void *flex_array_get(struct flex_array *fa, unsigned int element_nr)
 	}
 	return &part->elements[index_inside_part(fa, element_nr)];
 }
+
+static int part_is_free(struct flex_array_part *part)
+{
+	int i;
+
+	for (i = 0; i < sizeof(struct flex_array_part); i++)
+		if (part->elements[i] != FLEX_ARRAY_FREE)
+			return 0;
+	return 1;
+}
+
+/**
+ * flex_array_shrink - free unused second-level pages
+ *
+ * Frees all second-level pages that consist solely of unused
+ * elements.  Returns the number of pages freed.
+ *
+ * Locking must be provided by the caller.
+ */
+int flex_array_shrink(struct flex_array *fa)
+{
+	struct flex_array_part *part;
+	int max_part = nr_base_part_ptrs();
+	int part_nr;
+	int ret = 0;
+
+	if (elements_fit_in_base(fa))
+		return ret;
+	for (part_nr = 0; part_nr < max_part; part_nr++) {
+		part = fa->parts[part_nr];
+		if (!part)
+			continue;
+		if (part_is_free(part)) {
+			fa->parts[part_nr] = NULL;
+			kfree(part);
+			ret++;
+		}
+	}
+	return ret;
+}
-- 
cgit v0.10.2


From 45b588d6e5cc172704bac0c998ce54873b149b22 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Mon, 21 Sep 2009 17:04:33 -0700
Subject: flex_array: introduce DEFINE_FLEX_ARRAY

FLEX_ARRAY_INIT(element_size, total_nr_elements) cannot determine if
either parameter is valid, so flex arrays which are statically allocated
with this interface can easily become corrupted or reference beyond its
allocated memory.

This removes FLEX_ARRAY_INIT() as a struct flex_array initializer since no
initializer may perform the required checking.  Instead, the array is now
defined with a new interface:

	DEFINE_FLEX_ARRAY(name, element_size, total_nr_elements)

This may be prefixed with `static' for file scope.

This interface includes compile-time checking of the parameters to ensure
they are valid.  Since the validity of both element_size and
total_nr_elements depend on FLEX_ARRAY_BASE_SIZE and FLEX_ARRAY_PART_SIZE,
the kernel build will fail if either of these predefined values changes
such that the array parameters are no longer valid.

Since BUILD_BUG_ON() requires compile time constants, several of the
static inline functions that were once local to lib/flex_array.c had to be
moved to include/linux/flex_array.h.

Signed-off-by: David Rientjes <rientjes@google.com>
Acked-by: Dave Hansen <dave@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/flex_array.h b/include/linux/flex_array.h
index f12401e..1d747f7 100644
--- a/include/linux/flex_array.h
+++ b/include/linux/flex_array.h
@@ -31,10 +31,32 @@ struct flex_array {
 	};
 };
 
-#define FLEX_ARRAY_INIT(size, total) { { {\
-	.element_size = (size),		\
-	.total_nr_elements = (total),	\
-} } }
+/* Number of bytes left in base struct flex_array, excluding metadata */
+#define FLEX_ARRAY_BASE_BYTES_LEFT					\
+	(FLEX_ARRAY_BASE_SIZE - offsetof(struct flex_array, parts))
+
+/* Number of pointers in base to struct flex_array_part pages */
+#define FLEX_ARRAY_NR_BASE_PTRS						\
+	(FLEX_ARRAY_BASE_BYTES_LEFT / sizeof(struct flex_array_part *))
+
+/* Number of elements of size that fit in struct flex_array_part */
+#define FLEX_ARRAY_ELEMENTS_PER_PART(size)				\
+	(FLEX_ARRAY_PART_SIZE / size)
+
+/*
+ * Defines a statically allocated flex array and ensures its parameters are
+ * valid.
+ */
+#define DEFINE_FLEX_ARRAY(__arrayname, __element_size, __total)		\
+	struct flex_array __arrayname = { { {				\
+		.element_size = (__element_size),			\
+		.total_nr_elements = (__total),				\
+	} } };								\
+	static inline void __arrayname##_invalid_parameter(void)	\
+	{								\
+		BUILD_BUG_ON((__total) > FLEX_ARRAY_NR_BASE_PTRS *	\
+			FLEX_ARRAY_ELEMENTS_PER_PART(__element_size));	\
+	}
 
 struct flex_array *flex_array_alloc(int element_size, unsigned int total,
 		gfp_t flags);
diff --git a/lib/flex_array.c b/lib/flex_array.c
index 1b03bb5..b62ce6c 100644
--- a/lib/flex_array.c
+++ b/lib/flex_array.c
@@ -28,23 +28,6 @@ struct flex_array_part {
 	char elements[FLEX_ARRAY_PART_SIZE];
 };
 
-static inline int __elements_per_part(int element_size)
-{
-	return FLEX_ARRAY_PART_SIZE / element_size;
-}
-
-static inline int bytes_left_in_base(void)
-{
-	int element_offset = offsetof(struct flex_array, parts);
-	int bytes_left = FLEX_ARRAY_BASE_SIZE - element_offset;
-	return bytes_left;
-}
-
-static inline int nr_base_part_ptrs(void)
-{
-	return bytes_left_in_base() / sizeof(struct flex_array_part *);
-}
-
 /*
  * If a user requests an allocation which is small
  * enough, we may simply use the space in the
@@ -54,7 +37,7 @@ static inline int nr_base_part_ptrs(void)
 static inline int elements_fit_in_base(struct flex_array *fa)
 {
 	int data_size = fa->element_size * fa->total_nr_elements;
-	if (data_size <= bytes_left_in_base())
+	if (data_size <= FLEX_ARRAY_BASE_BYTES_LEFT)
 		return 1;
 	return 0;
 }
@@ -103,7 +86,8 @@ struct flex_array *flex_array_alloc(int element_size, unsigned int total,
 					gfp_t flags)
 {
 	struct flex_array *ret;
-	int max_size = nr_base_part_ptrs() * __elements_per_part(element_size);
+	int max_size = FLEX_ARRAY_NR_BASE_PTRS *
+				FLEX_ARRAY_ELEMENTS_PER_PART(element_size);
 
 	/* max_size will end up 0 if element_size > PAGE_SIZE */
 	if (total > max_size)
@@ -114,14 +98,15 @@ struct flex_array *flex_array_alloc(int element_size, unsigned int total,
 	ret->element_size = element_size;
 	ret->total_nr_elements = total;
 	if (elements_fit_in_base(ret) && !(flags & __GFP_ZERO))
-		memset(ret->parts[0], FLEX_ARRAY_FREE, bytes_left_in_base());
+		memset(ret->parts[0], FLEX_ARRAY_FREE,
+						FLEX_ARRAY_BASE_BYTES_LEFT);
 	return ret;
 }
 
 static int fa_element_to_part_nr(struct flex_array *fa,
 					unsigned int element_nr)
 {
-	return element_nr / __elements_per_part(fa->element_size);
+	return element_nr / FLEX_ARRAY_ELEMENTS_PER_PART(fa->element_size);
 }
 
 /**
@@ -133,11 +118,10 @@ static int fa_element_to_part_nr(struct flex_array *fa,
 void flex_array_free_parts(struct flex_array *fa)
 {
 	int part_nr;
-	int max_part = nr_base_part_ptrs();
 
 	if (elements_fit_in_base(fa))
 		return;
-	for (part_nr = 0; part_nr < max_part; part_nr++)
+	for (part_nr = 0; part_nr < FLEX_ARRAY_NR_BASE_PTRS; part_nr++)
 		kfree(fa->parts[part_nr]);
 }
 
@@ -152,7 +136,8 @@ static unsigned int index_inside_part(struct flex_array *fa,
 {
 	unsigned int part_offset;
 
-	part_offset = element_nr % __elements_per_part(fa->element_size);
+	part_offset = element_nr %
+				FLEX_ARRAY_ELEMENTS_PER_PART(fa->element_size);
 	return part_offset * fa->element_size;
 }
 
@@ -313,13 +298,12 @@ static int part_is_free(struct flex_array_part *part)
 int flex_array_shrink(struct flex_array *fa)
 {
 	struct flex_array_part *part;
-	int max_part = nr_base_part_ptrs();
 	int part_nr;
 	int ret = 0;
 
 	if (elements_fit_in_base(fa))
 		return ret;
-	for (part_nr = 0; part_nr < max_part; part_nr++) {
+	for (part_nr = 0; part_nr < FLEX_ARRAY_NR_BASE_PTRS; part_nr++) {
 		part = fa->parts[part_nr];
 		if (!part)
 			continue;
-- 
cgit v0.10.2


From fc0d8d944df0c58cd810f33db82f87dcf5dcc190 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Mon, 21 Sep 2009 17:04:33 -0700
Subject: flex_array: add missing kerneldoc annotations

Add kerneldoc annotations for function formals of type struct flex_array
and gfp_t which are currently lacking.

Signed-off-by: David Rientjes <rientjes@google.com>
Cc: Dave Hansen <dave@linux.vnet.ibm.com>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/lib/flex_array.c b/lib/flex_array.c
index b62ce6c..66eef2e 100644
--- a/lib/flex_array.c
+++ b/lib/flex_array.c
@@ -46,6 +46,7 @@ static inline int elements_fit_in_base(struct flex_array *fa)
  * flex_array_alloc - allocate a new flexible array
  * @element_size:	the size of individual elements in the array
  * @total:		total number of elements that this should hold
+ * @flags:		page allocation flags to use for base array
  *
  * Note: all locking must be provided by the caller.
  *
@@ -111,6 +112,7 @@ static int fa_element_to_part_nr(struct flex_array *fa,
 
 /**
  * flex_array_free_parts - just free the second-level pages
+ * @fa:		the flex array from which to free parts
  *
  * This is to be used in cases where the base 'struct flex_array'
  * has been statically allocated and should not be free.
@@ -159,9 +161,12 @@ __fa_get_part(struct flex_array *fa, int part_nr, gfp_t flags)
 
 /**
  * flex_array_put - copy data into the array at @element_nr
- * @src:	address of data to copy into the array
+ * @fa:		the flex array to copy data into
  * @element_nr:	index of the position in which to insert
  * 		the new element.
+ * @src:	address of data to copy into the array
+ * @flags:	page allocation flags to use for array expansion
+ *
  *
  * Note that this *copies* the contents of @src into
  * the array.  If you are trying to store an array of
@@ -192,6 +197,7 @@ int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src,
 
 /**
  * flex_array_clear - clear element in array at @element_nr
+ * @fa:		the flex array of the element.
  * @element_nr:	index of the position to clear.
  *
  * Locking must be provided by the caller.
@@ -218,8 +224,10 @@ int flex_array_clear(struct flex_array *fa, unsigned int element_nr)
 
 /**
  * flex_array_prealloc - guarantee that array space exists
+ * @fa:		the flex array for which to preallocate parts
  * @start:	index of first array element for which space is allocated
  * @end:	index of last (inclusive) element for which space is allocated
+ * @flags:	page allocation flags
  *
  * This will guarantee that no future calls to flex_array_put()
  * will allocate memory.  It can be used if you are expecting to
@@ -252,6 +260,7 @@ int flex_array_prealloc(struct flex_array *fa, unsigned int start,
 
 /**
  * flex_array_get - pull data back out of the array
+ * @fa:		the flex array from which to extract data
  * @element_nr:	index of the element to fetch from the array
  *
  * Returns a pointer to the data at index @element_nr.  Note
@@ -289,6 +298,7 @@ static int part_is_free(struct flex_array_part *part)
 
 /**
  * flex_array_shrink - free unused second-level pages
+ * @fa:		the flex array to shrink
  *
  * Frees all second-level pages that consist solely of unused
  * elements.  Returns the number of pages freed.
-- 
cgit v0.10.2


From 463f28648586721c2191130c9b3c27589dcc11a0 Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@canonical.com>
Date: Mon, 21 Sep 2009 17:04:34 -0700
Subject: checkpatch: possible types -- else cannot start a type

An else cannot start a type, it would have to be within a block after the
else.  This can trigger false modifier matching.

Signed-off-by: Andy Whitcroft <apw@canonical.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 2d5ece7..fd64816 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -1372,6 +1372,8 @@ sub process {
 			# Ignore functions being called
 			} elsif ($s =~ /^.\s*$Ident\s*\(/s) {
 
+			} elsif ($s =~ /^.\s*else\b/s) {
+
 			# declarations always start with types
 			} elsif ($prev_values eq 'E' && $s =~ /^.\s*(?:$Storage\s+)?(?:$Inline\s+)?(?:const\s+)?((?:\s*$Ident)+?)\b(?:\s+$Sparse)?\s*\**\s*(?:$Ident|\(\*[^\)]*\))(?:\s*$Modifier)?\s*(?:;|=|,|\()/s) {
 				my $type = $1;
-- 
cgit v0.10.2


From 113f04a836481e9ecc26e8dee8b0e4d52878a288 Mon Sep 17 00:00:00 2001
From: Daniel Walker <dwalker@fifo99.com>
Date: Mon, 21 Sep 2009 17:04:35 -0700
Subject: checkpatch: handle C99 comments correctly (performance issue)

This fixes the sanitation process in checkpatch.pl so that it blocks out
the text after a C99 style comment the same way it does with block style
comments.  This prevents the text from getting processed as regular code.

Signed-off-by: Daniel Walker <dwalker@fifo99.com>
Signed-off-by: Andy Whitcroft <apw@canonical.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index fd64816..aa009a3 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -356,6 +356,13 @@ sub sanitise_line {
 			$off++;
 			next;
 		}
+		if ($sanitise_quote eq '' && substr($line, $off, 2) eq '//') {
+			$sanitise_quote = '//';
+
+			substr($res, $off, 2, $sanitise_quote);
+			$off++;
+			next;
+		}
 
 		# A \ in a string means ignore the next character.
 		if (($sanitise_quote eq "'" || $sanitise_quote eq '"') &&
@@ -379,6 +386,8 @@ sub sanitise_line {
 		#print "c<$c> SQ<$sanitise_quote>\n";
 		if ($off != 0 && $sanitise_quote eq '*/' && $c ne "\t") {
 			substr($res, $off, 1, $;);
+		} elsif ($off != 0 && $sanitise_quote eq '//' && $c ne "\t") {
+			substr($res, $off, 1, $;);
 		} elsif ($off != 0 && $sanitise_quote && $c ne "\t") {
 			substr($res, $off, 1, 'X');
 		} else {
@@ -386,6 +395,10 @@ sub sanitise_line {
 		}
 	}
 
+	if ($sanitise_quote eq '//') {
+		$sanitise_quote = '';
+	}
+
 	# The pathname on a #include may be surrounded by '<' and '>'.
 	if ($res =~ /^.\s*\#\s*include\s+\<(.*)\>/) {
 		my $clean = 'X' x length($1);
-- 
cgit v0.10.2


From 30dad6ebecffebddf6b9947d11e31377fa900ff3 Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@canonical.com>
Date: Mon, 21 Sep 2009 17:04:36 -0700
Subject: checkpatch: indent checks -- stop when we run out of continuation
 lines

Ensure we terminate when there are no futher continuation lines when
trying to determine relative indent of conditionals and their blocks.

Reported-by: John Daiker <daikerjohn@gmail.com>
Signed-off-by: Andy Whitcroft <apw@canonical.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index aa009a3..b6f267b 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -1547,8 +1547,9 @@ sub process {
 				    $s =~ /^\s*#\s*?/ ||
 				    $s =~ /^\s*$Ident\s*:/) {
 					$continuation = ($s =~ /^.*?\\\n/) ? 1 : 0;
-					$s =~ s/^.*?\n//;
-					$cond_lines++;
+					if ($s =~ s/^.*?\n//) {
+						$cond_lines++;
+					}
 				}
 			}
 
-- 
cgit v0.10.2


From 77f5b10a82bbd832c99ec4b120d5645342a5b140 Mon Sep 17 00:00:00 2001
From: Hannes Eder <hannes@hanneseder.net>
Date: Mon, 21 Sep 2009 17:04:37 -0700
Subject: checkpatch: make -f alias --file, add --help, more verbose help
 message

Impact:
  - More verbose help/usage message.
  - Make the option -f an alias for --file.
  - On -h, --help, and --version display help message and exit(0).
  - With no FILE(s) given, exit(1) with "no input files".
  - On invalid options display help/usage and exit(1).

Based on a patch by Pavel Machek.

Signed-off-by: Hannes Eder <hannes@hanneseder.net>
Acked-by: Pavel Machek <pavel@suse.cz>
Signed-off-by: Andy Whitcroft <apw@canonical.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index b6f267b..e3c6b49 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -28,6 +28,41 @@ my $mailback = 0;
 my $summary_file = 0;
 my $root;
 my %debug;
+my $help = 0;
+
+sub help {
+	my ($exitcode) = @_;
+
+	print << "EOM";
+Usage: $P [OPTION]... [FILE]...
+Version: $V
+
+Options:
+  -q, --quiet                quiet
+  --no-tree                  run without a kernel tree
+  --no-signoff               do not check for 'Signed-off-by' line
+  --patch                    treat FILE as patchfile (default)
+  --emacs                    emacs compile window format
+  --terse                    one line per report
+  -f, --file                 treat FILE as regular source file
+  --subjective, --strict     enable more subjective tests
+  --root=PATH                PATH to the kernel tree root
+  --no-summary               suppress the per-file summary
+  --mailback                 only produce a report in case of warnings/errors
+  --summary-file             include the filename in summary
+  --debug KEY=[0|1]          turn on/off debugging of KEY, where KEY is one of
+                             'values', 'possible', 'type', and 'attr' (default
+                             is all off)
+  --test-only=WORD           report only warnings/errors containing WORD
+                             literally
+  -h, --help, --version      display this help and exit
+
+When FILE is - read standard input.
+EOM
+
+	exit($exitcode);
+}
+
 GetOptions(
 	'q|quiet+'	=> \$quiet,
 	'tree!'		=> \$tree,
@@ -35,7 +70,7 @@ GetOptions(
 	'patch!'	=> \$chk_patch,
 	'emacs!'	=> \$emacs,
 	'terse!'	=> \$terse,
-	'file!'		=> \$file,
+	'f|file!'	=> \$file,
 	'subjective!'	=> \$check,
 	'strict!'	=> \$check,
 	'root=s'	=> \$root,
@@ -45,22 +80,16 @@ GetOptions(
 
 	'debug=s'	=> \%debug,
 	'test-only=s'	=> \$tst_only,
-) or exit;
+	'h|help'	=> \$help,
+	'version'	=> \$help
+) or help(1);
+
+help(0) if ($help);
 
 my $exit = 0;
 
 if ($#ARGV < 0) {
-	print "usage: $P [options] patchfile\n";
-	print "version: $V\n";
-	print "options: -q               => quiet\n";
-	print "         --no-tree        => run without a kernel tree\n";
-	print "         --terse          => one line per report\n";
-	print "         --emacs          => emacs compile window format\n";
-	print "         --file           => check a source file\n";
-	print "         --strict         => enable more subjective tests\n";
-	print "         --root           => path to the kernel tree root\n";
-	print "         --no-summary     => suppress the per-file summary\n";
-	print "         --summary-file   => include the filename in summary\n";
+	print "$P: no input files\n";
 	exit(1);
 }
 
-- 
cgit v0.10.2


From ea71a0a019f913bdf506103bd90140d93a2b83f0 Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@canonical.com>
Date: Mon, 21 Sep 2009 17:04:38 -0700
Subject: checkpatch: format strings should not have brackets in macros

We should not recommend braces for the following:

    #define pr_fmt(fmt)    "%s: " fmt, __func__

allow things with double quotes round them to avoid this check.

Signed-off-by: Andy Whitcroft <apw@canonical.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index e3c6b49..57df906 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -2288,7 +2288,8 @@ sub process {
 				DECLARE_PER_CPU|
 				DEFINE_PER_CPU|
 				__typeof__\(|
-				\.$Ident\s*=\s*
+				\.$Ident\s*=\s*|
+				^\"|\"$
 			}x;
 			#print "REST<$rest> dstat<$dstat>\n";
 			if ($rest ne '') {
-- 
cgit v0.10.2


From fb9e9096ba94385b738a8ad6c5864b5778285957 Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@canonical.com>
Date: Mon, 21 Sep 2009 17:04:38 -0700
Subject: checkpatch: limit sN/uN matches to actual bit sizes

Limit our type matcher to the s/u/le/be etc sizes that actually exist to
prevent miss categorising s2 as a type.  Fix up the spelling of the error
also.

Signed-off-by: Andy Whitcroft <apw@canonical.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 57df906..7fee823 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -182,7 +182,7 @@ our $UTF8	= qr {
 }x;
 
 our $typeTypedefs = qr{(?x:
-	(?:__)?(?:u|s|be|le)(?:\d|\d\d)|
+	(?:__)?(?:u|s|be|le)(?:8|16|32|64)|
 	atomic_t
 )};
 
@@ -1936,7 +1936,7 @@ sub process {
 						# A unary '*' may be const
 
 					} elsif ($ctx =~ /.xW/) {
-						ERROR("Aspace prohibited after that '$op' $at\n" . $hereptr);
+						ERROR("space prohibited after that '$op' $at\n" . $hereptr);
 					}
 
 				# unary ++ and unary -- are allowed no space on one side.
-- 
cgit v0.10.2


From 0487683096decad0720dfaf80b9d28173d5f6662 Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@canonical.com>
Date: Mon, 21 Sep 2009 17:04:39 -0700
Subject: checkpatch: version 0.29

Signed-off-by: Andy Whitcroft <apw@canonical.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 7fee823..8b1dfd5 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -10,7 +10,7 @@ use strict;
 my $P = $0;
 $P =~ s@.*/@@g;
 
-my $V = '0.28';
+my $V = '0.29';
 
 use Getopt::Long qw(:config no_auto_abbrev);
 
-- 
cgit v0.10.2


From 42e41c54d61e32e8a349943607daa53205324d7f Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Mon, 21 Sep 2009 17:04:40 -0700
Subject: checkpatch: add some common Blackfin checks

Add checks for Blackfin-specific issues that seem to crop up from time to
time.  In particular, we have helper macros to break a 32bit address into
the hi/lo parts, and we want to make sure people use the csync/ssync
variant that includes fun anomaly workarounds.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Bryan Wu <cooloney@kernel.org>
Cc: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 8b1dfd5..87bbb8b 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -1378,6 +1378,18 @@ sub process {
 			WARN("adding a line without newline at end of file\n" . $herecurr);
 		}
 
+# Blackfin: use hi/lo macros
+		if ($realfile =~ m@arch/blackfin/.*\.S$@) {
+			if ($line =~ /\.[lL][[:space:]]*=.*&[[:space:]]*0x[fF][fF][fF][fF]/) {
+				my $herevet = "$here\n" . cat_vet($line) . "\n";
+				ERROR("use the LO() macro, not (... & 0xFFFF)\n" . $herevet);
+			}
+			if ($line =~ /\.[hH][[:space:]]*=.*>>[[:space:]]*16/) {
+				my $herevet = "$here\n" . cat_vet($line) . "\n";
+				ERROR("use the HI() macro, not (... >> 16)\n" . $herevet);
+			}
+		}
+
 # check we are in a valid source file C or perl if not then ignore this hunk
 		next if ($realfile !~ /\.(h|c|pl)$/);
 
@@ -1397,6 +1409,16 @@ sub process {
 			WARN("CVS style keyword markers, these will _not_ be updated\n". $herecurr);
 		}
 
+# Blackfin: don't use __builtin_bfin_[cs]sync
+		if ($line =~ /__builtin_bfin_csync/) {
+			my $herevet = "$here\n" . cat_vet($line) . "\n";
+			ERROR("use the CSYNC() macro in asm/blackfin.h\n" . $herevet);
+		}
+		if ($line =~ /__builtin_bfin_ssync/) {
+			my $herevet = "$here\n" . cat_vet($line) . "\n";
+			ERROR("use the SSYNC() macro in asm/blackfin.h\n" . $herevet);
+		}
+
 # Check for potential 'bare' types
 		my ($stat, $cond, $line_nr_next, $remain_next, $off_next);
 		if ($realcnt && $line =~ /.\s*\S/) {
-- 
cgit v0.10.2


From 0bf41d9f414a5cf558aff234a0ff486257537574 Mon Sep 17 00:00:00 2001
From: Michael Riepe <michael.riepe@googlemail.com>
Date: Mon, 21 Sep 2009 17:04:41 -0700
Subject: drivers/hwmon/coretemp.c: enable the Intel Atom
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Enable the coretemp driver on an Intel Atom.

I'm not sure if the readings are correct, however - on my 330, the driver
reports values between 27 and 41 °C (with core1 being about 8°C hotter
than core0, given the same load).  Maybe the maximum temperature of 100 °C
is wrong for Atom CPUs.

Cc: Arjan van de Ven <arjan@infradead.org>
Cc: Rudolf Marek <r.marek@assembler.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
index 93c1722..972cf4b 100644
--- a/drivers/hwmon/coretemp.c
+++ b/drivers/hwmon/coretemp.c
@@ -185,7 +185,7 @@ static int __devinit adjust_tjmax(struct cpuinfo_x86 *c, u32 id, struct device *
 		}
 	}
 
-	if (ismobile) {
+	if (ismobile || c->x86_model == 0x1c) {
 
 		err = rdmsr_safe_on_cpu(id, 0xee, &eax, &edx);
 		if (err) {
@@ -417,7 +417,7 @@ static int __init coretemp_init(void)
 		if ((c->cpuid_level < 0) || (c->x86 != 0x6) ||
 		    !((c->x86_model == 0xe) || (c->x86_model == 0xf) ||
 			(c->x86_model == 0x16) || (c->x86_model == 0x17) ||
-			(c->x86_model == 0x1A))) {
+			(c->x86_model == 0x1A) || (c->x86_model == 0x1c))) {
 
 			/* supported CPU not found, but report the unknown
 			   family 6 CPU */
-- 
cgit v0.10.2


From 0ec48915e8bbb37dea3df85c41e4c3498b95664b Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Mon, 21 Sep 2009 17:04:42 -0700
Subject: lis3: fix typo

Bit 0x80 in CTRL_REG3 is an ACTIVE_LOW rather than an ACTIVE_HIGH
function, I got that wrong during my last change.

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Acked-by: Pavel Machek <pavel@ucw.cz>
Cc: Eric Piel <eric.piel@tremplin-utc.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/lis3lv02d.h b/include/linux/lis3lv02d.h
index ad651f4..113778b 100644
--- a/include/linux/lis3lv02d.h
+++ b/include/linux/lis3lv02d.h
@@ -32,7 +32,7 @@ struct lis3lv02d_platform_data {
 #define LIS3_IRQ2_DATA_READY	(4 << 3)
 #define LIS3_IRQ2_CLICK		(7 << 3)
 #define LIS3_IRQ_OPEN_DRAIN	(1 << 6)
-#define LIS3_IRQ_ACTIVE_HIGH	(1 << 7)
+#define LIS3_IRQ_ACTIVE_LOW	(1 << 7)
 	unsigned char irq_cfg;
 };
 
-- 
cgit v0.10.2


From 8873c33483e62988ed886230aab71ef4c678f710 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Mon, 21 Sep 2009 17:04:43 -0700
Subject: lis3: add free-fall/wakeup function via platform_data

This offers a way for platforms to define flags and thresholds for the
free-fall/wakeup functions of the lis302d chips.

More registers needed to be seperated as they are specific to the

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Acked-by: Pavel Machek <pavel@ucw.cz>
Cc: Eric Piel <eric.piel@tremplin-utc.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/hwmon/lis3lv02d.c b/drivers/hwmon/lis3lv02d.c
index 271338b..cf5afb9 100644
--- a/drivers/hwmon/lis3lv02d.c
+++ b/drivers/hwmon/lis3lv02d.c
@@ -454,6 +454,15 @@ int lis3lv02d_init_device(struct lis3lv02d *dev)
 					(p->click_thresh_y << 4));
 		}
 
+		if (p->wakeup_flags && (dev->whoami == LIS_SINGLE_ID)) {
+			dev->write(dev, FF_WU_CFG_1, p->wakeup_flags);
+			dev->write(dev, FF_WU_THS_1, p->wakeup_thresh & 0x7f);
+			/* default to 2.5ms for now */
+			dev->write(dev, FF_WU_DURATION_1, 1);
+			/* enable high pass filter for both free-fall units */
+			dev->write(dev, CTRL_REG2, HP_FF_WU1 | HP_FF_WU2);
+		}
+
 		if (p->irq_cfg)
 			dev->write(dev, CTRL_REG3, p->irq_cfg);
 	}
diff --git a/drivers/hwmon/lis3lv02d.h b/drivers/hwmon/lis3lv02d.h
index e320e2f..3e1ff46 100644
--- a/drivers/hwmon/lis3lv02d.h
+++ b/drivers/hwmon/lis3lv02d.h
@@ -58,15 +58,17 @@ enum lis3_reg {
 	OUTZ_L		= 0x2C,
 	OUTZ_H		= 0x2D,
 	OUTZ		= 0x2D,
-	FF_WU_CFG	= 0x30,
-	FF_WU_SRC	= 0x31,
-	FF_WU_ACK	= 0x32,
-	FF_WU_THS_L	= 0x34,
-	FF_WU_THS_H	= 0x35,
-	FF_WU_DURATION	= 0x36,
 };
 
 enum lis302d_reg {
+	FF_WU_CFG_1	= 0x30,
+	FF_WU_SRC_1	= 0x31,
+	FF_WU_THS_1	= 0x32,
+	FF_WU_DURATION_1 = 0x33,
+	FF_WU_CFG_2	= 0x34,
+	FF_WU_SRC_2	= 0x35,
+	FF_WU_THS_2	= 0x36,
+	FF_WU_DURATION_2 = 0x37,
 	CLICK_CFG	= 0x38,
 	CLICK_SRC	= 0x39,
 	CLICK_THSY_X	= 0x3B,
@@ -77,6 +79,12 @@ enum lis302d_reg {
 };
 
 enum lis3lv02d_reg {
+	FF_WU_CFG	= 0x30,
+	FF_WU_SRC	= 0x31,
+	FF_WU_ACK	= 0x32,
+	FF_WU_THS_L	= 0x34,
+	FF_WU_THS_H	= 0x35,
+	FF_WU_DURATION	= 0x36,
 	DD_CFG		= 0x38,
 	DD_SRC		= 0x39,
 	DD_ACK		= 0x3A,
@@ -107,6 +115,10 @@ enum lis3lv02d_ctrl2 {
 	CTRL2_FS	= 0x80, /* Full Scale selection */
 };
 
+enum lis302d_ctrl2 {
+	HP_FF_WU2	= 0x08,
+	HP_FF_WU1	= 0x04,
+};
 
 enum lis3lv02d_ctrl3 {
 	CTRL3_CFS0	= 0x01,
diff --git a/include/linux/lis3lv02d.h b/include/linux/lis3lv02d.h
index 113778b..3cc2f2c 100644
--- a/include/linux/lis3lv02d.h
+++ b/include/linux/lis3lv02d.h
@@ -34,6 +34,15 @@ struct lis3lv02d_platform_data {
 #define LIS3_IRQ_OPEN_DRAIN	(1 << 6)
 #define LIS3_IRQ_ACTIVE_LOW	(1 << 7)
 	unsigned char irq_cfg;
+
+#define LIS3_WAKEUP_X_LO	(1 << 0)
+#define LIS3_WAKEUP_X_HI	(1 << 1)
+#define LIS3_WAKEUP_Y_LO	(1 << 2)
+#define LIS3_WAKEUP_Y_HI	(1 << 3)
+#define LIS3_WAKEUP_Z_LO	(1 << 4)
+#define LIS3_WAKEUP_Z_HI	(1 << 5)
+	unsigned char wakeup_flags;
+	unsigned char wakeup_thresh;
 };
 
 #endif /* __LIS3LV02D_H_ */
-- 
cgit v0.10.2


From 2cd9645e2f0d60ed12268fe1738e79c119e2fe5a Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Mon, 21 Sep 2009 17:04:44 -0700
Subject: lis3: add power management functions

This enabled power management functions for the SPI transport layer of the
lis3 devices.  The device's suspend mode is only entered in case no wakeup
threshold has been given.  In this case, the device is supposed to wake up
the system and must thus not be put to deep sleep.

[randy.dunlap@oracle.com: fix lis3-spi for CONFIG_PM=n]
Signed-off-by: Daniel Mack <daniel@caiaq.de>
Acked-by: Pavel Machek <pavel@ucw.cz>
Cc: Eric Piel <eric.piel@tremplin-utc.net>
Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/hwmon/lis3lv02d_spi.c b/drivers/hwmon/lis3lv02d_spi.c
index 3827ff0..2bc8493 100644
--- a/drivers/hwmon/lis3lv02d_spi.c
+++ b/drivers/hwmon/lis3lv02d_spi.c
@@ -87,6 +87,32 @@ static int __devexit lis302dl_spi_remove(struct spi_device *spi)
 	return 0;
 }
 
+#ifdef CONFIG_PM
+static int lis3lv02d_spi_suspend(struct spi_device *spi, pm_message_t mesg)
+{
+	struct lis3lv02d *lis3 = spi_get_drvdata(spi);
+
+	if (!lis3->pdata->wakeup_flags)
+		lis3lv02d_poweroff(&lis3_dev);
+
+	return 0;
+}
+
+static int lis3lv02d_spi_resume(struct spi_device *spi)
+{
+	struct lis3lv02d *lis3 = spi_get_drvdata(spi);
+
+	if (!lis3->pdata->wakeup_flags)
+		lis3lv02d_poweron(lis3);
+
+	return 0;
+}
+
+#else
+#define lis3lv02d_spi_suspend	NULL
+#define lis3lv02d_spi_resume	NULL
+#endif
+
 static struct spi_driver lis302dl_spi_driver = {
 	.driver	 = {
 		.name   = DRV_NAME,
@@ -94,6 +120,8 @@ static struct spi_driver lis302dl_spi_driver = {
 	},
 	.probe	= lis302dl_spi_probe,
 	.remove	= __devexit_p(lis302dl_spi_remove),
+	.suspend = lis3lv02d_spi_suspend,
+	.resume  = lis3lv02d_spi_resume,
 };
 
 static int __init lis302dl_init(void)
-- 
cgit v0.10.2


From dc791f8aeeeea4314beede83d6ee74e6af5f627b Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Mon, 21 Sep 2009 17:04:45 -0700
Subject: lis3_spi: code cleanups

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Acked-by: Pavel Machek <pavel@ucw.cz>
Cc: Eric Piel <eric.piel@tremplin-utc.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/hwmon/lis3lv02d_spi.c b/drivers/hwmon/lis3lv02d_spi.c
index 2bc8493..82ebca5 100644
--- a/drivers/hwmon/lis3lv02d_spi.c
+++ b/drivers/hwmon/lis3lv02d_spi.c
@@ -66,17 +66,16 @@ static int __devinit lis302dl_spi_probe(struct spi_device *spi)
 	if (ret < 0)
 		return ret;
 
-	lis3_dev.bus_priv = spi;
-	lis3_dev.init = lis3_spi_init;
-	lis3_dev.read = lis3_spi_read;
-	lis3_dev.write = lis3_spi_write;
-	lis3_dev.irq = spi->irq;
-	lis3_dev.ac = lis3lv02d_axis_normal;
-	lis3_dev.pdata = spi->dev.platform_data;
+	lis3_dev.bus_priv	= spi;
+	lis3_dev.init		= lis3_spi_init;
+	lis3_dev.read		= lis3_spi_read;
+	lis3_dev.write		= lis3_spi_write;
+	lis3_dev.irq		= spi->irq;
+	lis3_dev.ac		= lis3lv02d_axis_normal;
+	lis3_dev.pdata		= spi->dev.platform_data;
 	spi_set_drvdata(spi, &lis3_dev);
 
-	ret = lis3lv02d_init_device(&lis3_dev);
-	return ret;
+	return lis3lv02d_init_device(&lis3_dev);
 }
 
 static int __devexit lis302dl_spi_remove(struct spi_device *spi)
-- 
cgit v0.10.2


From f266889517252bc697e7103bcd6ed46bdf2c1579 Mon Sep 17 00:00:00 2001
From: Michael Abbott <michael@araneidae.co.uk>
Date: Mon, 21 Sep 2009 17:04:46 -0700
Subject: drivers/hwmon/adm1021.c: support high precision ADM1023 remote sensor

The ADM1023 temperature sensor supports higher resolution for its external
sensor (sensitivity of 1/8 deg C).  This patch makes this higher
resolution available through the appropriate temperature sysfs nodes.

Curiously, this functionality was available in the 2.4 kernel driver (but
formatted in a less helpful manner).

Cc: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Michael Abbott <michael.abbott@diamond.ac.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/hwmon/adm1021.c b/drivers/hwmon/adm1021.c
index b11e06f..de84398 100644
--- a/drivers/hwmon/adm1021.c
+++ b/drivers/hwmon/adm1021.c
@@ -85,14 +85,11 @@ struct adm1021_data {
 	char valid;		/* !=0 if following fields are valid */
 	unsigned long last_updated;	/* In jiffies */
 
-	s8 temp_max[2];		/* Register values */
-	s8 temp_min[2];
-	s8 temp[2];
+	int temp_max[2];		/* Register values */
+	int temp_min[2];
+	int temp[2];
 	u8 alarms;
 	/* Special values for ADM1023 only */
-	u8 remote_temp_prec;
-	u8 remote_temp_os_prec;
-	u8 remote_temp_hyst_prec;
 	u8 remote_temp_offset;
 	u8 remote_temp_offset_prec;
 };
@@ -141,7 +138,7 @@ static ssize_t show_temp(struct device *dev,
 	int index = to_sensor_dev_attr(devattr)->index;
 	struct adm1021_data *data = adm1021_update_device(dev);
 
-	return sprintf(buf, "%d\n", 1000 * data->temp[index]);
+	return sprintf(buf, "%d\n", data->temp[index]);
 }
 
 static ssize_t show_temp_max(struct device *dev,
@@ -150,7 +147,7 @@ static ssize_t show_temp_max(struct device *dev,
 	int index = to_sensor_dev_attr(devattr)->index;
 	struct adm1021_data *data = adm1021_update_device(dev);
 
-	return sprintf(buf, "%d\n", 1000 * data->temp_max[index]);
+	return sprintf(buf, "%d\n", data->temp_max[index]);
 }
 
 static ssize_t show_temp_min(struct device *dev,
@@ -159,7 +156,7 @@ static ssize_t show_temp_min(struct device *dev,
 	int index = to_sensor_dev_attr(devattr)->index;
 	struct adm1021_data *data = adm1021_update_device(dev);
 
-	return sprintf(buf, "%d\n", 1000 * data->temp_min[index]);
+	return sprintf(buf, "%d\n", data->temp_min[index]);
 }
 
 static ssize_t show_alarm(struct device *dev, struct device_attribute *attr,
@@ -412,25 +409,27 @@ static struct adm1021_data *adm1021_update_device(struct device *dev)
 		dev_dbg(&client->dev, "Starting adm1021 update\n");
 
 		for (i = 0; i < 2; i++) {
-			data->temp[i] = i2c_smbus_read_byte_data(client,
-						ADM1021_REG_TEMP(i));
-			data->temp_max[i] = i2c_smbus_read_byte_data(client,
-						ADM1021_REG_TOS_R(i));
-			data->temp_min[i] = i2c_smbus_read_byte_data(client,
-						ADM1021_REG_THYST_R(i));
+			data->temp[i] = 1000 *
+				(s8) i2c_smbus_read_byte_data(
+					client, ADM1021_REG_TEMP(i));
+			data->temp_max[i] = 1000 *
+				(s8) i2c_smbus_read_byte_data(
+					client, ADM1021_REG_TOS_R(i));
+			data->temp_min[i] = 1000 *
+				(s8) i2c_smbus_read_byte_data(
+					client, ADM1021_REG_THYST_R(i));
 		}
 		data->alarms = i2c_smbus_read_byte_data(client,
 						ADM1021_REG_STATUS) & 0x7c;
 		if (data->type == adm1023) {
-			data->remote_temp_prec =
-				i2c_smbus_read_byte_data(client,
-						ADM1023_REG_REM_TEMP_PREC);
-			data->remote_temp_os_prec =
-				i2c_smbus_read_byte_data(client,
-						ADM1023_REG_REM_TOS_PREC);
-			data->remote_temp_hyst_prec =
-				i2c_smbus_read_byte_data(client,
-						ADM1023_REG_REM_THYST_PREC);
+			/* The ADM1023 provides 3 extra bits of precision for
+			 * the remote sensor in extra registers. */
+			data->temp[1] += 125 * (i2c_smbus_read_byte_data(
+				client, ADM1023_REG_REM_TEMP_PREC) >> 5);
+			data->temp_max[1] += 125 * (i2c_smbus_read_byte_data(
+				client, ADM1023_REG_REM_TOS_PREC) >> 5);
+			data->temp_min[1] += 125 * (i2c_smbus_read_byte_data(
+				client, ADM1023_REG_REM_THYST_PREC) >> 5);
 			data->remote_temp_offset =
 				i2c_smbus_read_byte_data(client,
 						ADM1023_REG_REM_OFFSET);
-- 
cgit v0.10.2


From 905ffdc35e50844ab45bbc59d5302238844f8526 Mon Sep 17 00:00:00 2001
From: Michael Abbott <michael@araneidae.co.uk>
Date: Mon, 21 Sep 2009 17:04:47 -0700
Subject: drivers/hwmon/adm1021.c: add low_power support for adm1021 driver

Occasionally it is helpful to be able to turn a temperature sensor off
(for example if it's making unwanted electrical noise).  This patch
adds a sysfs node to put any adm1021 compatible device into low power mode.

Signed-off-by: Michael Abbott <michael.abbott@diamond.ac.uk>
Cc: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/hwmon/adm1021.c b/drivers/hwmon/adm1021.c
index de84398..afc5943 100644
--- a/drivers/hwmon/adm1021.c
+++ b/drivers/hwmon/adm1021.c
@@ -83,6 +83,7 @@ struct adm1021_data {
 
 	struct mutex update_lock;
 	char valid;		/* !=0 if following fields are valid */
+	char low_power;		/* !=0 if device in low power mode */
 	unsigned long last_updated;	/* In jiffies */
 
 	int temp_max[2];		/* Register values */
@@ -213,6 +214,35 @@ static ssize_t set_temp_min(struct device *dev,
 	return count;
 }
 
+static ssize_t show_low_power(struct device *dev,
+			      struct device_attribute *devattr, char *buf)
+{
+	struct adm1021_data *data = adm1021_update_device(dev);
+	return sprintf(buf, "%d\n", data->low_power);
+}
+
+static ssize_t set_low_power(struct device *dev,
+			     struct device_attribute *devattr,
+			     const char *buf, size_t count)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct adm1021_data *data = i2c_get_clientdata(client);
+	int low_power = simple_strtol(buf, NULL, 10) != 0;
+
+	mutex_lock(&data->update_lock);
+	if (low_power != data->low_power) {
+		int config = i2c_smbus_read_byte_data(
+			client, ADM1021_REG_CONFIG_R);
+		data->low_power = low_power;
+		i2c_smbus_write_byte_data(client, ADM1021_REG_CONFIG_W,
+			(config & 0xBF) | (low_power << 6));
+	}
+	mutex_unlock(&data->update_lock);
+
+	return count;
+}
+
+
 static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, show_temp, NULL, 0);
 static SENSOR_DEVICE_ATTR(temp1_max, S_IWUSR | S_IRUGO, show_temp_max,
 			  set_temp_max, 0);
@@ -230,6 +260,7 @@ static SENSOR_DEVICE_ATTR(temp2_min_alarm, S_IRUGO, show_alarm, NULL, 3);
 static SENSOR_DEVICE_ATTR(temp2_fault, S_IRUGO, show_alarm, NULL, 2);
 
 static DEVICE_ATTR(alarms, S_IRUGO, show_alarms, NULL);
+static DEVICE_ATTR(low_power, S_IWUSR | S_IRUGO, show_low_power, set_low_power);
 
 static struct attribute *adm1021_attributes[] = {
 	&sensor_dev_attr_temp1_max.dev_attr.attr,
@@ -244,6 +275,7 @@ static struct attribute *adm1021_attributes[] = {
 	&sensor_dev_attr_temp2_min_alarm.dev_attr.attr,
 	&sensor_dev_attr_temp2_fault.dev_attr.attr,
 	&dev_attr_alarms.attr,
+	&dev_attr_low_power.attr,
 	NULL
 };
 
-- 
cgit v0.10.2


From 560a64a2b501add585b494b2b9cd9f68c0636b50 Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Mon, 21 Sep 2009 17:04:48 -0700
Subject: hwmon: fix freeing of gpio_data and irq

If already requested, gpio_data and irq should be freed in the case of an
error.

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Acked-by: Jonathan Cameron <jic23@cam.ac.uk>
Cc: David Brownell <david-b@pacbell.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/hwmon/sht15.c b/drivers/hwmon/sht15.c
index 6290a25..303c026 100644
--- a/drivers/hwmon/sht15.c
+++ b/drivers/hwmon/sht15.c
@@ -562,7 +562,7 @@ static int __devinit sht15_probe(struct platform_device *pdev)
 	ret = sysfs_create_group(&pdev->dev.kobj, &sht15_attr_group);
 	if (ret) {
 		dev_err(&pdev->dev, "sysfs create failed");
-		goto err_free_data;
+		goto err_release_gpio_data;
 	}
 
 	ret = request_irq(gpio_to_irq(data->pdata->gpio_data),
@@ -581,10 +581,12 @@ static int __devinit sht15_probe(struct platform_device *pdev)
 	data->hwmon_dev = hwmon_device_register(data->dev);
 	if (IS_ERR(data->hwmon_dev)) {
 		ret = PTR_ERR(data->hwmon_dev);
-		goto err_release_gpio_data;
+		goto err_release_irq;
 	}
 	return 0;
 
+err_release_irq:
+	free_irq(gpio_to_irq(data->pdata->gpio_data), data);
 err_release_gpio_data:
 	gpio_free(data->pdata->gpio_data);
 err_release_gpio_sck:
-- 
cgit v0.10.2


From a976f150a6953da5ccbd40fa6dba3bd7d56f9f67 Mon Sep 17 00:00:00 2001
From: Henrik Rydberg <rydberg@euromail.se>
Date: Mon, 21 Sep 2009 17:04:50 -0700
Subject: hwmon: applesmc: restore accelerometer and keyboard backlight on
 resume

On resume from suspend, the driver currently resets the logical state as
if it was brought up from halt.  This patch uses the
dev_pm_ops.resume/restore methods to synchronize the hardware with the
memorized logical state, in effect bringing back the accelerometer and
backlight to the state prior to suspend.  Works for both suspend to ram
and hibernation.  The patch has zero effect on the running state.

Signed-off-by: Henrik Rydberg <rydberg@euromail.se>
Cc: Nicolas Boichat <nicolas@boichat.ch>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c
index 753b348..7ea6a8f 100644
--- a/drivers/hwmon/applesmc.c
+++ b/drivers/hwmon/applesmc.c
@@ -178,6 +178,8 @@ static const int debug;
 static struct platform_device *pdev;
 static s16 rest_x;
 static s16 rest_y;
+static u8 backlight_state[2];
+
 static struct device *hwmon_dev;
 static struct input_polled_dev *applesmc_idev;
 
@@ -497,17 +499,36 @@ static int applesmc_probe(struct platform_device *dev)
 	return 0;
 }
 
-static int applesmc_resume(struct platform_device *dev)
+/* Synchronize device with memorized backlight state */
+static int applesmc_pm_resume(struct device *dev)
 {
-	return applesmc_device_init();
+	mutex_lock(&applesmc_lock);
+	if (applesmc_light)
+		applesmc_write_key(BACKLIGHT_KEY, backlight_state, 2);
+	mutex_unlock(&applesmc_lock);
+	return 0;
 }
 
+/* Reinitialize device on resume from hibernation */
+static int applesmc_pm_restore(struct device *dev)
+{
+	int ret = applesmc_device_init();
+	if (ret)
+		return ret;
+	return applesmc_pm_resume(dev);
+}
+
+static struct dev_pm_ops applesmc_pm_ops = {
+	.resume = applesmc_pm_resume,
+	.restore = applesmc_pm_restore,
+};
+
 static struct platform_driver applesmc_driver = {
 	.probe = applesmc_probe,
-	.resume = applesmc_resume,
 	.driver	= {
 		.name = "applesmc",
 		.owner = THIS_MODULE,
+		.pm = &applesmc_pm_ops,
 	},
 };
 
@@ -804,17 +825,10 @@ static ssize_t applesmc_calibrate_store(struct device *dev,
 	return count;
 }
 
-/* Store the next backlight value to be written by the work */
-static unsigned int backlight_value;
-
 static void applesmc_backlight_set(struct work_struct *work)
 {
-	u8 buffer[2];
-
 	mutex_lock(&applesmc_lock);
-	buffer[0] = backlight_value;
-	buffer[1] = 0x00;
-	applesmc_write_key(BACKLIGHT_KEY, buffer, 2);
+	applesmc_write_key(BACKLIGHT_KEY, backlight_state, 2);
 	mutex_unlock(&applesmc_lock);
 }
 static DECLARE_WORK(backlight_work, &applesmc_backlight_set);
@@ -824,7 +838,7 @@ static void applesmc_brightness_set(struct led_classdev *led_cdev,
 {
 	int ret;
 
-	backlight_value = value;
+	backlight_state[0] = value;
 	ret = queue_work(applesmc_led_wq, &backlight_work);
 
 	if (debug && (!ret))
-- 
cgit v0.10.2


From abd6633c67925f90775bb74755f9c547e30f1f20 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?David=20H=C3=A4rdeman?= <david@hardeman.nu>
Date: Mon, 21 Sep 2009 17:04:52 -0700
Subject: pnp: add a shutdown method to pnp drivers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The shutdown method is used by the winbond cir driver to setup the
hardware for wake-from-S5.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: David Härdeman <david@hardeman.nu>
Cc: Dmitry Torokhov <dtor@mail.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/pnp/driver.c b/drivers/pnp/driver.c
index 527ee76..cd11b11 100644
--- a/drivers/pnp/driver.c
+++ b/drivers/pnp/driver.c
@@ -135,6 +135,15 @@ static int pnp_device_remove(struct device *dev)
 	return 0;
 }
 
+static void pnp_device_shutdown(struct device *dev)
+{
+	struct pnp_dev *pnp_dev = to_pnp_dev(dev);
+	struct pnp_driver *drv = pnp_dev->driver;
+
+	if (drv && drv->shutdown)
+		drv->shutdown(pnp_dev);
+}
+
 static int pnp_bus_match(struct device *dev, struct device_driver *drv)
 {
 	struct pnp_dev *pnp_dev = to_pnp_dev(dev);
@@ -203,6 +212,7 @@ struct bus_type pnp_bus_type = {
 	.match   = pnp_bus_match,
 	.probe   = pnp_device_probe,
 	.remove  = pnp_device_remove,
+	.shutdown = pnp_device_shutdown,
 	.suspend = pnp_bus_suspend,
 	.resume  = pnp_bus_resume,
 	.dev_attrs = pnp_interface_attrs,
diff --git a/include/linux/pnp.h b/include/linux/pnp.h
index b063c73..fddfafa 100644
--- a/include/linux/pnp.h
+++ b/include/linux/pnp.h
@@ -360,6 +360,7 @@ struct pnp_driver {
 	unsigned int flags;
 	int (*probe) (struct pnp_dev *dev, const struct pnp_device_id *dev_id);
 	void (*remove) (struct pnp_dev *dev);
+	void (*shutdown) (struct pnp_dev *dev);
 	int (*suspend) (struct pnp_dev *dev, pm_message_t state);
 	int (*resume) (struct pnp_dev *dev);
 	struct device_driver driver;
-- 
cgit v0.10.2


From e258b80e691f1f3ae83a60aa80eaf7322bd55ec4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?David=20H=C3=A4rdeman?= <david@hardeman.nu>
Date: Mon, 21 Sep 2009 17:04:53 -0700
Subject: input: add a driver for the Winbond WPCD376I Consumer IR hardware
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a driver for the the Consumer IR (CIR) functionality of the Winbond
WPCD376I chipset (found on e.g. Intel DG45FC motherboards).

Signed-off-by: David Härdeman <david@hardeman.nu>
Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: Dmitry Torokhov <dtor@mail.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/MAINTAINERS b/MAINTAINERS
index 379db50..da7a811 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5640,6 +5640,12 @@ L:	linux-scsi@vger.kernel.org
 S:	Maintained
 F:	drivers/scsi/wd7000.c
 
+WINBOND CIR DRIVER
+P:	David H�rdeman
+M:	david@hardeman.nu
+S:	Maintained
+F:	drivers/input/misc/winbond-cir.c
+
 WIMAX STACK
 M:	Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
 M:	linux-wimax@intel.com
diff --git a/drivers/input/misc/Kconfig b/drivers/input/misc/Kconfig
index 1a50be3..76d6751 100644
--- a/drivers/input/misc/Kconfig
+++ b/drivers/input/misc/Kconfig
@@ -222,6 +222,22 @@ config INPUT_SGI_BTNS
 	  To compile this driver as a module, choose M here: the
 	  module will be called sgi_btns.
 
+config INPUT_WINBOND_CIR
+	tristate "Winbond IR remote control"
+	depends on X86 && PNP
+	select LEDS_CLASS
+	select BITREVERSE
+	help
+	  Say Y here if you want to use the IR remote functionality found
+	  in some Winbond SuperI/O chips. Currently only the WPCD376I
+	  chip is supported (included in some Intel Media series motherboards).
+
+	  IR Receive and wake-on-IR from suspend and power-off is currently
+	  supported.
+
+	  To compile this driver as a module, choose M here: the module will be
+	  called winbond_cir.
+
 config HP_SDC_RTC
 	tristate "HP SDC Real Time Clock"
 	depends on (GSC || HP300) && SERIO
diff --git a/drivers/input/misc/Makefile b/drivers/input/misc/Makefile
index bf4db62..a8b8485 100644
--- a/drivers/input/misc/Makefile
+++ b/drivers/input/misc/Makefile
@@ -26,6 +26,7 @@ obj-$(CONFIG_INPUT_SGI_BTNS)		+= sgi_btns.o
 obj-$(CONFIG_INPUT_SPARCSPKR)		+= sparcspkr.o
 obj-$(CONFIG_INPUT_TWL4030_PWRBUTTON)	+= twl4030-pwrbutton.o
 obj-$(CONFIG_INPUT_UINPUT)		+= uinput.o
+obj-$(CONFIG_INPUT_WINBOND_CIR)		+= winbond-cir.o
 obj-$(CONFIG_INPUT_WISTRON_BTNS)	+= wistron_btns.o
 obj-$(CONFIG_INPUT_WM831X_ON)		+= wm831x-on.o
 obj-$(CONFIG_INPUT_YEALINK)		+= yealink.o
diff --git a/drivers/input/misc/winbond-cir.c b/drivers/input/misc/winbond-cir.c
new file mode 100644
index 0000000..33309fe
--- /dev/null
+++ b/drivers/input/misc/winbond-cir.c
@@ -0,0 +1,1614 @@
+/*
+ *  winbond-cir.c - Driver for the Consumer IR functionality of Winbond
+ *                  SuperI/O chips.
+ *
+ *  Currently supports the Winbond WPCD376i chip (PNP id WEC1022), but
+ *  could probably support others (Winbond WEC102X, NatSemi, etc)
+ *  with minor modifications.
+ *
+ *  Original Author: David H�rdeman <david@hardeman.nu>
+ *     Copyright (C) 2009 David H�rdeman <david@hardeman.nu>
+ *
+ *  Dedicated to Matilda, my newborn daughter, without whose loving attention
+ *  this driver would have been finished in half the time and with a fraction
+ *  of the bugs.
+ *
+ *  Written using:
+ *    o Winbond WPCD376I datasheet helpfully provided by Jesse Barnes at Intel
+ *    o NatSemi PC87338/PC97338 datasheet (for the serial port stuff)
+ *    o DSDT dumps
+ *
+ *  Supported features:
+ *    o RC6
+ *    o Wake-On-CIR functionality
+ *
+ *  To do:
+ *    o Test NEC and RC5
+ *
+ *  Left as an exercise for the reader:
+ *    o Learning (I have neither the hardware, nor the need)
+ *    o IR Transmit (ibid)
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/pnp.h>
+#include <linux/interrupt.h>
+#include <linux/timer.h>
+#include <linux/input.h>
+#include <linux/leds.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/pci_ids.h>
+#include <linux/io.h>
+#include <linux/bitrev.h>
+#include <linux/bitops.h>
+
+#define DRVNAME "winbond-cir"
+
+/* CEIR Wake-Up Registers, relative to data->wbase                      */
+#define WBCIR_REG_WCEIR_CTL	0x03 /* CEIR Receiver Control		*/
+#define WBCIR_REG_WCEIR_STS	0x04 /* CEIR Receiver Status		*/
+#define WBCIR_REG_WCEIR_EV_EN	0x05 /* CEIR Receiver Event Enable	*/
+#define WBCIR_REG_WCEIR_CNTL	0x06 /* CEIR Receiver Counter Low	*/
+#define WBCIR_REG_WCEIR_CNTH	0x07 /* CEIR Receiver Counter High	*/
+#define WBCIR_REG_WCEIR_INDEX	0x08 /* CEIR Receiver Index		*/
+#define WBCIR_REG_WCEIR_DATA	0x09 /* CEIR Receiver Data		*/
+#define WBCIR_REG_WCEIR_CSL	0x0A /* CEIR Re. Compare Strlen		*/
+#define WBCIR_REG_WCEIR_CFG1	0x0B /* CEIR Re. Configuration 1	*/
+#define WBCIR_REG_WCEIR_CFG2	0x0C /* CEIR Re. Configuration 2	*/
+
+/* CEIR Enhanced Functionality Registers, relative to data->ebase       */
+#define WBCIR_REG_ECEIR_CTS	0x00 /* Enhanced IR Control Status	*/
+#define WBCIR_REG_ECEIR_CCTL	0x01 /* Infrared Counter Control	*/
+#define WBCIR_REG_ECEIR_CNT_LO	0x02 /* Infrared Counter LSB		*/
+#define WBCIR_REG_ECEIR_CNT_HI	0x03 /* Infrared Counter MSB		*/
+#define WBCIR_REG_ECEIR_IREM	0x04 /* Infrared Emitter Status		*/
+
+/* SP3 Banked Registers, relative to data->sbase                        */
+#define WBCIR_REG_SP3_BSR	0x03 /* Bank Select, all banks		*/
+				      /* Bank 0				*/
+#define WBCIR_REG_SP3_RXDATA	0x00 /* FIFO RX data (r)		*/
+#define WBCIR_REG_SP3_TXDATA	0x00 /* FIFO TX data (w)		*/
+#define WBCIR_REG_SP3_IER	0x01 /* Interrupt Enable		*/
+#define WBCIR_REG_SP3_EIR	0x02 /* Event Identification (r)	*/
+#define WBCIR_REG_SP3_FCR	0x02 /* FIFO Control (w)		*/
+#define WBCIR_REG_SP3_MCR	0x04 /* Mode Control			*/
+#define WBCIR_REG_SP3_LSR	0x05 /* Link Status			*/
+#define WBCIR_REG_SP3_MSR	0x06 /* Modem Status			*/
+#define WBCIR_REG_SP3_ASCR	0x07 /* Aux Status and Control		*/
+				      /* Bank 2				*/
+#define WBCIR_REG_SP3_BGDL	0x00 /* Baud Divisor LSB		*/
+#define WBCIR_REG_SP3_BGDH	0x01 /* Baud Divisor MSB		*/
+#define WBCIR_REG_SP3_EXCR1	0x02 /* Extended Control 1		*/
+#define WBCIR_REG_SP3_EXCR2	0x04 /* Extended Control 2		*/
+#define WBCIR_REG_SP3_TXFLV	0x06 /* TX FIFO Level			*/
+#define WBCIR_REG_SP3_RXFLV	0x07 /* RX FIFO Level			*/
+				      /* Bank 3				*/
+#define WBCIR_REG_SP3_MRID	0x00 /* Module Identification		*/
+#define WBCIR_REG_SP3_SH_LCR	0x01 /* LCR Shadow			*/
+#define WBCIR_REG_SP3_SH_FCR	0x02 /* FCR Shadow			*/
+				      /* Bank 4				*/
+#define WBCIR_REG_SP3_IRCR1	0x02 /* Infrared Control 1		*/
+				      /* Bank 5				*/
+#define WBCIR_REG_SP3_IRCR2	0x04 /* Infrared Control 2		*/
+				      /* Bank 6				*/
+#define WBCIR_REG_SP3_IRCR3	0x00 /* Infrared Control 3		*/
+#define WBCIR_REG_SP3_SIR_PW	0x02 /* SIR Pulse Width		*/
+				      /* Bank 7				*/
+#define WBCIR_REG_SP3_IRRXDC	0x00 /* IR RX Demod Control		*/
+#define WBCIR_REG_SP3_IRTXMC	0x01 /* IR TX Mod Control		*/
+#define WBCIR_REG_SP3_RCCFG	0x02 /* CEIR Config			*/
+#define WBCIR_REG_SP3_IRCFG1	0x04 /* Infrared Config 1		*/
+#define WBCIR_REG_SP3_IRCFG4	0x07 /* Infrared Config 4		*/
+
+/*
+ * Magic values follow
+ */
+
+/* No interrupts for WBCIR_REG_SP3_IER and WBCIR_REG_SP3_EIR */
+#define WBCIR_IRQ_NONE		0x00
+/* RX data bit for WBCIR_REG_SP3_IER and WBCIR_REG_SP3_EIR */
+#define WBCIR_IRQ_RX		0x01
+/* Over/Under-flow bit for WBCIR_REG_SP3_IER and WBCIR_REG_SP3_EIR */
+#define WBCIR_IRQ_ERR		0x04
+/* Led enable/disable bit for WBCIR_REG_ECEIR_CTS */
+#define WBCIR_LED_ENABLE	0x80
+/* RX data available bit for WBCIR_REG_SP3_LSR */
+#define WBCIR_RX_AVAIL		0x01
+/* RX disable bit for WBCIR_REG_SP3_ASCR */
+#define WBCIR_RX_DISABLE	0x20
+/* Extended mode enable bit for WBCIR_REG_SP3_EXCR1 */
+#define WBCIR_EXT_ENABLE	0x01
+/* Select compare register in WBCIR_REG_WCEIR_INDEX (bits 5 & 6) */
+#define WBCIR_REGSEL_COMPARE	0x10
+/* Select mask register in WBCIR_REG_WCEIR_INDEX (bits 5 & 6) */
+#define WBCIR_REGSEL_MASK	0x20
+/* Starting address of selected register in WBCIR_REG_WCEIR_INDEX */
+#define WBCIR_REG_ADDR0		0x00
+
+/* Valid banks for the SP3 UART */
+enum wbcir_bank {
+	WBCIR_BANK_0          = 0x00,
+	WBCIR_BANK_1          = 0x80,
+	WBCIR_BANK_2          = 0xE0,
+	WBCIR_BANK_3          = 0xE4,
+	WBCIR_BANK_4          = 0xE8,
+	WBCIR_BANK_5          = 0xEC,
+	WBCIR_BANK_6          = 0xF0,
+	WBCIR_BANK_7          = 0xF4,
+};
+
+/* Supported IR Protocols */
+enum wbcir_protocol {
+	IR_PROTOCOL_RC5          = 0x0,
+	IR_PROTOCOL_NEC          = 0x1,
+	IR_PROTOCOL_RC6          = 0x2,
+};
+
+/* Misc */
+#define WBCIR_NAME	"Winbond CIR"
+#define WBCIR_ID_FAMILY          0xF1 /* Family ID for the WPCD376I	*/
+#define	WBCIR_ID_CHIP            0x04 /* Chip ID for the WPCD376I	*/
+#define IR_KEYPRESS_TIMEOUT       250 /* FIXME: should be per-protocol? */
+#define INVALID_SCANCODE   0x7FFFFFFF /* Invalid with all protos	*/
+#define WAKEUP_IOMEM_LEN         0x10 /* Wake-Up I/O Reg Len		*/
+#define EHFUNC_IOMEM_LEN         0x10 /* Enhanced Func I/O Reg Len	*/
+#define SP_IOMEM_LEN             0x08 /* Serial Port 3 (IR) Reg Len	*/
+#define WBCIR_MAX_IDLE_BYTES       10
+
+static DEFINE_SPINLOCK(wbcir_lock);
+static DEFINE_RWLOCK(keytable_lock);
+
+struct wbcir_key {
+	u32 scancode;
+	unsigned int keycode;
+};
+
+struct wbcir_keyentry {
+	struct wbcir_key key;
+	struct list_head list;
+};
+
+static struct wbcir_key rc6_def_keymap[] = {
+	{ 0x800F0400, KEY_NUMERIC_0		},
+	{ 0x800F0401, KEY_NUMERIC_1		},
+	{ 0x800F0402, KEY_NUMERIC_2		},
+	{ 0x800F0403, KEY_NUMERIC_3		},
+	{ 0x800F0404, KEY_NUMERIC_4		},
+	{ 0x800F0405, KEY_NUMERIC_5		},
+	{ 0x800F0406, KEY_NUMERIC_6		},
+	{ 0x800F0407, KEY_NUMERIC_7		},
+	{ 0x800F0408, KEY_NUMERIC_8		},
+	{ 0x800F0409, KEY_NUMERIC_9		},
+	{ 0x800F041D, KEY_NUMERIC_STAR		},
+	{ 0x800F041C, KEY_NUMERIC_POUND		},
+	{ 0x800F0410, KEY_VOLUMEUP		},
+	{ 0x800F0411, KEY_VOLUMEDOWN		},
+	{ 0x800F0412, KEY_CHANNELUP		},
+	{ 0x800F0413, KEY_CHANNELDOWN		},
+	{ 0x800F040E, KEY_MUTE			},
+	{ 0x800F040D, KEY_VENDOR		}, /* Vista Logo Key */
+	{ 0x800F041E, KEY_UP			},
+	{ 0x800F041F, KEY_DOWN			},
+	{ 0x800F0420, KEY_LEFT			},
+	{ 0x800F0421, KEY_RIGHT			},
+	{ 0x800F0422, KEY_OK			},
+	{ 0x800F0423, KEY_ESC			},
+	{ 0x800F040F, KEY_INFO			},
+	{ 0x800F040A, KEY_CLEAR			},
+	{ 0x800F040B, KEY_ENTER			},
+	{ 0x800F045B, KEY_RED			},
+	{ 0x800F045C, KEY_GREEN			},
+	{ 0x800F045D, KEY_YELLOW		},
+	{ 0x800F045E, KEY_BLUE			},
+	{ 0x800F045A, KEY_TEXT			},
+	{ 0x800F0427, KEY_SWITCHVIDEOMODE	},
+	{ 0x800F040C, KEY_POWER			},
+	{ 0x800F0450, KEY_RADIO			},
+	{ 0x800F0448, KEY_PVR			},
+	{ 0x800F0447, KEY_AUDIO			},
+	{ 0x800F0426, KEY_EPG			},
+	{ 0x800F0449, KEY_CAMERA		},
+	{ 0x800F0425, KEY_TV			},
+	{ 0x800F044A, KEY_VIDEO			},
+	{ 0x800F0424, KEY_DVD			},
+	{ 0x800F0416, KEY_PLAY			},
+	{ 0x800F0418, KEY_PAUSE			},
+	{ 0x800F0419, KEY_STOP			},
+	{ 0x800F0414, KEY_FASTFORWARD		},
+	{ 0x800F041A, KEY_NEXT			},
+	{ 0x800F041B, KEY_PREVIOUS		},
+	{ 0x800F0415, KEY_REWIND		},
+	{ 0x800F0417, KEY_RECORD		},
+};
+
+/* Registers and other state is protected by wbcir_lock */
+struct wbcir_data {
+	unsigned long wbase;        /* Wake-Up Baseaddr		*/
+	unsigned long ebase;        /* Enhanced Func. Baseaddr	*/
+	unsigned long sbase;        /* Serial Port Baseaddr	*/
+	unsigned int  irq;          /* Serial Port IRQ		*/
+
+	struct input_dev *input_dev;
+	struct timer_list timer_keyup;
+	struct led_trigger *rxtrigger;
+	struct led_trigger *txtrigger;
+	struct led_classdev led;
+
+	u32 last_scancode;
+	unsigned int last_keycode;
+	u8 last_toggle;
+	u8 keypressed;
+	unsigned long keyup_jiffies;
+	unsigned int idle_count;
+
+	/* RX irdata and parsing state */
+	unsigned long irdata[30];
+	unsigned int irdata_count;
+	unsigned int irdata_idle;
+	unsigned int irdata_off;
+	unsigned int irdata_error;
+
+	/* Protected by keytable_lock */
+	struct list_head keytable;
+};
+
+static enum wbcir_protocol protocol = IR_PROTOCOL_RC6;
+module_param(protocol, uint, 0444);
+MODULE_PARM_DESC(protocol, "IR protocol to use "
+		 "(0 = RC5, 1 = NEC, 2 = RC6A, default)");
+
+static int invert; /* default = 0 */
+module_param(invert, bool, 0444);
+MODULE_PARM_DESC(invert, "Invert the signal from the IR receiver");
+
+static unsigned int wake_sc = 0x800F040C;
+module_param(wake_sc, uint, 0644);
+MODULE_PARM_DESC(wake_sc, "Scancode of the power-on IR command");
+
+static unsigned int wake_rc6mode = 6;
+module_param(wake_rc6mode, uint, 0644);
+MODULE_PARM_DESC(wake_rc6mode, "RC6 mode for the power-on command "
+		 "(0 = 0, 6 = 6A, default)");
+
+
+
+/*****************************************************************************
+ *
+ * UTILITY FUNCTIONS
+ *
+ *****************************************************************************/
+
+/* Caller needs to hold wbcir_lock */
+static void
+wbcir_set_bits(unsigned long addr, u8 bits, u8 mask)
+{
+	u8 val;
+
+	val = inb(addr);
+	val = ((val & ~mask) | (bits & mask));
+	outb(val, addr);
+}
+
+/* Selects the register bank for the serial port */
+static inline void
+wbcir_select_bank(struct wbcir_data *data, enum wbcir_bank bank)
+{
+	outb(bank, data->sbase + WBCIR_REG_SP3_BSR);
+}
+
+static enum led_brightness
+wbcir_led_brightness_get(struct led_classdev *led_cdev)
+{
+	struct wbcir_data *data = container_of(led_cdev,
+					       struct wbcir_data,
+					       led);
+
+	if (inb(data->ebase + WBCIR_REG_ECEIR_CTS) & WBCIR_LED_ENABLE)
+		return LED_FULL;
+	else
+		return LED_OFF;
+}
+
+static void
+wbcir_led_brightness_set(struct led_classdev *led_cdev,
+			    enum led_brightness brightness)
+{
+	struct wbcir_data *data = container_of(led_cdev,
+					       struct wbcir_data,
+					       led);
+
+	wbcir_set_bits(data->ebase + WBCIR_REG_ECEIR_CTS,
+		       brightness == LED_OFF ? 0x00 : WBCIR_LED_ENABLE,
+		       WBCIR_LED_ENABLE);
+}
+
+/* Manchester encodes bits to RC6 message cells (see wbcir_parse_rc6) */
+static u8
+wbcir_to_rc6cells(u8 val)
+{
+	u8 coded = 0x00;
+	int i;
+
+	val &= 0x0F;
+	for (i = 0; i < 4; i++) {
+		if (val & 0x01)
+			coded |= 0x02 << (i * 2);
+		else
+			coded |= 0x01 << (i * 2);
+		val >>= 1;
+	}
+
+	return coded;
+}
+
+
+
+/*****************************************************************************
+ *
+ * INPUT FUNCTIONS
+ *
+ *****************************************************************************/
+
+static unsigned int
+wbcir_do_getkeycode(struct wbcir_data *data, u32 scancode)
+{
+	struct wbcir_keyentry *keyentry;
+	unsigned int keycode = KEY_RESERVED;
+	unsigned long flags;
+
+	read_lock_irqsave(&keytable_lock, flags);
+
+	list_for_each_entry(keyentry, &data->keytable, list) {
+		if (keyentry->key.scancode == scancode) {
+			keycode = keyentry->key.keycode;
+			break;
+		}
+	}
+
+	read_unlock_irqrestore(&keytable_lock, flags);
+	return keycode;
+}
+
+static int
+wbcir_getkeycode(struct input_dev *dev, int scancode, int *keycode)
+{
+	struct wbcir_data *data = input_get_drvdata(dev);
+
+	*keycode = (int)wbcir_do_getkeycode(data, (u32)scancode);
+	return 0;
+}
+
+static int
+wbcir_setkeycode(struct input_dev *dev, int sscancode, int keycode)
+{
+	struct wbcir_data *data = input_get_drvdata(dev);
+	struct wbcir_keyentry *keyentry;
+	struct wbcir_keyentry *new_keyentry;
+	unsigned long flags;
+	unsigned int old_keycode = KEY_RESERVED;
+	u32 scancode = (u32)sscancode;
+
+	if (keycode < 0 || keycode > KEY_MAX)
+		return -EINVAL;
+
+	new_keyentry = kmalloc(sizeof(*new_keyentry), GFP_KERNEL);
+	if (!new_keyentry)
+		return -ENOMEM;
+
+	write_lock_irqsave(&keytable_lock, flags);
+
+	list_for_each_entry(keyentry, &data->keytable, list) {
+		if (keyentry->key.scancode != scancode)
+			continue;
+
+		old_keycode = keyentry->key.keycode;
+		keyentry->key.keycode = keycode;
+
+		if (keyentry->key.keycode == KEY_RESERVED) {
+			list_del(&keyentry->list);
+			kfree(keyentry);
+		}
+
+		break;
+	}
+
+	set_bit(keycode, dev->keybit);
+
+	if (old_keycode == KEY_RESERVED) {
+		new_keyentry->key.scancode = scancode;
+		new_keyentry->key.keycode = keycode;
+		list_add(&new_keyentry->list, &data->keytable);
+	} else {
+		kfree(new_keyentry);
+		clear_bit(old_keycode, dev->keybit);
+		list_for_each_entry(keyentry, &data->keytable, list) {
+			if (keyentry->key.keycode == old_keycode) {
+				set_bit(old_keycode, dev->keybit);
+				break;
+			}
+		}
+	}
+
+	write_unlock_irqrestore(&keytable_lock, flags);
+	return 0;
+}
+
+/*
+ * Timer function to report keyup event some time after keydown is
+ * reported by the ISR.
+ */
+static void
+wbcir_keyup(unsigned long cookie)
+{
+	struct wbcir_data *data = (struct wbcir_data *)cookie;
+	unsigned long flags;
+
+	/*
+	 * data->keyup_jiffies is used to prevent a race condition if a
+	 * hardware interrupt occurs at this point and the keyup timer
+	 * event is moved further into the future as a result.
+	 *
+	 * The timer will then be reactivated and this function called
+	 * again in the future. We need to exit gracefully in that case
+	 * to allow the input subsystem to do its auto-repeat magic or
+	 * a keyup event might follow immediately after the keydown.
+	 */
+
+	spin_lock_irqsave(&wbcir_lock, flags);
+
+	if (time_is_after_eq_jiffies(data->keyup_jiffies) && data->keypressed) {
+		data->keypressed = 0;
+		led_trigger_event(data->rxtrigger, LED_OFF);
+		input_report_key(data->input_dev, data->last_keycode, 0);
+		input_sync(data->input_dev);
+	}
+
+	spin_unlock_irqrestore(&wbcir_lock, flags);
+}
+
+static void
+wbcir_keydown(struct wbcir_data *data, u32 scancode, u8 toggle)
+{
+	unsigned int keycode;
+
+	/* Repeat? */
+	if (data->last_scancode == scancode &&
+	    data->last_toggle == toggle &&
+	    data->keypressed)
+		goto set_timer;
+	data->last_scancode = scancode;
+
+	/* Do we need to release an old keypress? */
+	if (data->keypressed) {
+		input_report_key(data->input_dev, data->last_keycode, 0);
+		input_sync(data->input_dev);
+		data->keypressed = 0;
+	}
+
+	/* Report scancode */
+	input_event(data->input_dev, EV_MSC, MSC_SCAN, (int)scancode);
+
+	/* Do we know this scancode? */
+	keycode = wbcir_do_getkeycode(data, scancode);
+	if (keycode == KEY_RESERVED)
+		goto set_timer;
+
+	/* Register a keypress */
+	input_report_key(data->input_dev, keycode, 1);
+	data->keypressed = 1;
+	data->last_keycode = keycode;
+	data->last_toggle = toggle;
+
+set_timer:
+	input_sync(data->input_dev);
+	led_trigger_event(data->rxtrigger,
+			  data->keypressed ? LED_FULL : LED_OFF);
+	data->keyup_jiffies = jiffies + msecs_to_jiffies(IR_KEYPRESS_TIMEOUT);
+	mod_timer(&data->timer_keyup, data->keyup_jiffies);
+}
+
+
+
+/*****************************************************************************
+ *
+ * IR PARSING FUNCTIONS
+ *
+ *****************************************************************************/
+
+/* Resets all irdata */
+static void
+wbcir_reset_irdata(struct wbcir_data *data)
+{
+	memset(data->irdata, 0, sizeof(data->irdata));
+	data->irdata_count = 0;
+	data->irdata_off = 0;
+	data->irdata_error = 0;
+}
+
+/* Adds one bit of irdata */
+static void
+add_irdata_bit(struct wbcir_data *data, int set)
+{
+	if (data->irdata_count >= sizeof(data->irdata) * 8) {
+		data->irdata_error = 1;
+		return;
+	}
+
+	if (set)
+		__set_bit(data->irdata_count, data->irdata);
+	data->irdata_count++;
+}
+
+/* Gets count bits of irdata */
+static u16
+get_bits(struct wbcir_data *data, int count)
+{
+	u16 val = 0x0;
+
+	if (data->irdata_count - data->irdata_off < count) {
+		data->irdata_error = 1;
+		return 0x0;
+	}
+
+	while (count > 0) {
+		val <<= 1;
+		if (test_bit(data->irdata_off, data->irdata))
+			val |= 0x1;
+		count--;
+		data->irdata_off++;
+	}
+
+	return val;
+}
+
+/* Reads 16 cells and converts them to a byte */
+static u8
+wbcir_rc6cells_to_byte(struct wbcir_data *data)
+{
+	u16 raw = get_bits(data, 16);
+	u8 val = 0x00;
+	int bit;
+
+	for (bit = 0; bit < 8; bit++) {
+		switch (raw & 0x03) {
+		case 0x01:
+			break;
+		case 0x02:
+			val |= (0x01 << bit);
+			break;
+		default:
+			data->irdata_error = 1;
+			break;
+		}
+		raw >>= 2;
+	}
+
+	return val;
+}
+
+/* Decodes a number of bits from raw RC5 data */
+static u8
+wbcir_get_rc5bits(struct wbcir_data *data, unsigned int count)
+{
+	u16 raw = get_bits(data, count * 2);
+	u8 val = 0x00;
+	int bit;
+
+	for (bit = 0; bit < count; bit++) {
+		switch (raw & 0x03) {
+		case 0x01:
+			val |= (0x01 << bit);
+			break;
+		case 0x02:
+			break;
+		default:
+			data->irdata_error = 1;
+			break;
+		}
+		raw >>= 2;
+	}
+
+	return val;
+}
+
+static void
+wbcir_parse_rc6(struct device *dev, struct wbcir_data *data)
+{
+	/*
+	 * Normal bits are manchester coded as follows:
+	 * cell0 + cell1 = logic "0"
+	 * cell1 + cell0 = logic "1"
+	 *
+	 * The IR pulse has the following components:
+	 *
+	 * Leader		- 6 * cell1 - discarded
+	 * Gap    		- 2 * cell0 - discarded
+	 * Start bit		- Normal Coding - always "1"
+	 * Mode Bit 2 - 0	- Normal Coding
+	 * Toggle bit		- Normal Coding with double bit time,
+	 *			  e.g. cell0 + cell0 + cell1 + cell1
+	 *			  means logic "0".
+	 *
+	 * The rest depends on the mode, the following modes are known:
+	 *
+	 * MODE 0:
+	 *  Address Bit 7 - 0	- Normal Coding
+	 *  Command Bit 7 - 0	- Normal Coding
+	 *
+	 * MODE 6:
+	 *  The above Toggle Bit is used as a submode bit, 0 = A, 1 = B.
+	 *  Submode B is for pointing devices, only remotes using submode A
+	 *  are supported.
+	 *
+	 *  Customer range bit	- 0 => Customer = 7 bits, 0...127
+	 *                        1 => Customer = 15 bits, 32768...65535
+	 *  Customer Bits	- Normal Coding
+	 *
+	 *  Customer codes are allocated by Philips. The rest of the bits
+	 *  are customer dependent. The following is commonly used (and the
+	 *  only supported config):
+	 *
+	 *  Toggle Bit		- Normal Coding
+	 *  Address Bit 6 - 0	- Normal Coding
+	 *  Command Bit 7 - 0	- Normal Coding
+	 *
+	 * All modes are followed by at least 6 * cell0.
+	 *
+	 * MODE 0 msglen:
+	 *  1 * 2 (start bit) + 3 * 2 (mode) + 2 * 2 (toggle) +
+	 *  8 * 2 (address) + 8 * 2 (command) =
+	 *  44 cells
+	 *
+	 * MODE 6A msglen:
+	 *  1 * 2 (start bit) + 3 * 2 (mode) + 2 * 2 (submode) +
+	 *  1 * 2 (customer range bit) + 7/15 * 2 (customer bits) +
+	 *  1 * 2 (toggle bit) + 7 * 2 (address) + 8 * 2 (command) =
+	 *  60 - 76 cells
+	 */
+	u8 mode;
+	u8 toggle;
+	u16 customer = 0x0;
+	u8 address;
+	u8 command;
+	u32 scancode;
+
+	/* Leader mark */
+	while (get_bits(data, 1) && !data->irdata_error)
+		/* Do nothing */;
+
+	/* Leader space */
+	if (get_bits(data, 1)) {
+		dev_dbg(dev, "RC6 - Invalid leader space\n");
+		return;
+	}
+
+	/* Start bit */
+	if (get_bits(data, 2) != 0x02) {
+		dev_dbg(dev, "RC6 - Invalid start bit\n");
+		return;
+	}
+
+	/* Mode */
+	mode = get_bits(data, 6);
+	switch (mode) {
+	case 0x15: /* 010101 = b000 */
+		mode = 0;
+		break;
+	case 0x29: /* 101001 = b110 */
+		mode = 6;
+		break;
+	default:
+		dev_dbg(dev, "RC6 - Invalid mode\n");
+		return;
+	}
+
+	/* Toggle bit / Submode bit */
+	toggle = get_bits(data, 4);
+	switch (toggle) {
+	case 0x03:
+		toggle = 0;
+		break;
+	case 0x0C:
+		toggle = 1;
+		break;
+	default:
+		dev_dbg(dev, "RC6 - Toggle bit error\n");
+		break;
+	}
+
+	/* Customer */
+	if (mode == 6) {
+		if (toggle != 0) {
+			dev_dbg(dev, "RC6B - Not Supported\n");
+			return;
+		}
+
+		customer = wbcir_rc6cells_to_byte(data);
+
+		if (customer & 0x80) {
+			/* 15 bit customer value */
+			customer <<= 8;
+			customer |= wbcir_rc6cells_to_byte(data);
+		}
+	}
+
+	/* Address */
+	address = wbcir_rc6cells_to_byte(data);
+	if (mode == 6) {
+		toggle = address >> 7;
+		address &= 0x7F;
+	}
+
+	/* Command */
+	command = wbcir_rc6cells_to_byte(data);
+
+	/* Create scancode */
+	scancode =  command;
+	scancode |= address << 8;
+	scancode |= customer << 16;
+
+	/* Last sanity check */
+	if (data->irdata_error) {
+		dev_dbg(dev, "RC6 - Cell error(s)\n");
+		return;
+	}
+
+	dev_info(dev, "IR-RC6 ad 0x%02X cm 0x%02X cu 0x%04X "
+		"toggle %u mode %u scan 0x%08X\n",
+		address,
+		command,
+		customer,
+		(unsigned int)toggle,
+		(unsigned int)mode,
+		scancode);
+
+	wbcir_keydown(data, scancode, toggle);
+}
+
+static void
+wbcir_parse_rc5(struct device *dev, struct wbcir_data *data)
+{
+	/*
+	 * Bits are manchester coded as follows:
+	 * cell1 + cell0 = logic "0"
+	 * cell0 + cell1 = logic "1"
+	 * (i.e. the reverse of RC6)
+	 *
+	 * Start bit 1		- "1" - discarded
+	 * Start bit 2		- Must be inverted to get command bit 6
+	 * Toggle bit
+	 * Address Bit 4 - 0
+	 * Command Bit 5 - 0
+	 */
+	u8 toggle;
+	u8 address;
+	u8 command;
+	u32 scancode;
+
+	/* Start bit 1 */
+	if (!get_bits(data, 1)) {
+		dev_dbg(dev, "RC5 - Invalid start bit\n");
+		return;
+	}
+
+	/* Start bit 2 */
+	if (!wbcir_get_rc5bits(data, 1))
+		command = 0x40;
+	else
+		command = 0x00;
+
+	toggle   = wbcir_get_rc5bits(data, 1);
+	address  = wbcir_get_rc5bits(data, 5);
+	command |= wbcir_get_rc5bits(data, 6);
+	scancode = address << 7 | command;
+
+	/* Last sanity check */
+	if (data->irdata_error) {
+		dev_dbg(dev, "RC5 - Invalid message\n");
+		return;
+	}
+
+	dev_dbg(dev, "IR-RC5 ad %u cm %u t %u s %u\n",
+		(unsigned int)address,
+		(unsigned int)command,
+		(unsigned int)toggle,
+		(unsigned int)scancode);
+
+	wbcir_keydown(data, scancode, toggle);
+}
+
+static void
+wbcir_parse_nec(struct device *dev, struct wbcir_data *data)
+{
+	/*
+	 * Each bit represents 560 us.
+	 *
+	 * Leader		- 9 ms burst
+	 * Gap			- 4.5 ms silence
+	 * Address1 bit 0 - 7	- Address 1
+	 * Address2 bit 0 - 7	- Address 2
+	 * Command1 bit 0 - 7	- Command 1
+	 * Command2 bit 0 - 7	- Command 2
+	 *
+	 * Note the bit order!
+	 *
+	 * With the old NEC protocol, Address2 was the inverse of Address1
+	 * and Command2 was the inverse of Command1 and were used as
+	 * an error check.
+	 *
+	 * With NEC extended, Address1 is the LSB of the Address and
+	 * Address2 is the MSB, Command parsing remains unchanged.
+	 *
+	 * A repeat message is coded as:
+	 * Leader		- 9 ms burst
+	 * Gap			- 2.25 ms silence
+	 * Repeat		- 560 us active
+	 */
+	u8 address1;
+	u8 address2;
+	u8 command1;
+	u8 command2;
+	u16 address;
+	u32 scancode;
+
+	/* Leader mark */
+	while (get_bits(data, 1) && !data->irdata_error)
+		/* Do nothing */;
+
+	/* Leader space */
+	if (get_bits(data, 4)) {
+		dev_dbg(dev, "NEC - Invalid leader space\n");
+		return;
+	}
+
+	/* Repeat? */
+	if (get_bits(data, 1)) {
+		if (!data->keypressed) {
+			dev_dbg(dev, "NEC - Stray repeat message\n");
+			return;
+		}
+
+		dev_dbg(dev, "IR-NEC repeat s %u\n",
+			(unsigned int)data->last_scancode);
+
+		wbcir_keydown(data, data->last_scancode, data->last_toggle);
+		return;
+	}
+
+	/* Remaining leader space */
+	if (get_bits(data, 3)) {
+		dev_dbg(dev, "NEC - Invalid leader space\n");
+		return;
+	}
+
+	address1  = bitrev8(get_bits(data, 8));
+	address2  = bitrev8(get_bits(data, 8));
+	command1  = bitrev8(get_bits(data, 8));
+	command2  = bitrev8(get_bits(data, 8));
+
+	/* Sanity check */
+	if (data->irdata_error) {
+		dev_dbg(dev, "NEC - Invalid message\n");
+		return;
+	}
+
+	/* Check command validity */
+	if (command1 != ~command2) {
+		dev_dbg(dev, "NEC - Command bytes mismatch\n");
+		return;
+	}
+
+	/* Check for extended NEC protocol */
+	address = address1;
+	if (address1 != ~address2)
+		address |= address2 << 8;
+
+	scancode = address << 8 | command1;
+
+	dev_dbg(dev, "IR-NEC ad %u cm %u s %u\n",
+		(unsigned int)address,
+		(unsigned int)command1,
+		(unsigned int)scancode);
+
+	wbcir_keydown(data, scancode, !data->last_toggle);
+}
+
+
+
+/*****************************************************************************
+ *
+ * INTERRUPT FUNCTIONS
+ *
+ *****************************************************************************/
+
+static irqreturn_t
+wbcir_irq_handler(int irqno, void *cookie)
+{
+	struct pnp_dev *device = cookie;
+	struct wbcir_data *data = pnp_get_drvdata(device);
+	struct device *dev = &device->dev;
+	u8 status;
+	unsigned long flags;
+	u8 irdata[8];
+	int i;
+	unsigned int hw;
+
+	spin_lock_irqsave(&wbcir_lock, flags);
+
+	wbcir_select_bank(data, WBCIR_BANK_0);
+
+	status = inb(data->sbase + WBCIR_REG_SP3_EIR);
+
+	if (!(status & (WBCIR_IRQ_RX | WBCIR_IRQ_ERR))) {
+		spin_unlock_irqrestore(&wbcir_lock, flags);
+		return IRQ_NONE;
+	}
+
+	if (status & WBCIR_IRQ_ERR)
+		data->irdata_error = 1;
+
+	if (!(status & WBCIR_IRQ_RX))
+		goto out;
+
+	/* Since RXHDLEV is set, at least 8 bytes are in the FIFO */
+	insb(data->sbase + WBCIR_REG_SP3_RXDATA, &irdata[0], 8);
+
+	for (i = 0; i < sizeof(irdata); i++) {
+		hw = hweight8(irdata[i]);
+		if (hw > 4)
+			add_irdata_bit(data, 0);
+		else
+			add_irdata_bit(data, 1);
+
+		if (hw == 8)
+			data->idle_count++;
+		else
+			data->idle_count = 0;
+	}
+
+	if (data->idle_count > WBCIR_MAX_IDLE_BYTES) {
+		/* Set RXINACTIVE... */
+		outb(WBCIR_RX_DISABLE, data->sbase + WBCIR_REG_SP3_ASCR);
+
+		/* ...and drain the FIFO */
+		while (inb(data->sbase + WBCIR_REG_SP3_LSR) & WBCIR_RX_AVAIL)
+			inb(data->sbase + WBCIR_REG_SP3_RXDATA);
+
+		dev_dbg(dev, "IRDATA:\n");
+		for (i = 0; i < data->irdata_count; i += BITS_PER_LONG)
+			dev_dbg(dev, "0x%08lX\n", data->irdata[i/BITS_PER_LONG]);
+
+		switch (protocol) {
+		case IR_PROTOCOL_RC5:
+			wbcir_parse_rc5(dev, data);
+			break;
+		case IR_PROTOCOL_RC6:
+			wbcir_parse_rc6(dev, data);
+			break;
+		case IR_PROTOCOL_NEC:
+			wbcir_parse_nec(dev, data);
+			break;
+		}
+
+		wbcir_reset_irdata(data);
+		data->idle_count = 0;
+	}
+
+out:
+	spin_unlock_irqrestore(&wbcir_lock, flags);
+	return IRQ_HANDLED;
+}
+
+
+
+/*****************************************************************************
+ *
+ * SUSPEND/RESUME FUNCTIONS
+ *
+ *****************************************************************************/
+
+static void
+wbcir_shutdown(struct pnp_dev *device)
+{
+	struct device *dev = &device->dev;
+	struct wbcir_data *data = pnp_get_drvdata(device);
+	int do_wake = 1;
+	u8 match[11];
+	u8 mask[11];
+	u8 rc6_csl = 0;
+	int i;
+
+	memset(match, 0, sizeof(match));
+	memset(mask, 0, sizeof(mask));
+
+	if (wake_sc == INVALID_SCANCODE || !device_may_wakeup(dev)) {
+		do_wake = 0;
+		goto finish;
+	}
+
+	switch (protocol) {
+	case IR_PROTOCOL_RC5:
+		if (wake_sc > 0xFFF) {
+			do_wake = 0;
+			dev_err(dev, "RC5 - Invalid wake scancode\n");
+			break;
+		}
+
+		/* Mask = 13 bits, ex toggle */
+		mask[0] = 0xFF;
+		mask[1] = 0x17;
+
+		match[0]  = (wake_sc & 0x003F);      /* 6 command bits */
+		match[0] |= (wake_sc & 0x0180) >> 1; /* 2 address bits */
+		match[1]  = (wake_sc & 0x0E00) >> 9; /* 3 address bits */
+		if (!(wake_sc & 0x0040))             /* 2nd start bit  */
+			match[1] |= 0x10;
+
+		break;
+
+	case IR_PROTOCOL_NEC:
+		if (wake_sc > 0xFFFFFF) {
+			do_wake = 0;
+			dev_err(dev, "NEC - Invalid wake scancode\n");
+			break;
+		}
+
+		mask[0] = mask[1] = mask[2] = mask[3] = 0xFF;
+
+		match[1] = bitrev8((wake_sc & 0xFF));
+		match[0] = ~match[1];
+
+		match[3] = bitrev8((wake_sc & 0xFF00) >> 8);
+		if (wake_sc > 0xFFFF)
+			match[2] = bitrev8((wake_sc & 0xFF0000) >> 16);
+		else
+			match[2] = ~match[3];
+
+		break;
+
+	case IR_PROTOCOL_RC6:
+
+		if (wake_rc6mode == 0) {
+			if (wake_sc > 0xFFFF) {
+				do_wake = 0;
+				dev_err(dev, "RC6 - Invalid wake scancode\n");
+				break;
+			}
+
+			/* Command */
+			match[0] = wbcir_to_rc6cells(wake_sc >>  0);
+			mask[0]  = 0xFF;
+			match[1] = wbcir_to_rc6cells(wake_sc >>  4);
+			mask[1]  = 0xFF;
+
+			/* Address */
+			match[2] = wbcir_to_rc6cells(wake_sc >>  8);
+			mask[2]  = 0xFF;
+			match[3] = wbcir_to_rc6cells(wake_sc >> 12);
+			mask[3]  = 0xFF;
+
+			/* Header */
+			match[4] = 0x50; /* mode1 = mode0 = 0, ignore toggle */
+			mask[4]  = 0xF0;
+			match[5] = 0x09; /* start bit = 1, mode2 = 0 */
+			mask[5]  = 0x0F;
+
+			rc6_csl = 44;
+
+		} else if (wake_rc6mode == 6) {
+			i = 0;
+
+			/* Command */
+			match[i]  = wbcir_to_rc6cells(wake_sc >>  0);
+			mask[i++] = 0xFF;
+			match[i]  = wbcir_to_rc6cells(wake_sc >>  4);
+			mask[i++] = 0xFF;
+
+			/* Address + Toggle */
+			match[i]  = wbcir_to_rc6cells(wake_sc >>  8);
+			mask[i++] = 0xFF;
+			match[i]  = wbcir_to_rc6cells(wake_sc >> 12);
+			mask[i++] = 0x3F;
+
+			/* Customer bits 7 - 0 */
+			match[i]  = wbcir_to_rc6cells(wake_sc >> 16);
+			mask[i++] = 0xFF;
+			match[i]  = wbcir_to_rc6cells(wake_sc >> 20);
+			mask[i++] = 0xFF;
+
+			if (wake_sc & 0x80000000) {
+				/* Customer range bit and bits 15 - 8 */
+				match[i]  = wbcir_to_rc6cells(wake_sc >> 24);
+				mask[i++] = 0xFF;
+				match[i]  = wbcir_to_rc6cells(wake_sc >> 28);
+				mask[i++] = 0xFF;
+				rc6_csl = 76;
+			} else if (wake_sc <= 0x007FFFFF) {
+				rc6_csl = 60;
+			} else {
+				do_wake = 0;
+				dev_err(dev, "RC6 - Invalid wake scancode\n");
+				break;
+			}
+
+			/* Header */
+			match[i]  = 0x93; /* mode1 = mode0 = 1, submode = 0 */
+			mask[i++] = 0xFF;
+			match[i]  = 0x0A; /* start bit = 1, mode2 = 1 */
+			mask[i++] = 0x0F;
+
+		} else {
+			do_wake = 0;
+			dev_err(dev, "RC6 - Invalid wake mode\n");
+		}
+
+		break;
+
+	default:
+		do_wake = 0;
+		break;
+	}
+
+finish:
+	if (do_wake) {
+		/* Set compare and compare mask */
+		wbcir_set_bits(data->wbase + WBCIR_REG_WCEIR_INDEX,
+			       WBCIR_REGSEL_COMPARE | WBCIR_REG_ADDR0,
+			       0x3F);
+		outsb(data->wbase + WBCIR_REG_WCEIR_DATA, match, 11);
+		wbcir_set_bits(data->wbase + WBCIR_REG_WCEIR_INDEX,
+			       WBCIR_REGSEL_MASK | WBCIR_REG_ADDR0,
+			       0x3F);
+		outsb(data->wbase + WBCIR_REG_WCEIR_DATA, mask, 11);
+
+		/* RC6 Compare String Len */
+		outb(rc6_csl, data->wbase + WBCIR_REG_WCEIR_CSL);
+
+		/* Clear status bits NEC_REP, BUFF, MSG_END, MATCH */
+		wbcir_set_bits(data->wbase + WBCIR_REG_WCEIR_STS, 0x17, 0x17);
+
+		/* Clear BUFF_EN, Clear END_EN, Set MATCH_EN */
+		wbcir_set_bits(data->wbase + WBCIR_REG_WCEIR_EV_EN, 0x01, 0x07);
+
+		/* Set CEIR_EN */
+		wbcir_set_bits(data->wbase + WBCIR_REG_WCEIR_CTL, 0x01, 0x01);
+
+	} else {
+		/* Clear BUFF_EN, Clear END_EN, Clear MATCH_EN */
+		wbcir_set_bits(data->wbase + WBCIR_REG_WCEIR_EV_EN, 0x00, 0x07);
+
+		/* Clear CEIR_EN */
+		wbcir_set_bits(data->wbase + WBCIR_REG_WCEIR_CTL, 0x00, 0x01);
+	}
+
+	/* Disable interrupts */
+	outb(WBCIR_IRQ_NONE, data->sbase + WBCIR_REG_SP3_IER);
+}
+
+static int
+wbcir_suspend(struct pnp_dev *device, pm_message_t state)
+{
+	wbcir_shutdown(device);
+	return 0;
+}
+
+static int
+wbcir_resume(struct pnp_dev *device)
+{
+	struct wbcir_data *data = pnp_get_drvdata(device);
+
+	/* Clear BUFF_EN, Clear END_EN, Clear MATCH_EN */
+	wbcir_set_bits(data->wbase + WBCIR_REG_WCEIR_EV_EN, 0x00, 0x07);
+
+	/* Clear CEIR_EN */
+	wbcir_set_bits(data->wbase + WBCIR_REG_WCEIR_CTL, 0x00, 0x01);
+
+	/* Enable interrupts */
+	wbcir_reset_irdata(data);
+	outb(WBCIR_IRQ_RX | WBCIR_IRQ_ERR, data->sbase + WBCIR_REG_SP3_IER);
+
+	return 0;
+}
+
+
+
+/*****************************************************************************
+ *
+ * SETUP/INIT FUNCTIONS
+ *
+ *****************************************************************************/
+
+static void
+wbcir_cfg_ceir(struct wbcir_data *data)
+{
+	u8 tmp;
+
+	/* Set PROT_SEL, RX_INV, Clear CEIR_EN (needed for the led) */
+	tmp = protocol << 4;
+	if (invert)
+		tmp |= 0x08;
+	outb(tmp, data->wbase + WBCIR_REG_WCEIR_CTL);
+
+	/* Clear status bits NEC_REP, BUFF, MSG_END, MATCH */
+	wbcir_set_bits(data->wbase + WBCIR_REG_WCEIR_STS, 0x17, 0x17);
+
+	/* Clear BUFF_EN, Clear END_EN, Clear MATCH_EN */
+	wbcir_set_bits(data->wbase + WBCIR_REG_WCEIR_EV_EN, 0x00, 0x07);
+
+	/* Set RC5 cell time to correspond to 36 kHz */
+	wbcir_set_bits(data->wbase + WBCIR_REG_WCEIR_CFG1, 0x4A, 0x7F);
+
+	/* Set IRTX_INV */
+	if (invert)
+		outb(0x04, data->ebase + WBCIR_REG_ECEIR_CCTL);
+	else
+		outb(0x00, data->ebase + WBCIR_REG_ECEIR_CCTL);
+
+	/*
+	 * Clear IR LED, set SP3 clock to 24Mhz
+	 * set SP3_IRRX_SW to binary 01, helpfully not documented
+	 */
+	outb(0x10, data->ebase + WBCIR_REG_ECEIR_CTS);
+}
+
+static int __devinit
+wbcir_probe(struct pnp_dev *device, const struct pnp_device_id *dev_id)
+{
+	struct device *dev = &device->dev;
+	struct wbcir_data *data;
+	int err;
+
+	if (!(pnp_port_len(device, 0) == EHFUNC_IOMEM_LEN &&
+	      pnp_port_len(device, 1) == WAKEUP_IOMEM_LEN &&
+	      pnp_port_len(device, 2) == SP_IOMEM_LEN)) {
+		dev_err(dev, "Invalid resources\n");
+		return -ENODEV;
+	}
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data) {
+		err = -ENOMEM;
+		goto exit;
+	}
+
+	pnp_set_drvdata(device, data);
+
+	data->ebase = pnp_port_start(device, 0);
+	data->wbase = pnp_port_start(device, 1);
+	data->sbase = pnp_port_start(device, 2);
+	data->irq = pnp_irq(device, 0);
+
+	if (data->wbase == 0 || data->ebase == 0 ||
+	    data->sbase == 0 || data->irq == 0) {
+		err = -ENODEV;
+		dev_err(dev, "Invalid resources\n");
+		goto exit_free_data;
+	}
+
+	dev_dbg(&device->dev, "Found device "
+		"(w: 0x%lX, e: 0x%lX, s: 0x%lX, i: %u)\n",
+		data->wbase, data->ebase, data->sbase, data->irq);
+
+	if (!request_region(data->wbase, WAKEUP_IOMEM_LEN, DRVNAME)) {
+		dev_err(dev, "Region 0x%lx-0x%lx already in use!\n",
+			data->wbase, data->wbase + WAKEUP_IOMEM_LEN - 1);
+		err = -EBUSY;
+		goto exit_free_data;
+	}
+
+	if (!request_region(data->ebase, EHFUNC_IOMEM_LEN, DRVNAME)) {
+		dev_err(dev, "Region 0x%lx-0x%lx already in use!\n",
+			data->ebase, data->ebase + EHFUNC_IOMEM_LEN - 1);
+		err = -EBUSY;
+		goto exit_release_wbase;
+	}
+
+	if (!request_region(data->sbase, SP_IOMEM_LEN, DRVNAME)) {
+		dev_err(dev, "Region 0x%lx-0x%lx already in use!\n",
+			data->sbase, data->sbase + SP_IOMEM_LEN - 1);
+		err = -EBUSY;
+		goto exit_release_ebase;
+	}
+
+	err = request_irq(data->irq, wbcir_irq_handler,
+			  IRQF_DISABLED, DRVNAME, device);
+	if (err) {
+		dev_err(dev, "Failed to claim IRQ %u\n", data->irq);
+		err = -EBUSY;
+		goto exit_release_sbase;
+	}
+
+	led_trigger_register_simple("cir-tx", &data->txtrigger);
+	if (!data->txtrigger) {
+		err = -ENOMEM;
+		goto exit_free_irq;
+	}
+
+	led_trigger_register_simple("cir-rx", &data->rxtrigger);
+	if (!data->rxtrigger) {
+		err = -ENOMEM;
+		goto exit_unregister_txtrigger;
+	}
+
+	data->led.name = "cir::activity";
+	data->led.default_trigger = "cir-rx";
+	data->led.brightness_set = wbcir_led_brightness_set;
+	data->led.brightness_get = wbcir_led_brightness_get;
+	err = led_classdev_register(&device->dev, &data->led);
+	if (err)
+		goto exit_unregister_rxtrigger;
+
+	data->input_dev = input_allocate_device();
+	if (!data->input_dev) {
+		err = -ENOMEM;
+		goto exit_unregister_led;
+	}
+
+	data->input_dev->evbit[0] = BIT(EV_KEY);
+	data->input_dev->name = WBCIR_NAME;
+	data->input_dev->phys = "wbcir/cir0";
+	data->input_dev->id.bustype = BUS_HOST;
+	data->input_dev->id.vendor  = PCI_VENDOR_ID_WINBOND;
+	data->input_dev->id.product = WBCIR_ID_FAMILY;
+	data->input_dev->id.version = WBCIR_ID_CHIP;
+	data->input_dev->getkeycode = wbcir_getkeycode;
+	data->input_dev->setkeycode = wbcir_setkeycode;
+	input_set_capability(data->input_dev, EV_MSC, MSC_SCAN);
+	input_set_drvdata(data->input_dev, data);
+
+	err = input_register_device(data->input_dev);
+	if (err)
+		goto exit_free_input;
+
+	data->last_scancode = INVALID_SCANCODE;
+	INIT_LIST_HEAD(&data->keytable);
+	setup_timer(&data->timer_keyup, wbcir_keyup, (unsigned long)data);
+
+	/* Load default keymaps */
+	if (protocol == IR_PROTOCOL_RC6) {
+		int i;
+		for (i = 0; i < ARRAY_SIZE(rc6_def_keymap); i++) {
+			err = wbcir_setkeycode(data->input_dev,
+					       (int)rc6_def_keymap[i].scancode,
+					       (int)rc6_def_keymap[i].keycode);
+			if (err)
+				goto exit_unregister_keys;
+		}
+	}
+
+	device_init_wakeup(&device->dev, 1);
+
+	wbcir_cfg_ceir(data);
+
+	/* Disable interrupts */
+	wbcir_select_bank(data, WBCIR_BANK_0);
+	outb(WBCIR_IRQ_NONE, data->sbase + WBCIR_REG_SP3_IER);
+
+	/* Enable extended mode */
+	wbcir_select_bank(data, WBCIR_BANK_2);
+	outb(WBCIR_EXT_ENABLE, data->sbase + WBCIR_REG_SP3_EXCR1);
+
+	/*
+	 * Configure baud generator, IR data will be sampled at
+	 * a bitrate of: (24Mhz * prescaler) / (divisor * 16).
+	 *
+	 * The ECIR registers include a flag to change the
+	 * 24Mhz clock freq to 48Mhz.
+	 *
+	 * It's not documented in the specs, but fifo levels
+	 * other than 16 seems to be unsupported.
+	 */
+
+	/* prescaler 1.0, tx/rx fifo lvl 16 */
+	outb(0x30, data->sbase + WBCIR_REG_SP3_EXCR2);
+
+	/* Set baud divisor to generate one byte per bit/cell */
+	switch (protocol) {
+	case IR_PROTOCOL_RC5:
+		outb(0xA7, data->sbase + WBCIR_REG_SP3_BGDL);
+		break;
+	case IR_PROTOCOL_RC6:
+		outb(0x53, data->sbase + WBCIR_REG_SP3_BGDL);
+		break;
+	case IR_PROTOCOL_NEC:
+		outb(0x69, data->sbase + WBCIR_REG_SP3_BGDL);
+		break;
+	}
+	outb(0x00, data->sbase + WBCIR_REG_SP3_BGDH);
+
+	/* Set CEIR mode */
+	wbcir_select_bank(data, WBCIR_BANK_0);
+	outb(0xC0, data->sbase + WBCIR_REG_SP3_MCR);
+	inb(data->sbase + WBCIR_REG_SP3_LSR); /* Clear LSR */
+	inb(data->sbase + WBCIR_REG_SP3_MSR); /* Clear MSR */
+
+	/* Disable RX demod, run-length encoding/decoding, set freq span */
+	wbcir_select_bank(data, WBCIR_BANK_7);
+	outb(0x10, data->sbase + WBCIR_REG_SP3_RCCFG);
+
+	/* Disable timer */
+	wbcir_select_bank(data, WBCIR_BANK_4);
+	outb(0x00, data->sbase + WBCIR_REG_SP3_IRCR1);
+
+	/* Enable MSR interrupt, Clear AUX_IRX */
+	wbcir_select_bank(data, WBCIR_BANK_5);
+	outb(0x00, data->sbase + WBCIR_REG_SP3_IRCR2);
+
+	/* Disable CRC */
+	wbcir_select_bank(data, WBCIR_BANK_6);
+	outb(0x20, data->sbase + WBCIR_REG_SP3_IRCR3);
+
+	/* Set RX/TX (de)modulation freq, not really used */
+	wbcir_select_bank(data, WBCIR_BANK_7);
+	outb(0xF2, data->sbase + WBCIR_REG_SP3_IRRXDC);
+	outb(0x69, data->sbase + WBCIR_REG_SP3_IRTXMC);
+
+	/* Set invert and pin direction */
+	if (invert)
+		outb(0x10, data->sbase + WBCIR_REG_SP3_IRCFG4);
+	else
+		outb(0x00, data->sbase + WBCIR_REG_SP3_IRCFG4);
+
+	/* Set FIFO thresholds (RX = 8, TX = 3), reset RX/TX */
+	wbcir_select_bank(data, WBCIR_BANK_0);
+	outb(0x97, data->sbase + WBCIR_REG_SP3_FCR);
+
+	/* Clear AUX status bits */
+	outb(0xE0, data->sbase + WBCIR_REG_SP3_ASCR);
+
+	/* Enable interrupts */
+	outb(WBCIR_IRQ_RX | WBCIR_IRQ_ERR, data->sbase + WBCIR_REG_SP3_IER);
+
+	return 0;
+
+exit_unregister_keys:
+	if (!list_empty(&data->keytable)) {
+		struct wbcir_keyentry *key;
+		struct wbcir_keyentry *keytmp;
+
+		list_for_each_entry_safe(key, keytmp, &data->keytable, list) {
+			list_del(&key->list);
+			kfree(key);
+		}
+	}
+	input_unregister_device(data->input_dev);
+	/* Can't call input_free_device on an unregistered device */
+	data->input_dev = NULL;
+exit_free_input:
+	input_free_device(data->input_dev);
+exit_unregister_led:
+	led_classdev_unregister(&data->led);
+exit_unregister_rxtrigger:
+	led_trigger_unregister_simple(data->rxtrigger);
+exit_unregister_txtrigger:
+	led_trigger_unregister_simple(data->txtrigger);
+exit_free_irq:
+	free_irq(data->irq, device);
+exit_release_sbase:
+	release_region(data->sbase, SP_IOMEM_LEN);
+exit_release_ebase:
+	release_region(data->ebase, EHFUNC_IOMEM_LEN);
+exit_release_wbase:
+	release_region(data->wbase, WAKEUP_IOMEM_LEN);
+exit_free_data:
+	kfree(data);
+	pnp_set_drvdata(device, NULL);
+exit:
+	return err;
+}
+
+static void __devexit
+wbcir_remove(struct pnp_dev *device)
+{
+	struct wbcir_data *data = pnp_get_drvdata(device);
+	struct wbcir_keyentry *key;
+	struct wbcir_keyentry *keytmp;
+
+	/* Disable interrupts */
+	wbcir_select_bank(data, WBCIR_BANK_0);
+	outb(WBCIR_IRQ_NONE, data->sbase + WBCIR_REG_SP3_IER);
+
+	del_timer_sync(&data->timer_keyup);
+
+	free_irq(data->irq, device);
+
+	/* Clear status bits NEC_REP, BUFF, MSG_END, MATCH */
+	wbcir_set_bits(data->wbase + WBCIR_REG_WCEIR_STS, 0x17, 0x17);
+
+	/* Clear CEIR_EN */
+	wbcir_set_bits(data->wbase + WBCIR_REG_WCEIR_CTL, 0x00, 0x01);
+
+	/* Clear BUFF_EN, END_EN, MATCH_EN */
+	wbcir_set_bits(data->wbase + WBCIR_REG_WCEIR_EV_EN, 0x00, 0x07);
+
+	/* This will generate a keyup event if necessary */
+	input_unregister_device(data->input_dev);
+
+	led_trigger_unregister_simple(data->rxtrigger);
+	led_trigger_unregister_simple(data->txtrigger);
+	led_classdev_unregister(&data->led);
+
+	/* This is ok since &data->led isn't actually used */
+	wbcir_led_brightness_set(&data->led, LED_OFF);
+
+	release_region(data->wbase, WAKEUP_IOMEM_LEN);
+	release_region(data->ebase, EHFUNC_IOMEM_LEN);
+	release_region(data->sbase, SP_IOMEM_LEN);
+
+	list_for_each_entry_safe(key, keytmp, &data->keytable, list) {
+		list_del(&key->list);
+		kfree(key);
+	}
+
+	kfree(data);
+
+	pnp_set_drvdata(device, NULL);
+}
+
+static const struct pnp_device_id wbcir_ids[] = {
+	{ "WEC1022", 0 },
+	{ "", 0 }
+};
+MODULE_DEVICE_TABLE(pnp, wbcir_ids);
+
+static struct pnp_driver wbcir_driver = {
+	.name     = WBCIR_NAME,
+	.id_table = wbcir_ids,
+	.probe    = wbcir_probe,
+	.remove   = __devexit_p(wbcir_remove),
+	.suspend  = wbcir_suspend,
+	.resume   = wbcir_resume,
+	.shutdown = wbcir_shutdown
+};
+
+static int __init
+wbcir_init(void)
+{
+	int ret;
+
+	switch (protocol) {
+	case IR_PROTOCOL_RC5:
+	case IR_PROTOCOL_NEC:
+	case IR_PROTOCOL_RC6:
+		break;
+	default:
+		printk(KERN_ERR DRVNAME ": Invalid protocol argument\n");
+		return -EINVAL;
+	}
+
+	ret = pnp_register_driver(&wbcir_driver);
+	if (ret)
+		printk(KERN_ERR DRVNAME ": Unable to register driver\n");
+
+	return ret;
+}
+
+static void __exit
+wbcir_exit(void)
+{
+	pnp_unregister_driver(&wbcir_driver);
+}
+
+MODULE_AUTHOR("David H�rdeman <david@hardeman.nu>");
+MODULE_DESCRIPTION("Winbond SuperI/O Consumer IR Driver");
+MODULE_LICENSE("GPL");
+
+module_init(wbcir_init);
+module_exit(wbcir_exit);
+
+
-- 
cgit v0.10.2


From 2f17644d1cd0121daa0a997ff4eca5b3b44d1fae Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Tue, 22 Sep 2009 22:58:33 +0200
Subject: [S390] cio: merge init calls

Define initialization sequence of css and ccw bus init calls by merging
them into a single init call. Also introduce channel_subsystem_init_sync
to wait for the initialization of devices to finish.

Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c
index 393c73c..95805da 100644
--- a/drivers/s390/cio/css.c
+++ b/drivers/s390/cio/css.c
@@ -601,8 +601,7 @@ static void css_process_crw(struct crw *crw0, struct crw *crw1, int overflow)
 	css_evaluate_subchannel(mchk_schid, 0);
 }
 
-static int __init
-__init_channel_subsystem(struct subchannel_id schid, void *data)
+static int __init setup_subchannel(struct subchannel_id schid, void *data)
 {
 	struct subchannel *sch;
 	int ret;
@@ -854,14 +853,13 @@ static struct notifier_block css_power_notifier = {
  * The struct subchannel's are created during probing (except for the
  * static console subchannel).
  */
-static int __init
-init_channel_subsystem (void)
+static int __init css_bus_init(void)
 {
 	int ret, i;
 
 	ret = chsc_determine_css_characteristics();
 	if (ret == -ENOMEM)
-		goto out; /* No need to continue. */
+		goto out;
 
 	ret = chsc_alloc_sei_area();
 	if (ret)
@@ -934,7 +932,6 @@ init_channel_subsystem (void)
 	/* Enable default isc for I/O subchannels. */
 	isc_register(IO_SCH_ISC);
 
-	for_each_subchannel(__init_channel_subsystem, NULL);
 	return 0;
 out_file:
 	if (css_chsc_characteristics.secm)
@@ -966,6 +963,60 @@ out:
 	return ret;
 }
 
+static void __init css_bus_cleanup(void)
+{
+	struct channel_subsystem *css;
+	int i;
+
+	for (i = 0; i <= __MAX_CSSID; i++) {
+		css = channel_subsystems[i];
+		device_unregister(&css->pseudo_subchannel->dev);
+		css->pseudo_subchannel = NULL;
+		if (css_chsc_characteristics.secm)
+			device_remove_file(&css->device, &dev_attr_cm_enable);
+		device_unregister(&css->device);
+	}
+	bus_unregister(&css_bus_type);
+	crw_unregister_handler(CRW_RSC_CSS);
+	chsc_free_sei_area();
+	kfree(slow_subchannel_set);
+	isc_unregister(IO_SCH_ISC);
+}
+
+static int __init channel_subsystem_init(void)
+{
+	int ret;
+
+	ret = css_bus_init();
+	if (ret)
+		return ret;
+
+	ret = io_subchannel_init();
+	if (ret)
+		css_bus_cleanup();
+
+	return ret;
+}
+subsys_initcall(channel_subsystem_init);
+
+/*
+ * Wait for the initialization of devices to finish, to make sure we are
+ * done with our setup if the search for the root device starts.
+ */
+static int __init channel_subsystem_init_sync(void)
+{
+	/* Allocate and register subchannels. */
+	for_each_subchannel(setup_subchannel, NULL);
+
+	/* Wait for the initialization of ccw devices to finish. */
+	wait_event(ccw_device_init_wq,
+		   atomic_read(&ccw_device_init_count) == 0);
+	flush_workqueue(ccw_device_work);
+
+	return 0;
+}
+subsys_initcall_sync(channel_subsystem_init_sync);
+
 int sch_is_pseudo_sch(struct subchannel *sch)
 {
 	return sch == to_css(sch->dev.parent)->pseudo_subchannel;
@@ -1135,7 +1186,5 @@ void css_driver_unregister(struct css_driver *cdrv)
 }
 EXPORT_SYMBOL_GPL(css_driver_unregister);
 
-subsys_initcall(init_channel_subsystem);
-
 MODULE_LICENSE("GPL");
 EXPORT_SYMBOL(css_bus_type);
diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c
index 6527f3f..4093adc 100644
--- a/drivers/s390/cio/device.c
+++ b/drivers/s390/cio/device.c
@@ -170,8 +170,7 @@ atomic_t ccw_device_init_count;
 
 static void recovery_func(unsigned long data);
 
-static int __init
-init_ccw_bus_type (void)
+int __init io_subchannel_init(void)
 {
 	int ret;
 
@@ -181,10 +180,10 @@ init_ccw_bus_type (void)
 
 	ccw_device_work = create_singlethread_workqueue("cio");
 	if (!ccw_device_work)
-		return -ENOMEM; /* FIXME: better errno ? */
+		return -ENOMEM;
 	slow_path_wq = create_singlethread_workqueue("kslowcrw");
 	if (!slow_path_wq) {
-		ret = -ENOMEM; /* FIXME: better errno ? */
+		ret = -ENOMEM;
 		goto out_err;
 	}
 	if ((ret = bus_register (&ccw_bus_type)))
@@ -194,9 +193,6 @@ init_ccw_bus_type (void)
 	if (ret)
 		goto out_err;
 
-	wait_event(ccw_device_init_wq,
-		   atomic_read(&ccw_device_init_count) == 0);
-	flush_workqueue(ccw_device_work);
 	return 0;
 out_err:
 	if (ccw_device_work)
@@ -206,16 +202,6 @@ out_err:
 	return ret;
 }
 
-static void __exit
-cleanup_ccw_bus_type (void)
-{
-	css_driver_unregister(&io_subchannel_driver);
-	bus_unregister(&ccw_bus_type);
-	destroy_workqueue(ccw_device_work);
-}
-
-subsys_initcall(init_ccw_bus_type);
-module_exit(cleanup_ccw_bus_type);
 
 /************************ device handling **************************/
 
diff --git a/drivers/s390/cio/device.h b/drivers/s390/cio/device.h
index e397510..ed39a2c 100644
--- a/drivers/s390/cio/device.h
+++ b/drivers/s390/cio/device.h
@@ -74,6 +74,7 @@ dev_fsm_final_state(struct ccw_device *cdev)
 extern struct workqueue_struct *ccw_device_work;
 extern wait_queue_head_t ccw_device_init_wq;
 extern atomic_t ccw_device_init_count;
+int __init io_subchannel_init(void);
 
 void io_subchannel_recog_done(struct ccw_device *cdev);
 void io_subchannel_init_config(struct subchannel *sch);
-- 
cgit v0.10.2


From 255305536c1b56ad09590f1400fb2c788265e34e Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Tue, 22 Sep 2009 22:58:34 +0200
Subject: [S390] cio: introduce css_eval_scheduled

Use css_eval_scheduled to determine if all scheduled subchannel
evaluation is finished. Wait for this value to be 0 in the
channel subsystem init function.

Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c
index 95805da..59c4b94 100644
--- a/drivers/s390/cio/css.c
+++ b/drivers/s390/cio/css.c
@@ -409,10 +409,14 @@ static void css_evaluate_subchannel(struct subchannel_id schid, int slow)
 
 static struct idset *slow_subchannel_set;
 static spinlock_t slow_subchannel_lock;
+static wait_queue_head_t css_eval_wq;
+static atomic_t css_eval_scheduled;
 
 static int __init slow_subchannel_init(void)
 {
 	spin_lock_init(&slow_subchannel_lock);
+	atomic_set(&css_eval_scheduled, 0);
+	init_waitqueue_head(&css_eval_wq);
 	slow_subchannel_set = idset_sch_new();
 	if (!slow_subchannel_set) {
 		CIO_MSG_EVENT(0, "could not allocate slow subchannel set\n");
@@ -468,9 +472,17 @@ static int slow_eval_unknown_fn(struct subchannel_id schid, void *data)
 
 static void css_slow_path_func(struct work_struct *unused)
 {
+	unsigned long flags;
+
 	CIO_TRACE_EVENT(4, "slowpath");
 	for_each_subchannel_staged(slow_eval_known_fn, slow_eval_unknown_fn,
 				   NULL);
+	spin_lock_irqsave(&slow_subchannel_lock, flags);
+	if (idset_is_empty(slow_subchannel_set)) {
+		atomic_set(&css_eval_scheduled, 0);
+		wake_up(&css_eval_wq);
+	}
+	spin_unlock_irqrestore(&slow_subchannel_lock, flags);
 }
 
 static DECLARE_WORK(slow_path_work, css_slow_path_func);
@@ -482,6 +494,7 @@ void css_schedule_eval(struct subchannel_id schid)
 
 	spin_lock_irqsave(&slow_subchannel_lock, flags);
 	idset_sch_add(slow_subchannel_set, schid);
+	atomic_set(&css_eval_scheduled, 1);
 	queue_work(slow_path_wq, &slow_path_work);
 	spin_unlock_irqrestore(&slow_subchannel_lock, flags);
 }
@@ -492,6 +505,7 @@ void css_schedule_eval_all(void)
 
 	spin_lock_irqsave(&slow_subchannel_lock, flags);
 	idset_fill(slow_subchannel_set);
+	atomic_set(&css_eval_scheduled, 1);
 	queue_work(slow_path_wq, &slow_path_work);
 	spin_unlock_irqrestore(&slow_subchannel_lock, flags);
 }
@@ -1007,6 +1021,8 @@ static int __init channel_subsystem_init_sync(void)
 {
 	/* Allocate and register subchannels. */
 	for_each_subchannel(setup_subchannel, NULL);
+	/* Wait for the evaluation of subchannels to finish. */
+	wait_event(css_eval_wq, atomic_read(&css_eval_scheduled) == 0);
 
 	/* Wait for the initialization of ccw devices to finish. */
 	wait_event(ccw_device_init_wq,
diff --git a/drivers/s390/cio/idset.c b/drivers/s390/cio/idset.c
index cf8f24a..77e42cb 100644
--- a/drivers/s390/cio/idset.c
+++ b/drivers/s390/cio/idset.c
@@ -110,3 +110,13 @@ int idset_sch_get_first(struct idset *set, struct subchannel_id *schid)
 	}
 	return rc;
 }
+
+int idset_is_empty(struct idset *set)
+{
+	int bitnum;
+
+	bitnum = find_first_bit(set->bitmap, set->num_ssid * set->num_id);
+	if (bitnum >= set->num_ssid * set->num_id)
+		return 1;
+	return 0;
+}
diff --git a/drivers/s390/cio/idset.h b/drivers/s390/cio/idset.h
index 528065c..ca1398a 100644
--- a/drivers/s390/cio/idset.h
+++ b/drivers/s390/cio/idset.h
@@ -21,5 +21,6 @@ void idset_sch_add(struct idset *set, struct subchannel_id id);
 void idset_sch_del(struct idset *set, struct subchannel_id id);
 int idset_sch_contains(struct idset *set, struct subchannel_id id);
 int idset_sch_get_first(struct idset *set, struct subchannel_id *id);
+int idset_is_empty(struct idset *set);
 
 #endif /* S390_IDSET_H */
-- 
cgit v0.10.2


From 8ea7f5590142c0b9ab319aa3cae85cf430a207d8 Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Tue, 22 Sep 2009 22:58:35 +0200
Subject: [S390] cio: introduce css_settle

Introduce the css_driver callback settle which can be implemented
by a subchannel driver to wait for the subchannel type specific
asynchronous work to finish.
In channel_subsystem_init_sync we call that for each subchannel
driver.

Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c
index 59c4b94..f018835 100644
--- a/drivers/s390/cio/css.c
+++ b/drivers/s390/cio/css.c
@@ -1013,6 +1013,15 @@ static int __init channel_subsystem_init(void)
 }
 subsys_initcall(channel_subsystem_init);
 
+static int css_settle(struct device_driver *drv, void *unused)
+{
+	struct css_driver *cssdrv = to_cssdriver(drv);
+
+	if (cssdrv->settle)
+		cssdrv->settle();
+	return 0;
+}
+
 /*
  * Wait for the initialization of devices to finish, to make sure we are
  * done with our setup if the search for the root device starts.
@@ -1024,12 +1033,8 @@ static int __init channel_subsystem_init_sync(void)
 	/* Wait for the evaluation of subchannels to finish. */
 	wait_event(css_eval_wq, atomic_read(&css_eval_scheduled) == 0);
 
-	/* Wait for the initialization of ccw devices to finish. */
-	wait_event(ccw_device_init_wq,
-		   atomic_read(&ccw_device_init_count) == 0);
-	flush_workqueue(ccw_device_work);
-
-	return 0;
+	/* Wait for the subchannel type specific initialization to finish */
+	return bus_for_each_drv(&css_bus_type, NULL, NULL, css_settle);
 }
 subsys_initcall_sync(channel_subsystem_init_sync);
 
diff --git a/drivers/s390/cio/css.h b/drivers/s390/cio/css.h
index 9763eee..54acdaa 100644
--- a/drivers/s390/cio/css.h
+++ b/drivers/s390/cio/css.h
@@ -75,6 +75,7 @@ struct chp_link;
  * @freeze: callback for freezing during hibernation snapshotting
  * @thaw: undo work done in @freeze
  * @restore: callback for restoring after hibernation
+ * @settle: wait for asynchronous work to finish
  * @name: name of the device driver
  */
 struct css_driver {
@@ -92,6 +93,7 @@ struct css_driver {
 	int (*freeze)(struct subchannel *);
 	int (*thaw) (struct subchannel *);
 	int (*restore)(struct subchannel *);
+	void (*settle)(void);
 	const char *name;
 };
 
diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c
index 4093adc..f780bdd 100644
--- a/drivers/s390/cio/device.c
+++ b/drivers/s390/cio/device.c
@@ -131,6 +131,10 @@ static void io_subchannel_shutdown(struct subchannel *);
 static int io_subchannel_sch_event(struct subchannel *, int);
 static int io_subchannel_chp_event(struct subchannel *, struct chp_link *,
 				   int);
+static void recovery_func(unsigned long data);
+struct workqueue_struct *ccw_device_work;
+wait_queue_head_t ccw_device_init_wq;
+atomic_t ccw_device_init_count;
 
 static struct css_device_id io_subchannel_ids[] = {
 	{ .match_flags = 0x1, .type = SUBCHANNEL_TYPE_IO, },
@@ -151,6 +155,13 @@ static int io_subchannel_prepare(struct subchannel *sch)
 	return 0;
 }
 
+static void io_subchannel_settle(void)
+{
+	wait_event(ccw_device_init_wq,
+		   atomic_read(&ccw_device_init_count) == 0);
+	flush_workqueue(ccw_device_work);
+}
+
 static struct css_driver io_subchannel_driver = {
 	.owner = THIS_MODULE,
 	.subchannel_type = io_subchannel_ids,
@@ -162,14 +173,9 @@ static struct css_driver io_subchannel_driver = {
 	.remove = io_subchannel_remove,
 	.shutdown = io_subchannel_shutdown,
 	.prepare = io_subchannel_prepare,
+	.settle = io_subchannel_settle,
 };
 
-struct workqueue_struct *ccw_device_work;
-wait_queue_head_t ccw_device_init_wq;
-atomic_t ccw_device_init_count;
-
-static void recovery_func(unsigned long data);
-
 int __init io_subchannel_init(void)
 {
 	int ret;
-- 
cgit v0.10.2


From b827d1c8b65b27a293433e7c4723c7dfd6c4b848 Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Tue, 22 Sep 2009 22:58:36 +0200
Subject: [S390] cio: dont kfree vmalloced memory

Don't use kfree to free memory allocated by vmalloc.

Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c
index f018835..b43f769 100644
--- a/drivers/s390/cio/css.c
+++ b/drivers/s390/cio/css.c
@@ -971,7 +971,7 @@ out_bus:
 out:
 	crw_unregister_handler(CRW_RSC_CSS);
 	chsc_free_sei_area();
-	kfree(slow_subchannel_set);
+	idset_free(slow_subchannel_set);
 	pr_alert("The CSS device driver initialization failed with "
 		 "errno=%d\n", ret);
 	return ret;
@@ -993,7 +993,7 @@ static void __init css_bus_cleanup(void)
 	bus_unregister(&css_bus_type);
 	crw_unregister_handler(CRW_RSC_CSS);
 	chsc_free_sei_area();
-	kfree(slow_subchannel_set);
+	idset_free(slow_subchannel_set);
 	isc_unregister(IO_SCH_ISC);
 }
 
-- 
cgit v0.10.2


From b0a285d31bd475fdd4312e457288be558b705e55 Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Tue, 22 Sep 2009 22:58:37 +0200
Subject: [S390] cio: idset use actual number of ssids

The functions idset_sch_new and for_each_subchannel_staged
use different values for the number of subchannel sets. Make
it consistent by changing idset_sch_new to also use the actual
number of subchannel sets.

Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c
index b43f769..5e217bb 100644
--- a/drivers/s390/cio/css.c
+++ b/drivers/s390/cio/css.c
@@ -32,7 +32,7 @@
 
 int css_init_done = 0;
 static int need_reprobe = 0;
-static int max_ssid = 0;
+int max_ssid;
 
 struct channel_subsystem *channel_subsystems[__MAX_CSSID + 1];
 
@@ -879,6 +879,18 @@ static int __init css_bus_init(void)
 	if (ret)
 		goto out;
 
+	/* Try to enable MSS. */
+	ret = chsc_enable_facility(CHSC_SDA_OC_MSS);
+	switch (ret) {
+	case 0: /* Success. */
+		max_ssid = __MAX_SSID;
+		break;
+	case -ENOMEM:
+		goto out;
+	default:
+		max_ssid = 0;
+	}
+
 	ret = slow_subchannel_init();
 	if (ret)
 		goto out;
@@ -890,17 +902,6 @@ static int __init css_bus_init(void)
 	if ((ret = bus_register(&css_bus_type)))
 		goto out;
 
-	/* Try to enable MSS. */
-	ret = chsc_enable_facility(CHSC_SDA_OC_MSS);
-	switch (ret) {
-	case 0: /* Success. */
-		max_ssid = __MAX_SSID;
-		break;
-	case -ENOMEM:
-		goto out_bus;
-	default:
-		max_ssid = 0;
-	}
 	/* Setup css structure. */
 	for (i = 0; i <= __MAX_CSSID; i++) {
 		struct channel_subsystem *css;
@@ -966,7 +967,6 @@ out_unregister:
 					   &dev_attr_cm_enable);
 		device_unregister(&css->device);
 	}
-out_bus:
 	bus_unregister(&css_bus_type);
 out:
 	crw_unregister_handler(CRW_RSC_CSS);
diff --git a/drivers/s390/cio/css.h b/drivers/s390/cio/css.h
index 54acdaa..68d6b0b 100644
--- a/drivers/s390/cio/css.h
+++ b/drivers/s390/cio/css.h
@@ -111,6 +111,7 @@ extern void css_sch_device_unregister(struct subchannel *);
 extern int css_probe_device(struct subchannel_id);
 extern struct subchannel *get_subchannel_by_schid(struct subchannel_id);
 extern int css_init_done;
+extern int max_ssid;
 int for_each_subchannel_staged(int (*fn_known)(struct subchannel *, void *),
 			       int (*fn_unknown)(struct subchannel_id,
 			       void *), void *data);
diff --git a/drivers/s390/cio/idset.c b/drivers/s390/cio/idset.c
index 77e42cb..5c88faf 100644
--- a/drivers/s390/cio/idset.c
+++ b/drivers/s390/cio/idset.c
@@ -78,7 +78,7 @@ static inline int idset_get_first(struct idset *set, int *ssid, int *id)
 
 struct idset *idset_sch_new(void)
 {
-	return idset_new(__MAX_SSID + 1, __MAX_SUBCHANNEL + 1);
+	return idset_new(max_ssid + 1, __MAX_SUBCHANNEL + 1);
 }
 
 void idset_sch_add(struct idset *set, struct subchannel_id schid)
-- 
cgit v0.10.2


From 703e5c9993639284bc0a8929b6de362424df7019 Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Tue, 22 Sep 2009 22:58:38 +0200
Subject: [S390] cio: introduce consistent subchannel scanning

Previously, there were multiple subchannel scanning mechanisms
which could potentially conflict with each other. Fix this problem
by moving blacklist and ccw driver triggered scanning to the
existing evaluation method.

Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c
index 5e217bb..91c2570 100644
--- a/drivers/s390/cio/css.c
+++ b/drivers/s390/cio/css.c
@@ -31,7 +31,6 @@
 #include "chp.h"
 
 int css_init_done = 0;
-static int need_reprobe = 0;
 int max_ssid;
 
 struct channel_subsystem *channel_subsystems[__MAX_CSSID + 1];
@@ -315,12 +314,18 @@ int css_probe_device(struct subchannel_id schid)
 	int ret;
 	struct subchannel *sch;
 
-	sch = css_alloc_subchannel(schid);
-	if (IS_ERR(sch))
-		return PTR_ERR(sch);
+	if (cio_is_console(schid))
+		sch = cio_get_console_subchannel();
+	else {
+		sch = css_alloc_subchannel(schid);
+		if (IS_ERR(sch))
+			return PTR_ERR(sch);
+	}
 	ret = css_register_subchannel(sch);
-	if (ret)
-		put_device(&sch->dev);
+	if (ret) {
+		if (!cio_is_console(schid))
+			put_device(&sch->dev);
+	}
 	return ret;
 }
 
@@ -510,76 +515,48 @@ void css_schedule_eval_all(void)
 	spin_unlock_irqrestore(&slow_subchannel_lock, flags);
 }
 
-void css_wait_for_slow_path(void)
+static int __unset_registered(struct device *dev, void *data)
 {
-	flush_workqueue(slow_path_wq);
-}
-
-/* Reprobe subchannel if unregistered. */
-static int reprobe_subchannel(struct subchannel_id schid, void *data)
-{
-	int ret;
-
-	CIO_MSG_EVENT(6, "cio: reprobe 0.%x.%04x\n",
-		      schid.ssid, schid.sch_no);
-	if (need_reprobe)
-		return -EAGAIN;
-
-	ret = css_probe_device(schid);
-	switch (ret) {
-	case 0:
-		break;
-	case -ENXIO:
-	case -ENOMEM:
-	case -EIO:
-		/* These should abort looping */
-		break;
-	default:
-		ret = 0;
-	}
-
-	return ret;
-}
+	struct idset *set = data;
+	struct subchannel *sch = to_subchannel(dev);
 
-static void reprobe_after_idle(struct work_struct *unused)
-{
-	/* Make sure initial subchannel scan is done. */
-	wait_event(ccw_device_init_wq,
-		   atomic_read(&ccw_device_init_count) == 0);
-	if (need_reprobe)
-		css_schedule_reprobe();
+	idset_sch_del(set, sch->schid);
+	return 0;
 }
 
-static DECLARE_WORK(reprobe_idle_work, reprobe_after_idle);
-
-/* Work function used to reprobe all unregistered subchannels. */
-static void reprobe_all(struct work_struct *unused)
+void css_schedule_eval_all_unreg(void)
 {
-	int ret;
-
-	CIO_MSG_EVENT(4, "reprobe start\n");
+	unsigned long flags;
+	struct idset *unreg_set;
 
-	/* Make sure initial subchannel scan is done. */
-	if (atomic_read(&ccw_device_init_count) != 0) {
-		queue_work(ccw_device_work, &reprobe_idle_work);
+	/* Find unregistered subchannels. */
+	unreg_set = idset_sch_new();
+	if (!unreg_set) {
+		/* Fallback. */
+		css_schedule_eval_all();
 		return;
 	}
-	need_reprobe = 0;
-	ret = for_each_subchannel_staged(NULL, reprobe_subchannel, NULL);
-
-	CIO_MSG_EVENT(4, "reprobe done (rc=%d, need_reprobe=%d)\n", ret,
-		      need_reprobe);
+	idset_fill(unreg_set);
+	bus_for_each_dev(&css_bus_type, NULL, unreg_set, __unset_registered);
+	/* Apply to slow_subchannel_set. */
+	spin_lock_irqsave(&slow_subchannel_lock, flags);
+	idset_add_set(slow_subchannel_set, unreg_set);
+	atomic_set(&css_eval_scheduled, 1);
+	queue_work(slow_path_wq, &slow_path_work);
+	spin_unlock_irqrestore(&slow_subchannel_lock, flags);
+	idset_free(unreg_set);
 }
 
-static DECLARE_WORK(css_reprobe_work, reprobe_all);
+void css_wait_for_slow_path(void)
+{
+	flush_workqueue(slow_path_wq);
+}
 
 /* Schedule reprobing of all unregistered subchannels. */
 void css_schedule_reprobe(void)
 {
-	need_reprobe = 1;
-	queue_work(slow_path_wq, &css_reprobe_work);
+	css_schedule_eval_all_unreg();
 }
-
 EXPORT_SYMBOL_GPL(css_schedule_reprobe);
 
 /*
@@ -615,48 +592,6 @@ static void css_process_crw(struct crw *crw0, struct crw *crw1, int overflow)
 	css_evaluate_subchannel(mchk_schid, 0);
 }
 
-static int __init setup_subchannel(struct subchannel_id schid, void *data)
-{
-	struct subchannel *sch;
-	int ret;
-
-	if (cio_is_console(schid))
-		sch = cio_get_console_subchannel();
-	else {
-		sch = css_alloc_subchannel(schid);
-		if (IS_ERR(sch))
-			ret = PTR_ERR(sch);
-		else
-			ret = 0;
-		switch (ret) {
-		case 0:
-			break;
-		case -ENOMEM:
-			panic("Out of memory in init_channel_subsystem\n");
-		/* -ENXIO: no more subchannels. */
-		case -ENXIO:
-			return ret;
-		/* -EIO: this subchannel set not supported. */
-		case -EIO:
-			return ret;
-		default:
-			return 0;
-		}
-	}
-	/*
-	 * We register ALL valid subchannels in ioinfo, even those
-	 * that have been present before init_channel_subsystem.
-	 * These subchannels can't have been registered yet (kmalloc
-	 * not working) so we do it now. This is true e.g. for the
-	 * console subchannel.
-	 */
-	if (css_register_subchannel(sch)) {
-		if (!cio_is_console(schid))
-			put_device(&sch->dev);
-	}
-	return 0;
-}
-
 static void __init
 css_generate_pgid(struct channel_subsystem *css, u32 tod_high)
 {
@@ -1028,11 +963,10 @@ static int css_settle(struct device_driver *drv, void *unused)
  */
 static int __init channel_subsystem_init_sync(void)
 {
-	/* Allocate and register subchannels. */
-	for_each_subchannel(setup_subchannel, NULL);
+	/* Start initial subchannel evaluation. */
+	css_schedule_eval_all();
 	/* Wait for the evaluation of subchannels to finish. */
 	wait_event(css_eval_wq, atomic_read(&css_eval_scheduled) == 0);
-
 	/* Wait for the subchannel type specific initialization to finish */
 	return bus_for_each_drv(&css_bus_type, NULL, NULL, css_settle);
 }
diff --git a/drivers/s390/cio/idset.c b/drivers/s390/cio/idset.c
index 5c88faf..4d10981 100644
--- a/drivers/s390/cio/idset.c
+++ b/drivers/s390/cio/idset.c
@@ -120,3 +120,13 @@ int idset_is_empty(struct idset *set)
 		return 1;
 	return 0;
 }
+
+void idset_add_set(struct idset *to, struct idset *from)
+{
+	unsigned long i, len;
+
+	len = min(__BITOPS_WORDS(to->num_ssid * to->num_id),
+		  __BITOPS_WORDS(from->num_ssid * from->num_id));
+	for (i = 0; i < len ; i++)
+		to->bitmap[i] |= from->bitmap[i];
+}
diff --git a/drivers/s390/cio/idset.h b/drivers/s390/cio/idset.h
index ca1398a..7543da4 100644
--- a/drivers/s390/cio/idset.h
+++ b/drivers/s390/cio/idset.h
@@ -22,5 +22,6 @@ void idset_sch_del(struct idset *set, struct subchannel_id id);
 int idset_sch_contains(struct idset *set, struct subchannel_id id);
 int idset_sch_get_first(struct idset *set, struct subchannel_id *id);
 int idset_is_empty(struct idset *set);
+void idset_add_set(struct idset *to, struct idset *from);
 
 #endif /* S390_IDSET_H */
-- 
cgit v0.10.2


From 1d7e1500a6acfc89415aa2524e2c475c980ac42a Mon Sep 17 00:00:00 2001
From: Jan Glauber <jang@linux.vnet.ibm.com>
Date: Tue, 22 Sep 2009 22:58:39 +0200
Subject: [S390] qdio: reduce per device debug messages

Even if turned off the debug message overhead is measurable in the hot path.
Reduce the number of debug message calls in do_QDIO and qdio_kick_handler.
Also use hex numbers to save space in the debug entries.

Signed-off-by: Jan Glauber <jang@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c
index 9aef402..21766c7 100644
--- a/drivers/s390/cio/qdio_main.c
+++ b/drivers/s390/cio/qdio_main.c
@@ -401,7 +401,7 @@ static void announce_buffer_error(struct qdio_q *q, int count)
 	if ((!q->is_input_q &&
 	    (q->sbal[q->first_to_check]->element[15].flags & 0xff) == 0x10)) {
 		qdio_perf_stat_inc(&perf_stats.outbound_target_full);
-		DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "OUTFULL FTC:%3d",
+		DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "OUTFULL FTC:%02x",
 			      q->first_to_check);
 		return;
 	}
@@ -418,7 +418,7 @@ static inline void inbound_primed(struct qdio_q *q, int count)
 {
 	int new;
 
-	DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "in prim: %3d", count);
+	DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "in prim: %02x", count);
 
 	/* for QEBSM the ACK was already set by EQBS */
 	if (is_qebsm(q)) {
@@ -545,7 +545,7 @@ static inline int qdio_inbound_q_done(struct qdio_q *q)
 	 * has (probably) not moved (see qdio_inbound_processing).
 	 */
 	if (get_usecs() > q->u.in.timestamp + QDIO_INPUT_THRESHOLD) {
-		DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "in done:%3d",
+		DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "in done:%02x",
 			      q->first_to_check);
 		return 1;
 	} else
@@ -565,11 +565,10 @@ static void qdio_kick_handler(struct qdio_q *q)
 
 	if (q->is_input_q) {
 		qdio_perf_stat_inc(&perf_stats.inbound_handler);
-		DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "kih s:%3d c:%3d", start, count);
-	} else {
-		DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "koh: nr:%1d", q->nr);
-		DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "s:%3d c:%3d", start, count);
-	}
+		DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "kih s:%02x c:%02x", start, count);
+	} else
+		DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "koh: s:%02x c:%02x",
+			      start, count);
 
 	q->handler(q->irq_ptr->cdev, q->qdio_error, q->nr, start, count,
 		   q->irq_ptr->int_parm);
@@ -633,7 +632,7 @@ static int get_outbound_buffer_frontier(struct qdio_q *q)
 	switch (state) {
 	case SLSB_P_OUTPUT_EMPTY:
 		/* the adapter got it */
-		DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "out empty:%1d %3d", q->nr, count);
+		DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "out empty:%1d %02x", q->nr, count);
 
 		atomic_sub(count, &q->nr_buf_used);
 		q->first_to_check = add_buf(q->first_to_check, count);
@@ -1481,10 +1480,9 @@ static int handle_outbound(struct qdio_q *q, unsigned int callflags,
 	get_buf_state(q, prev_buf(bufnr), &state, 0);
 	if (state != SLSB_CU_OUTPUT_PRIMED)
 		rc = qdio_kick_outbound_q(q);
-	else {
-		DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "fast-req");
+	else
 		qdio_perf_stat_inc(&perf_stats.fast_requeue);
-	}
+
 out:
 	tasklet_schedule(&q->tasklet);
 	return rc;
@@ -1510,12 +1508,8 @@ int do_QDIO(struct ccw_device *cdev, unsigned int callflags,
 	if (!irq_ptr)
 		return -ENODEV;
 
-	if (callflags & QDIO_FLAG_SYNC_INPUT)
-		DBF_DEV_EVENT(DBF_INFO, irq_ptr, "doQDIO input");
-	else
-		DBF_DEV_EVENT(DBF_INFO, irq_ptr, "doQDIO output");
-	DBF_DEV_EVENT(DBF_INFO, irq_ptr, "q:%1d flag:%4x", q_nr, callflags);
-	DBF_DEV_EVENT(DBF_INFO, irq_ptr, "buf:%2d cnt:%3d", bufnr, count);
+	DBF_DEV_EVENT(DBF_INFO, irq_ptr,
+		      "do%02x b:%02x c:%02x", callflags, bufnr, count);
 
 	if (irq_ptr->state != QDIO_IRQ_STATE_ACTIVE)
 		return -EBUSY;
-- 
cgit v0.10.2


From 6541f7b68f229aacd2e453bc9e94335fc56419fe Mon Sep 17 00:00:00 2001
From: Jan Glauber <jang@linux.vnet.ibm.com>
Date: Tue, 22 Sep 2009 22:58:40 +0200
Subject: [S390] qdio: change state of all primed input buffers

If input buffers stay in primed state qdio may not receive further interrupts
for the input queue depending on the firmware. That can cause a connection
hang on OSA cards.

Change the state of all primed input buffers that are not acknowledged to
not initialized.

Signed-off-by: Jan Glauber <jang@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c
index 21766c7..4be6e84 100644
--- a/drivers/s390/cio/qdio_main.c
+++ b/drivers/s390/cio/qdio_main.c
@@ -455,6 +455,8 @@ static inline void inbound_primed(struct qdio_q *q, int count)
 	count--;
 	if (!count)
 		return;
+	/* need to change ALL buffers to get more interrupts */
+	set_buf_states(q, q->first_to_check, SLSB_P_INPUT_NOT_INIT, count);
 }
 
 static int get_inbound_buffer_frontier(struct qdio_q *q)
-- 
cgit v0.10.2


From 2d70ca23f86647e076e3a8b64b3a90e583b894d5 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Tue, 22 Sep 2009 22:58:41 +0200
Subject: [S390] Convert sys_clone to function with parameters.

Use function parameters instead of accessing the pt_regs structure
to get the parameters.
Also merge the 31 and 64 bit versions since they are identical.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c
index 9ab188d..f741cd4 100644
--- a/arch/s390/kernel/compat_linux.c
+++ b/arch/s390/kernel/compat_linux.c
@@ -801,23 +801,6 @@ asmlinkage long sys32_write(unsigned int fd, char __user * buf, size_t count)
 	return sys_write(fd, buf, count);
 }
 
-asmlinkage long sys32_clone(void)
-{
-	struct pt_regs *regs = task_pt_regs(current);
-	unsigned long clone_flags;
-	unsigned long newsp;
-	int __user *parent_tidptr, *child_tidptr;
-
-	clone_flags = regs->gprs[3] & 0xffffffffUL;
-	newsp = regs->orig_gpr2 & 0x7fffffffUL;
-	parent_tidptr = compat_ptr(regs->gprs[4]);
-	child_tidptr = compat_ptr(regs->gprs[5]);
-	if (!newsp)
-		newsp = regs->gprs[15];
-	return do_fork(clone_flags, newsp, regs, 0,
-		       parent_tidptr, child_tidptr);
-}
-
 /*
  * 31 bit emulation wrapper functions for sys_fadvise64/fadvise64_64.
  * These need to rewrite the advise values for POSIX_FADV_{DONTNEED,NOREUSE}
diff --git a/arch/s390/kernel/compat_linux.h b/arch/s390/kernel/compat_linux.h
index 836a288..7cceb67 100644
--- a/arch/s390/kernel/compat_linux.h
+++ b/arch/s390/kernel/compat_linux.h
@@ -222,7 +222,6 @@ unsigned long old32_mmap(struct mmap_arg_struct_emu31 __user *arg);
 long sys32_mmap2(struct mmap_arg_struct_emu31 __user *arg);
 long sys32_read(unsigned int fd, char __user * buf, size_t count);
 long sys32_write(unsigned int fd, char __user * buf, size_t count);
-long sys32_clone(void);
 long sys32_fadvise64(int fd, loff_t offset, size_t len, int advise);
 long sys32_fadvise64_64(struct fadvise64_64_args __user *args);
 long sys32_sigaction(int sig, const struct old_sigaction32 __user *act,
diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S
index 6247900..28f8440 100644
--- a/arch/s390/kernel/compat_wrapper.S
+++ b/arch/s390/kernel/compat_wrapper.S
@@ -1840,3 +1840,11 @@ sys_perf_event_open_wrapper:
 	lgfr	%r5,%r5			# int
 	llgfr	%r6,%r6			# unsigned long
 	jg	sys_perf_event_open	# branch to system call
+
+	.globl	sys_clone_wrapper
+sys_clone_wrapper:
+	llgfr	%r2,%r2			# unsigned long
+	llgfr	%r3,%r3			# unsigned long
+	llgtr	%r4,%r4			# int *
+	llgtr	%r5,%r5			# int *
+	jg	sys_clone		# branch to system call
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index 950c59c..91fd788 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -42,7 +42,8 @@ long sys_s390_fadvise64_64(struct fadvise64_64_args __user *args);
 long sys_s390_fallocate(int fd, int mode, loff_t offset, u32 len_high,
 			u32 len_low);
 long sys_fork(void);
-long sys_clone(void);
+long sys_clone(unsigned long newsp, unsigned long clone_flags,
+	       int __user *parent_tidptr, int __user *child_tidptr);
 long sys_vfork(void);
 void execve_tail(void);
 long sys_execve(void);
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 5a43f27..189d978 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -230,17 +230,11 @@ SYSCALL_DEFINE0(fork)
 	return do_fork(SIGCHLD, regs->gprs[15], regs, 0, NULL, NULL);
 }
 
-SYSCALL_DEFINE0(clone)
+SYSCALL_DEFINE4(clone, unsigned long, newsp, unsigned long, clone_flags,
+		int __user *, parent_tidptr, int __user *, child_tidptr)
 {
 	struct pt_regs *regs = task_pt_regs(current);
-	unsigned long clone_flags;
-	unsigned long newsp;
-	int __user *parent_tidptr, *child_tidptr;
 
-	clone_flags = regs->gprs[3];
-	newsp = regs->orig_gpr2;
-	parent_tidptr = (int __user *) regs->gprs[4];
-	child_tidptr = (int __user *) regs->gprs[5];
 	if (!newsp)
 		newsp = regs->gprs[15];
 	return do_fork(clone_flags, newsp, regs, 0,
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 0b50836..ff79d58 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -128,7 +128,7 @@ SYSCALL(sys_sysinfo,sys_sysinfo,compat_sys_sysinfo_wrapper)
 SYSCALL(sys_ipc,sys_ipc,sys32_ipc_wrapper)
 SYSCALL(sys_fsync,sys_fsync,sys32_fsync_wrapper)
 SYSCALL(sys_sigreturn,sys_sigreturn,sys32_sigreturn)
-SYSCALL(sys_clone,sys_clone,sys32_clone)			/* 120 */
+SYSCALL(sys_clone,sys_clone,sys_clone_wrapper)			/* 120 */
 SYSCALL(sys_setdomainname,sys_setdomainname,sys32_setdomainname_wrapper)
 SYSCALL(sys_newuname,sys_s390_newuname,sys32_newuname_wrapper)
 NI_SYSCALL							/* modify_ldt for i386 */
-- 
cgit v0.10.2


From 3e86a8c617413e344143839c514e9b0c1713065c Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Tue, 22 Sep 2009 22:58:42 +0200
Subject: [S390] Convert sys_execve to function with parameters.

Use function parameters instead of accessing the pt_regs structure
to get the parameters.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c
index f741cd4..05cf446 100644
--- a/arch/s390/kernel/compat_linux.c
+++ b/arch/s390/kernel/compat_linux.c
@@ -443,31 +443,26 @@ sys32_rt_sigqueueinfo(int pid, int sig, compat_siginfo_t __user *uinfo)
  * sys32_execve() executes a new program after the asm stub has set
  * things up for us.  This should basically do what I want it to.
  */
-asmlinkage long sys32_execve(void)
+asmlinkage long sys32_execve(char __user *name, compat_uptr_t __user *argv,
+			     compat_uptr_t __user *envp)
 {
 	struct pt_regs *regs = task_pt_regs(current);
 	char *filename;
-	unsigned long result;
-	int rc;
-
-	filename = getname(compat_ptr(regs->orig_gpr2));
-	if (IS_ERR(filename)) {
-		result = PTR_ERR(filename);
-                goto out;
-	}
-	rc = compat_do_execve(filename, compat_ptr(regs->gprs[3]),
-			      compat_ptr(regs->gprs[4]), regs);
-	if (rc) {
-		result = rc;
-		goto out_putname;
-	}
+	long rc;
+
+	filename = getname(name);
+	rc = PTR_ERR(filename);
+	if (IS_ERR(filename))
+		return rc;
+	rc = compat_do_execve(filename, argv, envp, regs);
+	if (rc)
+		goto out;
 	current->thread.fp_regs.fpc=0;
 	asm volatile("sfpc %0,0" : : "d" (0));
-	result = regs->gprs[2];
-out_putname:
-        putname(filename);
+	rc = regs->gprs[2];
 out:
-	return result;
+	putname(filename);
+	return rc;
 }
 
 
diff --git a/arch/s390/kernel/compat_linux.h b/arch/s390/kernel/compat_linux.h
index 7cceb67..c07f9ca 100644
--- a/arch/s390/kernel/compat_linux.h
+++ b/arch/s390/kernel/compat_linux.h
@@ -198,7 +198,8 @@ long sys32_rt_sigprocmask(int how, compat_sigset_t __user *set,
 			  compat_sigset_t __user *oset, size_t sigsetsize);
 long sys32_rt_sigpending(compat_sigset_t __user *set, size_t sigsetsize);
 long sys32_rt_sigqueueinfo(int pid, int sig, compat_siginfo_t __user *uinfo);
-long sys32_execve(void);
+long sys32_execve(char __user *name, compat_uptr_t __user *argv,
+		  compat_uptr_t __user *envp);
 long sys32_init_module(void __user *umod, unsigned long len,
 		       const char __user *uargs);
 long sys32_delete_module(const char __user *name_user, unsigned int flags);
diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S
index 28f8440..8a6a796 100644
--- a/arch/s390/kernel/compat_wrapper.S
+++ b/arch/s390/kernel/compat_wrapper.S
@@ -1848,3 +1848,10 @@ sys_clone_wrapper:
 	llgtr	%r4,%r4			# int *
 	llgtr	%r5,%r5			# int *
 	jg	sys_clone		# branch to system call
+
+	.globl	sys32_execve_wrapper
+sys32_execve_wrapper:
+	llgtr	%r2,%r2			# char *
+	llgtr	%r3,%r3			# compat_uptr_t *
+	llgtr	%r4,%r4			# compat_uptr_t *
+	jg	sys32_execve		# branch to system call
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index 91fd788..e1e5e76 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -46,7 +46,8 @@ long sys_clone(unsigned long newsp, unsigned long clone_flags,
 	       int __user *parent_tidptr, int __user *child_tidptr);
 long sys_vfork(void);
 void execve_tail(void);
-long sys_execve(void);
+long sys_execve(char __user *name, char __user * __user *argv,
+		char __user * __user *envp);
 long sys_sigsuspend(int history0, int history1, old_sigset_t mask);
 long sys_sigaction(int sig, const struct old_sigaction __user *act,
 		   struct old_sigaction __user *oact);
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 189d978..59fe6ec 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -32,6 +32,7 @@
 #include <linux/elfcore.h>
 #include <linux/kernel_stat.h>
 #include <linux/syscalls.h>
+#include <linux/compat.h>
 #include <asm/compat.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -268,30 +269,25 @@ asmlinkage void execve_tail(void)
 /*
  * sys_execve() executes a new program.
  */
-SYSCALL_DEFINE0(execve)
+SYSCALL_DEFINE3(execve, char __user *, name, char __user * __user *, argv,
+		char __user * __user *, envp)
 {
 	struct pt_regs *regs = task_pt_regs(current);
 	char *filename;
-	unsigned long result;
-	int rc;
+	long rc;
 
-	filename = getname((char __user *) regs->orig_gpr2);
-	if (IS_ERR(filename)) {
-		result = PTR_ERR(filename);
+	filename = getname(name);
+	rc = PTR_ERR(filename);
+	if (IS_ERR(filename))
+		return rc;
+	rc = do_execve(filename, argv, envp, regs);
+	if (rc)
 		goto out;
-	}
-	rc = do_execve(filename, (char __user * __user *) regs->gprs[3],
-		       (char __user * __user *) regs->gprs[4], regs);
-	if (rc) {
-		result = rc;
-		goto out_putname;
-	}
 	execve_tail();
-	result = regs->gprs[2];
-out_putname:
-	putname(filename);
+	rc = regs->gprs[2];
 out:
-	return result;
+	putname(filename);
+	return rc;
 }
 
 /*
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index ff79d58..062dd8f 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -19,7 +19,7 @@ SYSCALL(sys_restart_syscall,sys_restart_syscall,sys_restart_syscall)
 SYSCALL(sys_creat,sys_creat,sys32_creat_wrapper)
 SYSCALL(sys_link,sys_link,sys32_link_wrapper)
 SYSCALL(sys_unlink,sys_unlink,sys32_unlink_wrapper)		/* 10 */
-SYSCALL(sys_execve,sys_execve,sys32_execve)
+SYSCALL(sys_execve,sys_execve,sys32_execve_wrapper)
 SYSCALL(sys_chdir,sys_chdir,sys32_chdir_wrapper)
 SYSCALL(sys_time,sys_ni_syscall,sys32_time_wrapper)		/* old time syscall */
 SYSCALL(sys_mknod,sys_mknod,sys32_mknod_wrapper)
-- 
cgit v0.10.2


From 2e50195f58ec045bc4601ec94478d957974f4aa4 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Tue, 22 Sep 2009 22:58:43 +0200
Subject: [S390] Get rid of init_module/delete_module compat functions.

These functions aren't needed. Might be a leftover of the pre
cond_syscall time.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c
index 05cf446..5519cb7 100644
--- a/arch/s390/kernel/compat_linux.c
+++ b/arch/s390/kernel/compat_linux.c
@@ -465,39 +465,6 @@ out:
 	return rc;
 }
 
-
-#ifdef CONFIG_MODULES
-
-asmlinkage long
-sys32_init_module(void __user *umod, unsigned long len,
-		const char __user *uargs)
-{
-	return sys_init_module(umod, len, uargs);
-}
-
-asmlinkage long
-sys32_delete_module(const char __user *name_user, unsigned int flags)
-{
-	return sys_delete_module(name_user, flags);
-}
-
-#else /* CONFIG_MODULES */
-
-asmlinkage long
-sys32_init_module(void __user *umod, unsigned long len,
-		const char __user *uargs)
-{
-	return -ENOSYS;
-}
-
-asmlinkage long
-sys32_delete_module(const char __user *name_user, unsigned int flags)
-{
-	return -ENOSYS;
-}
-
-#endif  /* CONFIG_MODULES */
-
 asmlinkage long sys32_pread64(unsigned int fd, char __user *ubuf,
 				size_t count, u32 poshi, u32 poslo)
 {
diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S
index 8a6a796..682fb69 100644
--- a/arch/s390/kernel/compat_wrapper.S
+++ b/arch/s390/kernel/compat_wrapper.S
@@ -568,18 +568,18 @@ compat_sys_sigprocmask_wrapper:
 	llgtr	%r4,%r4			# compat_old_sigset_t *
 	jg	compat_sys_sigprocmask		# branch to system call
 
-	.globl	sys32_init_module_wrapper
-sys32_init_module_wrapper:
+	.globl	sys_init_module_wrapper
+sys_init_module_wrapper:
 	llgtr	%r2,%r2			# void *
 	llgfr	%r3,%r3			# unsigned long
 	llgtr	%r4,%r4			# char *
-	jg	sys32_init_module	# branch to system call
+	jg	sys_init_module		# branch to system call
 
-	.globl	sys32_delete_module_wrapper
-sys32_delete_module_wrapper:
+	.globl	sys_delete_module_wrapper
+sys_delete_module_wrapper:
 	llgtr	%r2,%r2			# const char *
 	llgfr	%r3,%r3			# unsigned int
-	jg	sys32_delete_module	# branch to system call
+	jg	sys_delete_module	# branch to system call
 
 	.globl	sys32_quotactl_wrapper
 sys32_quotactl_wrapper:
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 062dd8f..30eca07 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -136,8 +136,8 @@ SYSCALL(sys_adjtimex,sys_adjtimex,compat_sys_adjtimex_wrapper)
 SYSCALL(sys_mprotect,sys_mprotect,sys32_mprotect_wrapper)	/* 125 */
 SYSCALL(sys_sigprocmask,sys_sigprocmask,compat_sys_sigprocmask_wrapper)
 NI_SYSCALL							/* old "create module" */
-SYSCALL(sys_init_module,sys_init_module,sys32_init_module_wrapper)
-SYSCALL(sys_delete_module,sys_delete_module,sys32_delete_module_wrapper)
+SYSCALL(sys_init_module,sys_init_module,sys_init_module_wrapper)
+SYSCALL(sys_delete_module,sys_delete_module,sys_delete_module_wrapper)
 NI_SYSCALL							/* 130: old get_kernel_syms */
 SYSCALL(sys_quotactl,sys_quotactl,sys32_quotactl_wrapper)
 SYSCALL(sys_getpgid,sys_getpgid,sys32_getpgid_wrapper)
-- 
cgit v0.10.2


From 846955c8afe5ebca2f8841b042ca3342e08a092b Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Tue, 22 Sep 2009 22:58:44 +0200
Subject: [S390] hibernation: fix guest page hinting related crash

On resume the system that loads the to be resumed image might have
unstable pages.
When the resume image is copied back and a write access happen to an
unstable page this causes an exception and the system crashes.

To fix this set all free pages to stable before copying the resumed
image data. Also after everything has been restored set all free
pages of the resumed system to unstable again.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/kernel/swsusp_asm64.S b/arch/s390/kernel/swsusp_asm64.S
index 7cd6b09..9a86ccb 100644
--- a/arch/s390/kernel/swsusp_asm64.S
+++ b/arch/s390/kernel/swsusp_asm64.S
@@ -102,6 +102,9 @@ swsusp_arch_resume:
 	aghi	%r15,-STACK_FRAME_OVERHEAD
 	stg	%r1,__SF_BACKCHAIN(%r15)
 
+	/* Make all free pages stable */
+	lghi	%r2,1
+	brasl	%r14,arch_set_page_states
 #ifdef CONFIG_SMP
 	/* Save boot cpu number */
 	brasl	%r14,smp_get_phys_cpu_id
@@ -178,6 +181,10 @@ swsusp_arch_resume:
 	/* Activate DAT */
 	stosm	__SF_EMPTY(%r15),0x04
 
+	/* Make all free pages unstable */
+	lghi	%r2,0
+	brasl	%r14,arch_set_page_states
+
 	/* Return 0 */
 	lmg	%r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15)
 	lghi	%r2,0
diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c
index f92ec20..098923a 100644
--- a/arch/s390/mm/page-states.c
+++ b/arch/s390/mm/page-states.c
@@ -50,28 +50,64 @@ void __init cmma_init(void)
 		cmma_flag = 0;
 }
 
-void arch_free_page(struct page *page, int order)
+static inline void set_page_unstable(struct page *page, int order)
 {
 	int i, rc;
 
-	if (!cmma_flag)
-		return;
 	for (i = 0; i < (1 << order); i++)
 		asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0"
 			     : "=&d" (rc)
-			     : "a" ((page_to_pfn(page) + i) << PAGE_SHIFT),
+			     : "a" (page_to_phys(page + i)),
 			       "i" (ESSA_SET_UNUSED));
 }
 
-void arch_alloc_page(struct page *page, int order)
+void arch_free_page(struct page *page, int order)
 {
-	int i, rc;
-
 	if (!cmma_flag)
 		return;
+	set_page_unstable(page, order);
+}
+
+static inline void set_page_stable(struct page *page, int order)
+{
+	int i, rc;
+
 	for (i = 0; i < (1 << order); i++)
 		asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0"
 			     : "=&d" (rc)
-			     : "a" ((page_to_pfn(page) + i) << PAGE_SHIFT),
+			     : "a" (page_to_phys(page + i)),
 			       "i" (ESSA_SET_STABLE));
 }
+
+void arch_alloc_page(struct page *page, int order)
+{
+	if (!cmma_flag)
+		return;
+	set_page_stable(page, order);
+}
+
+void arch_set_page_states(int make_stable)
+{
+	unsigned long flags, order, t;
+	struct list_head *l;
+	struct page *page;
+	struct zone *zone;
+
+	if (!cmma_flag)
+		return;
+	if (make_stable)
+		drain_local_pages(NULL);
+	for_each_populated_zone(zone) {
+		spin_lock_irqsave(&zone->lock, flags);
+		for_each_migratetype_order(order, t) {
+			list_for_each(l, &zone->free_area[order].free_list[t]) {
+				page = list_entry(l, struct page, lru);
+				if (make_stable)
+					set_page_stable(page, order);
+				else
+					set_page_unstable(page, order);
+			}
+		}
+		spin_unlock_irqrestore(&zone->lock, flags);
+	}
+}
-- 
cgit v0.10.2


From 2583d1efe04170cf166d43da6e4ed309e5601e84 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Tue, 22 Sep 2009 22:58:45 +0200
Subject: [S390] hibernation: reset system after resume

Force system into defined state after resume.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/kernel/swsusp_asm64.S b/arch/s390/kernel/swsusp_asm64.S
index 9a86ccb..fc05681 100644
--- a/arch/s390/kernel/swsusp_asm64.S
+++ b/arch/s390/kernel/swsusp_asm64.S
@@ -108,7 +108,8 @@ swsusp_arch_resume:
 #ifdef CONFIG_SMP
 	/* Save boot cpu number */
 	brasl	%r14,smp_get_phys_cpu_id
-	lgr	%r10,%r2
+	larl	%r1,saved_cpu_id
+	st	%r2,0(%r1)
 #endif
 	/* Deactivate DAT */
 	stnsm	__SF_EMPTY(%r15),0xfb
@@ -136,6 +137,29 @@ swsusp_arch_resume:
 2:
 	ptlb				/* flush tlb */
 
+	/* Reset System */
+	larl	%r1,restart_entry
+	larl	%r2,restart_psw
+	og	%r1,0(%r2)
+	stg	%r1,0(%r0)
+	larl	%r1,saved_pgm_check_psw
+	mvc	0(16,%r1),__LC_PGM_NEW_PSW(%r0)
+	larl	%r1,new_pgm_check_psw
+	epsw	%r2,%r3
+	stm	%r2,%r3,0(%r1)
+	mvc	__LC_PGM_NEW_PSW(16,%r0),0(%r1)
+	lghi	%r0,0
+	diag	%r0,%r0,0x308
+restart_entry:
+	lhi	%r1,1
+	sigp	%r1,%r0,0x12
+	sam64
+	larl	%r1,new_pgm_check_psw
+	lpswe	0(%r1)
+pgm_check_entry:
+	larl	%r1,saved_pgm_check_psw
+	mvc	__LC_PGM_NEW_PSW(16,%r0),0(%r1)
+
 	/* Restore registers */
 	lghi	%r13,0x1000		/* %r1 = pointer to save arae */
 
@@ -171,7 +195,8 @@ swsusp_arch_resume:
 
 #ifdef CONFIG_SMP
 	/* Switch CPUs */
-	lgr	%r2,%r10		/* get cpu id */
+	larl	%r1,saved_cpu_id
+	llgf	%r2,0(%r1)
 	llgf	%r3,0x318(%r13)
 	brasl	%r14,smp_switch_boot_cpu_in_resume
 #endif
@@ -189,3 +214,16 @@ swsusp_arch_resume:
 	lmg	%r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15)
 	lghi	%r2,0
 	br	%r14
+
+	.section .data.nosave,"aw",@progbits
+	.align	8
+restart_psw:
+	.long	0x00080000,0x80000000
+new_pgm_check_psw:
+	.quad	0,pgm_check_entry
+saved_pgm_check_psw:
+	.quad	0,0
+#ifdef CONFIG_SMP
+saved_cpu_id:
+	.long	0
+#endif
-- 
cgit v0.10.2


From 87458ff4582953d6b3bf45edeac8582849552e69 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Tue, 22 Sep 2009 22:58:46 +0200
Subject: [S390] Change kernel_page_present coding style.

Make the inline assembly look like all others.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index c702152..c60bfb3 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -314,21 +314,18 @@ int s390_enable_sie(void)
 }
 EXPORT_SYMBOL_GPL(s390_enable_sie);
 
-#ifdef CONFIG_DEBUG_PAGEALLOC
-#ifdef CONFIG_HIBERNATION
+#if defined(CONFIG_DEBUG_PAGEALLOC) && defined(CONFIG_HIBERNATION)
 bool kernel_page_present(struct page *page)
 {
 	unsigned long addr;
 	int cc;
 
 	addr = page_to_phys(page);
-	asm("lra %1,0(%1)\n"
-	    "ipm %0\n"
-	    "srl %0,28"
-	    :"=d"(cc),"+a"(addr)::"cc");
+	asm volatile(
+		"	lra	%1,0(%1)\n"
+		"	ipm	%0\n"
+		"	srl	%0,28"
+		: "=d" (cc), "+a" (addr) : : "cc");
 	return cc == 0;
 }
-
-#endif /* CONFIG_HIBERNATION */
-#endif /* CONFIG_DEBUG_PAGEALLOC */
-
+#endif /* CONFIG_HIBERNATION && CONFIG_DEBUG_PAGEALLOC */
-- 
cgit v0.10.2


From bdd42b28cde0453fb8c3d9b4e4868ca2d8124806 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Tue, 22 Sep 2009 22:58:47 +0200
Subject: [S390] fix disabled_wait inline assembly clobber list

The disabled_wait inline assmembly also clobbers register r1, but it
is missing in the clobber list.
Fixes recursive Oops on panic.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index cf8eed3..b427154 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -295,7 +295,7 @@ static inline void ATTRIB_NORET disabled_wait(unsigned long code)
 		"	oi	0x384(1),0x10\n"/* fake protection bit */
 		"	lpswe	0(%1)"
 		: "=m" (ctl_buf)
-		: "a" (&dw_psw), "a" (&ctl_buf), "m" (dw_psw) : "cc", "0");
+		: "a" (&dw_psw), "a" (&ctl_buf), "m" (dw_psw) : "cc", "0", "1");
 #endif /* __s390x__ */
 	while (1);
 }
-- 
cgit v0.10.2


From 07805ac81c81634623042408ffea6bbad3160bfa Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Tue, 22 Sep 2009 22:58:48 +0200
Subject: [S390] ptrace: use common code for simple peek/poke operations

arch_ptrace on s390 implements PTRACE_(PEEK|POKE)(TEXT|DATA) instead of
using using ptrace_request in kernel/ptrace.c.
The only reason is the 31bit addressing mode, where we have to unmask the
highest bit.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index f3ddd7a..a873867 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -339,24 +339,10 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 	int copied, ret;
 
 	switch (request) {
-	case PTRACE_PEEKTEXT:
-	case PTRACE_PEEKDATA:
-		/* Remove high order bit from address (only for 31 bit). */
-		addr &= PSW_ADDR_INSN;
-		/* read word at location addr. */
-		return generic_ptrace_peekdata(child, addr, data);
-
 	case PTRACE_PEEKUSR:
 		/* read the word at location addr in the USER area. */
 		return peek_user(child, addr, data);
 
-	case PTRACE_POKETEXT:
-	case PTRACE_POKEDATA:
-		/* Remove high order bit from address (only for 31 bit). */
-		addr &= PSW_ADDR_INSN;
-		/* write the word at location addr. */
-		return generic_ptrace_pokedata(child, addr, data);
-
 	case PTRACE_POKEUSR:
 		/* write the word at location addr in the USER area */
 		return poke_user(child, addr, data);
@@ -386,8 +372,11 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 			copied += sizeof(unsigned long);
 		}
 		return 0;
+	default:
+		/* Removing high order bit from addr (only for 31 bit). */
+		addr &= PSW_ADDR_INSN;
+		return ptrace_request(child, request, addr, data);
 	}
-	return ptrace_request(child, request, addr, data);
 }
 
 #ifdef CONFIG_COMPAT
-- 
cgit v0.10.2


From 3fd26a7793fb21b88ccf1e238670b2a508fcf835 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Tue, 22 Sep 2009 22:58:49 +0200
Subject: [S390] smp: introduce LC_ORDER and simplify lowcore handling

Removes a couple of simple code duplications. But before I have to do
this again, just simplify it.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 6bc9426..8c55703 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -189,6 +189,12 @@ union save_area {
 #define SAVE_AREA_BASE SAVE_AREA_BASE_S390X
 #endif
 
+#ifndef __s390x__
+#define LC_ORDER 0
+#else
+#define LC_ORDER 1
+#endif
+
 struct _lowcore
 {
 #ifndef __s390x__
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 56c1687..6f14734 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -475,10 +475,8 @@ static int __cpuinit smp_alloc_lowcore(int cpu)
 {
 	unsigned long async_stack, panic_stack;
 	struct _lowcore *lowcore;
-	int lc_order;
 
-	lc_order = sizeof(long) == 8 ? 1 : 0;
-	lowcore = (void *) __get_free_pages(GFP_KERNEL | GFP_DMA, lc_order);
+	lowcore = (void *) __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER);
 	if (!lowcore)
 		return -ENOMEM;
 	async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER);
@@ -509,16 +507,14 @@ static int __cpuinit smp_alloc_lowcore(int cpu)
 out:
 	free_page(panic_stack);
 	free_pages(async_stack, ASYNC_ORDER);
-	free_pages((unsigned long) lowcore, lc_order);
+	free_pages((unsigned long) lowcore, LC_ORDER);
 	return -ENOMEM;
 }
 
 static void smp_free_lowcore(int cpu)
 {
 	struct _lowcore *lowcore;
-	int lc_order;
 
-	lc_order = sizeof(long) == 8 ? 1 : 0;
 	lowcore = lowcore_ptr[cpu];
 #ifndef CONFIG_64BIT
 	if (MACHINE_HAS_IEEE)
@@ -528,7 +524,7 @@ static void smp_free_lowcore(int cpu)
 #endif
 	free_page(lowcore->panic_stack - PAGE_SIZE);
 	free_pages(lowcore->async_stack - ASYNC_SIZE, ASYNC_ORDER);
-	free_pages((unsigned long) lowcore, lc_order);
+	free_pages((unsigned long) lowcore, LC_ORDER);
 	lowcore_ptr[cpu] = NULL;
 }
 
@@ -664,7 +660,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 	unsigned long async_stack, panic_stack;
 	struct _lowcore *lowcore;
 	unsigned int cpu;
-	int lc_order;
 
 	smp_detect_cpus();
 
@@ -674,8 +669,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 	print_cpu_info();
 
 	/* Reallocate current lowcore, but keep its contents. */
-	lc_order = sizeof(long) == 8 ? 1 : 0;
-	lowcore = (void *) __get_free_pages(GFP_KERNEL | GFP_DMA, lc_order);
+	lowcore = (void *) __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER);
 	panic_stack = __get_free_page(GFP_KERNEL);
 	async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER);
 	BUG_ON(!lowcore || !panic_stack || !async_stack);
-- 
cgit v0.10.2


From 2573a575304c5ce4765fc88f9d09ed4dbf8d04bf Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Tue, 22 Sep 2009 22:58:50 +0200
Subject: [S390] hibernate: make sure pfn_is_nosave handles lowcore pages

pfn_is_nosave doesn't return the correct value for the second lowcore
page if lowcore protection is enabled. Make sure it always returns
the correct value.

While at it simplify the whole thing.
NSS special handling is done by the tprot check like it already works
for DCSS as well. So remove the extra code for NSS.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 8c55703..a3ff9b0 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -195,6 +195,8 @@ union save_area {
 #define LC_ORDER 1
 #endif
 
+#define LC_PAGES (1UL << LC_ORDER)
+
 struct _lowcore
 {
 #ifndef __s390x__
diff --git a/arch/s390/kernel/suspend.c b/arch/s390/kernel/suspend.c
index 086bee9..cf9e5c6 100644
--- a/arch/s390/kernel/suspend.c
+++ b/arch/s390/kernel/suspend.c
@@ -6,36 +6,26 @@
  * Author(s): Hans-Joachim Picht <hans@linux.vnet.ibm.com>
  */
 
-#include <linux/suspend.h>
-#include <linux/reboot.h>
 #include <linux/pfn.h>
-#include <linux/mm.h>
-#include <asm/sections.h>
 #include <asm/system.h>
-#include <asm/ipl.h>
 
 /*
  * References to section boundaries
  */
 extern const void __nosave_begin, __nosave_end;
 
-/*
- *  check if given pfn is in the 'nosave' or in the read only NSS section
- */
 int pfn_is_nosave(unsigned long pfn)
 {
-	unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT;
-	unsigned long nosave_end_pfn = PAGE_ALIGN(__pa(&__nosave_end))
-					>> PAGE_SHIFT;
-	unsigned long eshared_pfn = PFN_DOWN(__pa(&_eshared)) - 1;
-	unsigned long stext_pfn = PFN_DOWN(__pa(&_stext));
+	unsigned long nosave_begin_pfn = PFN_DOWN(__pa(&__nosave_begin));
+	unsigned long nosave_end_pfn = PFN_DOWN(__pa(&__nosave_end));
 
+	/* Always save lowcore pages (LC protection might be enabled). */
+	if (pfn <= LC_PAGES)
+		return 0;
 	if (pfn >= nosave_begin_pfn && pfn < nosave_end_pfn)
 		return 1;
-	if (pfn >= stext_pfn && pfn <= eshared_pfn) {
-		if (ipl_info.type == IPL_TYPE_NSS)
-			return 1;
-	} else if ((tprot(pfn * PAGE_SIZE) && pfn > 0))
+	/* Skip memory holes and read-only pages (NSS, DCSS, ...). */
+	if (tprot(PFN_PHYS(pfn)))
 		return 1;
 	return 0;
 }
-- 
cgit v0.10.2


From 5314af693da5149c2361d290bb184cf18ee21cdd Mon Sep 17 00:00:00 2001
From: Felix Beck <felix.beck@de.ibm.com>
Date: Tue, 22 Sep 2009 22:58:51 +0200
Subject: [S390] zcrypt: Do not add/remove devices in s/r callbacks

Devices are no longer removed or added in the suspend and resume
callbacks. Instead they are marked unregistered in suspend. In the
resume callback the ap_scan_bus method is scheduled. The bus scan
function will remove the old device and add new ones. This way all
the device handling will be done in only one function. Additionaly
the case where the domain might change during suspend/resume is
caught. In that case the devices qid needs to re-calculated in
order of having it found by the scan method.

Signed-off-by: Felix Beck <felix.beck@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c
index 090b32a..1294876 100644
--- a/drivers/s390/crypto/ap_bus.c
+++ b/drivers/s390/crypto/ap_bus.c
@@ -60,6 +60,7 @@ static int ap_device_probe(struct device *dev);
 static void ap_interrupt_handler(void *unused1, void *unused2);
 static void ap_reset(struct ap_device *ap_dev);
 static void ap_config_timeout(unsigned long ptr);
+static int ap_select_domain(void);
 
 /*
  * Module description.
@@ -109,6 +110,10 @@ static unsigned long long poll_timeout = 250000;
 
 /* Suspend flag */
 static int ap_suspend_flag;
+/* Flag to check if domain was set through module parameter domain=. This is
+ * important when supsend and resume is done in a z/VM environment where the
+ * domain might change. */
+static int user_set_domain = 0;
 static struct bus_type ap_bus_type;
 
 /**
@@ -643,6 +648,7 @@ static int ap_bus_suspend(struct device *dev, pm_message_t state)
 			destroy_workqueue(ap_work_queue);
 			ap_work_queue = NULL;
 		}
+
 		tasklet_disable(&ap_tasklet);
 	}
 	/* Poll on the device until all requests are finished. */
@@ -653,7 +659,10 @@ static int ap_bus_suspend(struct device *dev, pm_message_t state)
 		spin_unlock_bh(&ap_dev->lock);
 	} while ((flags & 1) || (flags & 2));
 
-	ap_device_remove(dev);
+	spin_lock_bh(&ap_dev->lock);
+	ap_dev->unregistered = 1;
+	spin_unlock_bh(&ap_dev->lock);
+
 	return 0;
 }
 
@@ -666,11 +675,10 @@ static int ap_bus_resume(struct device *dev)
 		ap_suspend_flag = 0;
 		if (!ap_interrupts_available())
 			ap_interrupt_indicator = NULL;
-		ap_device_probe(dev);
-		ap_reset(ap_dev);
-		setup_timer(&ap_dev->timeout, ap_request_timeout,
-			    (unsigned long) ap_dev);
-		ap_scan_bus(NULL);
+		if (!user_set_domain) {
+			ap_domain_index = -1;
+			ap_select_domain();
+		}
 		init_timer(&ap_config_timer);
 		ap_config_timer.function = ap_config_timeout;
 		ap_config_timer.data = 0;
@@ -686,12 +694,14 @@ static int ap_bus_resume(struct device *dev)
 			tasklet_schedule(&ap_tasklet);
 		if (ap_thread_flag)
 			rc = ap_poll_thread_start();
-	} else {
-		ap_device_probe(dev);
-		ap_reset(ap_dev);
-		setup_timer(&ap_dev->timeout, ap_request_timeout,
-			    (unsigned long) ap_dev);
 	}
+	if (AP_QID_QUEUE(ap_dev->qid) != ap_domain_index) {
+		spin_lock_bh(&ap_dev->lock);
+		ap_dev->qid = AP_MKQID(AP_QID_DEVICE(ap_dev->qid),
+				       ap_domain_index);
+		spin_unlock_bh(&ap_dev->lock);
+	}
+	queue_work(ap_work_queue, &ap_config_work);
 
 	return rc;
 }
@@ -1079,6 +1089,8 @@ static void ap_scan_bus(struct work_struct *unused)
 			spin_lock_bh(&ap_dev->lock);
 			if (rc || ap_dev->unregistered) {
 				spin_unlock_bh(&ap_dev->lock);
+				if (ap_dev->unregistered)
+					i--;
 				device_unregister(dev);
 				put_device(dev);
 				continue;
@@ -1586,6 +1598,12 @@ int __init ap_module_init(void)
 			   ap_domain_index);
 		return -EINVAL;
 	}
+	/* In resume callback we need to know if the user had set the domain.
+	 * If so, we can not just reset it.
+	 */
+	if (ap_domain_index >= 0)
+		user_set_domain = 1;
+
 	if (ap_instructions_available() != 0) {
 		pr_warning("The hardware system does not support "
 			   "AP instructions\n");
-- 
cgit v0.10.2


From 68d1e5f08b13132504752cad54169376739753db Mon Sep 17 00:00:00 2001
From: Stefan Weinhuber <wein@de.ibm.com>
Date: Tue, 22 Sep 2009 22:58:52 +0200
Subject: [S390] dasd: tolerate devices that have no feature codes

The DASD device driver reads the feature codes of a device during
device initialization. These codes are later used to determine the
availability of advanced features like PAV or High Performance FICON.
Some very old devices do not support the command to read feature
codes and the initialization routine fails.
As the feature codes are not necessary for basic DASD operations, we
can support such devices by just ignoring missing feature codes.

Signed-off-by: Stefan Weinhuber <wein@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index a1ce573..614813f 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -935,6 +935,7 @@ static int dasd_eckd_read_features(struct dasd_device *device)
 	struct dasd_eckd_private *private;
 
 	private = (struct dasd_eckd_private *) device->private;
+	memset(&private->features, 0, sizeof(struct dasd_rssd_features));
 	cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1 /* PSF */	+ 1 /* RSSD */,
 				   (sizeof(struct dasd_psf_prssd_data) +
 				    sizeof(struct dasd_rssd_features)),
@@ -982,7 +983,9 @@ static int dasd_eckd_read_features(struct dasd_device *device)
 		features = (struct dasd_rssd_features *) (prssdp + 1);
 		memcpy(&private->features, features,
 		       sizeof(struct dasd_rssd_features));
-	}
+	} else
+		dev_warn(&device->cdev->dev, "Reading device feature codes"
+			 " failed with rc=%d\n", rc);
 	dasd_sfree_request(cqr, cqr->memdev);
 	return rc;
 }
@@ -1144,9 +1147,7 @@ dasd_eckd_check_characteristics(struct dasd_device *device)
 	}
 
 	/* Read Feature Codes */
-	rc = dasd_eckd_read_features(device);
-	if (rc)
-		goto out_err3;
+	dasd_eckd_read_features(device);
 
 	/* Read Device Characteristics */
 	rc = dasd_generic_read_dev_chars(device, DASD_ECKD_MAGIC,
@@ -3241,9 +3242,7 @@ int dasd_eckd_restore_device(struct dasd_device *device)
 	}
 
 	/* Read Feature Codes */
-	rc = dasd_eckd_read_features(device);
-	if (rc)
-		goto out_err;
+	dasd_eckd_read_features(device);
 
 	/* Read Device Characteristics */
 	memset(&private->rdc_data, 0, sizeof(private->rdc_data));
-- 
cgit v0.10.2


From 1aaf179d043856d80bbb354f9feaf706b9cfbcd3 Mon Sep 17 00:00:00 2001
From: Michael Holzheu <michael.holzheu@linux.vnet.ibm.com>
Date: Tue, 22 Sep 2009 22:58:53 +0200
Subject: [S390] hibernate: Do real CPU swap at resume time

Currently, when the physical resume CPU is not equal to the physical suspend
CPU, we swap the CPUs logically, by modifying the logical/physical CPU mapping.
This has two major drawbacks: First the change is visible from user space (e.g.
CPU sysfs files) and second it is hard to ensure that nowhere in the kernel
the physical CPU ID is stored before suspend.
To fix this, we now really swap the physical CPUs, if the resume CPU is not
the pysical suspend CPU. We restart the suspend CPU and stop the resume CPU
using SIGP restart and SIGP stop. If the suspend CPU is no longer available,
we write a message and load a disabled wait PSW.

Signed-off-by: Michael Holzheu <michael.holzheu@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index a3ff9b0..f2ef4b6 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -86,6 +86,7 @@
 #define __LC_PGM_OLD_PSW		0x0150
 #define __LC_MCK_OLD_PSW		0x0160
 #define __LC_IO_OLD_PSW			0x0170
+#define __LC_RESTART_PSW		0x01a0
 #define __LC_EXT_NEW_PSW		0x01b0
 #define __LC_SVC_NEW_PSW		0x01c0
 #define __LC_PGM_NEW_PSW		0x01d0
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index fa9905c..63e4643 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -7,6 +7,7 @@
 #include <linux/sched.h>
 #include <linux/kbuild.h>
 #include <asm/vdso.h>
+#include <asm/sigp.h>
 
 int main(void)
 {
@@ -59,6 +60,10 @@ int main(void)
 	DEFINE(CLOCK_REALTIME, CLOCK_REALTIME);
 	DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC);
 	DEFINE(CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC);
-
+	/* constants for SIGP */
+	DEFINE(__SIGP_STOP, sigp_stop);
+	DEFINE(__SIGP_RESTART, sigp_restart);
+	DEFINE(__SIGP_SENSE, sigp_sense);
+	DEFINE(__SIGP_INITIAL_CPU_RESET, sigp_initial_cpu_reset);
 	return 0;
 }
diff --git a/arch/s390/kernel/sclp.S b/arch/s390/kernel/sclp.S
index 20639df..e27ca63 100644
--- a/arch/s390/kernel/sclp.S
+++ b/arch/s390/kernel/sclp.S
@@ -24,8 +24,6 @@ LC_EXT_INT_CODE		= 0x86			# addr of ext int code
 #   R3	= external interruption parameter if R2=0
 #
 
-.section ".init.text","ax"
-
 _sclp_wait_int:
 	stm	%r6,%r15,24(%r15)		# save registers
 	basr	%r13,0				# get base register
@@ -318,9 +316,8 @@ _sclp_print_early:
 	.long	_sclp_work_area
 .Lascebc:
 	.long	_ascebc
-.previous
 
-.section ".init.data","a"
+.section .data,"aw",@progbits
 	.balign 4096
 _sclp_work_area:
 	.fill	4096
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 6f14734..b4b6396 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -1041,42 +1041,6 @@ out:
 static SYSDEV_CLASS_ATTR(dispatching, 0644, dispatching_show,
 			 dispatching_store);
 
-/*
- * If the resume kernel runs on another cpu than the suspended kernel,
- * we have to switch the cpu IDs in the logical map.
- */
-void smp_switch_boot_cpu_in_resume(u32 resume_phys_cpu_id,
-				   struct _lowcore *suspend_lowcore)
-{
-	int cpu, suspend_cpu_id, resume_cpu_id;
-	u32 suspend_phys_cpu_id;
-
-	suspend_phys_cpu_id = __cpu_logical_map[suspend_lowcore->cpu_nr];
-	suspend_cpu_id = suspend_lowcore->cpu_nr;
-
-	for_each_present_cpu(cpu) {
-		if (__cpu_logical_map[cpu] == resume_phys_cpu_id) {
-			resume_cpu_id = cpu;
-			goto found;
-		}
-	}
-	panic("Could not find resume cpu in logical map.\n");
-
-found:
-	printk("Resume  cpu ID: %i/%i\n", resume_phys_cpu_id, resume_cpu_id);
-	printk("Suspend cpu ID: %i/%i\n", suspend_phys_cpu_id, suspend_cpu_id);
-
-	__cpu_logical_map[resume_cpu_id] = suspend_phys_cpu_id;
-	__cpu_logical_map[suspend_cpu_id] = resume_phys_cpu_id;
-
-	lowcore_ptr[suspend_cpu_id]->cpu_addr = resume_phys_cpu_id;
-}
-
-u32 smp_get_phys_cpu_id(void)
-{
-	return __cpu_logical_map[smp_processor_id()];
-}
-
 static int __init topology_init(void)
 {
 	int cpu;
diff --git a/arch/s390/kernel/swsusp_asm64.S b/arch/s390/kernel/swsusp_asm64.S
index fc05681..fe927d0 100644
--- a/arch/s390/kernel/swsusp_asm64.S
+++ b/arch/s390/kernel/swsusp_asm64.S
@@ -9,6 +9,7 @@
 
 #include <asm/page.h>
 #include <asm/ptrace.h>
+#include <asm/thread_info.h>
 #include <asm/asm-offsets.h>
 
 /*
@@ -41,6 +42,9 @@ swsusp_arch_suspend:
 	/* Get pointer to save area */
 	lghi	%r1,0x1000
 
+	/* Save CPU address */
+	stap	__LC_CPU_ADDRESS(%r1)
+
 	/* Store registers */
 	mvc	0x318(4,%r1),__SF_EMPTY(%r15)	/* move prefix to lowcore */
 	stfpc	0x31c(%r1)			/* store fpu control */
@@ -105,12 +109,7 @@ swsusp_arch_resume:
 	/* Make all free pages stable */
 	lghi	%r2,1
 	brasl	%r14,arch_set_page_states
-#ifdef CONFIG_SMP
-	/* Save boot cpu number */
-	brasl	%r14,smp_get_phys_cpu_id
-	larl	%r1,saved_cpu_id
-	st	%r2,0(%r1)
-#endif
+
 	/* Deactivate DAT */
 	stnsm	__SF_EMPTY(%r15),0xfb
 
@@ -139,12 +138,10 @@ swsusp_arch_resume:
 
 	/* Reset System */
 	larl	%r1,restart_entry
-	larl	%r2,restart_psw
+	larl	%r2,.Lrestart_diag308_psw
 	og	%r1,0(%r2)
 	stg	%r1,0(%r0)
-	larl	%r1,saved_pgm_check_psw
-	mvc	0(16,%r1),__LC_PGM_NEW_PSW(%r0)
-	larl	%r1,new_pgm_check_psw
+	larl	%r1,.Lnew_pgm_check_psw
 	epsw	%r2,%r3
 	stm	%r2,%r3,0(%r1)
 	mvc	__LC_PGM_NEW_PSW(16,%r0),0(%r1)
@@ -154,12 +151,54 @@ restart_entry:
 	lhi	%r1,1
 	sigp	%r1,%r0,0x12
 	sam64
-	larl	%r1,new_pgm_check_psw
+	larl	%r1,.Lnew_pgm_check_psw
 	lpswe	0(%r1)
 pgm_check_entry:
-	larl	%r1,saved_pgm_check_psw
-	mvc	__LC_PGM_NEW_PSW(16,%r0),0(%r1)
 
+	/* Switch to original suspend CPU */
+	larl	%r1,.Lresume_cpu		/* Resume CPU address: r2 */
+	stap	0(%r1)
+	llgh	%r2,0(%r1)
+	lghi	%r3,0x1000
+	llgh	%r1,__LC_CPU_ADDRESS(%r3)	/* Suspend CPU address: r1 */
+	cgr	%r1,%r2
+	je	restore_registers		/* r1 = r2 -> nothing to do */
+	larl	%r4,.Lrestart_suspend_psw	/* Set new restart PSW */
+	mvc	__LC_RESTART_PSW(16,%r0),0(%r4)
+3:
+	sigp	%r9,%r1,__SIGP_INITIAL_CPU_RESET
+	brc	8,4f	/* accepted */
+	brc	2,3b	/* busy, try again */
+
+	/* Suspend CPU not available -> panic */
+	larl	%r15,init_thread_union
+	ahi	%r15,1<<(PAGE_SHIFT+THREAD_ORDER)
+	larl	%r2,.Lpanic_string
+	larl	%r3,_sclp_print_early
+	lghi	%r1,0
+	sam31
+	sigp	%r1,%r0,0x12
+	basr	%r14,%r3
+	larl	%r3,.Ldisabled_wait_31
+	lpsw	0(%r3)
+4:
+	/* Switch to suspend CPU */
+	sigp	%r9,%r1,__SIGP_RESTART	/* start suspend CPU */
+	brc	2,4b			/* busy, try again */
+5:
+	sigp	%r9,%r2,__SIGP_STOP	/* stop resume (current) CPU */
+6:	j	6b
+
+restart_suspend:
+	larl	%r1,.Lresume_cpu
+	llgh	%r2,0(%r1)
+7:
+	sigp	%r9,%r2,__SIGP_SENSE	/* Wait for resume CPU */
+	brc	2,7b			/* busy, try again */
+	tmll	%r9,0x40		/* Test if resume CPU is stopped */
+	jz	7b
+
+restore_registers:
 	/* Restore registers */
 	lghi	%r13,0x1000		/* %r1 = pointer to save arae */
 
@@ -193,13 +232,6 @@ pgm_check_entry:
 	/* Pointer to save area */
 	lghi	%r13,0x1000
 
-#ifdef CONFIG_SMP
-	/* Switch CPUs */
-	larl	%r1,saved_cpu_id
-	llgf	%r2,0(%r1)
-	llgf	%r3,0x318(%r13)
-	brasl	%r14,smp_switch_boot_cpu_in_resume
-#endif
 	/* Restore prefix register */
 	spx	0x318(%r13)
 
@@ -217,13 +249,16 @@ pgm_check_entry:
 
 	.section .data.nosave,"aw",@progbits
 	.align	8
-restart_psw:
+.Ldisabled_wait_31:
+	.long  0x000a0000,0x00000000
+.Lpanic_string:
+	.asciz	"Resume not possible because suspend CPU is no longer available"
+	.align	8
+.Lrestart_diag308_psw:
 	.long	0x00080000,0x80000000
-new_pgm_check_psw:
+.Lrestart_suspend_psw:
+	.quad	0x0000000180000000,restart_suspend
+.Lnew_pgm_check_psw:
 	.quad	0,pgm_check_entry
-saved_pgm_check_psw:
-	.quad	0,0
-#ifdef CONFIG_SMP
-saved_cpu_id:
-	.long	0
-#endif
+.Lresume_cpu:
+	.byte	0,0
-- 
cgit v0.10.2


From ed87b27e00d2ca240f62f3903583a2f1541fb9ef Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Tue, 22 Sep 2009 22:58:54 +0200
Subject: [S390] Update default configuration.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/defconfig b/arch/s390/defconfig
index 4e91a25..ab44644 100644
--- a/arch/s390/defconfig
+++ b/arch/s390/defconfig
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.30
-# Mon Jun 22 11:08:16 2009
+# Linux kernel version: 2.6.31
+# Tue Sep 22 17:43:13 2009
 #
 CONFIG_SCHED_MC=y
 CONFIG_MMU=y
@@ -24,6 +24,7 @@ CONFIG_PGSTE=y
 CONFIG_VIRT_CPU_ACCOUNTING=y
 CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y
 CONFIG_S390=y
+CONFIG_SCHED_OMIT_FRAME_POINTER=y
 CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
 CONFIG_CONSTRUCTORS=y
 
@@ -48,11 +49,12 @@ CONFIG_AUDIT=y
 #
 # RCU Subsystem
 #
-CONFIG_CLASSIC_RCU=y
-# CONFIG_TREE_RCU is not set
-# CONFIG_PREEMPT_RCU is not set
+CONFIG_TREE_RCU=y
+# CONFIG_TREE_PREEMPT_RCU is not set
+# CONFIG_RCU_TRACE is not set
+CONFIG_RCU_FANOUT=64
+# CONFIG_RCU_FANOUT_EXACT is not set
 # CONFIG_TREE_RCU_TRACE is not set
-# CONFIG_PREEMPT_RCU_TRACE is not set
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=17
@@ -103,11 +105,12 @@ CONFIG_TIMERFD=y
 CONFIG_EVENTFD=y
 CONFIG_SHMEM=y
 CONFIG_AIO=y
-CONFIG_HAVE_PERF_COUNTERS=y
+CONFIG_HAVE_PERF_EVENTS=y
 
 #
-# Performance Counters
+# Kernel Performance Events And Counters
 #
+# CONFIG_PERF_EVENTS is not set
 # CONFIG_PERF_COUNTERS is not set
 CONFIG_VM_EVENT_COUNTERS=y
 # CONFIG_STRIP_ASM_SYMS is not set
@@ -116,7 +119,6 @@ CONFIG_SLAB=y
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
 # CONFIG_PROFILING is not set
-# CONFIG_MARKERS is not set
 CONFIG_HAVE_OPROFILE=y
 CONFIG_KPROBES=y
 CONFIG_HAVE_SYSCALL_WRAPPERS=y
@@ -176,6 +178,7 @@ CONFIG_NO_HZ=y
 CONFIG_HIGH_RES_TIMERS=y
 CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
 CONFIG_64BIT=y
+# CONFIG_KTIME_SCALAR is not set
 CONFIG_SMP=y
 CONFIG_NR_CPUS=32
 CONFIG_HOTPLUG_CPU=y
@@ -257,7 +260,6 @@ CONFIG_FORCE_MAX_ZONEORDER=9
 CONFIG_PFAULT=y
 # CONFIG_SHARED_KERNEL is not set
 # CONFIG_CMM is not set
-# CONFIG_PAGE_STATES is not set
 # CONFIG_APPLDATA_BASE is not set
 CONFIG_HZ_100=y
 # CONFIG_HZ_250 is not set
@@ -280,6 +282,7 @@ CONFIG_PM_SLEEP_SMP=y
 CONFIG_PM_SLEEP=y
 CONFIG_HIBERNATION=y
 CONFIG_PM_STD_PARTITION=""
+# CONFIG_PM_RUNTIME is not set
 CONFIG_NET=y
 
 #
@@ -394,6 +397,7 @@ CONFIG_IP_SCTP=m
 # CONFIG_SCTP_HMAC_NONE is not set
 # CONFIG_SCTP_HMAC_SHA1 is not set
 CONFIG_SCTP_HMAC_MD5=y
+# CONFIG_RDS is not set
 # CONFIG_TIPC is not set
 # CONFIG_ATM is not set
 # CONFIG_BRIDGE is not set
@@ -487,6 +491,7 @@ CONFIG_CCW=y
 # Generic Driver Options
 #
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+# CONFIG_DEVTMPFS is not set
 CONFIG_STANDALONE=y
 CONFIG_PREVENT_FIRMWARE_BUILD=y
 CONFIG_FW_LOADER=y
@@ -501,6 +506,7 @@ CONFIG_BLK_DEV=y
 CONFIG_BLK_DEV_LOOP=m
 # CONFIG_BLK_DEV_CRYPTOLOOP is not set
 CONFIG_BLK_DEV_NBD=m
+# CONFIG_BLK_DEV_OSD is not set
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_COUNT=16
 CONFIG_BLK_DEV_RAM_SIZE=4096
@@ -594,8 +600,11 @@ CONFIG_BLK_DEV_DM=y
 CONFIG_DM_CRYPT=y
 CONFIG_DM_SNAPSHOT=y
 CONFIG_DM_MIRROR=y
+# CONFIG_DM_LOG_USERSPACE is not set
 CONFIG_DM_ZERO=y
 CONFIG_DM_MULTIPATH=m
+# CONFIG_DM_MULTIPATH_QL is not set
+# CONFIG_DM_MULTIPATH_ST is not set
 # CONFIG_DM_DELAY is not set
 # CONFIG_DM_UEVENT is not set
 CONFIG_NETDEVICES=y
@@ -615,7 +624,6 @@ CONFIG_NET_ETHERNET=y
 # CONFIG_IBM_NEW_EMAC_NO_FLOW_CTRL is not set
 # CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set
 # CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set
-# CONFIG_KS8842 is not set
 CONFIG_NETDEV_1000=y
 CONFIG_NETDEV_10000=y
 # CONFIG_TR is not set
@@ -678,6 +686,7 @@ CONFIG_SCLP_CONSOLE=y
 CONFIG_SCLP_VT220_TTY=y
 CONFIG_SCLP_VT220_CONSOLE=y
 CONFIG_SCLP_CPI=m
+CONFIG_SCLP_ASYNC=m
 CONFIG_S390_TAPE=m
 
 #
@@ -737,6 +746,7 @@ CONFIG_FS_POSIX_ACL=y
 # CONFIG_GFS2_FS is not set
 # CONFIG_OCFS2_FS is not set
 # CONFIG_BTRFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_FILE_LOCKING=y
 CONFIG_FSNOTIFY=y
 CONFIG_DNOTIFY=y
@@ -798,7 +808,6 @@ CONFIG_MISC_FILESYSTEMS=y
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
 # CONFIG_EXOFS_FS is not set
-# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
@@ -885,11 +894,13 @@ CONFIG_DEBUG_MEMORY_INIT=y
 # CONFIG_DEBUG_LIST is not set
 # CONFIG_DEBUG_SG is not set
 # CONFIG_DEBUG_NOTIFIERS is not set
+# CONFIG_DEBUG_CREDENTIALS is not set
 # CONFIG_RCU_TORTURE_TEST is not set
 # CONFIG_RCU_CPU_STALL_DETECTOR is not set
 # CONFIG_KPROBES_SANITY_TEST is not set
 # CONFIG_BACKTRACE_SELF_TEST is not set
 # CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
+CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y
 # CONFIG_LKDTM is not set
 # CONFIG_FAULT_INJECTION is not set
 # CONFIG_LATENCYTOP is not set
@@ -979,11 +990,13 @@ CONFIG_CRYPTO_PCBC=m
 #
 CONFIG_CRYPTO_HMAC=m
 # CONFIG_CRYPTO_XCBC is not set
+CONFIG_CRYPTO_VMAC=m
 
 #
 # Digest
 #
 CONFIG_CRYPTO_CRC32C=m
+CONFIG_CRYPTO_GHASH=m
 # CONFIG_CRYPTO_MD4 is not set
 CONFIG_CRYPTO_MD5=m
 # CONFIG_CRYPTO_MICHAEL_MIC is not set
-- 
cgit v0.10.2


From 63209a71e8e7727f52208d17bb7180cd392edcfb Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 22 Sep 2009 08:50:32 -0700
Subject: regulator: Add some brief design documentation

Provide some brief documentation of some of the design decisions that
are made by the regulator API.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>

diff --git a/Documentation/power/regulator/design.txt b/Documentation/power/regulator/design.txt
new file mode 100644
index 0000000..f9b56b7
--- /dev/null
+++ b/Documentation/power/regulator/design.txt
@@ -0,0 +1,33 @@
+Regulator API design notes
+==========================
+
+This document provides a brief, partially structured, overview of some
+of the design considerations which impact the regulator API design.
+
+Safety
+------
+
+ - Errors in regulator configuration can have very serious consequences
+   for the system, potentially including lasting hardware damage.
+ - It is not possible to automatically determine the power confugration
+   of the system - software-equivalent variants of the same chip may
+   have different power requirments, and not all components with power
+   requirements are visible to software.
+
+  => The API should make no changes to the hardware state unless it has
+     specific knowledge that these changes are safe to do perform on
+     this particular system.
+
+Consumer use cases
+------------------
+
+ - The overwhelming majority of devices in a system will have no
+   requirement to do any runtime configuration of their power beyond
+   being able to turn it on or off.
+
+ - Many of the power supplies in the system will be shared between many
+   different consumers.
+
+  => The consumer API should be structured so that these use cases are
+     very easy to handle and so that consumers will work with shared
+     supplies without any additional effort.
-- 
cgit v0.10.2


From 7c329288d72e025db4feac65f0fed95fb3e3ef1c Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 23 Sep 2009 09:52:18 +1000
Subject: vgaarb: make client interface config invariant.

Fixes build when VGA_ARB is off.

Reported-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Dave Airlie <airlied@redhat.com>

diff --git a/include/linux/vgaarb.h b/include/linux/vgaarb.h
index e81c64a..923f904 100644
--- a/include/linux/vgaarb.h
+++ b/include/linux/vgaarb.h
@@ -41,7 +41,7 @@
  *     interrupts at any time.
  */
 extern void vga_set_legacy_decoding(struct pci_dev *pdev,
-									unsigned int decodes);
+				    unsigned int decodes);
 
 /**
  *     vga_get         - acquire & locks VGA resources
@@ -193,8 +193,17 @@ static inline int vga_conflicts(struct pci_dev *p1, struct pci_dev *p2)
  * They driver will get a callback when VGA arbitration is first used
  * by userspace since we some older X servers have issues.
  */
+#if defined(CONFIG_VGA_ARB)
 int vga_client_register(struct pci_dev *pdev, void *cookie,
 			void (*irq_set_state)(void *cookie, bool state),
 			unsigned int (*set_vga_decode)(void *cookie, bool state));
+#else
+static inline int vga_client_register(struct pci_dev *pdev, void *cookie,
+				      void (*irq_set_state)(void *cookie, bool state),
+				      unsigned int (*set_vga_decode)(void *cookie, bool state))
+{
+	return 0;
+}
+#endif
 
 #endif /* LINUX_VGA_H */
-- 
cgit v0.10.2


From c214271563c00f2721c5111e27b53bf06dabc6e4 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@linux.ie>
Date: Tue, 22 Sep 2009 08:50:10 +1000
Subject: drm/radeon: consolidate family flags used in pciids.

having these separate was pointless and introduced a bug when
one got updated without the other.

Signed-off-by: Dave Airlie <airlied@redhat.com>

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 817af8e..d5de53e 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -49,6 +49,7 @@
 #include <linux/list.h>
 #include <linux/kref.h>
 
+#include "radeon_family.h"
 #include "radeon_mode.h"
 #include "radeon_reg.h"
 
@@ -77,64 +78,6 @@ extern int radeon_tv;
 #define RADEONFB_CONN_LIMIT		4
 #define RADEON_BIOS_NUM_SCRATCH		8
 
-enum radeon_family {
-	CHIP_R100,
-	CHIP_RV100,
-	CHIP_RS100,
-	CHIP_RV200,
-	CHIP_RS200,
-	CHIP_R200,
-	CHIP_RV250,
-	CHIP_RS300,
-	CHIP_RV280,
-	CHIP_R300,
-	CHIP_R350,
-	CHIP_RV350,
-	CHIP_RV380,
-	CHIP_R420,
-	CHIP_R423,
-	CHIP_RV410,
-	CHIP_RS400,
-	CHIP_RS480,
-	CHIP_RS600,
-	CHIP_RS690,
-	CHIP_RS740,
-	CHIP_RV515,
-	CHIP_R520,
-	CHIP_RV530,
-	CHIP_RV560,
-	CHIP_RV570,
-	CHIP_R580,
-	CHIP_R600,
-	CHIP_RV610,
-	CHIP_RV630,
-	CHIP_RV670,
-	CHIP_RV620,
-	CHIP_RV635,
-	CHIP_RS780,
-	CHIP_RS880,
-	CHIP_RV770,
-	CHIP_RV730,
-	CHIP_RV710,
-	CHIP_RV740,
-	CHIP_LAST,
-};
-
-enum radeon_chip_flags {
-	RADEON_FAMILY_MASK = 0x0000ffffUL,
-	RADEON_FLAGS_MASK = 0xffff0000UL,
-	RADEON_IS_MOBILITY = 0x00010000UL,
-	RADEON_IS_IGP = 0x00020000UL,
-	RADEON_SINGLE_CRTC = 0x00040000UL,
-	RADEON_IS_AGP = 0x00080000UL,
-	RADEON_HAS_HIERZ = 0x00100000UL,
-	RADEON_IS_PCIE = 0x00200000UL,
-	RADEON_NEW_MEMMAP = 0x00400000UL,
-	RADEON_IS_PCI = 0x00800000UL,
-	RADEON_IS_IGPGART = 0x01000000UL,
-};
-
-
 /*
  * Errata workarounds.
  */
diff --git a/drivers/gpu/drm/radeon/radeon_drv.h b/drivers/gpu/drm/radeon/radeon_drv.h
index cb0cfe4..350962e 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.h
+++ b/drivers/gpu/drm/radeon/radeon_drv.h
@@ -34,6 +34,8 @@
 #include <linux/firmware.h>
 #include <linux/platform_device.h>
 
+#include "radeon_family.h"
+
 /* General customization:
  */
 
@@ -109,75 +111,12 @@
 #define DRIVER_MINOR		31
 #define DRIVER_PATCHLEVEL	0
 
-/*
- * Radeon chip families
- */
-enum radeon_family {
-	CHIP_R100,
-	CHIP_RV100,
-	CHIP_RS100,
-	CHIP_RV200,
-	CHIP_RS200,
-	CHIP_R200,
-	CHIP_RV250,
-	CHIP_RS300,
-	CHIP_RV280,
-	CHIP_R300,
-	CHIP_R350,
-	CHIP_RV350,
-	CHIP_RV380,
-	CHIP_R420,
-	CHIP_R423,
-	CHIP_RV410,
-	CHIP_RS400,
-	CHIP_RS480,
-	CHIP_RS600,
-	CHIP_RS690,
-	CHIP_RS740,
-	CHIP_RV515,
-	CHIP_R520,
-	CHIP_RV530,
-	CHIP_RV560,
-	CHIP_RV570,
-	CHIP_R580,
-	CHIP_R600,
-	CHIP_RV610,
-	CHIP_RV630,
-	CHIP_RV620,
-	CHIP_RV635,
-	CHIP_RV670,
-	CHIP_RS780,
-	CHIP_RS880,
-	CHIP_RV770,
-	CHIP_RV730,
-	CHIP_RV710,
-	CHIP_RV740,
-	CHIP_LAST,
-};
-
 enum radeon_cp_microcode_version {
 	UCODE_R100,
 	UCODE_R200,
 	UCODE_R300,
 };
 
-/*
- * Chip flags
- */
-enum radeon_chip_flags {
-	RADEON_FAMILY_MASK = 0x0000ffffUL,
-	RADEON_FLAGS_MASK = 0xffff0000UL,
-	RADEON_IS_MOBILITY = 0x00010000UL,
-	RADEON_IS_IGP = 0x00020000UL,
-	RADEON_SINGLE_CRTC = 0x00040000UL,
-	RADEON_IS_AGP = 0x00080000UL,
-	RADEON_HAS_HIERZ = 0x00100000UL,
-	RADEON_IS_PCIE = 0x00200000UL,
-	RADEON_NEW_MEMMAP = 0x00400000UL,
-	RADEON_IS_PCI = 0x00800000UL,
-	RADEON_IS_IGPGART = 0x01000000UL,
-};
-
 typedef struct drm_radeon_freelist {
 	unsigned int age;
 	struct drm_buf *buf;
diff --git a/drivers/gpu/drm/radeon/radeon_family.h b/drivers/gpu/drm/radeon/radeon_family.h
new file mode 100644
index 0000000..797972e
--- /dev/null
+++ b/drivers/gpu/drm/radeon/radeon_family.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ *          Jerome Glisse
+ */
+
+/* this file defines the CHIP_  and family flags used in the pciids,
+ * its is common between kms and non-kms because duplicating it and
+ * changing one place is fail.
+ */
+#ifndef RADEON_FAMILY_H
+#define RADEON_FAMILY_H
+/*
+ * Radeon chip families
+ */
+enum radeon_family {
+	CHIP_R100,
+	CHIP_RV100,
+	CHIP_RS100,
+	CHIP_RV200,
+	CHIP_RS200,
+	CHIP_R200,
+	CHIP_RV250,
+	CHIP_RS300,
+	CHIP_RV280,
+	CHIP_R300,
+	CHIP_R350,
+	CHIP_RV350,
+	CHIP_RV380,
+	CHIP_R420,
+	CHIP_R423,
+	CHIP_RV410,
+	CHIP_RS400,
+	CHIP_RS480,
+	CHIP_RS600,
+	CHIP_RS690,
+	CHIP_RS740,
+	CHIP_RV515,
+	CHIP_R520,
+	CHIP_RV530,
+	CHIP_RV560,
+	CHIP_RV570,
+	CHIP_R580,
+	CHIP_R600,
+	CHIP_RV610,
+	CHIP_RV630,
+	CHIP_RV670,
+	CHIP_RV620,
+	CHIP_RV635,
+	CHIP_RS780,
+	CHIP_RS880,
+	CHIP_RV770,
+	CHIP_RV730,
+	CHIP_RV710,
+	CHIP_RV740,
+	CHIP_LAST,
+};
+
+/*
+ * Chip flags
+ */
+enum radeon_chip_flags {
+	RADEON_FAMILY_MASK = 0x0000ffffUL,
+	RADEON_FLAGS_MASK = 0xffff0000UL,
+	RADEON_IS_MOBILITY = 0x00010000UL,
+	RADEON_IS_IGP = 0x00020000UL,
+	RADEON_SINGLE_CRTC = 0x00040000UL,
+	RADEON_IS_AGP = 0x00080000UL,
+	RADEON_HAS_HIERZ = 0x00100000UL,
+	RADEON_IS_PCIE = 0x00200000UL,
+	RADEON_NEW_MEMMAP = 0x00400000UL,
+	RADEON_IS_PCI = 0x00800000UL,
+	RADEON_IS_IGPGART = 0x01000000UL,
+};
+#endif
-- 
cgit v0.10.2


From 5b31aee9d72f529ee6b60e8d66967f817a0e39fc Mon Sep 17 00:00:00 2001
From: Andre Maasikas <amaasikas@gmail.com>
Date: Mon, 21 Sep 2009 08:59:41 -0400
Subject: drm/radeon/r600: set correct pitch for 4 byte copy

[agd5f: also fix the non-kms path]

Signed-off-by: Alex Deucher <alexdeucher@gmail.com>

diff --git a/drivers/gpu/drm/radeon/r600_blit.c b/drivers/gpu/drm/radeon/r600_blit.c
index dde2ccb..d988eec 100644
--- a/drivers/gpu/drm/radeon/r600_blit.c
+++ b/drivers/gpu/drm/radeon/r600_blit.c
@@ -737,7 +737,7 @@ r600_blit_copy(struct drm_device *dev,
 
 			/* dst */
 			set_render_target(dev_priv, COLOR_8_8_8_8,
-					  dst_x + cur_size, h,
+					  (dst_x + cur_size) / 4, h,
 					  dst_gpu_addr);
 
 			/* scissors */
diff --git a/drivers/gpu/drm/radeon/r600_blit_kms.c b/drivers/gpu/drm/radeon/r600_blit_kms.c
index 0a6f468..acae33e 100644
--- a/drivers/gpu/drm/radeon/r600_blit_kms.c
+++ b/drivers/gpu/drm/radeon/r600_blit_kms.c
@@ -774,7 +774,7 @@ void r600_kms_blit_copy(struct radeon_device *rdev,
 
 			/* dst 23 */
 			set_render_target(rdev, COLOR_8_8_8_8,
-					  dst_x + cur_size, h,
+					  (dst_x + cur_size) / 4, h,
 					  dst_gpu_addr);
 
 			/* scissors 12  */
-- 
cgit v0.10.2


From 268e46712d57a6493cc0f98e7d200a0f674c31ed Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Tue, 22 Sep 2009 19:56:50 -0700
Subject: asm-generic: syscall_get_nr returns int

Only 32 bits of system call number are meaningful, so make the
specification for syscall_get_nr() be to return int, not long.

Signed-off-by: Roland McGrath <roland@redhat.com>

diff --git a/include/asm-generic/syscall.h b/include/asm-generic/syscall.h
index ea8087b..5c122ae 100644
--- a/include/asm-generic/syscall.h
+++ b/include/asm-generic/syscall.h
@@ -1,7 +1,7 @@
 /*
  * Access to user system call parameters and results
  *
- * Copyright (C) 2008 Red Hat, Inc.  All rights reserved.
+ * Copyright (C) 2008-2009 Red Hat, Inc.  All rights reserved.
  *
  * This copyrighted material is made available to anyone wishing to use,
  * modify, copy, or redistribute it subject to the terms and conditions
@@ -32,9 +32,13 @@ struct pt_regs;
  * If @task is not executing a system call, i.e. it's blocked
  * inside the kernel for a fault or signal, returns -1.
  *
+ * Note this returns int even on 64-bit machines.  Only 32 bits of
+ * system call number can be meaningful.  If the actual arch value
+ * is 64 bits, this truncates to 32 bits so 0xffffffff means -1.
+ *
  * It's only valid to call this when @task is known to be blocked.
  */
-long syscall_get_nr(struct task_struct *task, struct pt_regs *regs);
+int syscall_get_nr(struct task_struct *task, struct pt_regs *regs);
 
 /**
  * syscall_rollback - roll back registers after an aborted system call
-- 
cgit v0.10.2


From 18c1e2c80d92adca50ffc654617639a4aa35f29c Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Tue, 22 Sep 2009 19:57:51 -0700
Subject: x86: syscall_get_nr returns int

Make syscall_get_nr() return int, so we always sign-extend
the low 32 bits of orig_ax in checks.

Signed-off-by: Roland McGrath <roland@redhat.com>

diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index d82f39b..8d33bc5 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -1,7 +1,7 @@
 /*
  * Access to user system call parameters and results
  *
- * Copyright (C) 2008 Red Hat, Inc.  All rights reserved.
+ * Copyright (C) 2008-2009 Red Hat, Inc.  All rights reserved.
  *
  * This copyrighted material is made available to anyone wishing to use,
  * modify, copy, or redistribute it subject to the terms and conditions
@@ -16,13 +16,13 @@
 #include <linux/sched.h>
 #include <linux/err.h>
 
-static inline long syscall_get_nr(struct task_struct *task,
-				  struct pt_regs *regs)
+/*
+ * Only the low 32 bits of orig_ax are meaningful, so we return int.
+ * This importantly ignores the high bits on 64-bit, so comparisons
+ * sign-extend the low 32 bits.
+ */
+static inline int syscall_get_nr(struct task_struct *task, struct pt_regs *regs)
 {
-	/*
-	 * We always sign-extend a -1 value being set here,
-	 * so this is always either -1L or a syscall number.
-	 */
 	return regs->orig_ax;
 }
 
-- 
cgit v0.10.2


From 721f69c404c51a5d1dc93fddb48ee936e8e23770 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 18 Aug 2009 11:17:49 +0800
Subject: ocfs2: Define refcount tree structure.

Signed-off-by: Tao Ma <tao.ma@oracle.com>

diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 7ab6e9e..e4288b4 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -68,6 +68,7 @@
 #define OCFS2_DIR_TRAILER_SIGNATURE	"DIRTRL1"
 #define OCFS2_DX_ROOT_SIGNATURE		"DXDIR01"
 #define OCFS2_DX_LEAF_SIGNATURE		"DXLEAF1"
+#define OCFS2_REFCOUNT_BLOCK_SIGNATURE	"REFCNT1"
 
 /* Compatibility flags */
 #define OCFS2_HAS_COMPAT_FEATURE(sb,mask)			\
@@ -160,6 +161,9 @@
 /* Metadata checksum and error correction */
 #define OCFS2_FEATURE_INCOMPAT_META_ECC		0x0800
 
+/* Refcount tree support */
+#define OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE	0x1000
+
 /*
  * backup superblock flag is used to indicate that this volume
  * has backup superblocks.
@@ -223,6 +227,7 @@
 #define OCFS2_HAS_XATTR_FL	(0x0002)
 #define OCFS2_INLINE_XATTR_FL	(0x0004)
 #define OCFS2_INDEXED_DIR_FL	(0x0008)
+#define OCFS2_HAS_REFCOUNT_FL   (0x0010)
 
 /* Inode attributes, keep in sync with EXT2 */
 #define OCFS2_SECRM_FL		(0x00000001)	/* Secure deletion */
@@ -241,8 +246,11 @@
 /*
  * Extent record flags (e_node.leaf.flags)
  */
-#define OCFS2_EXT_UNWRITTEN	(0x01)	/* Extent is allocated but
-					 * unwritten */
+#define OCFS2_EXT_UNWRITTEN		(0x01)	/* Extent is allocated but
+						 * unwritten */
+#define OCFS2_EXT_REFCOUNTED		(0x02)  /* Extent is reference
+						 * counted in an associated
+						 * refcount tree */
 
 /*
  * ioctl commands
@@ -717,7 +725,8 @@ struct ocfs2_dinode {
 	__le64 i_xattr_loc;
 /*80*/	struct ocfs2_block_check i_check;	/* Error checking */
 /*88*/	__le64 i_dx_root;		/* Pointer to dir index root block */
-	__le64 i_reserved2[5];
+/*90*/	__le64 i_refcount_loc;
+	__le64 i_reserved2[4];
 /*B8*/	union {
 		__le64 i_pad1;		/* Generic way to refer to this
 					   64bit union */
@@ -901,6 +910,59 @@ struct ocfs2_group_desc
 /*40*/	__u8    bg_bitmap[0];
 };
 
+struct ocfs2_refcount_rec {
+/*00*/	__le64 r_cpos;		/* Physical offset, in clusters */
+	__le32 r_clusters;	/* Clusters covered by this extent */
+	__le32 r_refcount;	/* Reference count of this extent */
+/*10*/
+};
+
+#define OCFS2_REFCOUNT_LEAF_FL          (0x00000001)
+#define OCFS2_REFCOUNT_TREE_FL          (0x00000002)
+
+struct ocfs2_refcount_list {
+/*00*/	__le16 rl_count;	/* Maximum number of entries possible
+				   in rl_records */
+	__le16 rl_used;		/* Current number of used records */
+	__le32 rl_reserved2;
+	__le64 rl_reserved1;	/* Pad to sizeof(ocfs2_refcount_record) */
+/*10*/	struct ocfs2_refcount_rec rl_recs[0];	/* Refcount records */
+};
+
+
+struct ocfs2_refcount_block {
+/*00*/	__u8 rf_signature[8];		/* Signature for verification */
+	__le16 rf_suballoc_slot;	/* Slot suballocator this block
+					   belongs to */
+	__le16 rf_suballoc_bit;		/* Bit offset in suballocator
+					   block group */
+	__le32 rf_fs_generation;	/* Must match superblock */
+/*10*/	__le64 rf_blkno;		/* Offset on disk, in blocks */
+	__le64 rf_parent;		/* Parent block, only valid if
+					   OCFS2_REFCOUNT_LEAF_FL is set in
+					   rf_flags */
+/*20*/	struct ocfs2_block_check rf_check;	/* Error checking */
+	__le64 rf_last_eb_blk;		/* Pointer to last extent block */
+/*30*/	__le32 rf_count;		/* Number of inodes sharing this
+					   refcount tree */
+	__le32 rf_flags;		/* See the flags above */
+	__le32 rf_clusters;		/* clusters covered by refcount tree. */
+	__le32 rf_cpos;			/* cluster offset in refcount tree.*/
+/*40*/	__le32 rf_generation;		/* generation number. all be the same
+					 * for the same refcount tree. */
+	__le32 rf_reserved0;
+	__le64 rf_reserved1[7];
+/*80*/	union {
+		struct ocfs2_refcount_list rf_records;  /* List of refcount
+							  records */
+		struct ocfs2_extent_list rf_list;	/* Extent record list,
+							only valid if
+							OCFS2_REFCOUNT_TREE_FL
+							is set in rf_flags */
+	};
+/* Actual on-disk size is one block */
+};
+
 /*
  * On disk extended attribute structure for OCFS2.
  */
@@ -1312,6 +1374,26 @@ static inline u16 ocfs2_xattr_recs_per_xb(struct super_block *sb)
 
 	return size / sizeof(struct ocfs2_extent_rec);
 }
+
+static inline u16 ocfs2_extent_recs_per_rb(struct super_block *sb)
+{
+	int size;
+
+	size = sb->s_blocksize -
+		offsetof(struct ocfs2_refcount_block, rf_list.l_recs);
+
+	return size / sizeof(struct ocfs2_extent_rec);
+}
+
+static inline u16 ocfs2_refcount_recs_per_rb(struct super_block *sb)
+{
+	int size;
+
+	size = sb->s_blocksize -
+		offsetof(struct ocfs2_refcount_block, rf_records.rl_recs);
+
+	return size / sizeof(struct ocfs2_refcount_rec);
+}
 #else
 static inline int ocfs2_fast_symlink_chars(int blocksize)
 {
-- 
cgit v0.10.2


From 93c97087a646429f4dc0d73298d64674ddd5cde8 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 18 Aug 2009 11:19:20 +0800
Subject: ocfs2: Add metaecc for ocfs2_refcount_block.

Add metaecc and journal trigger for ocfs2_refcount_block.

Signed-off-by: Tao Ma <tao.ma@oracle.com>

diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 5b6c0e4..54c16b6 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -555,6 +555,14 @@ static struct ocfs2_triggers eb_triggers = {
 	.ot_offset	= offsetof(struct ocfs2_extent_block, h_check),
 };
 
+static struct ocfs2_triggers rb_triggers = {
+	.ot_triggers = {
+		.t_commit = ocfs2_commit_trigger,
+		.t_abort = ocfs2_abort_trigger,
+	},
+	.ot_offset	= offsetof(struct ocfs2_refcount_block, rf_check),
+};
+
 static struct ocfs2_triggers gd_triggers = {
 	.ot_triggers = {
 		.t_commit = ocfs2_commit_trigger,
@@ -677,6 +685,13 @@ int ocfs2_journal_access_eb(handle_t *handle, struct ocfs2_caching_info *ci,
 	return __ocfs2_journal_access(handle, ci, bh, &eb_triggers, type);
 }
 
+int ocfs2_journal_access_rb(handle_t *handle, struct ocfs2_caching_info *ci,
+			    struct buffer_head *bh, int type)
+{
+	return __ocfs2_journal_access(handle, ci, bh, &rb_triggers,
+				      type);
+}
+
 int ocfs2_journal_access_gd(handle_t *handle, struct ocfs2_caching_info *ci,
 			    struct buffer_head *bh, int type)
 {
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 6163f28..b2dc125 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -281,6 +281,9 @@ int ocfs2_journal_access_di(handle_t *handle, struct ocfs2_caching_info *ci,
 /* ocfs2_extent_block */
 int ocfs2_journal_access_eb(handle_t *handle, struct ocfs2_caching_info *ci,
 			    struct buffer_head *bh, int type);
+/* ocfs2_refcount_block */
+int ocfs2_journal_access_rb(handle_t *handle, struct ocfs2_caching_info *ci,
+			    struct buffer_head *bh, int type);
 /* ocfs2_group_desc */
 int ocfs2_journal_access_gd(handle_t *handle, struct ocfs2_caching_info *ci,
 			    struct buffer_head *bh, int type);
-- 
cgit v0.10.2


From f2c870e3b12e38da6d9b5b17c4c8ae56a0ed68e4 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 18 Aug 2009 11:19:26 +0800
Subject: ocfs2: Add ocfs2_read_refcount_block.

Signed-off-by: Tao Ma <tao.ma@oracle.com>

diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile
index 0159607..31f25ce 100644
--- a/fs/ocfs2/Makefile
+++ b/fs/ocfs2/Makefile
@@ -28,6 +28,7 @@ ocfs2-objs := \
 	locks.o			\
 	mmap.o 			\
 	namei.o 		\
+	refcounttree.o		\
 	resize.o		\
 	slot_map.o 		\
 	suballoc.o 		\
diff --git a/fs/ocfs2/cluster/masklog.c b/fs/ocfs2/cluster/masklog.c
index 96df541..1cd2934 100644
--- a/fs/ocfs2/cluster/masklog.c
+++ b/fs/ocfs2/cluster/masklog.c
@@ -111,6 +111,7 @@ static struct mlog_attribute mlog_attrs[MLOG_MAX_BITS] = {
 	define_mask(EXPORT),
 	define_mask(XATTR),
 	define_mask(QUOTA),
+	define_mask(REFCOUNT),
 	define_mask(ERROR),
 	define_mask(NOTICE),
 	define_mask(KTHREAD),
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
index 696c32e..9b4d117 100644
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -113,6 +113,7 @@
 #define ML_EXPORT	0x0000000010000000ULL /* ocfs2 export operations */
 #define ML_XATTR	0x0000000020000000ULL /* ocfs2 extended attributes */
 #define ML_QUOTA	0x0000000040000000ULL /* ocfs2 quota operations */
+#define ML_REFCOUNT	0x0000000080000000ULL /* refcount tree operations */
 /* bits that are infrequently given and frequently matched in the high word */
 #define ML_ERROR	0x0000000100000000ULL /* sent to KERN_ERR */
 #define ML_NOTICE	0x0000000200000000ULL /* setn to KERN_NOTICE */
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index d370262..6688d19 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -610,6 +610,9 @@ static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb)
 #define OCFS2_IS_VALID_DX_LEAF(ptr)					\
 	(!strcmp((ptr)->dl_signature, OCFS2_DX_LEAF_SIGNATURE))
 
+#define OCFS2_IS_VALID_REFCOUNT_BLOCK(ptr)				\
+	(!strcmp((ptr)->rf_signature, OCFS2_REFCOUNT_BLOCK_SIGNATURE))
+
 static inline unsigned long ino_from_blkno(struct super_block *sb,
 					   u64 blkno)
 {
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
new file mode 100644
index 0000000..a923535
--- /dev/null
+++ b/fs/ocfs2/refcounttree.c
@@ -0,0 +1,99 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * refcounttree.c
+ *
+ * Copyright (C) 2009 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#define MLOG_MASK_PREFIX ML_REFCOUNT
+#include <cluster/masklog.h>
+#include "ocfs2.h"
+#include "inode.h"
+#include "alloc.h"
+#include "suballoc.h"
+#include "journal.h"
+#include "uptodate.h"
+#include "super.h"
+#include "buffer_head_io.h"
+#include "blockcheck.h"
+
+static int ocfs2_validate_refcount_block(struct super_block *sb,
+					 struct buffer_head *bh)
+{
+	int rc;
+	struct ocfs2_refcount_block *rb =
+		(struct ocfs2_refcount_block *)bh->b_data;
+
+	mlog(0, "Validating refcount block %llu\n",
+	     (unsigned long long)bh->b_blocknr);
+
+	BUG_ON(!buffer_uptodate(bh));
+
+	/*
+	 * If the ecc fails, we return the error but otherwise
+	 * leave the filesystem running.  We know any error is
+	 * local to this block.
+	 */
+	rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &rb->rf_check);
+	if (rc) {
+		mlog(ML_ERROR, "Checksum failed for refcount block %llu\n",
+		     (unsigned long long)bh->b_blocknr);
+		return rc;
+	}
+
+
+	if (!OCFS2_IS_VALID_REFCOUNT_BLOCK(rb)) {
+		ocfs2_error(sb,
+			    "Refcount block #%llu has bad signature %.*s",
+			    (unsigned long long)bh->b_blocknr, 7,
+			    rb->rf_signature);
+		return -EINVAL;
+	}
+
+	if (le64_to_cpu(rb->rf_blkno) != bh->b_blocknr) {
+		ocfs2_error(sb,
+			    "Refcount block #%llu has an invalid rf_blkno "
+			    "of %llu",
+			    (unsigned long long)bh->b_blocknr,
+			    (unsigned long long)le64_to_cpu(rb->rf_blkno));
+		return -EINVAL;
+	}
+
+	if (le32_to_cpu(rb->rf_fs_generation) != OCFS2_SB(sb)->fs_generation) {
+		ocfs2_error(sb,
+			    "Refcount block #%llu has an invalid "
+			    "rf_fs_generation of #%u",
+			    (unsigned long long)bh->b_blocknr,
+			    le32_to_cpu(rb->rf_fs_generation));
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int ocfs2_read_refcount_block(struct ocfs2_caching_info *ci,
+				     u64 rb_blkno,
+				     struct buffer_head **bh)
+{
+	int rc;
+	struct buffer_head *tmp = *bh;
+
+	rc = ocfs2_read_block(ci, rb_blkno, &tmp,
+			      ocfs2_validate_refcount_block);
+
+	/* If ocfs2_read_block() got us a new bh, pass it up. */
+	if (!rc && !*bh)
+		*bh = tmp;
+
+	return rc;
+}
-- 
cgit v0.10.2


From a433848132d8cdfb8173745b922ddb919de11527 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 18 Aug 2009 11:19:29 +0800
Subject: ocfs2: Abstract caching info checkpoint.

In meta downconvert, we need to checkpoint the metadata in an inode.
For refcount tree, we also need it. So abstract the process out.

Signed-off-by: Tao Ma <tao.ma@oracle.com>

diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index f518d1b..79db055 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -3495,11 +3495,11 @@ out:
 	return UNBLOCK_CONTINUE;
 }
 
-static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
-					int new_level)
+static int ocfs2_ci_checkpointed(struct ocfs2_caching_info *ci,
+				 struct ocfs2_lock_res *lockres,
+				 int new_level)
 {
-	struct inode *inode = ocfs2_lock_res_inode(lockres);
-	int checkpointed = ocfs2_ci_fully_checkpointed(INODE_CACHE(inode));
+	int checkpointed = ocfs2_ci_fully_checkpointed(ci);
 
 	BUG_ON(new_level != DLM_LOCK_NL && new_level != DLM_LOCK_PR);
 	BUG_ON(lockres->l_level != DLM_LOCK_EX && !checkpointed);
@@ -3507,10 +3507,18 @@ static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
 	if (checkpointed)
 		return 1;
 
-	ocfs2_start_checkpoint(OCFS2_SB(inode->i_sb));
+	ocfs2_start_checkpoint(OCFS2_SB(ocfs2_metadata_cache_get_super(ci)));
 	return 0;
 }
 
+static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
+					int new_level)
+{
+	struct inode *inode = ocfs2_lock_res_inode(lockres);
+
+	return ocfs2_ci_checkpointed(INODE_CACHE(inode), lockres, new_level);
+}
+
 static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres)
 {
 	struct inode *inode = ocfs2_lock_res_inode(lockres);
-- 
cgit v0.10.2


From 8dec98edfe9684ce00b580a09dde3dcd21ee785b Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 18 Aug 2009 11:19:58 +0800
Subject: ocfs2: Add new refcount tree lock resource in dlmglue.

refcount tree lock resource is used to protect refcount
tree read/write among multiple nodes.

Signed-off-by: Tao Ma <tao.ma@oracle.com>

diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 79db055..bb2fc69 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -53,6 +53,7 @@
 #include "super.h"
 #include "uptodate.h"
 #include "quota.h"
+#include "refcounttree.h"
 
 #include "buffer_head_io.h"
 
@@ -110,6 +111,11 @@ static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,
 
 static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres);
 
+static int ocfs2_check_refcount_downconvert(struct ocfs2_lock_res *lockres,
+					    int new_level);
+static int ocfs2_refcount_convert_worker(struct ocfs2_lock_res *lockres,
+					 int blocking);
+
 #define mlog_meta_lvb(__level, __lockres) ocfs2_dump_meta_lvb_info(__level, __PRETTY_FUNCTION__, __LINE__, __lockres)
 
 /* This aids in debugging situations where a bad LVB might be involved. */
@@ -278,6 +284,12 @@ static struct ocfs2_lock_res_ops ocfs2_qinfo_lops = {
 	.flags		= LOCK_TYPE_REQUIRES_REFRESH | LOCK_TYPE_USES_LVB,
 };
 
+static struct ocfs2_lock_res_ops ocfs2_refcount_block_lops = {
+	.check_downconvert = ocfs2_check_refcount_downconvert,
+	.downconvert_worker = ocfs2_refcount_convert_worker,
+	.flags		= 0,
+};
+
 static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres)
 {
 	return lockres->l_type == OCFS2_LOCK_TYPE_META ||
@@ -306,6 +318,12 @@ static inline struct ocfs2_mem_dqinfo *ocfs2_lock_res_qinfo(struct ocfs2_lock_re
 	return (struct ocfs2_mem_dqinfo *)lockres->l_priv;
 }
 
+static inline struct ocfs2_refcount_tree *
+ocfs2_lock_res_refcount_tree(struct ocfs2_lock_res *res)
+{
+	return container_of(res, struct ocfs2_refcount_tree, rf_lockres);
+}
+
 static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *lockres)
 {
 	if (lockres->l_ops->get_osb)
@@ -693,6 +711,17 @@ void ocfs2_qinfo_lock_res_init(struct ocfs2_lock_res *lockres,
 				   info);
 }
 
+void ocfs2_refcount_lock_res_init(struct ocfs2_lock_res *lockres,
+				  struct ocfs2_super *osb, u64 ref_blkno,
+				  unsigned int generation)
+{
+	ocfs2_lock_res_init_once(lockres);
+	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_REFCOUNT, ref_blkno,
+			      generation, lockres->l_name);
+	ocfs2_lock_res_init_common(osb, lockres, OCFS2_LOCK_TYPE_REFCOUNT,
+				   &ocfs2_refcount_block_lops, osb);
+}
+
 void ocfs2_lock_res_free(struct ocfs2_lock_res *res)
 {
 	mlog_entry_void();
@@ -3648,6 +3677,26 @@ static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
 	return UNBLOCK_CONTINUE_POST;
 }
 
+static int ocfs2_check_refcount_downconvert(struct ocfs2_lock_res *lockres,
+					    int new_level)
+{
+	struct ocfs2_refcount_tree *tree =
+				ocfs2_lock_res_refcount_tree(lockres);
+
+	return ocfs2_ci_checkpointed(&tree->rf_ci, lockres, new_level);
+}
+
+static int ocfs2_refcount_convert_worker(struct ocfs2_lock_res *lockres,
+					 int blocking)
+{
+	struct ocfs2_refcount_tree *tree =
+				ocfs2_lock_res_refcount_tree(lockres);
+
+	ocfs2_metadata_cache_purge(&tree->rf_ci);
+
+	return UNBLOCK_CONTINUE;
+}
+
 static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres)
 {
 	struct ocfs2_qinfo_lvb *lvb;
@@ -3760,6 +3809,37 @@ bail:
 	return status;
 }
 
+int ocfs2_refcount_lock(struct ocfs2_refcount_tree *ref_tree, int ex)
+{
+	int status;
+	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
+	struct ocfs2_lock_res *lockres = &ref_tree->rf_lockres;
+	struct ocfs2_super *osb = lockres->l_priv;
+
+
+	if (ocfs2_is_hard_readonly(osb))
+		return -EROFS;
+
+	if (ocfs2_mount_local(osb))
+		return 0;
+
+	status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
+	if (status < 0)
+		mlog_errno(status);
+
+	return status;
+}
+
+void ocfs2_refcount_unlock(struct ocfs2_refcount_tree *ref_tree, int ex)
+{
+	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
+	struct ocfs2_lock_res *lockres = &ref_tree->rf_lockres;
+	struct ocfs2_super *osb = lockres->l_priv;
+
+	if (!ocfs2_mount_local(osb))
+		ocfs2_cluster_unlock(osb, lockres, level);
+}
+
 /*
  * This is the filesystem locking protocol.  It provides the lock handling
  * hooks for the underlying DLM.  It has a maximum version number.
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
index 7553836..d1ce48e 100644
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -101,6 +101,9 @@ void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres,
 struct ocfs2_mem_dqinfo;
 void ocfs2_qinfo_lock_res_init(struct ocfs2_lock_res *lockres,
                                struct ocfs2_mem_dqinfo *info);
+void ocfs2_refcount_lock_res_init(struct ocfs2_lock_res *lockres,
+				  struct ocfs2_super *osb, u64 ref_blkno,
+				  unsigned int generation);
 void ocfs2_lock_res_free(struct ocfs2_lock_res *res);
 int ocfs2_create_new_inode_locks(struct inode *inode);
 int ocfs2_drop_inode_locks(struct inode *inode);
@@ -148,6 +151,9 @@ int ocfs2_file_lock(struct file *file, int ex, int trylock);
 void ocfs2_file_unlock(struct file *file);
 int ocfs2_qinfo_lock(struct ocfs2_mem_dqinfo *oinfo, int ex);
 void ocfs2_qinfo_unlock(struct ocfs2_mem_dqinfo *oinfo, int ex);
+struct ocfs2_refcount_tree;
+int ocfs2_refcount_lock(struct ocfs2_refcount_tree *ref_tree, int ex);
+void ocfs2_refcount_unlock(struct ocfs2_refcount_tree *ref_tree, int ex);
 
 
 void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres);
diff --git a/fs/ocfs2/ocfs2_lockid.h b/fs/ocfs2/ocfs2_lockid.h
index c212cf5..d277aab 100644
--- a/fs/ocfs2/ocfs2_lockid.h
+++ b/fs/ocfs2/ocfs2_lockid.h
@@ -49,6 +49,7 @@ enum ocfs2_lock_type {
 	OCFS2_LOCK_TYPE_QINFO,
 	OCFS2_LOCK_TYPE_NFS_SYNC,
 	OCFS2_LOCK_TYPE_ORPHAN_SCAN,
+	OCFS2_LOCK_TYPE_REFCOUNT,
 	OCFS2_NUM_LOCK_TYPES
 };
 
@@ -89,6 +90,9 @@ static inline char ocfs2_lock_type_char(enum ocfs2_lock_type type)
 		case OCFS2_LOCK_TYPE_ORPHAN_SCAN:
 			c = 'P';
 			break;
+		case OCFS2_LOCK_TYPE_REFCOUNT:
+			c = 'T';
+			break;
 		default:
 			c = '\0';
 	}
@@ -110,6 +114,7 @@ static char *ocfs2_lock_type_strings[] = {
 	[OCFS2_LOCK_TYPE_QINFO] = "Quota",
 	[OCFS2_LOCK_TYPE_NFS_SYNC] = "NFSSync",
 	[OCFS2_LOCK_TYPE_ORPHAN_SCAN] = "OrphanScan",
+	[OCFS2_LOCK_TYPE_REFCOUNT] = "Refcount",
 };
 
 static inline const char *ocfs2_lock_type_string(enum ocfs2_lock_type type)
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h
new file mode 100644
index 0000000..9a3695c
--- /dev/null
+++ b/fs/ocfs2/refcounttree.h
@@ -0,0 +1,36 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * refcounttree.h
+ *
+ * Copyright (C) 2009 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#ifndef OCFS2_REFCOUNTTREE_H
+#define OCFS2_REFCOUNTTREE_H
+
+struct ocfs2_refcount_tree {
+	struct rb_node rf_node;
+	u64 rf_blkno;
+	u32 rf_generation;
+	struct rw_semaphore rf_sem;
+	struct ocfs2_lock_res rf_lockres;
+	struct kref rf_getcnt;
+	int rf_removed;
+
+	/* the following 4 fields are used by caching_info. */
+	struct ocfs2_caching_info rf_ci;
+	spinlock_t rf_lock;
+	struct mutex rf_io_mutex;
+	struct super_block *rf_sb;
+};
+
+#endif /* OCFS2_REFCOUNTTREE_H */
-- 
cgit v0.10.2


From c732eb16bf07f9bfb7fa72b6868462471273bdbd Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 18 Aug 2009 11:21:00 +0800
Subject: ocfs2: Add caching info for refcount tree.

refcount tree should use its own caching info so that when
we downconvert the refcount tree lock, we can drop all the
cached buffer head.

Signed-off-by: Tao Ma <tao.ma@oracle.com>

diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index a923535..eb0f4a0 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -26,6 +26,13 @@
 #include "super.h"
 #include "buffer_head_io.h"
 #include "blockcheck.h"
+#include "refcounttree.h"
+
+static inline struct ocfs2_refcount_tree *
+cache_info_to_refcount(struct ocfs2_caching_info *ci)
+{
+	return container_of(ci, struct ocfs2_refcount_tree, rf_ci);
+}
 
 static int ocfs2_validate_refcount_block(struct super_block *sb,
 					 struct buffer_head *bh)
@@ -97,3 +104,55 @@ static int ocfs2_read_refcount_block(struct ocfs2_caching_info *ci,
 
 	return rc;
 }
+
+static u64 ocfs2_refcount_cache_owner(struct ocfs2_caching_info *ci)
+{
+	struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
+
+	return rf->rf_blkno;
+}
+
+static struct super_block *
+ocfs2_refcount_cache_get_super(struct ocfs2_caching_info *ci)
+{
+	struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
+
+	return rf->rf_sb;
+}
+
+static void ocfs2_refcount_cache_lock(struct ocfs2_caching_info *ci)
+{
+	struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
+
+	spin_lock(&rf->rf_lock);
+}
+
+static void ocfs2_refcount_cache_unlock(struct ocfs2_caching_info *ci)
+{
+	struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
+
+	spin_unlock(&rf->rf_lock);
+}
+
+static void ocfs2_refcount_cache_io_lock(struct ocfs2_caching_info *ci)
+{
+	struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
+
+	mutex_lock(&rf->rf_io_mutex);
+}
+
+static void ocfs2_refcount_cache_io_unlock(struct ocfs2_caching_info *ci)
+{
+	struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
+
+	mutex_unlock(&rf->rf_io_mutex);
+}
+
+static const struct ocfs2_caching_operations ocfs2_refcount_caching_ops = {
+	.co_owner		= ocfs2_refcount_cache_owner,
+	.co_get_super		= ocfs2_refcount_cache_get_super,
+	.co_cache_lock		= ocfs2_refcount_cache_lock,
+	.co_cache_unlock	= ocfs2_refcount_cache_unlock,
+	.co_io_lock		= ocfs2_refcount_cache_io_lock,
+	.co_io_unlock		= ocfs2_refcount_cache_io_unlock,
+};
-- 
cgit v0.10.2


From 374a263e790c4de85844283c098810a92985f623 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Mon, 24 Aug 2009 11:13:37 +0800
Subject: ocfs2: Add refcount tree lock mechanism.

Implement locking around struct ocfs2_refcount_tree.  This protects
all read/write operations on refcount trees.  ocfs2_refcount_tree
has its own lock and its own caching_info, protecting buffers among
multiple nodes.

User must call ocfs2_lock_refcount_tree before his operation on
the tree and unlock it after that.

ocfs2_refcount_trees are referenced by the block number of the
refcount tree root block, So we create an rb-tree on the ocfs2_super
to look them up.

Signed-off-by: Tao Ma <tao.ma@oracle.com>

diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 6688d19..bb53573 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -408,6 +408,10 @@ struct ocfs2_super
 
 	/* the group we used to allocate inodes. */
 	u64				osb_inode_alloc_group;
+
+	/* rb tree root for refcount lock. */
+	struct rb_root	osb_rf_lock_tree;
+	struct ocfs2_refcount_tree *osb_ref_tree_lru;
 };
 
 #define OCFS2_SB(sb)	    ((struct ocfs2_super *)(sb)->s_fs_info)
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index eb0f4a0..8d79de8 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -27,6 +27,7 @@
 #include "buffer_head_io.h"
 #include "blockcheck.h"
 #include "refcounttree.h"
+#include "dlmglue.h"
 
 static inline struct ocfs2_refcount_tree *
 cache_info_to_refcount(struct ocfs2_caching_info *ci)
@@ -156,3 +157,361 @@ static const struct ocfs2_caching_operations ocfs2_refcount_caching_ops = {
 	.co_io_lock		= ocfs2_refcount_cache_io_lock,
 	.co_io_unlock		= ocfs2_refcount_cache_io_unlock,
 };
+
+static struct ocfs2_refcount_tree *
+ocfs2_find_refcount_tree(struct ocfs2_super *osb, u64 blkno)
+{
+	struct rb_node *n = osb->osb_rf_lock_tree.rb_node;
+	struct ocfs2_refcount_tree *tree = NULL;
+
+	while (n) {
+		tree = rb_entry(n, struct ocfs2_refcount_tree, rf_node);
+
+		if (blkno < tree->rf_blkno)
+			n = n->rb_left;
+		else if (blkno > tree->rf_blkno)
+			n = n->rb_right;
+		else
+			return tree;
+	}
+
+	return NULL;
+}
+
+/* osb_lock is already locked. */
+static void ocfs2_insert_refcount_tree(struct ocfs2_super *osb,
+				       struct ocfs2_refcount_tree *new)
+{
+	u64 rf_blkno = new->rf_blkno;
+	struct rb_node *parent = NULL;
+	struct rb_node **p = &osb->osb_rf_lock_tree.rb_node;
+	struct ocfs2_refcount_tree *tmp;
+
+	while (*p) {
+		parent = *p;
+
+		tmp = rb_entry(parent, struct ocfs2_refcount_tree,
+			       rf_node);
+
+		if (rf_blkno < tmp->rf_blkno)
+			p = &(*p)->rb_left;
+		else if (rf_blkno > tmp->rf_blkno)
+			p = &(*p)->rb_right;
+		else {
+			/* This should never happen! */
+			mlog(ML_ERROR, "Duplicate refcount block %llu found!\n",
+			     (unsigned long long)rf_blkno);
+			BUG();
+		}
+	}
+
+	rb_link_node(&new->rf_node, parent, p);
+	rb_insert_color(&new->rf_node, &osb->osb_rf_lock_tree);
+}
+
+static void ocfs2_free_refcount_tree(struct ocfs2_refcount_tree *tree)
+{
+	ocfs2_metadata_cache_exit(&tree->rf_ci);
+	ocfs2_simple_drop_lockres(OCFS2_SB(tree->rf_sb), &tree->rf_lockres);
+	ocfs2_lock_res_free(&tree->rf_lockres);
+	kfree(tree);
+}
+
+static inline void
+ocfs2_erase_refcount_tree_from_list_no_lock(struct ocfs2_super *osb,
+					struct ocfs2_refcount_tree *tree)
+{
+	rb_erase(&tree->rf_node, &osb->osb_rf_lock_tree);
+	if (osb->osb_ref_tree_lru && osb->osb_ref_tree_lru == tree)
+		osb->osb_ref_tree_lru = NULL;
+}
+
+static void ocfs2_erase_refcount_tree_from_list(struct ocfs2_super *osb,
+					struct ocfs2_refcount_tree *tree)
+{
+	spin_lock(&osb->osb_lock);
+	ocfs2_erase_refcount_tree_from_list_no_lock(osb, tree);
+	spin_unlock(&osb->osb_lock);
+}
+
+void ocfs2_kref_remove_refcount_tree(struct kref *kref)
+{
+	struct ocfs2_refcount_tree *tree =
+		container_of(kref, struct ocfs2_refcount_tree, rf_getcnt);
+
+	ocfs2_free_refcount_tree(tree);
+}
+
+static inline void
+ocfs2_refcount_tree_get(struct ocfs2_refcount_tree *tree)
+{
+	kref_get(&tree->rf_getcnt);
+}
+
+static inline void
+ocfs2_refcount_tree_put(struct ocfs2_refcount_tree *tree)
+{
+	kref_put(&tree->rf_getcnt, ocfs2_kref_remove_refcount_tree);
+}
+
+static inline void ocfs2_init_refcount_tree_ci(struct ocfs2_refcount_tree *new,
+					       struct super_block *sb)
+{
+	ocfs2_metadata_cache_init(&new->rf_ci, &ocfs2_refcount_caching_ops);
+	mutex_init(&new->rf_io_mutex);
+	new->rf_sb = sb;
+	spin_lock_init(&new->rf_lock);
+}
+
+static inline void ocfs2_init_refcount_tree_lock(struct ocfs2_super *osb,
+					struct ocfs2_refcount_tree *new,
+					u64 rf_blkno, u32 generation)
+{
+	init_rwsem(&new->rf_sem);
+	ocfs2_refcount_lock_res_init(&new->rf_lockres, osb,
+				     rf_blkno, generation);
+}
+
+static int ocfs2_get_refcount_tree(struct ocfs2_super *osb, u64 rf_blkno,
+				   struct ocfs2_refcount_tree **ret_tree)
+{
+	int ret = 0;
+	struct ocfs2_refcount_tree *tree, *new = NULL;
+	struct buffer_head *ref_root_bh = NULL;
+	struct ocfs2_refcount_block *ref_rb;
+
+	spin_lock(&osb->osb_lock);
+	if (osb->osb_ref_tree_lru &&
+	    osb->osb_ref_tree_lru->rf_blkno == rf_blkno)
+		tree = osb->osb_ref_tree_lru;
+	else
+		tree = ocfs2_find_refcount_tree(osb, rf_blkno);
+	if (tree)
+		goto out;
+
+	spin_unlock(&osb->osb_lock);
+
+	new = kzalloc(sizeof(struct ocfs2_refcount_tree), GFP_NOFS);
+	if (!new) {
+		ret = -ENOMEM;
+		return ret;
+	}
+
+	new->rf_blkno = rf_blkno;
+	kref_init(&new->rf_getcnt);
+	ocfs2_init_refcount_tree_ci(new, osb->sb);
+
+	/*
+	 * We need the generation to create the refcount tree lock and since
+	 * it isn't changed during the tree modification, we are safe here to
+	 * read without protection.
+	 * We also have to purge the cache after we create the lock since the
+	 * refcount block may have the stale data. It can only be trusted when
+	 * we hold the refcount lock.
+	 */
+	ret = ocfs2_read_refcount_block(&new->rf_ci, rf_blkno, &ref_root_bh);
+	if (ret) {
+		mlog_errno(ret);
+		ocfs2_metadata_cache_exit(&new->rf_ci);
+		kfree(new);
+		return ret;
+	}
+
+	ref_rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data;
+	new->rf_generation = le32_to_cpu(ref_rb->rf_generation);
+	ocfs2_init_refcount_tree_lock(osb, new, rf_blkno,
+				      new->rf_generation);
+	ocfs2_metadata_cache_purge(&new->rf_ci);
+
+	spin_lock(&osb->osb_lock);
+	tree = ocfs2_find_refcount_tree(osb, rf_blkno);
+	if (tree)
+		goto out;
+
+	ocfs2_insert_refcount_tree(osb, new);
+
+	tree = new;
+	new = NULL;
+
+out:
+	*ret_tree = tree;
+
+	osb->osb_ref_tree_lru = tree;
+
+	spin_unlock(&osb->osb_lock);
+
+	if (new)
+		ocfs2_free_refcount_tree(new);
+
+	brelse(ref_root_bh);
+	return ret;
+}
+
+static int ocfs2_get_refcount_block(struct inode *inode, u64 *ref_blkno)
+{
+	int ret;
+	struct buffer_head *di_bh = NULL;
+	struct ocfs2_dinode *di;
+
+	ret = ocfs2_read_inode_block(inode, &di_bh);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
+
+	di = (struct ocfs2_dinode *)di_bh->b_data;
+	*ref_blkno = le64_to_cpu(di->i_refcount_loc);
+	brelse(di_bh);
+out:
+	return ret;
+}
+
+static int __ocfs2_lock_refcount_tree(struct ocfs2_super *osb,
+				      struct ocfs2_refcount_tree *tree, int rw)
+{
+	int ret;
+
+	ret = ocfs2_refcount_lock(tree, rw);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	if (rw)
+		down_write(&tree->rf_sem);
+	else
+		down_read(&tree->rf_sem);
+
+out:
+	return ret;
+}
+
+/*
+ * Lock the refcount tree pointed by ref_blkno and return the tree.
+ * In most case, we lock the tree and read the refcount block.
+ * So read it here if the caller really needs it.
+ *
+ * If the tree has been re-created by other node, it will free the
+ * old one and re-create it.
+ */
+int ocfs2_lock_refcount_tree(struct ocfs2_super *osb,
+			     u64 ref_blkno, int rw,
+			     struct ocfs2_refcount_tree **ret_tree,
+			     struct buffer_head **ref_bh)
+{
+	int ret, delete_tree = 0;
+	struct ocfs2_refcount_tree *tree = NULL;
+	struct buffer_head *ref_root_bh = NULL;
+	struct ocfs2_refcount_block *rb;
+
+again:
+	ret = ocfs2_get_refcount_tree(osb, ref_blkno, &tree);
+	if (ret) {
+		mlog_errno(ret);
+		return ret;
+	}
+
+	ocfs2_refcount_tree_get(tree);
+
+	ret = __ocfs2_lock_refcount_tree(osb, tree, rw);
+	if (ret) {
+		mlog_errno(ret);
+		ocfs2_refcount_tree_put(tree);
+		goto out;
+	}
+
+	ret = ocfs2_read_refcount_block(&tree->rf_ci, tree->rf_blkno,
+					&ref_root_bh);
+	if (ret) {
+		mlog_errno(ret);
+		ocfs2_unlock_refcount_tree(osb, tree, rw);
+		ocfs2_refcount_tree_put(tree);
+		goto out;
+	}
+
+	rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data;
+	/*
+	 * If the refcount block has been freed and re-created, we may need
+	 * to recreate the refcount tree also.
+	 *
+	 * Here we just remove the tree from the rb-tree, and the last
+	 * kref holder will unlock and delete this refcount_tree.
+	 * Then we goto "again" and ocfs2_get_refcount_tree will create
+	 * the new refcount tree for us.
+	 */
+	if (tree->rf_generation != le32_to_cpu(rb->rf_generation)) {
+		if (!tree->rf_removed) {
+			ocfs2_erase_refcount_tree_from_list(osb, tree);
+			tree->rf_removed = 1;
+			delete_tree = 1;
+		}
+
+		ocfs2_unlock_refcount_tree(osb, tree, rw);
+		/*
+		 * We get an extra reference when we create the refcount
+		 * tree, so another put will destroy it.
+		 */
+		if (delete_tree)
+			ocfs2_refcount_tree_put(tree);
+		brelse(ref_root_bh);
+		ref_root_bh = NULL;
+		goto again;
+	}
+
+	*ret_tree = tree;
+	if (ref_bh) {
+		*ref_bh = ref_root_bh;
+		ref_root_bh = NULL;
+	}
+out:
+	brelse(ref_root_bh);
+	return ret;
+}
+
+int ocfs2_lock_refcount_tree_by_inode(struct inode *inode, int rw,
+				      struct ocfs2_refcount_tree **ret_tree,
+				      struct buffer_head **ref_bh)
+{
+	int ret;
+	u64 ref_blkno;
+
+	ret = ocfs2_get_refcount_block(inode, &ref_blkno);
+	if (ret) {
+		mlog_errno(ret);
+		return ret;
+	}
+
+	return ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb), ref_blkno,
+					rw, ret_tree, ref_bh);
+}
+
+void ocfs2_unlock_refcount_tree(struct ocfs2_super *osb,
+				struct ocfs2_refcount_tree *tree, int rw)
+{
+	if (rw)
+		up_write(&tree->rf_sem);
+	else
+		up_read(&tree->rf_sem);
+
+	ocfs2_refcount_unlock(tree, rw);
+	ocfs2_refcount_tree_put(tree);
+}
+
+void ocfs2_purge_refcount_trees(struct ocfs2_super *osb)
+{
+	struct rb_node *node;
+	struct ocfs2_refcount_tree *tree;
+	struct rb_root *root = &osb->osb_rf_lock_tree;
+
+	while ((node = rb_last(root)) != NULL) {
+		tree = rb_entry(node, struct ocfs2_refcount_tree, rf_node);
+
+		mlog(0, "Purge tree %llu\n",
+		     (unsigned long long) tree->rf_blkno);
+
+		rb_erase(&tree->rf_node, root);
+		ocfs2_free_refcount_tree(tree);
+	}
+}
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h
index 9a3695c..2ea7fc5 100644
--- a/fs/ocfs2/refcounttree.h
+++ b/fs/ocfs2/refcounttree.h
@@ -33,4 +33,11 @@ struct ocfs2_refcount_tree {
 	struct super_block *rf_sb;
 };
 
+void ocfs2_purge_refcount_trees(struct ocfs2_super *osb);
+int ocfs2_lock_refcount_tree(struct ocfs2_super *osb, u64 ref_blkno, int rw,
+			     struct ocfs2_refcount_tree **tree,
+			     struct buffer_head **ref_bh);
+void ocfs2_unlock_refcount_tree(struct ocfs2_super *osb,
+				struct ocfs2_refcount_tree *tree,
+				int rw);
 #endif /* OCFS2_REFCOUNTTREE_H */
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index e35a505..8b60621 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -69,6 +69,7 @@
 #include "ver.h"
 #include "xattr.h"
 #include "quota.h"
+#include "refcounttree.h"
 
 #include "buffer_head_io.h"
 
@@ -1858,6 +1859,8 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
 
 	ocfs2_sync_blockdev(sb);
 
+	ocfs2_purge_refcount_trees(osb);
+
 	/* No cluster connection means we've failed during mount, so skip
 	 * all the steps which depended on that to complete. */
 	if (osb->cconn) {
@@ -2064,6 +2067,8 @@ static int ocfs2_initialize_super(struct super_block *sb,
 		goto bail;
 	}
 
+	osb->osb_rf_lock_tree = RB_ROOT;
+
 	osb->s_feature_compat =
 		le32_to_cpu(OCFS2_RAW_SB(di)->s_feature_compat);
 	osb->s_feature_ro_compat =
-- 
cgit v0.10.2


From 8bf396de984e68491569b49770e4fd7aca40ba65 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Mon, 24 Aug 2009 11:12:02 +0800
Subject: ocfs2: Basic tree root operation.

Add basic refcount tree root operation.

Signed-off-by: Tao Ma <tao.ma@oracle.com>

diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index b2dc125..bd88c8b 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -490,6 +490,20 @@ static inline int ocfs2_calc_dxi_expand_credits(struct super_block *sb)
 	return credits;
 }
 
+/* inode update, new refcount block and its allocation credits. */
+#define OCFS2_REFCOUNT_TREE_CREATE_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1 \
+					    + OCFS2_SUBALLOC_ALLOC)
+
+/* inode and the refcount block update. */
+#define OCFS2_REFCOUNT_TREE_SET_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1)
+
+/*
+ * inode and the refcount block update.
+ * It doesn't include the credits for sub alloc change.
+ * So if we need to free the bit, OCFS2_SUBALLOC_FREE needs to be added.
+ */
+#define OCFS2_REFCOUNT_TREE_REMOVE_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1)
+
 /*
  * Please note that the caller must make sure that root_el is the root
  * of extent tree. So for an inode, it should be &fe->id2.i_list. Otherwise
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 8d79de8..d0d6fa3 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -27,6 +27,7 @@
 #include "buffer_head_io.h"
 #include "blockcheck.h"
 #include "refcounttree.h"
+#include "sysfile.h"
 #include "dlmglue.h"
 
 static inline struct ocfs2_refcount_tree *
@@ -272,6 +273,22 @@ static inline void ocfs2_init_refcount_tree_lock(struct ocfs2_super *osb,
 				     rf_blkno, generation);
 }
 
+static struct ocfs2_refcount_tree*
+ocfs2_allocate_refcount_tree(struct ocfs2_super *osb, u64 rf_blkno)
+{
+	struct ocfs2_refcount_tree *new;
+
+	new = kzalloc(sizeof(struct ocfs2_refcount_tree), GFP_NOFS);
+	if (!new)
+		return NULL;
+
+	new->rf_blkno = rf_blkno;
+	kref_init(&new->rf_getcnt);
+	ocfs2_init_refcount_tree_ci(new, osb->sb);
+
+	return new;
+}
+
 static int ocfs2_get_refcount_tree(struct ocfs2_super *osb, u64 rf_blkno,
 				   struct ocfs2_refcount_tree **ret_tree)
 {
@@ -291,16 +308,12 @@ static int ocfs2_get_refcount_tree(struct ocfs2_super *osb, u64 rf_blkno,
 
 	spin_unlock(&osb->osb_lock);
 
-	new = kzalloc(sizeof(struct ocfs2_refcount_tree), GFP_NOFS);
+	new = ocfs2_allocate_refcount_tree(osb, rf_blkno);
 	if (!new) {
 		ret = -ENOMEM;
+		mlog_errno(ret);
 		return ret;
 	}
-
-	new->rf_blkno = rf_blkno;
-	kref_init(&new->rf_getcnt);
-	ocfs2_init_refcount_tree_ci(new, osb->sb);
-
 	/*
 	 * We need the generation to create the refcount tree lock and since
 	 * it isn't changed during the tree modification, we are safe here to
@@ -515,3 +528,323 @@ void ocfs2_purge_refcount_trees(struct ocfs2_super *osb)
 		ocfs2_free_refcount_tree(tree);
 	}
 }
+
+/*
+ * Create a refcount tree for an inode.
+ * We take for granted that the inode is already locked.
+ */
+static int ocfs2_create_refcount_tree(struct inode *inode,
+				      struct buffer_head *di_bh)
+{
+	int ret;
+	handle_t *handle = NULL;
+	struct ocfs2_alloc_context *meta_ac = NULL;
+	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
+	struct ocfs2_inode_info *oi = OCFS2_I(inode);
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct buffer_head *new_bh = NULL;
+	struct ocfs2_refcount_block *rb;
+	struct ocfs2_refcount_tree *new_tree = NULL, *tree = NULL;
+	u16 suballoc_bit_start;
+	u32 num_got;
+	u64 first_blkno;
+
+	BUG_ON(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL);
+
+	mlog(0, "create tree for inode %lu\n", inode->i_ino);
+
+	ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	handle = ocfs2_start_trans(osb, OCFS2_REFCOUNT_TREE_CREATE_CREDITS);
+	if (IS_ERR(handle)) {
+		ret = PTR_ERR(handle);
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
+				      OCFS2_JOURNAL_ACCESS_WRITE);
+	if (ret) {
+		mlog_errno(ret);
+		goto out_commit;
+	}
+
+	ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1,
+				   &suballoc_bit_start, &num_got,
+				   &first_blkno);
+	if (ret) {
+		mlog_errno(ret);
+		goto out_commit;
+	}
+
+	new_tree = ocfs2_allocate_refcount_tree(osb, first_blkno);
+	if (!new_tree) {
+		ret = -ENOMEM;
+		mlog_errno(ret);
+		goto out_commit;
+	}
+
+	new_bh = sb_getblk(inode->i_sb, first_blkno);
+	ocfs2_set_new_buffer_uptodate(&new_tree->rf_ci, new_bh);
+
+	ret = ocfs2_journal_access_rb(handle, &new_tree->rf_ci, new_bh,
+				      OCFS2_JOURNAL_ACCESS_CREATE);
+	if (ret) {
+		mlog_errno(ret);
+		goto out_commit;
+	}
+
+	/* Initialize ocfs2_refcount_block. */
+	rb = (struct ocfs2_refcount_block *)new_bh->b_data;
+	memset(rb, 0, inode->i_sb->s_blocksize);
+	strcpy((void *)rb, OCFS2_REFCOUNT_BLOCK_SIGNATURE);
+	rb->rf_suballoc_slot = cpu_to_le16(osb->slot_num);
+	rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start);
+	rb->rf_fs_generation = cpu_to_le32(osb->fs_generation);
+	rb->rf_blkno = cpu_to_le64(first_blkno);
+	rb->rf_count = cpu_to_le32(1);
+	rb->rf_records.rl_count =
+			cpu_to_le16(ocfs2_refcount_recs_per_rb(osb->sb));
+	spin_lock(&osb->osb_lock);
+	rb->rf_generation = osb->s_next_generation++;
+	spin_unlock(&osb->osb_lock);
+
+	ocfs2_journal_dirty(handle, new_bh);
+
+	spin_lock(&oi->ip_lock);
+	oi->ip_dyn_features |= OCFS2_HAS_REFCOUNT_FL;
+	di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
+	di->i_refcount_loc = cpu_to_le64(first_blkno);
+	spin_unlock(&oi->ip_lock);
+
+	mlog(0, "created tree for inode %lu, refblock %llu\n",
+	     inode->i_ino, (unsigned long long)first_blkno);
+
+	ocfs2_journal_dirty(handle, di_bh);
+
+	/*
+	 * We have to init the tree lock here since it will use
+	 * the generation number to create it.
+	 */
+	new_tree->rf_generation = le32_to_cpu(rb->rf_generation);
+	ocfs2_init_refcount_tree_lock(osb, new_tree, first_blkno,
+				      new_tree->rf_generation);
+
+	spin_lock(&osb->osb_lock);
+	tree = ocfs2_find_refcount_tree(osb, first_blkno);
+
+	/*
+	 * We've just created a new refcount tree in this block.  If
+	 * we found a refcount tree on the ocfs2_super, it must be
+	 * one we just deleted.  We free the old tree before
+	 * inserting the new tree.
+	 */
+	BUG_ON(tree && tree->rf_generation == new_tree->rf_generation);
+	if (tree)
+		ocfs2_erase_refcount_tree_from_list_no_lock(osb, tree);
+	ocfs2_insert_refcount_tree(osb, new_tree);
+	spin_unlock(&osb->osb_lock);
+	new_tree = NULL;
+	if (tree)
+		ocfs2_refcount_tree_put(tree);
+
+out_commit:
+	ocfs2_commit_trans(osb, handle);
+
+out:
+	if (new_tree) {
+		ocfs2_metadata_cache_exit(&new_tree->rf_ci);
+		kfree(new_tree);
+	}
+
+	brelse(new_bh);
+	if (meta_ac)
+		ocfs2_free_alloc_context(meta_ac);
+
+	return ret;
+}
+
+static int ocfs2_set_refcount_tree(struct inode *inode,
+				   struct buffer_head *di_bh,
+				   u64 refcount_loc)
+{
+	int ret;
+	handle_t *handle = NULL;
+	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
+	struct ocfs2_inode_info *oi = OCFS2_I(inode);
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct buffer_head *ref_root_bh = NULL;
+	struct ocfs2_refcount_block *rb;
+	struct ocfs2_refcount_tree *ref_tree;
+
+	BUG_ON(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL);
+
+	ret = ocfs2_lock_refcount_tree(osb, refcount_loc, 1,
+				       &ref_tree, &ref_root_bh);
+	if (ret) {
+		mlog_errno(ret);
+		return ret;
+	}
+
+	handle = ocfs2_start_trans(osb, OCFS2_REFCOUNT_TREE_SET_CREDITS);
+	if (IS_ERR(handle)) {
+		ret = PTR_ERR(handle);
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
+				      OCFS2_JOURNAL_ACCESS_WRITE);
+	if (ret) {
+		mlog_errno(ret);
+		goto out_commit;
+	}
+
+	ret = ocfs2_journal_access_rb(handle, &ref_tree->rf_ci, ref_root_bh,
+				      OCFS2_JOURNAL_ACCESS_WRITE);
+	if (ret) {
+		mlog_errno(ret);
+		goto out_commit;
+	}
+
+	rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data;
+	le32_add_cpu(&rb->rf_count, 1);
+
+	ocfs2_journal_dirty(handle, ref_root_bh);
+
+	spin_lock(&oi->ip_lock);
+	oi->ip_dyn_features |= OCFS2_HAS_REFCOUNT_FL;
+	di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
+	di->i_refcount_loc = cpu_to_le64(refcount_loc);
+	spin_unlock(&oi->ip_lock);
+	ocfs2_journal_dirty(handle, di_bh);
+
+out_commit:
+	ocfs2_commit_trans(osb, handle);
+out:
+	ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
+	brelse(ref_root_bh);
+
+	return ret;
+}
+
+int ocfs2_remove_refcount_tree(struct inode *inode, struct buffer_head *di_bh)
+{
+	int ret, delete_tree = 0;
+	handle_t *handle = NULL;
+	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
+	struct ocfs2_inode_info *oi = OCFS2_I(inode);
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct ocfs2_refcount_block *rb;
+	struct inode *alloc_inode = NULL;
+	struct buffer_head *alloc_bh = NULL;
+	struct buffer_head *blk_bh = NULL;
+	struct ocfs2_refcount_tree *ref_tree;
+	int credits = OCFS2_REFCOUNT_TREE_REMOVE_CREDITS;
+	u64 blk = 0, bg_blkno = 0, ref_blkno = le64_to_cpu(di->i_refcount_loc);
+	u16 bit = 0;
+
+	if (!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL))
+		return 0;
+
+	BUG_ON(!ref_blkno);
+	ret = ocfs2_lock_refcount_tree(osb, ref_blkno, 1, &ref_tree, &blk_bh);
+	if (ret) {
+		mlog_errno(ret);
+		return ret;
+	}
+
+	rb = (struct ocfs2_refcount_block *)blk_bh->b_data;
+
+	/*
+	 * If we are the last user, we need to free the block.
+	 * So lock the allocator ahead.
+	 */
+	if (le32_to_cpu(rb->rf_count) == 1) {
+		blk = le64_to_cpu(rb->rf_blkno);
+		bit = le16_to_cpu(rb->rf_suballoc_bit);
+		bg_blkno = ocfs2_which_suballoc_group(blk, bit);
+
+		alloc_inode = ocfs2_get_system_file_inode(osb,
+					EXTENT_ALLOC_SYSTEM_INODE,
+					le16_to_cpu(rb->rf_suballoc_slot));
+		if (!alloc_inode) {
+			ret = -ENOMEM;
+			mlog_errno(ret);
+			goto out;
+		}
+		mutex_lock(&alloc_inode->i_mutex);
+
+		ret = ocfs2_inode_lock(alloc_inode, &alloc_bh, 1);
+		if (ret) {
+			mlog_errno(ret);
+			goto out_mutex;
+		}
+
+		credits += OCFS2_SUBALLOC_FREE;
+	}
+
+	handle = ocfs2_start_trans(osb, credits);
+	if (IS_ERR(handle)) {
+		ret = PTR_ERR(handle);
+		mlog_errno(ret);
+		goto out_unlock;
+	}
+
+	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
+				      OCFS2_JOURNAL_ACCESS_WRITE);
+	if (ret) {
+		mlog_errno(ret);
+		goto out_commit;
+	}
+
+	ret = ocfs2_journal_access_rb(handle, &ref_tree->rf_ci, blk_bh,
+				      OCFS2_JOURNAL_ACCESS_WRITE);
+	if (ret) {
+		mlog_errno(ret);
+		goto out_commit;
+	}
+
+	spin_lock(&oi->ip_lock);
+	oi->ip_dyn_features &= ~OCFS2_HAS_REFCOUNT_FL;
+	di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
+	di->i_refcount_loc = 0;
+	spin_unlock(&oi->ip_lock);
+	ocfs2_journal_dirty(handle, di_bh);
+
+	le32_add_cpu(&rb->rf_count , -1);
+	ocfs2_journal_dirty(handle, blk_bh);
+
+	if (!rb->rf_count) {
+		delete_tree = 1;
+		ocfs2_erase_refcount_tree_from_list(osb, ref_tree);
+		ret = ocfs2_free_suballoc_bits(handle, alloc_inode,
+					       alloc_bh, bit, bg_blkno, 1);
+		if (ret)
+			mlog_errno(ret);
+	}
+
+out_commit:
+	ocfs2_commit_trans(osb, handle);
+out_unlock:
+	if (alloc_inode) {
+		ocfs2_inode_unlock(alloc_inode, 1);
+		brelse(alloc_bh);
+	}
+out_mutex:
+	if (alloc_inode) {
+		mutex_unlock(&alloc_inode->i_mutex);
+		iput(alloc_inode);
+	}
+out:
+	ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
+	if (delete_tree)
+		ocfs2_refcount_tree_put(ref_tree);
+	brelse(blk_bh);
+
+	return ret;
+}
-- 
cgit v0.10.2


From 853a3a1439b18d5a70ada2cb3fcd468e70b7d095 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 18 Aug 2009 11:22:18 +0800
Subject: ocfs2: Wrap ocfs2_extent_contig in ocfs2_extent_tree.

Add a new operation eo_ocfs2_extent_contig int the extent tree's
operations vector. So that with the new refcount tree, We want
this so that refcount trees can always return CONTIG_NONE and
prevent extent merging.

Signed-off-by: Tao Ma <tao.ma@oracle.com>

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index ab4d2b5..75e65df 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -52,7 +52,17 @@
 
 #include "buffer_head_io.h"
 
+enum ocfs2_contig_type {
+	CONTIG_NONE = 0,
+	CONTIG_LEFT,
+	CONTIG_RIGHT,
+	CONTIG_LEFTRIGHT,
+};
 
+static enum ocfs2_contig_type
+	ocfs2_extent_rec_contig(struct super_block *sb,
+				struct ocfs2_extent_rec *ext,
+				struct ocfs2_extent_rec *insert_rec);
 /*
  * Operations for a specific extent tree type.
  *
@@ -122,6 +132,16 @@ struct ocfs2_extent_tree_operations {
 	 * to 0 (unlimited).  Optional.
 	 */
 	void (*eo_fill_max_leaf_clusters)(struct ocfs2_extent_tree *et);
+
+	/*
+	 * ->eo_extent_contig test whether the 2 ocfs2_extent_rec
+	 * are contiguous or not. Optional. Don't need to set it if use
+	 * ocfs2_extent_rec as the tree leaf.
+	 */
+	enum ocfs2_contig_type
+		(*eo_extent_contig)(struct ocfs2_extent_tree *et,
+				    struct ocfs2_extent_rec *ext,
+				    struct ocfs2_extent_rec *insert_rec);
 };
 
 
@@ -458,6 +478,19 @@ static inline int ocfs2_et_root_journal_access(handle_t *handle,
 					  type);
 }
 
+static inline enum ocfs2_contig_type
+	ocfs2_et_extent_contig(struct ocfs2_extent_tree *et,
+			       struct ocfs2_extent_rec *rec,
+			       struct ocfs2_extent_rec *insert_rec)
+{
+	if (et->et_ops->eo_extent_contig)
+		return et->et_ops->eo_extent_contig(et, rec, insert_rec);
+
+	return ocfs2_extent_rec_contig(
+				ocfs2_metadata_cache_get_super(et->et_ci),
+				rec, insert_rec);
+}
+
 static inline int ocfs2_et_insert_check(struct ocfs2_extent_tree *et,
 					struct ocfs2_extent_rec *rec)
 {
@@ -736,17 +769,9 @@ int ocfs2_search_extent_list(struct ocfs2_extent_list *el, u32 v_cluster)
 	return ret;
 }
 
-enum ocfs2_contig_type {
-	CONTIG_NONE = 0,
-	CONTIG_LEFT,
-	CONTIG_RIGHT,
-	CONTIG_LEFTRIGHT,
-};
-
-
 /*
  * NOTE: ocfs2_block_extent_contig(), ocfs2_extents_adjacent() and
- * ocfs2_extent_contig only work properly against leaf nodes!
+ * ocfs2_extent_rec_contig only work properly against leaf nodes!
  */
 static int ocfs2_block_extent_contig(struct super_block *sb,
 				     struct ocfs2_extent_rec *ext,
@@ -772,9 +797,9 @@ static int ocfs2_extents_adjacent(struct ocfs2_extent_rec *left,
 }
 
 static enum ocfs2_contig_type
-	ocfs2_extent_contig(struct super_block *sb,
-			    struct ocfs2_extent_rec *ext,
-			    struct ocfs2_extent_rec *insert_rec)
+	ocfs2_extent_rec_contig(struct super_block *sb,
+				struct ocfs2_extent_rec *ext,
+				struct ocfs2_extent_rec *insert_rec)
 {
 	u64 blkno = le64_to_cpu(insert_rec->e_blkno);
 
@@ -4400,7 +4425,7 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et,
 			if (split_rec->e_cpos == el->l_recs[index].e_cpos)
 				ret = CONTIG_RIGHT;
 		} else {
-			ret = ocfs2_extent_contig(sb, rec, split_rec);
+			ret = ocfs2_et_extent_contig(et, rec, split_rec);
 		}
 	}
 
@@ -4445,7 +4470,7 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et,
 	if (rec) {
 		enum ocfs2_contig_type contig_type;
 
-		contig_type = ocfs2_extent_contig(sb, rec, split_rec);
+		contig_type = ocfs2_et_extent_contig(et, rec, split_rec);
 
 		if (contig_type == CONTIG_LEFT && ret == CONTIG_RIGHT)
 			ret = CONTIG_LEFTRIGHT;
@@ -4473,8 +4498,8 @@ static void ocfs2_figure_contig_type(struct ocfs2_extent_tree *et,
 	BUG_ON(le16_to_cpu(el->l_tree_depth) != 0);
 
 	for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
-		contig_type = ocfs2_extent_contig(ocfs2_metadata_cache_get_super(et->et_ci),
-						  &el->l_recs[i], insert_rec);
+		contig_type = ocfs2_et_extent_contig(et, &el->l_recs[i],
+						     insert_rec);
 		if (contig_type != CONTIG_NONE) {
 			insert->ins_contig_index = i;
 			break;
-- 
cgit v0.10.2


From 555936bfcb1af26c6919d6cedb83710bb03d4322 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 18 Aug 2009 11:22:21 +0800
Subject: ocfs2: Abstract extent split process.

ocfs2_mark_extent_written actually does the following things:
1. check the parameters.
2. initialize the left_path and split_rec.
3. call __ocfs2_mark_extent_written. it will do:
   1) check the flags of unwritten
   2) do the real split work.
The whole process is packed tightly somehow. So this patch
will abstract 2 different functions so that future b-tree
operation can work with it.

1. __ocfs2_split_extent will accept path and split_rec and do
  the real split work.
2. ocfs2_change_extent_flag will accept a new flag and initialize
   path and split_rec.

So now ocfs2_mark_extent_written will do:
1. check the parameters.
2. call ocfs2_change_extent_flag.
   1) initalize the left_path and split_rec.
   2) check whether the new flags conflict with the old one.
   3) call __ocfs2_split_extent to do the split.

Signed-off-by: Tao Ma <tao.ma@oracle.com>

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 75e65df..14b9106 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -5032,9 +5032,8 @@ out:
 }
 
 /*
- * Mark part or all of the extent record at split_index in the leaf
- * pointed to by path as written. This removes the unwritten
- * extent flag.
+ * Split part or all of the extent record at split_index in the leaf
+ * pointed to by path. Merge with the contiguous extent record if needed.
  *
  * Care is taken to handle contiguousness so as to not grow the tree.
  *
@@ -5051,13 +5050,13 @@ out:
  * have been brought into cache (and pinned via the journal), so the
  * extra overhead is not expressed in terms of disk reads.
  */
-static int __ocfs2_mark_extent_written(handle_t *handle,
-				       struct ocfs2_extent_tree *et,
-				       struct ocfs2_path *path,
-				       int split_index,
-				       struct ocfs2_extent_rec *split_rec,
-				       struct ocfs2_alloc_context *meta_ac,
-				       struct ocfs2_cached_dealloc_ctxt *dealloc)
+static int __ocfs2_split_extent(handle_t *handle,
+				struct ocfs2_extent_tree *et,
+				struct ocfs2_path *path,
+				int split_index,
+				struct ocfs2_extent_rec *split_rec,
+				struct ocfs2_alloc_context *meta_ac,
+				struct ocfs2_cached_dealloc_ctxt *dealloc)
 {
 	int ret = 0;
 	struct ocfs2_extent_list *el = path_leaf_el(path);
@@ -5066,12 +5065,6 @@ static int __ocfs2_mark_extent_written(handle_t *handle,
 	struct ocfs2_merge_ctxt ctxt;
 	struct ocfs2_extent_list *rightmost_el;
 
-	if (!(rec->e_flags & OCFS2_EXT_UNWRITTEN)) {
-		ret = -EIO;
-		mlog_errno(ret);
-		goto out;
-	}
-
 	if (le32_to_cpu(rec->e_cpos) > le32_to_cpu(split_rec->e_cpos) ||
 	    ((le32_to_cpu(rec->e_cpos) + le16_to_cpu(rec->e_leaf_clusters)) <
 	     (le32_to_cpu(split_rec->e_cpos) + le16_to_cpu(split_rec->e_leaf_clusters)))) {
@@ -5141,42 +5134,31 @@ out:
 }
 
 /*
- * Mark the already-existing extent at cpos as written for len clusters.
+ * Change the flags of the already-existing extent at cpos for len clusters.
+ *
+ * new_flags: the flags we want to set.
+ * clear_flags: the flags we want to clear.
+ * phys: the new physical offset we want this new extent starts from.
  *
  * If the existing extent is larger than the request, initiate a
  * split. An attempt will be made at merging with adjacent extents.
  *
  * The caller is responsible for passing down meta_ac if we'll need it.
  */
-int ocfs2_mark_extent_written(struct inode *inode,
-			      struct ocfs2_extent_tree *et,
-			      handle_t *handle, u32 cpos, u32 len, u32 phys,
-			      struct ocfs2_alloc_context *meta_ac,
-			      struct ocfs2_cached_dealloc_ctxt *dealloc)
+static int ocfs2_change_extent_flag(handle_t *handle,
+				    struct ocfs2_extent_tree *et,
+				    u32 cpos, u32 len, u32 phys,
+				    struct ocfs2_alloc_context *meta_ac,
+				    struct ocfs2_cached_dealloc_ctxt *dealloc,
+				    int new_flags, int clear_flags)
 {
 	int ret, index;
-	u64 start_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys);
+	struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci);
+	u64 start_blkno = ocfs2_clusters_to_blocks(sb, phys);
 	struct ocfs2_extent_rec split_rec;
 	struct ocfs2_path *left_path = NULL;
 	struct ocfs2_extent_list *el;
-
-	mlog(0, "Inode %lu cpos %u, len %u, phys %u (%llu)\n",
-	     inode->i_ino, cpos, len, phys, (unsigned long long)start_blkno);
-
-	if (!ocfs2_writes_unwritten_extents(OCFS2_SB(inode->i_sb))) {
-		ocfs2_error(inode->i_sb, "Inode %llu has unwritten extents "
-			    "that are being written to, but the feature bit "
-			    "is not set in the super block.",
-			    (unsigned long long)OCFS2_I(inode)->ip_blkno);
-		ret = -EROFS;
-		goto out;
-	}
-
-	/*
-	 * XXX: This should be fixed up so that we just re-insert the
-	 * next extent records.
-	 */
-	ocfs2_et_extent_map_truncate(et, 0);
+	struct ocfs2_extent_rec *rec;
 
 	left_path = ocfs2_new_path_from_et(et);
 	if (!left_path) {
@@ -5194,30 +5176,98 @@ int ocfs2_mark_extent_written(struct inode *inode,
 
 	index = ocfs2_search_extent_list(el, cpos);
 	if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
-		ocfs2_error(inode->i_sb,
-			    "Inode %llu has an extent at cpos %u which can no "
+		ocfs2_error(sb,
+			    "Owner %llu has an extent at cpos %u which can no "
 			    "longer be found.\n",
-			    (unsigned long long)OCFS2_I(inode)->ip_blkno, cpos);
+			     (unsigned long long)
+			     ocfs2_metadata_cache_owner(et->et_ci), cpos);
 		ret = -EROFS;
 		goto out;
 	}
 
+	ret = -EIO;
+	rec = &el->l_recs[index];
+	if (new_flags && (rec->e_flags & new_flags)) {
+		mlog(ML_ERROR, "Owner %llu tried to set %d flags on an "
+		     "extent that already had them",
+		     (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
+		     new_flags);
+		goto out;
+	}
+
+	if (clear_flags && !(rec->e_flags & clear_flags)) {
+		mlog(ML_ERROR, "Owner %llu tried to clear %d flags on an "
+		     "extent that didn't have them",
+		     (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
+		     clear_flags);
+		goto out;
+	}
+
 	memset(&split_rec, 0, sizeof(struct ocfs2_extent_rec));
 	split_rec.e_cpos = cpu_to_le32(cpos);
 	split_rec.e_leaf_clusters = cpu_to_le16(len);
 	split_rec.e_blkno = cpu_to_le64(start_blkno);
-	split_rec.e_flags = path_leaf_el(left_path)->l_recs[index].e_flags;
-	split_rec.e_flags &= ~OCFS2_EXT_UNWRITTEN;
-
-	ret = __ocfs2_mark_extent_written(handle, et, left_path,
-					  index, &split_rec, meta_ac,
-					  dealloc);
+	split_rec.e_flags = rec->e_flags;
+	if (new_flags)
+		split_rec.e_flags |= new_flags;
+	if (clear_flags)
+		split_rec.e_flags &= ~clear_flags;
+
+	ret = __ocfs2_split_extent(handle, et, left_path,
+				  index, &split_rec, meta_ac,
+				  dealloc);
 	if (ret)
 		mlog_errno(ret);
 
 out:
 	ocfs2_free_path(left_path);
 	return ret;
+
+}
+
+/*
+ * Mark the already-existing extent at cpos as written for len clusters.
+ * This removes the unwritten extent flag.
+ *
+ * If the existing extent is larger than the request, initiate a
+ * split. An attempt will be made at merging with adjacent extents.
+ *
+ * The caller is responsible for passing down meta_ac if we'll need it.
+ */
+int ocfs2_mark_extent_written(struct inode *inode,
+			      struct ocfs2_extent_tree *et,
+			      handle_t *handle, u32 cpos, u32 len, u32 phys,
+			      struct ocfs2_alloc_context *meta_ac,
+			      struct ocfs2_cached_dealloc_ctxt *dealloc)
+{
+	int ret;
+
+	mlog(0, "Inode %lu cpos %u, len %u, phys clusters %u\n",
+	     inode->i_ino, cpos, len, phys);
+
+	if (!ocfs2_writes_unwritten_extents(OCFS2_SB(inode->i_sb))) {
+		ocfs2_error(inode->i_sb, "Inode %llu has unwritten extents "
+			    "that are being written to, but the feature bit "
+			    "is not set in the super block.",
+			    (unsigned long long)OCFS2_I(inode)->ip_blkno);
+		ret = -EROFS;
+		goto out;
+	}
+
+	/*
+	 * XXX: This should be fixed up so that we just re-insert the
+	 * next extent records.
+	 */
+	ocfs2_et_extent_map_truncate(et, 0);
+
+	ret = ocfs2_change_extent_flag(handle, et, cpos,
+				       len, phys, meta_ac, dealloc,
+				       0, OCFS2_EXT_UNWRITTEN);
+	if (ret)
+		mlog_errno(ret);
+
+out:
+	return ret;
 }
 
 static int ocfs2_split_tree(handle_t *handle, struct ocfs2_extent_tree *et,
-- 
cgit v0.10.2


From fe924415957e60471536762172d127e85519ef78 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 18 Aug 2009 11:22:25 +0800
Subject: ocfs2: Add refcount b-tree as a new extent tree.

Add refcount b-tree as a new extent tree so that it can
use the b-tree to store and maniuplate ocfs2_refcount_rec.

Signed-off-by: Tao Ma <tao.ma@oracle.com>

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 14b9106..a629656 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -385,6 +385,52 @@ static struct ocfs2_extent_tree_operations ocfs2_dx_root_et_ops = {
 	.eo_fill_root_el	= ocfs2_dx_root_fill_root_el,
 };
 
+static void ocfs2_refcount_tree_fill_root_el(struct ocfs2_extent_tree *et)
+{
+	struct ocfs2_refcount_block *rb = et->et_object;
+
+	et->et_root_el = &rb->rf_list;
+}
+
+static void ocfs2_refcount_tree_set_last_eb_blk(struct ocfs2_extent_tree *et,
+						u64 blkno)
+{
+	struct ocfs2_refcount_block *rb = et->et_object;
+
+	rb->rf_last_eb_blk = cpu_to_le64(blkno);
+}
+
+static u64 ocfs2_refcount_tree_get_last_eb_blk(struct ocfs2_extent_tree *et)
+{
+	struct ocfs2_refcount_block *rb = et->et_object;
+
+	return le64_to_cpu(rb->rf_last_eb_blk);
+}
+
+static void ocfs2_refcount_tree_update_clusters(struct ocfs2_extent_tree *et,
+						u32 clusters)
+{
+	struct ocfs2_refcount_block *rb = et->et_object;
+
+	le32_add_cpu(&rb->rf_clusters, clusters);
+}
+
+static enum ocfs2_contig_type
+ocfs2_refcount_tree_extent_contig(struct ocfs2_extent_tree *et,
+				  struct ocfs2_extent_rec *ext,
+				  struct ocfs2_extent_rec *insert_rec)
+{
+	return CONTIG_NONE;
+}
+
+static struct ocfs2_extent_tree_operations ocfs2_refcount_tree_et_ops = {
+	.eo_set_last_eb_blk	= ocfs2_refcount_tree_set_last_eb_blk,
+	.eo_get_last_eb_blk	= ocfs2_refcount_tree_get_last_eb_blk,
+	.eo_update_clusters	= ocfs2_refcount_tree_update_clusters,
+	.eo_fill_root_el	= ocfs2_refcount_tree_fill_root_el,
+	.eo_extent_contig	= ocfs2_refcount_tree_extent_contig,
+};
+
 static void __ocfs2_init_extent_tree(struct ocfs2_extent_tree *et,
 				     struct ocfs2_caching_info *ci,
 				     struct buffer_head *bh,
@@ -439,6 +485,14 @@ void ocfs2_init_dx_root_extent_tree(struct ocfs2_extent_tree *et,
 				 NULL, &ocfs2_dx_root_et_ops);
 }
 
+void ocfs2_init_refcount_extent_tree(struct ocfs2_extent_tree *et,
+				     struct ocfs2_caching_info *ci,
+				     struct buffer_head *bh)
+{
+	__ocfs2_init_extent_tree(et, ci, bh, ocfs2_journal_access_rb,
+				 NULL, &ocfs2_refcount_tree_et_ops);
+}
+
 static inline void ocfs2_et_set_last_eb_blk(struct ocfs2_extent_tree *et,
 					    u64 new_last_eb_blk)
 {
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index bcf6aa4..df0e778 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -80,6 +80,9 @@ void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et,
 void ocfs2_init_dx_root_extent_tree(struct ocfs2_extent_tree *et,
 				    struct ocfs2_caching_info *ci,
 				    struct buffer_head *bh);
+void ocfs2_init_refcount_extent_tree(struct ocfs2_extent_tree *et,
+				     struct ocfs2_caching_info *ci,
+				     struct buffer_head *bh);
 
 /*
  * Read an extent block into *bh.  If *bh is NULL, a bh will be
-- 
cgit v0.10.2


From e2e9f6082b5ff099978774d5c0148e062344c2f9 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 18 Aug 2009 11:22:34 +0800
Subject: ocfs2: move tree path functions to alloc.h.

Now fs/ocfs2/alloc.c has more than 7000 lines. It contains our
basic b-tree operation. Although we have already make our b-tree
operation generic, the basic structrue ocfs2_path which is used
to iterate one b-tree branch is still static and limited to only
used in alloc.c. As refcount tree need them and I don't want to
add any more b-tree unrelated code to alloc.c, export them out.

Signed-off-by: Tao Ma <tao.ma@oracle.com>

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index a629656..2c8ce32 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -567,36 +567,6 @@ static inline int ocfs2_et_sanity_check(struct ocfs2_extent_tree *et)
 static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc);
 static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt,
 					 struct ocfs2_extent_block *eb);
-
-/*
- * Structures which describe a path through a btree, and functions to
- * manipulate them.
- *
- * The idea here is to be as generic as possible with the tree
- * manipulation code.
- */
-struct ocfs2_path_item {
-	struct buffer_head		*bh;
-	struct ocfs2_extent_list	*el;
-};
-
-#define OCFS2_MAX_PATH_DEPTH	5
-
-struct ocfs2_path {
-	int				p_tree_depth;
-	ocfs2_journal_access_func	p_root_access;
-	struct ocfs2_path_item		p_node[OCFS2_MAX_PATH_DEPTH];
-};
-
-#define path_root_bh(_path) ((_path)->p_node[0].bh)
-#define path_root_el(_path) ((_path)->p_node[0].el)
-#define path_root_access(_path)((_path)->p_root_access)
-#define path_leaf_bh(_path) ((_path)->p_node[(_path)->p_tree_depth].bh)
-#define path_leaf_el(_path) ((_path)->p_node[(_path)->p_tree_depth].el)
-#define path_num_items(_path) ((_path)->p_tree_depth + 1)
-
-static int ocfs2_find_path(struct ocfs2_caching_info *ci,
-			   struct ocfs2_path *path, u32 cpos);
 static void ocfs2_adjust_rightmost_records(handle_t *handle,
 					   struct ocfs2_extent_tree *et,
 					   struct ocfs2_path *path,
@@ -606,7 +576,7 @@ static void ocfs2_adjust_rightmost_records(handle_t *handle,
  * to build another path. Generally, this involves freeing the buffer
  * heads.
  */
-static void ocfs2_reinit_path(struct ocfs2_path *path, int keep_root)
+void ocfs2_reinit_path(struct ocfs2_path *path, int keep_root)
 {
 	int i, start = 0, depth = 0;
 	struct ocfs2_path_item *node;
@@ -635,7 +605,7 @@ static void ocfs2_reinit_path(struct ocfs2_path *path, int keep_root)
 	path->p_tree_depth = depth;
 }
 
-static void ocfs2_free_path(struct ocfs2_path *path)
+void ocfs2_free_path(struct ocfs2_path *path)
 {
 	if (path) {
 		ocfs2_reinit_path(path, 0);
@@ -733,13 +703,13 @@ static struct ocfs2_path *ocfs2_new_path(struct buffer_head *root_bh,
 	return path;
 }
 
-static struct ocfs2_path *ocfs2_new_path_from_path(struct ocfs2_path *path)
+struct ocfs2_path *ocfs2_new_path_from_path(struct ocfs2_path *path)
 {
 	return ocfs2_new_path(path_root_bh(path), path_root_el(path),
 			      path_root_access(path));
 }
 
-static struct ocfs2_path *ocfs2_new_path_from_et(struct ocfs2_extent_tree *et)
+struct ocfs2_path *ocfs2_new_path_from_et(struct ocfs2_extent_tree *et)
 {
 	return ocfs2_new_path(et->et_root_bh, et->et_root_el,
 			      et->et_root_journal_access);
@@ -752,10 +722,10 @@ static struct ocfs2_path *ocfs2_new_path_from_et(struct ocfs2_extent_tree *et)
  * I don't like the way this function's name looks next to
  * ocfs2_journal_access_path(), but I don't have a better one.
  */
-static int ocfs2_path_bh_journal_access(handle_t *handle,
-					struct ocfs2_caching_info *ci,
-					struct ocfs2_path *path,
-					int idx)
+int ocfs2_path_bh_journal_access(handle_t *handle,
+				 struct ocfs2_caching_info *ci,
+				 struct ocfs2_path *path,
+				 int idx)
 {
 	ocfs2_journal_access_func access = path_root_access(path);
 
@@ -772,9 +742,9 @@ static int ocfs2_path_bh_journal_access(handle_t *handle,
 /*
  * Convenience function to journal all components in a path.
  */
-static int ocfs2_journal_access_path(struct ocfs2_caching_info *ci,
-				     handle_t *handle,
-				     struct ocfs2_path *path)
+int ocfs2_journal_access_path(struct ocfs2_caching_info *ci,
+			      handle_t *handle,
+			      struct ocfs2_path *path)
 {
 	int i, ret = 0;
 
@@ -1942,8 +1912,8 @@ static void find_path_ins(void *data, struct buffer_head *bh)
 	ocfs2_path_insert_eb(fp->path, fp->index, bh);
 	fp->index++;
 }
-static int ocfs2_find_path(struct ocfs2_caching_info *ci,
-			   struct ocfs2_path *path, u32 cpos)
+int ocfs2_find_path(struct ocfs2_caching_info *ci,
+		    struct ocfs2_path *path, u32 cpos)
 {
 	struct find_path_data data;
 
@@ -5104,13 +5074,13 @@ out:
  * have been brought into cache (and pinned via the journal), so the
  * extra overhead is not expressed in terms of disk reads.
  */
-static int __ocfs2_split_extent(handle_t *handle,
-				struct ocfs2_extent_tree *et,
-				struct ocfs2_path *path,
-				int split_index,
-				struct ocfs2_extent_rec *split_rec,
-				struct ocfs2_alloc_context *meta_ac,
-				struct ocfs2_cached_dealloc_ctxt *dealloc)
+int ocfs2_split_extent(handle_t *handle,
+		       struct ocfs2_extent_tree *et,
+		       struct ocfs2_path *path,
+		       int split_index,
+		       struct ocfs2_extent_rec *split_rec,
+		       struct ocfs2_alloc_context *meta_ac,
+		       struct ocfs2_cached_dealloc_ctxt *dealloc)
 {
 	int ret = 0;
 	struct ocfs2_extent_list *el = path_leaf_el(path);
@@ -5267,9 +5237,9 @@ static int ocfs2_change_extent_flag(handle_t *handle,
 	if (clear_flags)
 		split_rec.e_flags &= ~clear_flags;
 
-	ret = __ocfs2_split_extent(handle, et, left_path,
-				  index, &split_rec, meta_ac,
-				  dealloc);
+	ret = ocfs2_split_extent(handle, et, left_path,
+				 index, &split_rec, meta_ac,
+				 dealloc);
 	if (ret)
 		mlog_errno(ret);
 
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index df0e778..3f43489 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -115,6 +115,14 @@ int ocfs2_add_clusters_in_btree(handle_t *handle,
 				struct ocfs2_alloc_context *meta_ac,
 				enum ocfs2_alloc_restarted *reason_ret);
 struct ocfs2_cached_dealloc_ctxt;
+struct ocfs2_path;
+int ocfs2_split_extent(handle_t *handle,
+		       struct ocfs2_extent_tree *et,
+		       struct ocfs2_path *path,
+		       int split_index,
+		       struct ocfs2_extent_rec *split_rec,
+		       struct ocfs2_alloc_context *meta_ac,
+		       struct ocfs2_cached_dealloc_ctxt *dealloc);
 int ocfs2_mark_extent_written(struct inode *inode,
 			      struct ocfs2_extent_tree *et,
 			      handle_t *handle, u32 cpos, u32 len, u32 phys,
@@ -254,4 +262,45 @@ static inline int ocfs2_is_empty_extent(struct ocfs2_extent_rec *rec)
 	return !rec->e_leaf_clusters;
 }
 
+/*
+ * Structures which describe a path through a btree, and functions to
+ * manipulate them.
+ *
+ * The idea here is to be as generic as possible with the tree
+ * manipulation code.
+ */
+struct ocfs2_path_item {
+	struct buffer_head		*bh;
+	struct ocfs2_extent_list	*el;
+};
+
+#define OCFS2_MAX_PATH_DEPTH	5
+
+struct ocfs2_path {
+	int				p_tree_depth;
+	ocfs2_journal_access_func	p_root_access;
+	struct ocfs2_path_item		p_node[OCFS2_MAX_PATH_DEPTH];
+};
+
+#define path_root_bh(_path) ((_path)->p_node[0].bh)
+#define path_root_el(_path) ((_path)->p_node[0].el)
+#define path_root_access(_path)((_path)->p_root_access)
+#define path_leaf_bh(_path) ((_path)->p_node[(_path)->p_tree_depth].bh)
+#define path_leaf_el(_path) ((_path)->p_node[(_path)->p_tree_depth].el)
+#define path_num_items(_path) ((_path)->p_tree_depth + 1)
+
+void ocfs2_reinit_path(struct ocfs2_path *path, int keep_root);
+void ocfs2_free_path(struct ocfs2_path *path);
+int ocfs2_find_path(struct ocfs2_caching_info *ci,
+		    struct ocfs2_path *path,
+		    u32 cpos);
+struct ocfs2_path *ocfs2_new_path_from_path(struct ocfs2_path *path);
+struct ocfs2_path *ocfs2_new_path_from_et(struct ocfs2_extent_tree *et);
+int ocfs2_path_bh_journal_access(handle_t *handle,
+				 struct ocfs2_caching_info *ci,
+				 struct ocfs2_path *path,
+				 int idx);
+int ocfs2_journal_access_path(struct ocfs2_caching_info *ci,
+			      handle_t *handle,
+			      struct ocfs2_path *path);
 #endif /* OCFS2_ALLOC_H */
-- 
cgit v0.10.2


From e73a819db9c2d6c4065b7cab7374709b6939e8f1 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 11 Aug 2009 14:33:14 +0800
Subject: ocfs2: Add support for incrementing refcount in the tree.

    Given a physical cpos and length, increment the refcount
in the tree. If the extent has not been seen before, a refcount
record is created for it. Refcount records may be merged or
split by this operation.

Signed-off-by: Tao Ma <tao.ma@oracle.com>

diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index dc9482c..40b5105 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -353,11 +353,11 @@ static int ocfs2_search_for_hole_index(struct ocfs2_extent_list *el,
  * eb_bh is NULL. Otherwise, eb_bh should point to the extent block
  * containing el.
  */
-static int ocfs2_figure_hole_clusters(struct inode *inode,
-				      struct ocfs2_extent_list *el,
-				      struct buffer_head *eb_bh,
-				      u32 v_cluster,
-				      u32 *num_clusters)
+int ocfs2_figure_hole_clusters(struct ocfs2_caching_info *ci,
+			       struct ocfs2_extent_list *el,
+			       struct buffer_head *eb_bh,
+			       u32 v_cluster,
+			       u32 *num_clusters)
 {
 	int ret, i;
 	struct buffer_head *next_eb_bh = NULL;
@@ -375,7 +375,7 @@ static int ocfs2_figure_hole_clusters(struct inode *inode,
 		if (le64_to_cpu(eb->h_next_leaf_blk) == 0ULL)
 			goto no_more_extents;
 
-		ret = ocfs2_read_extent_block(INODE_CACHE(inode),
+		ret = ocfs2_read_extent_block(ci,
 					      le64_to_cpu(eb->h_next_leaf_blk),
 					      &next_eb_bh);
 		if (ret) {
@@ -456,7 +456,8 @@ static int ocfs2_get_clusters_nocache(struct inode *inode,
 		 * field.
 		 */
 		if (hole_len) {
-			ret = ocfs2_figure_hole_clusters(inode, el, eb_bh,
+			ret = ocfs2_figure_hole_clusters(INODE_CACHE(inode),
+							 el, eb_bh,
 							 v_cluster, &len);
 			if (ret) {
 				mlog_errno(ret);
diff --git a/fs/ocfs2/extent_map.h b/fs/ocfs2/extent_map.h
index b7dd973..9942f47 100644
--- a/fs/ocfs2/extent_map.h
+++ b/fs/ocfs2/extent_map.h
@@ -61,6 +61,11 @@ int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr,
 			   struct buffer_head *bhs[], int flags,
 			   int (*validate)(struct super_block *sb,
 					   struct buffer_head *bh));
+int ocfs2_figure_hole_clusters(struct ocfs2_caching_info *ci,
+			       struct ocfs2_extent_list *el,
+			       struct buffer_head *eb_bh,
+			       u32 v_cluster,
+			       u32 *num_clusters);
 static inline int ocfs2_read_virt_block(struct inode *inode, u64 v_block,
 					struct buffer_head **bh,
 					int (*validate)(struct super_block *sb,
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index e4288b4..40072cd 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -916,6 +916,7 @@ struct ocfs2_refcount_rec {
 	__le32 r_refcount;	/* Reference count of this extent */
 /*10*/
 };
+#define OCFS2_32BIT_POS_MASK		(0xffffffffULL)
 
 #define OCFS2_REFCOUNT_LEAF_FL          (0x00000001)
 #define OCFS2_REFCOUNT_TREE_FL          (0x00000002)
@@ -1394,6 +1395,12 @@ static inline u16 ocfs2_refcount_recs_per_rb(struct super_block *sb)
 
 	return size / sizeof(struct ocfs2_refcount_rec);
 }
+
+static inline u32
+ocfs2_get_ref_rec_low_cpos(const struct ocfs2_refcount_rec *rec)
+{
+	return le64_to_cpu(rec->r_cpos) & OCFS2_32BIT_POS_MASK;
+}
 #else
 static inline int ocfs2_fast_symlink_chars(int blocksize)
 {
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index d0d6fa3..ee0422c 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -15,6 +15,7 @@
  * General Public License for more details.
  */
 
+#include <linux/sort.h>
 #define MLOG_MASK_PREFIX ML_REFCOUNT
 #include <cluster/masklog.h>
 #include "ocfs2.h"
@@ -29,6 +30,7 @@
 #include "refcounttree.h"
 #include "sysfile.h"
 #include "dlmglue.h"
+#include "extent_map.h"
 
 static inline struct ocfs2_refcount_tree *
 cache_info_to_refcount(struct ocfs2_caching_info *ci)
@@ -848,3 +850,1054 @@ out:
 
 	return ret;
 }
+
+static void ocfs2_find_refcount_rec_in_rl(struct ocfs2_caching_info *ci,
+					  struct buffer_head *ref_leaf_bh,
+					  u64 cpos, unsigned int len,
+					  struct ocfs2_refcount_rec *ret_rec,
+					  int *index)
+{
+	int i = 0;
+	struct ocfs2_refcount_block *rb =
+		(struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
+	struct ocfs2_refcount_rec *rec = NULL;
+
+	for (; i < le16_to_cpu(rb->rf_records.rl_used); i++) {
+		rec = &rb->rf_records.rl_recs[i];
+
+		if (le64_to_cpu(rec->r_cpos) +
+		    le32_to_cpu(rec->r_clusters) <= cpos)
+			continue;
+		else if (le64_to_cpu(rec->r_cpos) > cpos)
+			break;
+
+		/* ok, cpos fail in this rec. Just return. */
+		if (ret_rec)
+			*ret_rec = *rec;
+		goto out;
+	}
+
+	if (ret_rec) {
+		/* We meet with a hole here, so fake the rec. */
+		ret_rec->r_cpos = cpu_to_le64(cpos);
+		ret_rec->r_refcount = 0;
+		if (i < le16_to_cpu(rb->rf_records.rl_used) &&
+		    le64_to_cpu(rec->r_cpos) < cpos + len)
+			ret_rec->r_clusters =
+				cpu_to_le32(le64_to_cpu(rec->r_cpos) - cpos);
+		else
+			ret_rec->r_clusters = cpu_to_le32(len);
+	}
+
+out:
+	*index = i;
+}
+
+/*
+ * Given a cpos and len, try to find the refcount record which contains cpos.
+ * 1. If cpos can be found in one refcount record, return the record.
+ * 2. If cpos can't be found, return a fake record which start from cpos
+ *    and end at a small value between cpos+len and start of the next record.
+ *    This fake record has r_refcount = 0.
+ */
+static int ocfs2_get_refcount_rec(struct ocfs2_caching_info *ci,
+				  struct buffer_head *ref_root_bh,
+				  u64 cpos, unsigned int len,
+				  struct ocfs2_refcount_rec *ret_rec,
+				  int *index,
+				  struct buffer_head **ret_bh)
+{
+	int ret = 0, i, found;
+	u32 low_cpos;
+	struct ocfs2_extent_list *el;
+	struct ocfs2_extent_rec *tmp, *rec = NULL;
+	struct ocfs2_extent_block *eb;
+	struct buffer_head *eb_bh = NULL, *ref_leaf_bh = NULL;
+	struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
+	struct ocfs2_refcount_block *rb =
+			(struct ocfs2_refcount_block *)ref_root_bh->b_data;
+
+	if (!(le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)) {
+		ocfs2_find_refcount_rec_in_rl(ci, ref_root_bh, cpos, len,
+					      ret_rec, index);
+		*ret_bh = ref_root_bh;
+		get_bh(ref_root_bh);
+		return 0;
+	}
+
+	el = &rb->rf_list;
+	low_cpos = cpos & OCFS2_32BIT_POS_MASK;
+
+	if (el->l_tree_depth) {
+		ret = ocfs2_find_leaf(ci, el, low_cpos, &eb_bh);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+
+		eb = (struct ocfs2_extent_block *) eb_bh->b_data;
+		el = &eb->h_list;
+
+		if (el->l_tree_depth) {
+			ocfs2_error(sb,
+			"refcount tree %llu has non zero tree "
+			"depth in leaf btree tree block %llu\n",
+			(unsigned long long)ocfs2_metadata_cache_owner(ci),
+			(unsigned long long)eb_bh->b_blocknr);
+			ret = -EROFS;
+			goto out;
+		}
+	}
+
+	found = 0;
+	for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
+		rec = &el->l_recs[i];
+
+		if (le32_to_cpu(rec->e_cpos) <= low_cpos) {
+			found = 1;
+			break;
+		}
+	}
+
+	/* adjust len when we have ocfs2_extent_rec after it. */
+	if (found && i < le16_to_cpu(el->l_next_free_rec) - 1) {
+		tmp = &el->l_recs[i+1];
+
+		if (le32_to_cpu(tmp->e_cpos) < cpos + len)
+			len = le32_to_cpu(tmp->e_cpos) - cpos;
+	}
+
+	ret = ocfs2_read_refcount_block(ci, le64_to_cpu(rec->e_blkno),
+					&ref_leaf_bh);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ocfs2_find_refcount_rec_in_rl(ci, ref_leaf_bh, cpos, len,
+				      ret_rec, index);
+	*ret_bh = ref_leaf_bh;
+out:
+	brelse(eb_bh);
+	return ret;
+}
+
+enum ocfs2_ref_rec_contig {
+	REF_CONTIG_NONE = 0,
+	REF_CONTIG_LEFT,
+	REF_CONTIG_RIGHT,
+	REF_CONTIG_LEFTRIGHT,
+};
+
+static enum ocfs2_ref_rec_contig
+	ocfs2_refcount_rec_adjacent(struct ocfs2_refcount_block *rb,
+				    int index)
+{
+	if ((rb->rf_records.rl_recs[index].r_refcount ==
+	    rb->rf_records.rl_recs[index + 1].r_refcount) &&
+	    (le64_to_cpu(rb->rf_records.rl_recs[index].r_cpos) +
+	    le32_to_cpu(rb->rf_records.rl_recs[index].r_clusters) ==
+	    le64_to_cpu(rb->rf_records.rl_recs[index + 1].r_cpos)))
+		return REF_CONTIG_RIGHT;
+
+	return REF_CONTIG_NONE;
+}
+
+static enum ocfs2_ref_rec_contig
+	ocfs2_refcount_rec_contig(struct ocfs2_refcount_block *rb,
+				  int index)
+{
+	enum ocfs2_ref_rec_contig ret = REF_CONTIG_NONE;
+
+	if (index < le16_to_cpu(rb->rf_records.rl_used) - 1)
+		ret = ocfs2_refcount_rec_adjacent(rb, index);
+
+	if (index > 0) {
+		enum ocfs2_ref_rec_contig tmp;
+
+		tmp = ocfs2_refcount_rec_adjacent(rb, index - 1);
+
+		if (tmp == REF_CONTIG_RIGHT) {
+			if (ret == REF_CONTIG_RIGHT)
+				ret = REF_CONTIG_LEFTRIGHT;
+			else
+				ret = REF_CONTIG_LEFT;
+		}
+	}
+
+	return ret;
+}
+
+static void ocfs2_rotate_refcount_rec_left(struct ocfs2_refcount_block *rb,
+					   int index)
+{
+	BUG_ON(rb->rf_records.rl_recs[index].r_refcount !=
+	       rb->rf_records.rl_recs[index+1].r_refcount);
+
+	le32_add_cpu(&rb->rf_records.rl_recs[index].r_clusters,
+		     le32_to_cpu(rb->rf_records.rl_recs[index+1].r_clusters));
+
+	if (index < le16_to_cpu(rb->rf_records.rl_used) - 2)
+		memmove(&rb->rf_records.rl_recs[index + 1],
+			&rb->rf_records.rl_recs[index + 2],
+			sizeof(struct ocfs2_refcount_rec) *
+			(le16_to_cpu(rb->rf_records.rl_used) - index - 2));
+
+	memset(&rb->rf_records.rl_recs[le16_to_cpu(rb->rf_records.rl_used) - 1],
+	       0, sizeof(struct ocfs2_refcount_rec));
+	le16_add_cpu(&rb->rf_records.rl_used, -1);
+}
+
+/*
+ * Merge the refcount rec if we are contiguous with the adjacent recs.
+ */
+static void ocfs2_refcount_rec_merge(struct ocfs2_refcount_block *rb,
+				     int index)
+{
+	enum ocfs2_ref_rec_contig contig =
+				ocfs2_refcount_rec_contig(rb, index);
+
+	if (contig == REF_CONTIG_NONE)
+		return;
+
+	if (contig == REF_CONTIG_LEFT || contig == REF_CONTIG_LEFTRIGHT) {
+		BUG_ON(index == 0);
+		index--;
+	}
+
+	ocfs2_rotate_refcount_rec_left(rb, index);
+
+	if (contig == REF_CONTIG_LEFTRIGHT)
+		ocfs2_rotate_refcount_rec_left(rb, index);
+}
+
+static int ocfs2_change_refcount_rec(handle_t *handle,
+				     struct ocfs2_caching_info *ci,
+				     struct buffer_head *ref_leaf_bh,
+				     int index, int change)
+{
+	int ret;
+	struct ocfs2_refcount_block *rb =
+			(struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
+	struct ocfs2_refcount_rec *rec = &rb->rf_records.rl_recs[index];
+
+	ret = ocfs2_journal_access_rb(handle, ci, ref_leaf_bh,
+				      OCFS2_JOURNAL_ACCESS_WRITE);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	mlog(0, "change index %d, old count %u, change %d\n", index,
+	     le32_to_cpu(rec->r_refcount), change);
+	le32_add_cpu(&rec->r_refcount, change);
+
+	ocfs2_refcount_rec_merge(rb, index);
+
+	ret = ocfs2_journal_dirty(handle, ref_leaf_bh);
+	if (ret)
+		mlog_errno(ret);
+out:
+	return ret;
+}
+
+static int ocfs2_expand_inline_ref_root(handle_t *handle,
+					struct ocfs2_caching_info *ci,
+					struct buffer_head *ref_root_bh,
+					struct buffer_head **ref_leaf_bh,
+					struct ocfs2_alloc_context *meta_ac)
+{
+	int ret;
+	u16 suballoc_bit_start;
+	u32 num_got;
+	u64 blkno;
+	struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
+	struct buffer_head *new_bh = NULL;
+	struct ocfs2_refcount_block *new_rb;
+	struct ocfs2_refcount_block *root_rb =
+			(struct ocfs2_refcount_block *)ref_root_bh->b_data;
+
+	ret = ocfs2_journal_access_rb(handle, ci, ref_root_bh,
+				      OCFS2_JOURNAL_ACCESS_WRITE);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_claim_metadata(OCFS2_SB(sb), handle, meta_ac, 1,
+				   &suballoc_bit_start, &num_got,
+				   &blkno);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	new_bh = sb_getblk(sb, blkno);
+	if (new_bh == NULL) {
+		ret = -EIO;
+		mlog_errno(ret);
+		goto out;
+	}
+	ocfs2_set_new_buffer_uptodate(ci, new_bh);
+
+	ret = ocfs2_journal_access_rb(handle, ci, new_bh,
+				      OCFS2_JOURNAL_ACCESS_CREATE);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	/*
+	 * Initialize ocfs2_refcount_block.
+	 * It should contain the same information as the old root.
+	 * so just memcpy it and change the corresponding field.
+	 */
+	memcpy(new_bh->b_data, ref_root_bh->b_data, sb->s_blocksize);
+
+	new_rb = (struct ocfs2_refcount_block *)new_bh->b_data;
+	new_rb->rf_suballoc_slot = cpu_to_le16(OCFS2_SB(sb)->slot_num);
+	new_rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start);
+	new_rb->rf_blkno = cpu_to_le64(blkno);
+	new_rb->rf_cpos = cpu_to_le32(0);
+	new_rb->rf_parent = cpu_to_le64(ref_root_bh->b_blocknr);
+	new_rb->rf_flags = cpu_to_le32(OCFS2_REFCOUNT_LEAF_FL);
+	ocfs2_journal_dirty(handle, new_bh);
+
+	/* Now change the root. */
+	memset(&root_rb->rf_list, 0, sb->s_blocksize -
+	       offsetof(struct ocfs2_refcount_block, rf_list));
+	root_rb->rf_list.l_count = cpu_to_le16(ocfs2_extent_recs_per_rb(sb));
+	root_rb->rf_clusters = cpu_to_le32(1);
+	root_rb->rf_list.l_next_free_rec = cpu_to_le16(1);
+	root_rb->rf_list.l_recs[0].e_blkno = cpu_to_le64(blkno);
+	root_rb->rf_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1);
+	root_rb->rf_flags = cpu_to_le32(OCFS2_REFCOUNT_TREE_FL);
+
+	ocfs2_journal_dirty(handle, ref_root_bh);
+
+	mlog(0, "new leaf block %llu, used %u\n", (unsigned long long)blkno,
+	     le16_to_cpu(new_rb->rf_records.rl_used));
+
+	*ref_leaf_bh = new_bh;
+	new_bh = NULL;
+out:
+	brelse(new_bh);
+	return ret;
+}
+
+static int ocfs2_refcount_rec_no_intersect(struct ocfs2_refcount_rec *prev,
+					   struct ocfs2_refcount_rec *next)
+{
+	if (ocfs2_get_ref_rec_low_cpos(prev) + le32_to_cpu(prev->r_clusters) <=
+		ocfs2_get_ref_rec_low_cpos(next))
+		return 1;
+
+	return 0;
+}
+
+static int cmp_refcount_rec_by_low_cpos(const void *a, const void *b)
+{
+	const struct ocfs2_refcount_rec *l = a, *r = b;
+	u32 l_cpos = ocfs2_get_ref_rec_low_cpos(l);
+	u32 r_cpos = ocfs2_get_ref_rec_low_cpos(r);
+
+	if (l_cpos > r_cpos)
+		return 1;
+	if (l_cpos < r_cpos)
+		return -1;
+	return 0;
+}
+
+static int cmp_refcount_rec_by_cpos(const void *a, const void *b)
+{
+	const struct ocfs2_refcount_rec *l = a, *r = b;
+	u64 l_cpos = le64_to_cpu(l->r_cpos);
+	u64 r_cpos = le64_to_cpu(r->r_cpos);
+
+	if (l_cpos > r_cpos)
+		return 1;
+	if (l_cpos < r_cpos)
+		return -1;
+	return 0;
+}
+
+static void swap_refcount_rec(void *a, void *b, int size)
+{
+	struct ocfs2_refcount_rec *l = a, *r = b, tmp;
+
+	tmp = *(struct ocfs2_refcount_rec *)l;
+	*(struct ocfs2_refcount_rec *)l =
+			*(struct ocfs2_refcount_rec *)r;
+	*(struct ocfs2_refcount_rec *)r = tmp;
+}
+
+/*
+ * The refcount cpos are ordered by their 64bit cpos,
+ * But we will use the low 32 bit to be the e_cpos in the b-tree.
+ * So we need to make sure that this pos isn't intersected with others.
+ *
+ * Note: The refcount block is already sorted by their low 32 bit cpos,
+ *       So just try the middle pos first, and we will exit when we find
+ *       the good position.
+ */
+static int ocfs2_find_refcount_split_pos(struct ocfs2_refcount_list *rl,
+					 u32 *split_pos, int *split_index)
+{
+	int num_used = le16_to_cpu(rl->rl_used);
+	int delta, middle = num_used / 2;
+
+	for (delta = 0; delta < middle; delta++) {
+		/* Let's check delta earlier than middle */
+		if (ocfs2_refcount_rec_no_intersect(
+					&rl->rl_recs[middle - delta - 1],
+					&rl->rl_recs[middle - delta])) {
+			*split_index = middle - delta;
+			break;
+		}
+
+		/* For even counts, don't walk off the end */
+		if ((middle + delta + 1) == num_used)
+			continue;
+
+		/* Now try delta past middle */
+		if (ocfs2_refcount_rec_no_intersect(
+					&rl->rl_recs[middle + delta],
+					&rl->rl_recs[middle + delta + 1])) {
+			*split_index = middle + delta + 1;
+			break;
+		}
+	}
+
+	if (delta >= middle)
+		return -ENOSPC;
+
+	*split_pos = ocfs2_get_ref_rec_low_cpos(&rl->rl_recs[*split_index]);
+	return 0;
+}
+
+static int ocfs2_divide_leaf_refcount_block(struct buffer_head *ref_leaf_bh,
+					    struct buffer_head *new_bh,
+					    u32 *split_cpos)
+{
+	int split_index = 0, num_moved, ret;
+	u32 cpos = 0;
+	struct ocfs2_refcount_block *rb =
+			(struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
+	struct ocfs2_refcount_list *rl = &rb->rf_records;
+	struct ocfs2_refcount_block *new_rb =
+			(struct ocfs2_refcount_block *)new_bh->b_data;
+	struct ocfs2_refcount_list *new_rl = &new_rb->rf_records;
+
+	mlog(0, "split old leaf refcount block %llu, count = %u, used = %u\n",
+	     (unsigned long long)ref_leaf_bh->b_blocknr,
+	     le32_to_cpu(rl->rl_count), le32_to_cpu(rl->rl_used));
+
+	/*
+	 * XXX: Improvement later.
+	 * If we know all the high 32 bit cpos is the same, no need to sort.
+	 *
+	 * In order to make the whole process safe, we do:
+	 * 1. sort the entries by their low 32 bit cpos first so that we can
+	 *    find the split cpos easily.
+	 * 2. call ocfs2_insert_extent to insert the new refcount block.
+	 * 3. move the refcount rec to the new block.
+	 * 4. sort the entries by their 64 bit cpos.
+	 * 5. dirty the new_rb and rb.
+	 */
+	sort(&rl->rl_recs, le16_to_cpu(rl->rl_used),
+	     sizeof(struct ocfs2_refcount_rec),
+	     cmp_refcount_rec_by_low_cpos, swap_refcount_rec);
+
+	ret = ocfs2_find_refcount_split_pos(rl, &cpos, &split_index);
+	if (ret) {
+		mlog_errno(ret);
+		return ret;
+	}
+
+	new_rb->rf_cpos = cpu_to_le32(cpos);
+
+	/* move refcount records starting from split_index to the new block. */
+	num_moved = le16_to_cpu(rl->rl_used) - split_index;
+	memcpy(new_rl->rl_recs, &rl->rl_recs[split_index],
+	       num_moved * sizeof(struct ocfs2_refcount_rec));
+
+	/*ok, remove the entries we just moved over to the other block. */
+	memset(&rl->rl_recs[split_index], 0,
+	       num_moved * sizeof(struct ocfs2_refcount_rec));
+
+	/* change old and new rl_used accordingly. */
+	le16_add_cpu(&rl->rl_used, -num_moved);
+	new_rl->rl_used = cpu_to_le32(num_moved);
+
+	sort(&rl->rl_recs, le16_to_cpu(rl->rl_used),
+	     sizeof(struct ocfs2_refcount_rec),
+	     cmp_refcount_rec_by_cpos, swap_refcount_rec);
+
+	sort(&new_rl->rl_recs, le16_to_cpu(new_rl->rl_used),
+	     sizeof(struct ocfs2_refcount_rec),
+	     cmp_refcount_rec_by_cpos, swap_refcount_rec);
+
+	*split_cpos = cpos;
+	return 0;
+}
+
+static int ocfs2_new_leaf_refcount_block(handle_t *handle,
+					 struct ocfs2_caching_info *ci,
+					 struct buffer_head *ref_root_bh,
+					 struct buffer_head *ref_leaf_bh,
+					 struct ocfs2_alloc_context *meta_ac)
+{
+	int ret;
+	u16 suballoc_bit_start;
+	u32 num_got, new_cpos;
+	u64 blkno;
+	struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
+	struct ocfs2_refcount_block *root_rb =
+			(struct ocfs2_refcount_block *)ref_root_bh->b_data;
+	struct buffer_head *new_bh = NULL;
+	struct ocfs2_refcount_block *new_rb;
+	struct ocfs2_extent_tree ref_et;
+
+	BUG_ON(!(le32_to_cpu(root_rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL));
+
+	ret = ocfs2_journal_access_rb(handle, ci, ref_root_bh,
+				      OCFS2_JOURNAL_ACCESS_WRITE);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_journal_access_rb(handle, ci, ref_leaf_bh,
+				      OCFS2_JOURNAL_ACCESS_WRITE);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_claim_metadata(OCFS2_SB(sb), handle, meta_ac, 1,
+				   &suballoc_bit_start, &num_got,
+				   &blkno);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	new_bh = sb_getblk(sb, blkno);
+	if (new_bh == NULL) {
+		ret = -EIO;
+		mlog_errno(ret);
+		goto out;
+	}
+	ocfs2_set_new_buffer_uptodate(ci, new_bh);
+
+	ret = ocfs2_journal_access_rb(handle, ci, new_bh,
+				      OCFS2_JOURNAL_ACCESS_CREATE);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	/* Initialize ocfs2_refcount_block. */
+	new_rb = (struct ocfs2_refcount_block *)new_bh->b_data;
+	memset(new_rb, 0, sb->s_blocksize);
+	strcpy((void *)new_rb, OCFS2_REFCOUNT_BLOCK_SIGNATURE);
+	new_rb->rf_suballoc_slot = cpu_to_le16(OCFS2_SB(sb)->slot_num);
+	new_rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start);
+	new_rb->rf_fs_generation = cpu_to_le32(OCFS2_SB(sb)->fs_generation);
+	new_rb->rf_blkno = cpu_to_le64(blkno);
+	new_rb->rf_parent = cpu_to_le64(ref_root_bh->b_blocknr);
+	new_rb->rf_flags = cpu_to_le32(OCFS2_REFCOUNT_LEAF_FL);
+	new_rb->rf_records.rl_count =
+				cpu_to_le16(ocfs2_refcount_recs_per_rb(sb));
+	new_rb->rf_generation = root_rb->rf_generation;
+
+	ret = ocfs2_divide_leaf_refcount_block(ref_leaf_bh, new_bh, &new_cpos);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ocfs2_journal_dirty(handle, ref_leaf_bh);
+	ocfs2_journal_dirty(handle, new_bh);
+
+	ocfs2_init_refcount_extent_tree(&ref_et, ci, ref_root_bh);
+
+	mlog(0, "insert new leaf block %llu at %u\n",
+	     (unsigned long long)new_bh->b_blocknr, new_cpos);
+
+	/* Insert the new leaf block with the specific offset cpos. */
+	ret = ocfs2_insert_extent(handle, &ref_et, new_cpos, new_bh->b_blocknr,
+				  1, 0, meta_ac);
+	if (ret)
+		mlog_errno(ret);
+
+out:
+	brelse(new_bh);
+	return ret;
+}
+
+static int ocfs2_expand_refcount_tree(handle_t *handle,
+				      struct ocfs2_caching_info *ci,
+				      struct buffer_head *ref_root_bh,
+				      struct buffer_head *ref_leaf_bh,
+				      struct ocfs2_alloc_context *meta_ac)
+{
+	int ret;
+	struct buffer_head *expand_bh = NULL;
+
+	if (ref_root_bh == ref_leaf_bh) {
+		/*
+		 * the old root bh hasn't been expanded to a b-tree,
+		 * so expand it first.
+		 */
+		ret = ocfs2_expand_inline_ref_root(handle, ci, ref_root_bh,
+						   &expand_bh, meta_ac);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+	} else {
+		expand_bh = ref_leaf_bh;
+		get_bh(expand_bh);
+	}
+
+
+	/* Now add a new refcount block into the tree.*/
+	ret = ocfs2_new_leaf_refcount_block(handle, ci, ref_root_bh,
+					    expand_bh, meta_ac);
+	if (ret)
+		mlog_errno(ret);
+out:
+	brelse(expand_bh);
+	return ret;
+}
+
+/*
+ * Adjust the extent rec in b-tree representing ref_leaf_bh.
+ *
+ * Only called when we have inserted a new refcount rec at index 0
+ * which means ocfs2_extent_rec.e_cpos may need some change.
+ */
+static int ocfs2_adjust_refcount_rec(handle_t *handle,
+				     struct ocfs2_caching_info *ci,
+				     struct buffer_head *ref_root_bh,
+				     struct buffer_head *ref_leaf_bh,
+				     struct ocfs2_refcount_rec *rec)
+{
+	int ret = 0, i;
+	u32 new_cpos, old_cpos;
+	struct ocfs2_path *path = NULL;
+	struct ocfs2_extent_tree et;
+	struct ocfs2_refcount_block *rb =
+		(struct ocfs2_refcount_block *)ref_root_bh->b_data;
+	struct ocfs2_extent_list *el;
+
+	if (!(le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL))
+		goto out;
+
+	rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
+	old_cpos = le32_to_cpu(rb->rf_cpos);
+	new_cpos = le64_to_cpu(rec->r_cpos) & OCFS2_32BIT_POS_MASK;
+	if (old_cpos <= new_cpos)
+		goto out;
+
+	ocfs2_init_refcount_extent_tree(&et, ci, ref_root_bh);
+
+	path = ocfs2_new_path_from_et(&et);
+	if (!path) {
+		ret = -ENOMEM;
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_find_path(ci, path, old_cpos);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	/*
+	 * 2 more credits, one for the leaf refcount block, one for
+	 * the extent block contains the extent rec.
+	 */
+	ret = ocfs2_extend_trans(handle, handle->h_buffer_credits + 2);
+	if (ret < 0) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_journal_access_rb(handle, ci, ref_leaf_bh,
+				      OCFS2_JOURNAL_ACCESS_WRITE);
+	if (ret < 0) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_journal_access_eb(handle, ci, path_leaf_bh(path),
+				      OCFS2_JOURNAL_ACCESS_WRITE);
+	if (ret < 0) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	/* change the leaf extent block first. */
+	el = path_leaf_el(path);
+
+	for (i = 0; i < le16_to_cpu(el->l_next_free_rec); i++)
+		if (le32_to_cpu(el->l_recs[i].e_cpos) == old_cpos)
+			break;
+
+	BUG_ON(i == le16_to_cpu(el->l_next_free_rec));
+
+	el->l_recs[i].e_cpos = cpu_to_le32(new_cpos);
+
+	/* change the r_cpos in the leaf block. */
+	rb->rf_cpos = cpu_to_le32(new_cpos);
+
+	ocfs2_journal_dirty(handle, path_leaf_bh(path));
+	ocfs2_journal_dirty(handle, ref_leaf_bh);
+
+out:
+	ocfs2_free_path(path);
+	return ret;
+}
+
+static int ocfs2_insert_refcount_rec(handle_t *handle,
+				     struct ocfs2_caching_info *ci,
+				     struct buffer_head *ref_root_bh,
+				     struct buffer_head *ref_leaf_bh,
+				     struct ocfs2_refcount_rec *rec,
+				     int index,
+				     struct ocfs2_alloc_context *meta_ac)
+{
+	int ret;
+	struct ocfs2_refcount_block *rb =
+			(struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
+	struct ocfs2_refcount_list *rf_list = &rb->rf_records;
+	struct buffer_head *new_bh = NULL;
+
+	BUG_ON(le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL);
+
+	if (rf_list->rl_used == rf_list->rl_count) {
+		u64 cpos = le64_to_cpu(rec->r_cpos);
+		u32 len = le32_to_cpu(rec->r_clusters);
+
+		ret = ocfs2_expand_refcount_tree(handle, ci, ref_root_bh,
+						 ref_leaf_bh, meta_ac);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+
+		ret = ocfs2_get_refcount_rec(ci, ref_root_bh,
+					     cpos, len, NULL, &index,
+					     &new_bh);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+
+		ref_leaf_bh = new_bh;
+		rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
+		rf_list = &rb->rf_records;
+	}
+
+	ret = ocfs2_journal_access_rb(handle, ci, ref_leaf_bh,
+				      OCFS2_JOURNAL_ACCESS_WRITE);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	if (index < le16_to_cpu(rf_list->rl_used))
+		memmove(&rf_list->rl_recs[index + 1],
+			&rf_list->rl_recs[index],
+			(le16_to_cpu(rf_list->rl_used) - index) *
+			 sizeof(struct ocfs2_refcount_rec));
+
+	mlog(0, "insert refcount record start %llu, len %u, count %u "
+	     "to leaf block %llu at index %d\n",
+	     (unsigned long long)le64_to_cpu(rec->r_cpos),
+	     le32_to_cpu(rec->r_clusters), le32_to_cpu(rec->r_refcount),
+	     (unsigned long long)ref_leaf_bh->b_blocknr, index);
+
+	rf_list->rl_recs[index] = *rec;
+
+	le16_add_cpu(&rf_list->rl_used, 1);
+
+	ocfs2_refcount_rec_merge(rb, index);
+
+	ret = ocfs2_journal_dirty(handle, ref_leaf_bh);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	if (index == 0) {
+		ret = ocfs2_adjust_refcount_rec(handle, ci,
+						ref_root_bh,
+						ref_leaf_bh, rec);
+		if (ret)
+			mlog_errno(ret);
+	}
+out:
+	brelse(new_bh);
+	return ret;
+}
+
+/*
+ * Split the refcount_rec indexed by "index" in ref_leaf_bh.
+ * This is much simple than our b-tree code.
+ * split_rec is the new refcount rec we want to insert.
+ * If split_rec->r_refcount > 0, we are changing the refcount(in case we
+ * increase refcount or decrease a refcount to non-zero).
+ * If split_rec->r_refcount == 0, we are punching a hole in current refcount
+ * rec( in case we decrease a refcount to zero).
+ */
+static int ocfs2_split_refcount_rec(handle_t *handle,
+				    struct ocfs2_caching_info *ci,
+				    struct buffer_head *ref_root_bh,
+				    struct buffer_head *ref_leaf_bh,
+				    struct ocfs2_refcount_rec *split_rec,
+				    int index,
+				    struct ocfs2_alloc_context *meta_ac,
+				    struct ocfs2_cached_dealloc_ctxt *dealloc)
+{
+	int ret, recs_need;
+	u32 len;
+	struct ocfs2_refcount_block *rb =
+			(struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
+	struct ocfs2_refcount_list *rf_list = &rb->rf_records;
+	struct ocfs2_refcount_rec *orig_rec = &rf_list->rl_recs[index];
+	struct ocfs2_refcount_rec *tail_rec = NULL;
+	struct buffer_head *new_bh = NULL;
+
+	BUG_ON(le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL);
+
+	mlog(0, "original r_pos %llu, cluster %u, split %llu, cluster %u\n",
+	     le64_to_cpu(orig_rec->r_cpos), le32_to_cpu(orig_rec->r_clusters),
+	     le64_to_cpu(split_rec->r_cpos),
+	     le32_to_cpu(split_rec->r_clusters));
+
+	/*
+	 * If we just need to split the header or tail clusters,
+	 * no more recs are needed, just split is OK.
+	 * Otherwise we at least need one new recs.
+	 */
+	if (!split_rec->r_refcount &&
+	    (split_rec->r_cpos == orig_rec->r_cpos ||
+	     le64_to_cpu(split_rec->r_cpos) +
+	     le32_to_cpu(split_rec->r_clusters) ==
+	     le64_to_cpu(orig_rec->r_cpos) + le32_to_cpu(orig_rec->r_clusters)))
+		recs_need = 0;
+	else
+		recs_need = 1;
+
+	/*
+	 * We need one more rec if we split in the middle and the new rec have
+	 * some refcount in it.
+	 */
+	if (split_rec->r_refcount &&
+	    (split_rec->r_cpos != orig_rec->r_cpos &&
+	     le64_to_cpu(split_rec->r_cpos) +
+	     le32_to_cpu(split_rec->r_clusters) !=
+	     le64_to_cpu(orig_rec->r_cpos) + le32_to_cpu(orig_rec->r_clusters)))
+		recs_need++;
+
+	/* If the leaf block don't have enough record, expand it. */
+	if (le16_to_cpu(rf_list->rl_used) + recs_need > rf_list->rl_count) {
+		struct ocfs2_refcount_rec tmp_rec;
+		u64 cpos = le64_to_cpu(orig_rec->r_cpos);
+		len = le32_to_cpu(orig_rec->r_clusters);
+		ret = ocfs2_expand_refcount_tree(handle, ci, ref_root_bh,
+						 ref_leaf_bh, meta_ac);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+
+		/*
+		 * We have to re-get it since now cpos may be moved to
+		 * another leaf block.
+		 */
+		ret = ocfs2_get_refcount_rec(ci, ref_root_bh,
+					     cpos, len, &tmp_rec, &index,
+					     &new_bh);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+
+		ref_leaf_bh = new_bh;
+		rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
+		rf_list = &rb->rf_records;
+		orig_rec = &rf_list->rl_recs[index];
+	}
+
+	ret = ocfs2_journal_access_rb(handle, ci, ref_leaf_bh,
+				      OCFS2_JOURNAL_ACCESS_WRITE);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	/*
+	 * We have calculated out how many new records we need and store
+	 * in recs_need, so spare enough space first by moving the records
+	 * after "index" to the end.
+	 */
+	if (index != le16_to_cpu(rf_list->rl_used) - 1)
+		memmove(&rf_list->rl_recs[index + 1 + recs_need],
+			&rf_list->rl_recs[index + 1],
+			(le16_to_cpu(rf_list->rl_used) - index - 1) *
+			 sizeof(struct ocfs2_refcount_rec));
+
+	len = (le64_to_cpu(orig_rec->r_cpos) +
+	      le32_to_cpu(orig_rec->r_clusters)) -
+	      (le64_to_cpu(split_rec->r_cpos) +
+	      le32_to_cpu(split_rec->r_clusters));
+
+	/*
+	 * If we have "len", the we will split in the tail and move it
+	 * to the end of the space we have just spared.
+	 */
+	if (len) {
+		tail_rec = &rf_list->rl_recs[index + recs_need];
+
+		memcpy(tail_rec, orig_rec, sizeof(struct ocfs2_refcount_rec));
+		le64_add_cpu(&tail_rec->r_cpos,
+			     le32_to_cpu(tail_rec->r_clusters) - len);
+		tail_rec->r_clusters = le32_to_cpu(len);
+	}
+
+	/*
+	 * If the split pos isn't the same as the original one, we need to
+	 * split in the head.
+	 *
+	 * Note: We have the chance that split_rec.r_refcount = 0,
+	 * recs_need = 0 and len > 0, which means we just cut the head from
+	 * the orig_rec and in that case we have done some modification in
+	 * orig_rec above, so the check for r_cpos is faked.
+	 */
+	if (split_rec->r_cpos != orig_rec->r_cpos && tail_rec != orig_rec) {
+		len = le64_to_cpu(split_rec->r_cpos) -
+		      le64_to_cpu(orig_rec->r_cpos);
+		orig_rec->r_clusters = cpu_to_le32(len);
+		index++;
+	}
+
+	le16_add_cpu(&rf_list->rl_used, recs_need);
+
+	if (split_rec->r_refcount) {
+		rf_list->rl_recs[index] = *split_rec;
+		mlog(0, "insert refcount record start %llu, len %u, count %u "
+		     "to leaf block %llu at index %d\n",
+		     (unsigned long long)le64_to_cpu(split_rec->r_cpos),
+		     le32_to_cpu(split_rec->r_clusters),
+		     le32_to_cpu(split_rec->r_refcount),
+		     (unsigned long long)ref_leaf_bh->b_blocknr, index);
+
+		ocfs2_refcount_rec_merge(rb, index);
+	}
+
+	ret = ocfs2_journal_dirty(handle, ref_leaf_bh);
+	if (ret)
+		mlog_errno(ret);
+
+out:
+	brelse(new_bh);
+	return ret;
+}
+
+static int __ocfs2_increase_refcount(handle_t *handle,
+				     struct ocfs2_caching_info *ci,
+				     struct buffer_head *ref_root_bh,
+				     u64 cpos, u32 len,
+				     struct ocfs2_alloc_context *meta_ac,
+				     struct ocfs2_cached_dealloc_ctxt *dealloc)
+{
+	int ret = 0, index;
+	struct buffer_head *ref_leaf_bh = NULL;
+	struct ocfs2_refcount_rec rec;
+	unsigned int set_len = 0;
+
+	mlog(0, "Tree owner %llu, add refcount start %llu, len %u\n",
+	     (unsigned long long)ocfs2_metadata_cache_owner(ci),
+	     (unsigned long long)cpos, len);
+
+	while (len) {
+		ret = ocfs2_get_refcount_rec(ci, ref_root_bh,
+					     cpos, len, &rec, &index,
+					     &ref_leaf_bh);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+
+		set_len = le32_to_cpu(rec.r_clusters);
+
+		/*
+		 * Here we may meet with 3 situations:
+		 *
+		 * 1. If we find an already existing record, and the length
+		 *    is the same, cool, we just need to increase the r_refcount
+		 *    and it is OK.
+		 * 2. If we find a hole, just insert it with r_refcount = 1.
+		 * 3. If we are in the middle of one extent record, split
+		 *    it.
+		 */
+		if (rec.r_refcount && le64_to_cpu(rec.r_cpos) == cpos &&
+		    set_len <= len) {
+			mlog(0, "increase refcount rec, start %llu, len %u, "
+			     "count %u\n", (unsigned long long)cpos, set_len,
+			     le32_to_cpu(rec.r_refcount));
+			ret = ocfs2_change_refcount_rec(handle, ci,
+							ref_leaf_bh, index, 1);
+			if (ret) {
+				mlog_errno(ret);
+				goto out;
+			}
+		} else if (!rec.r_refcount) {
+			rec.r_refcount = cpu_to_le32(1);
+
+			mlog(0, "insert refcount rec, start %llu, len %u\n",
+			     (unsigned long long)le64_to_cpu(rec.r_cpos),
+			     set_len);
+			ret = ocfs2_insert_refcount_rec(handle, ci, ref_root_bh,
+							ref_leaf_bh,
+							&rec, index, meta_ac);
+			if (ret) {
+				mlog_errno(ret);
+				goto out;
+			}
+		} else  {
+			set_len = min((u64)(cpos + len),
+				      le64_to_cpu(rec.r_cpos) + set_len) - cpos;
+			rec.r_cpos = cpu_to_le64(cpos);
+			rec.r_clusters = cpu_to_le32(set_len);
+			le32_add_cpu(&rec.r_refcount, 1);
+
+			mlog(0, "split refcount rec, start %llu, "
+			     "len %u, count %u\n",
+			     (unsigned long long)le64_to_cpu(rec.r_cpos),
+			     set_len, le32_to_cpu(rec.r_refcount));
+			ret = ocfs2_split_refcount_rec(handle, ci,
+						       ref_root_bh, ref_leaf_bh,
+						       &rec, index,
+						       meta_ac, dealloc);
+			if (ret) {
+				mlog_errno(ret);
+				goto out;
+			}
+		}
+
+		cpos += set_len;
+		len -= set_len;
+		brelse(ref_leaf_bh);
+		ref_leaf_bh = NULL;
+	}
+
+out:
+	brelse(ref_leaf_bh);
+	return ret;
+}
-- 
cgit v0.10.2


From 1823cb0b9fe5e6d48017ee3f92428f69c0235d87 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 18 Aug 2009 11:24:49 +0800
Subject: ocfs2: Add support of decrementing refcount for delete.

    Given a physical cpos and length, decrement the refcount
in the tree. If the refcount for any portion of the extent goes
to zero, that portion is queued for freeing.

Signed-off-by: Tao Ma <tao.ma@oracle.com>

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 2c8ce32..9dd68cd 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -6522,9 +6522,9 @@ ocfs2_find_per_slot_free_list(int type,
 	return fl;
 }
 
-static int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
-				     int type, int slot, u64 blkno,
-				     unsigned int bit)
+int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
+			      int type, int slot, u64 blkno,
+			      unsigned int bit)
 {
 	int ret;
 	struct ocfs2_per_slot_free_list *fl;
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index 3f43489..0610ba1 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -202,6 +202,9 @@ static inline void ocfs2_init_dealloc_ctxt(struct ocfs2_cached_dealloc_ctxt *c)
 }
 int ocfs2_cache_cluster_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
 				u64 blkno, unsigned int bit);
+int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
+			      int type, int slot, u64 blkno,
+			      unsigned int bit);
 static inline int ocfs2_dealloc_has_cluster(struct ocfs2_cached_dealloc_ctxt *c)
 {
 	return c->c_global_allocator != NULL;
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index ee0422c..2c7974c 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -1071,6 +1071,10 @@ static void ocfs2_refcount_rec_merge(struct ocfs2_refcount_block *rb,
 		ocfs2_rotate_refcount_rec_left(rb, index);
 }
 
+/*
+ * Change the refcount indexed by "index" in ref_bh.
+ * If refcount reaches 0, remove it.
+ */
 static int ocfs2_change_refcount_rec(handle_t *handle,
 				     struct ocfs2_caching_info *ci,
 				     struct buffer_head *ref_leaf_bh,
@@ -1079,7 +1083,8 @@ static int ocfs2_change_refcount_rec(handle_t *handle,
 	int ret;
 	struct ocfs2_refcount_block *rb =
 			(struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
-	struct ocfs2_refcount_rec *rec = &rb->rf_records.rl_recs[index];
+	struct ocfs2_refcount_list *rl = &rb->rf_records;
+	struct ocfs2_refcount_rec *rec = &rl->rl_recs[index];
 
 	ret = ocfs2_journal_access_rb(handle, ci, ref_leaf_bh,
 				      OCFS2_JOURNAL_ACCESS_WRITE);
@@ -1092,7 +1097,18 @@ static int ocfs2_change_refcount_rec(handle_t *handle,
 	     le32_to_cpu(rec->r_refcount), change);
 	le32_add_cpu(&rec->r_refcount, change);
 
-	ocfs2_refcount_rec_merge(rb, index);
+	if (!rec->r_refcount) {
+		if (index != le16_to_cpu(rl->rl_used) - 1) {
+			memmove(rec, rec + 1,
+				(le16_to_cpu(rl->rl_used) - index - 1) *
+				sizeof(struct ocfs2_refcount_rec));
+			memset(&rl->rl_recs[le16_to_cpu(rl->rl_used) - 1],
+			       0, sizeof(struct ocfs2_refcount_rec));
+		}
+
+		le16_add_cpu(&rl->rl_used, -1);
+	} else
+		ocfs2_refcount_rec_merge(rb, index);
 
 	ret = ocfs2_journal_dirty(handle, ref_leaf_bh);
 	if (ret)
@@ -1901,3 +1917,239 @@ out:
 	brelse(ref_leaf_bh);
 	return ret;
 }
+
+static int ocfs2_remove_refcount_extent(handle_t *handle,
+				struct ocfs2_caching_info *ci,
+				struct buffer_head *ref_root_bh,
+				struct buffer_head *ref_leaf_bh,
+				struct ocfs2_alloc_context *meta_ac,
+				struct ocfs2_cached_dealloc_ctxt *dealloc)
+{
+	int ret;
+	struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
+	struct ocfs2_refcount_block *rb =
+			(struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
+	struct ocfs2_extent_tree et;
+
+	BUG_ON(rb->rf_records.rl_used);
+
+	ocfs2_init_refcount_extent_tree(&et, ci, ref_root_bh);
+	ret = ocfs2_remove_extent(handle, &et, le32_to_cpu(rb->rf_cpos),
+				  1, meta_ac, dealloc);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ocfs2_remove_from_cache(ci, ref_leaf_bh);
+
+	/*
+	 * add the freed block to the dealloc so that it will be freed
+	 * when we run dealloc.
+	 */
+	ret = ocfs2_cache_block_dealloc(dealloc, EXTENT_ALLOC_SYSTEM_INODE,
+					le16_to_cpu(rb->rf_suballoc_slot),
+					le64_to_cpu(rb->rf_blkno),
+					le16_to_cpu(rb->rf_suballoc_bit));
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_journal_access_rb(handle, ci, ref_root_bh,
+				      OCFS2_JOURNAL_ACCESS_WRITE);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data;
+
+	le32_add_cpu(&rb->rf_clusters, -1);
+
+	/*
+	 * check whether we need to restore the root refcount block if
+	 * there is no leaf extent block at atll.
+	 */
+	if (!rb->rf_list.l_next_free_rec) {
+		BUG_ON(rb->rf_clusters);
+
+		mlog(0, "reset refcount tree root %llu to be a record block.\n",
+		     (unsigned long long)ref_root_bh->b_blocknr);
+
+		rb->rf_flags = 0;
+		rb->rf_parent = 0;
+		rb->rf_cpos = 0;
+		memset(&rb->rf_records, 0, sb->s_blocksize -
+		       offsetof(struct ocfs2_refcount_block, rf_records));
+		rb->rf_records.rl_count =
+				cpu_to_le16(ocfs2_refcount_recs_per_rb(sb));
+	}
+
+	ocfs2_journal_dirty(handle, ref_root_bh);
+
+out:
+	return ret;
+}
+
+static int ocfs2_decrease_refcount_rec(handle_t *handle,
+				struct ocfs2_caching_info *ci,
+				struct buffer_head *ref_root_bh,
+				struct buffer_head *ref_leaf_bh,
+				int index, u64 cpos, unsigned int len,
+				struct ocfs2_alloc_context *meta_ac,
+				struct ocfs2_cached_dealloc_ctxt *dealloc)
+{
+	int ret;
+	struct ocfs2_refcount_block *rb =
+			(struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
+	struct ocfs2_refcount_rec *rec = &rb->rf_records.rl_recs[index];
+
+	BUG_ON(cpos < le64_to_cpu(rec->r_cpos));
+	BUG_ON(cpos + len >
+	       le64_to_cpu(rec->r_cpos) + le32_to_cpu(rec->r_clusters));
+
+	if (cpos == le64_to_cpu(rec->r_cpos) &&
+	    len == le32_to_cpu(rec->r_clusters))
+		ret = ocfs2_change_refcount_rec(handle, ci,
+						ref_leaf_bh, index, -1);
+	else {
+		struct ocfs2_refcount_rec split = *rec;
+		split.r_cpos = cpu_to_le64(cpos);
+		split.r_clusters = cpu_to_le32(len);
+
+		le32_add_cpu(&split.r_refcount, -1);
+
+		mlog(0, "split refcount rec, start %llu, "
+		     "len %u, count %u, original start %llu, len %u\n",
+		     (unsigned long long)le64_to_cpu(split.r_cpos),
+		     len, le32_to_cpu(split.r_refcount),
+		     (unsigned long long)le64_to_cpu(rec->r_cpos),
+		     le32_to_cpu(rec->r_clusters));
+		ret = ocfs2_split_refcount_rec(handle, ci,
+					       ref_root_bh, ref_leaf_bh,
+					       &split, index,
+					       meta_ac, dealloc);
+	}
+
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	/* Remove the leaf refcount block if it contains no refcount record. */
+	if (!rb->rf_records.rl_used && ref_leaf_bh != ref_root_bh) {
+		ret = ocfs2_remove_refcount_extent(handle, ci, ref_root_bh,
+						   ref_leaf_bh, meta_ac,
+						   dealloc);
+		if (ret)
+			mlog_errno(ret);
+	}
+
+out:
+	return ret;
+}
+
+static int __ocfs2_decrease_refcount(handle_t *handle,
+				     struct ocfs2_caching_info *ci,
+				     struct buffer_head *ref_root_bh,
+				     u64 cpos, u32 len,
+				     struct ocfs2_alloc_context *meta_ac,
+				     struct ocfs2_cached_dealloc_ctxt *dealloc)
+{
+	int ret = 0, index = 0;
+	struct ocfs2_refcount_rec rec;
+	unsigned int r_count = 0, r_len;
+	struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
+	struct buffer_head *ref_leaf_bh = NULL;
+
+	mlog(0, "Tree owner %llu, decrease refcount start %llu, len %u\n",
+	     (unsigned long long)ocfs2_metadata_cache_owner(ci),
+	     (unsigned long long)cpos, len);
+
+	while (len) {
+		ret = ocfs2_get_refcount_rec(ci, ref_root_bh,
+					     cpos, len, &rec, &index,
+					     &ref_leaf_bh);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+
+		r_count = le32_to_cpu(rec.r_refcount);
+		BUG_ON(r_count == 0);
+
+		r_len = min((u64)(cpos + len), le64_to_cpu(rec.r_cpos) +
+			      le32_to_cpu(rec.r_clusters)) - cpos;
+
+		ret = ocfs2_decrease_refcount_rec(handle, ci, ref_root_bh,
+						  ref_leaf_bh, index,
+						  cpos, r_len,
+						  meta_ac, dealloc);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+
+		if (le32_to_cpu(rec.r_refcount) == 1) {
+			ret = ocfs2_cache_cluster_dealloc(dealloc,
+					  ocfs2_clusters_to_blocks(sb, cpos),
+							  r_len);
+			if (ret) {
+				mlog_errno(ret);
+				goto out;
+			}
+		}
+
+		cpos += r_len;
+		len -= r_len;
+		brelse(ref_leaf_bh);
+		ref_leaf_bh = NULL;
+	}
+
+out:
+	brelse(ref_leaf_bh);
+	return ret;
+}
+
+/* Caller must hold refcount tree lock. */
+int ocfs2_decrease_refcount(struct inode *inode,
+			    handle_t *handle, u32 cpos, u32 len,
+			    struct ocfs2_alloc_context *meta_ac,
+			    struct ocfs2_cached_dealloc_ctxt *dealloc)
+{
+	int ret;
+	u64 ref_blkno;
+	struct ocfs2_inode_info *oi = OCFS2_I(inode);
+	struct buffer_head *ref_root_bh = NULL;
+	struct ocfs2_refcount_tree *tree;
+
+	BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
+
+	ret = ocfs2_get_refcount_block(inode, &ref_blkno);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_get_refcount_tree(OCFS2_SB(inode->i_sb), ref_blkno, &tree);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_read_refcount_block(&tree->rf_ci, tree->rf_blkno,
+					&ref_root_bh);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = __ocfs2_decrease_refcount(handle, &tree->rf_ci, ref_root_bh,
+					cpos, len, meta_ac, dealloc);
+	if (ret)
+		mlog_errno(ret);
+out:
+	brelse(ref_root_bh);
+	return ret;
+}
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h
index 2ea7fc5..ad4b483 100644
--- a/fs/ocfs2/refcounttree.h
+++ b/fs/ocfs2/refcounttree.h
@@ -40,4 +40,9 @@ int ocfs2_lock_refcount_tree(struct ocfs2_super *osb, u64 ref_blkno, int rw,
 void ocfs2_unlock_refcount_tree(struct ocfs2_super *osb,
 				struct ocfs2_refcount_tree *tree,
 				int rw);
+
+int ocfs2_decrease_refcount(struct inode *inode,
+			    handle_t *handle, u32 cpos, u32 len,
+			    struct ocfs2_alloc_context *meta_ac,
+			    struct ocfs2_cached_dealloc_ctxt *dealloc);
 #endif /* OCFS2_REFCOUNTTREE_H */
-- 
cgit v0.10.2


From 1aa75fea64bc26bda9be9b1b20ae253d7a481877 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 18 Aug 2009 11:28:39 +0800
Subject: ocfs2: Add functions for extents refcounted.

Add function ocfs2_mark_extent_refcounted which can mark
an extent refcounted.

Signed-off-by: Tao Ma <tao.ma@oracle.com>

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 9dd68cd..96f8ca6 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -5169,12 +5169,12 @@ out:
  *
  * The caller is responsible for passing down meta_ac if we'll need it.
  */
-static int ocfs2_change_extent_flag(handle_t *handle,
-				    struct ocfs2_extent_tree *et,
-				    u32 cpos, u32 len, u32 phys,
-				    struct ocfs2_alloc_context *meta_ac,
-				    struct ocfs2_cached_dealloc_ctxt *dealloc,
-				    int new_flags, int clear_flags)
+int ocfs2_change_extent_flag(handle_t *handle,
+			     struct ocfs2_extent_tree *et,
+			     u32 cpos, u32 len, u32 phys,
+			     struct ocfs2_alloc_context *meta_ac,
+			     struct ocfs2_cached_dealloc_ctxt *dealloc,
+			     int new_flags, int clear_flags)
 {
 	int ret, index;
 	struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci);
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index 0610ba1..19d5b88 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -128,6 +128,12 @@ int ocfs2_mark_extent_written(struct inode *inode,
 			      handle_t *handle, u32 cpos, u32 len, u32 phys,
 			      struct ocfs2_alloc_context *meta_ac,
 			      struct ocfs2_cached_dealloc_ctxt *dealloc);
+int ocfs2_change_extent_flag(handle_t *handle,
+			     struct ocfs2_extent_tree *et,
+			     u32 cpos, u32 len, u32 phys,
+			     struct ocfs2_alloc_context *meta_ac,
+			     struct ocfs2_cached_dealloc_ctxt *dealloc,
+			     int new_flags, int clear_flags);
 int ocfs2_remove_extent(handle_t *handle, struct ocfs2_extent_tree *et,
 			u32 cpos, u32 len,
 			struct ocfs2_alloc_context *meta_ac,
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index bb53573..eae4046 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -516,6 +516,13 @@ static inline void ocfs2_add_links_count(struct ocfs2_dinode *di, int n)
 	ocfs2_set_links_count(di, links);
 }
 
+static inline int ocfs2_refcount_tree(struct ocfs2_super *osb)
+{
+	if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE)
+		return 1;
+	return 0;
+}
+
 /* set / clear functions because cluster events can make these happen
  * in parallel so we want the transitions to be atomic. this also
  * means that any future flags osb_flags must be protected by spinlock
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 2c7974c..f7d19f4 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -2153,3 +2153,42 @@ out:
 	brelse(ref_root_bh);
 	return ret;
 }
+
+/*
+ * Mark the already-existing extent at cpos as refcounted for len clusters.
+ * This adds the refcount extent flag.
+ *
+ * If the existing extent is larger than the request, initiate a
+ * split. An attempt will be made at merging with adjacent extents.
+ *
+ * The caller is responsible for passing down meta_ac if we'll need it.
+ */
+static int ocfs2_mark_extent_refcounted(struct inode *inode,
+				struct ocfs2_extent_tree *et,
+				handle_t *handle, u32 cpos,
+				u32 len, u32 phys,
+				struct ocfs2_alloc_context *meta_ac,
+				struct ocfs2_cached_dealloc_ctxt *dealloc)
+{
+	int ret;
+
+	mlog(0, "Inode %lu refcount tree cpos %u, len %u, phys cluster %u\n",
+	     inode->i_ino, cpos, len, phys);
+
+	if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) {
+		ocfs2_error(inode->i_sb, "Inode %lu want to use refcount "
+			    "tree, but the feature bit is not set in the "
+			    "super block.", inode->i_ino);
+		ret = -EROFS;
+		goto out;
+	}
+
+	ret = ocfs2_change_extent_flag(handle, et, cpos,
+				       len, phys, meta_ac, dealloc,
+				       OCFS2_EXT_REFCOUNTED, 0);
+	if (ret)
+		mlog_errno(ret);
+
+out:
+	return ret;
+}
-- 
cgit v0.10.2


From bcbbb24a6a5c5b3e7b8e5284e0bfa23f45c32377 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 18 Aug 2009 11:29:12 +0800
Subject: ocfs2: Decrement refcount when truncating refcounted extents.

Add 'Decrement refcount for delete' in to the normal truncate
process. So for a refcounted extent record, call refcount rec
decrementation instead of cluster free.

Signed-off-by: Tao Ma <tao.ma@oracle.com>

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 96f8ca6..03438a6 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -49,6 +49,7 @@
 #include "super.h"
 #include "uptodate.h"
 #include "xattr.h"
+#include "refcounttree.h"
 
 #include "buffer_head_io.h"
 
@@ -6673,7 +6674,7 @@ out:
  */
 static int ocfs2_trim_tree(struct inode *inode, struct ocfs2_path *path,
 			   handle_t *handle, struct ocfs2_truncate_context *tc,
-			   u32 clusters_to_del, u64 *delete_start)
+			   u32 clusters_to_del, u64 *delete_start, u8 *flags)
 {
 	int ret, i, index = path->p_tree_depth;
 	u32 new_edge = 0;
@@ -6683,6 +6684,7 @@ static int ocfs2_trim_tree(struct inode *inode, struct ocfs2_path *path,
 	struct ocfs2_extent_rec *rec;
 
 	*delete_start = 0;
+	*flags = 0;
 
 	while (index >= 0) {
 		bh = path->p_node[index].bh;
@@ -6770,6 +6772,7 @@ find_tail_record:
 			*delete_start = le64_to_cpu(rec->e_blkno)
 				+ ocfs2_clusters_to_blocks(inode->i_sb,
 					le16_to_cpu(rec->e_leaf_clusters));
+			*flags = rec->e_flags;
 
 			/*
 			 * If it's now empty, remove this record.
@@ -6869,7 +6872,8 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
 			     struct buffer_head *fe_bh,
 			     handle_t *handle,
 			     struct ocfs2_truncate_context *tc,
-			     struct ocfs2_path *path)
+			     struct ocfs2_path *path,
+			     struct ocfs2_alloc_context *meta_ac)
 {
 	int status;
 	struct ocfs2_dinode *fe;
@@ -6877,6 +6881,7 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
 	struct ocfs2_extent_list *el;
 	struct buffer_head *last_eb_bh = NULL;
 	u64 delete_blk = 0;
+	u8 rec_flags;
 
 	fe = (struct ocfs2_dinode *) fe_bh->b_data;
 
@@ -6932,7 +6937,7 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
 	inode->i_blocks = ocfs2_inode_sector_count(inode);
 
 	status = ocfs2_trim_tree(inode, path, handle, tc,
-				 clusters_to_del, &delete_blk);
+				 clusters_to_del, &delete_blk, &rec_flags);
 	if (status) {
 		mlog_errno(status);
 		goto bail;
@@ -6964,8 +6969,16 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
 	}
 
 	if (delete_blk) {
-		status = ocfs2_truncate_log_append(osb, handle, delete_blk,
-						   clusters_to_del);
+		if (rec_flags & OCFS2_EXT_REFCOUNTED)
+			status = ocfs2_decrease_refcount(inode, handle,
+					ocfs2_blocks_to_clusters(osb->sb,
+								 delete_blk),
+					clusters_to_del, meta_ac,
+					&tc->tc_dealloc);
+		else
+			status = ocfs2_truncate_log_append(osb, handle,
+							   delete_blk,
+							   clusters_to_del);
 		if (status < 0) {
 			mlog_errno(status);
 			goto bail;
@@ -7383,11 +7396,14 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb,
 {
 	int status, i, credits, tl_sem = 0;
 	u32 clusters_to_del, new_highest_cpos, range;
+	u64 blkno = 0;
 	struct ocfs2_extent_list *el;
 	handle_t *handle = NULL;
 	struct inode *tl_inode = osb->osb_tl_inode;
 	struct ocfs2_path *path = NULL;
 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data;
+	struct ocfs2_alloc_context *meta_ac = NULL;
+	struct ocfs2_refcount_tree *ref_tree = NULL;
 
 	mlog_entry_void();
 
@@ -7413,6 +7429,8 @@ start:
 		goto bail;
 	}
 
+	credits = 0;
+
 	/*
 	 * Truncate always works against the rightmost tree branch.
 	 */
@@ -7453,10 +7471,15 @@ start:
 		clusters_to_del = 0;
 	} else if (le32_to_cpu(el->l_recs[i].e_cpos) >= new_highest_cpos) {
 		clusters_to_del = ocfs2_rec_clusters(el, &el->l_recs[i]);
+		blkno = le64_to_cpu(el->l_recs[i].e_blkno);
 	} else if (range > new_highest_cpos) {
 		clusters_to_del = (ocfs2_rec_clusters(el, &el->l_recs[i]) +
 				   le32_to_cpu(el->l_recs[i].e_cpos)) -
 				  new_highest_cpos;
+		blkno = le64_to_cpu(el->l_recs[i].e_blkno) +
+			ocfs2_clusters_to_blocks(inode->i_sb,
+				ocfs2_rec_clusters(el, &el->l_recs[i]) -
+				clusters_to_del);
 	} else {
 		status = 0;
 		goto bail;
@@ -7465,6 +7488,29 @@ start:
 	mlog(0, "clusters_to_del = %u in this pass, tail blk=%llu\n",
 	     clusters_to_del, (unsigned long long)path_leaf_bh(path)->b_blocknr);
 
+	if (el->l_recs[i].e_flags & OCFS2_EXT_REFCOUNTED && clusters_to_del) {
+		BUG_ON(!(OCFS2_I(inode)->ip_dyn_features &
+			 OCFS2_HAS_REFCOUNT_FL));
+
+		status = ocfs2_lock_refcount_tree(osb,
+						le64_to_cpu(di->i_refcount_loc),
+						1, &ref_tree, NULL);
+		if (status) {
+			mlog_errno(status);
+			goto bail;
+		}
+
+		status = ocfs2_prepare_refcount_change_for_del(inode, fe_bh,
+							       blkno,
+							       clusters_to_del,
+							       &credits,
+							       &meta_ac);
+		if (status < 0) {
+			mlog_errno(status);
+			goto bail;
+		}
+	}
+
 	mutex_lock(&tl_inode->i_mutex);
 	tl_sem = 1;
 	/* ocfs2_truncate_log_needs_flush guarantees us at least one
@@ -7478,7 +7524,7 @@ start:
 		}
 	}
 
-	credits = ocfs2_calc_tree_trunc_credits(osb->sb, clusters_to_del,
+	credits += ocfs2_calc_tree_trunc_credits(osb->sb, clusters_to_del,
 						(struct ocfs2_dinode *)fe_bh->b_data,
 						el);
 	handle = ocfs2_start_trans(osb, credits);
@@ -7490,7 +7536,7 @@ start:
 	}
 
 	status = ocfs2_do_truncate(osb, clusters_to_del, inode, fe_bh, handle,
-				   tc, path);
+				   tc, path, meta_ac);
 	if (status < 0) {
 		mlog_errno(status);
 		goto bail;
@@ -7504,6 +7550,16 @@ start:
 
 	ocfs2_reinit_path(path, 1);
 
+	if (meta_ac) {
+		ocfs2_free_alloc_context(meta_ac);
+		meta_ac = NULL;
+	}
+
+	if (ref_tree) {
+		ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
+		ref_tree = NULL;
+	}
+
 	/*
 	 * The check above will catch the case where we've truncated
 	 * away all allocation.
@@ -7520,6 +7576,12 @@ bail:
 	if (handle)
 		ocfs2_commit_trans(osb, handle);
 
+	if (meta_ac)
+		ocfs2_free_alloc_context(meta_ac);
+
+	if (ref_tree)
+		ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
+
 	ocfs2_run_deallocs(osb, &tc->tc_dealloc);
 
 	ocfs2_free_path(path);
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index bd88c8b..3f74e09 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -504,6 +504,9 @@ static inline int ocfs2_calc_dxi_expand_credits(struct super_block *sb)
  */
 #define OCFS2_REFCOUNT_TREE_REMOVE_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1)
 
+/* 2 metadata alloc, 2 new blocks and root refcount block */
+#define OCFS2_EXPAND_REFCOUNT_TREE_CREDITS (OCFS2_SUBALLOC_ALLOC * 2 + 3)
+
 /*
  * Please note that the caller must make sure that root_el is the root
  * of extent tree. So for an inode, it should be &fe->id2.i_list. Otherwise
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index f7d19f4..e72dbdd 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -2192,3 +2192,215 @@ static int ocfs2_mark_extent_refcounted(struct inode *inode,
 out:
 	return ret;
 }
+
+/*
+ * Given some contiguous physical clusters, calculate what we need
+ * for modifying their refcount.
+ */
+static int ocfs2_calc_refcount_meta_credits(struct super_block *sb,
+					    struct ocfs2_caching_info *ci,
+					    struct buffer_head *ref_root_bh,
+					    u64 start_cpos,
+					    u32 clusters,
+					    int *meta_add,
+					    int *credits)
+{
+	int ret = 0, index, ref_blocks = 0, recs_add = 0;
+	u64 cpos = start_cpos;
+	struct ocfs2_refcount_block *rb;
+	struct ocfs2_refcount_rec rec;
+	struct buffer_head *ref_leaf_bh = NULL, *prev_bh = NULL;
+	u32 len;
+
+	mlog(0, "start_cpos %llu, clusters %u\n",
+	     (unsigned long long)start_cpos, clusters);
+	while (clusters) {
+		ret = ocfs2_get_refcount_rec(ci, ref_root_bh,
+					     cpos, clusters, &rec,
+					     &index, &ref_leaf_bh);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+
+		if (ref_leaf_bh != prev_bh) {
+			/*
+			 * Now we encounter a new leaf block, so calculate
+			 * whether we need to extend the old leaf.
+			 */
+			if (prev_bh) {
+				rb = (struct ocfs2_refcount_block *)
+							prev_bh->b_data;
+
+				if (le64_to_cpu(rb->rf_records.rl_used) +
+				    recs_add >
+				    le16_to_cpu(rb->rf_records.rl_count))
+					ref_blocks++;
+			}
+
+			recs_add = 0;
+			*credits += 1;
+			brelse(prev_bh);
+			prev_bh = ref_leaf_bh;
+			get_bh(prev_bh);
+		}
+
+		rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
+
+		mlog(0, "recs_add %d,cpos %llu, clusters %u, rec->r_cpos %llu,"
+		     "rec->r_clusters %u, rec->r_refcount %u, index %d\n",
+		     recs_add, (unsigned long long)cpos, clusters,
+		     (unsigned long long)le64_to_cpu(rec.r_cpos),
+		     le32_to_cpu(rec.r_clusters),
+		     le32_to_cpu(rec.r_refcount), index);
+
+		len = min((u64)cpos + clusters, le64_to_cpu(rec.r_cpos) +
+			  le32_to_cpu(rec.r_clusters)) - cpos;
+		/*
+		 * If the refcount rec already exist, cool. We just need
+		 * to check whether there is a split. Otherwise we just need
+		 * to increase the refcount.
+		 * If we will insert one, increases recs_add.
+		 *
+		 * We record all the records which will be inserted to the
+		 * same refcount block, so that we can tell exactly whether
+		 * we need a new refcount block or not.
+		 */
+		if (rec.r_refcount) {
+			/* Check whether we need a split at the beginning. */
+			if (cpos == start_cpos &&
+			    cpos != le64_to_cpu(rec.r_cpos))
+				recs_add++;
+
+			/* Check whether we need a split in the end. */
+			if (cpos + clusters < le64_to_cpu(rec.r_cpos) +
+			    le32_to_cpu(rec.r_clusters))
+				recs_add++;
+		} else
+			recs_add++;
+
+		brelse(ref_leaf_bh);
+		ref_leaf_bh = NULL;
+		clusters -= len;
+		cpos += len;
+	}
+
+	if (prev_bh) {
+		rb = (struct ocfs2_refcount_block *)prev_bh->b_data;
+
+		if (le64_to_cpu(rb->rf_records.rl_used) + recs_add >
+		    le16_to_cpu(rb->rf_records.rl_count))
+			ref_blocks++;
+
+		*credits += 1;
+	}
+
+	if (!ref_blocks)
+		goto out;
+
+	mlog(0, "we need ref_blocks %d\n", ref_blocks);
+	*meta_add += ref_blocks;
+	*credits += ref_blocks;
+
+	/*
+	 * So we may need ref_blocks to insert into the tree.
+	 * That also means we need to change the b-tree and add that number
+	 * of records since we never merge them.
+	 * We need one more block for expansion since the new created leaf
+	 * block is also full and needs split.
+	 */
+	rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data;
+	if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL) {
+		struct ocfs2_extent_tree et;
+
+		ocfs2_init_refcount_extent_tree(&et, ci, ref_root_bh);
+		*meta_add += ocfs2_extend_meta_needed(et.et_root_el);
+		*credits += ocfs2_calc_extend_credits(sb,
+						      et.et_root_el,
+						      ref_blocks);
+	} else {
+		*credits += OCFS2_EXPAND_REFCOUNT_TREE_CREDITS;
+		*meta_add += 1;
+	}
+
+out:
+	brelse(ref_leaf_bh);
+	brelse(prev_bh);
+	return ret;
+}
+
+/*
+ * For refcount tree, we will decrease some contiguous clusters
+ * refcount count, so just go through it to see how many blocks
+ * we gonna touch and whether we need to create new blocks.
+ *
+ * Normally the refcount blocks store these refcount should be
+ * continguous also, so that we can get the number easily.
+ * As for meta_ac, we will at most add split 2 refcount record and
+ * 2 more refcount block, so just check it in a rough way.
+ *
+ * Caller must hold refcount tree lock.
+ */
+int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
+					  struct buffer_head *di_bh,
+					  u64 phys_blkno,
+					  u32 clusters,
+					  int *credits,
+					  struct ocfs2_alloc_context **meta_ac)
+{
+	int ret, ref_blocks = 0;
+	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
+	struct ocfs2_inode_info *oi = OCFS2_I(inode);
+	struct buffer_head *ref_root_bh = NULL;
+	struct ocfs2_refcount_tree *tree;
+	u64 start_cpos = ocfs2_blocks_to_clusters(inode->i_sb, phys_blkno);
+
+	if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) {
+		ocfs2_error(inode->i_sb, "Inode %lu want to use refcount "
+			    "tree, but the feature bit is not set in the "
+			    "super block.", inode->i_ino);
+		ret = -EROFS;
+		goto out;
+	}
+
+	BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
+
+	ret = ocfs2_get_refcount_tree(OCFS2_SB(inode->i_sb),
+				      le64_to_cpu(di->i_refcount_loc), &tree);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_read_refcount_block(&tree->rf_ci,
+					le64_to_cpu(di->i_refcount_loc),
+					&ref_root_bh);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_calc_refcount_meta_credits(inode->i_sb,
+					       &tree->rf_ci,
+					       ref_root_bh,
+					       start_cpos, clusters,
+					       &ref_blocks, credits);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	mlog(0, "reserve new metadata %d, credits = %d\n",
+	     ref_blocks, *credits);
+
+	if (ref_blocks) {
+		ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb),
+							ref_blocks, meta_ac);
+		if (ret)
+			mlog_errno(ret);
+	}
+
+out:
+	brelse(ref_root_bh);
+	return ret;
+}
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h
index ad4b483..b8c9ed7 100644
--- a/fs/ocfs2/refcounttree.h
+++ b/fs/ocfs2/refcounttree.h
@@ -45,4 +45,10 @@ int ocfs2_decrease_refcount(struct inode *inode,
 			    handle_t *handle, u32 cpos, u32 len,
 			    struct ocfs2_alloc_context *meta_ac,
 			    struct ocfs2_cached_dealloc_ctxt *dealloc);
+int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
+					  struct buffer_head *di_bh,
+					  u64 phys_blkno,
+					  u32 clusters,
+					  int *credits,
+					  struct ocfs2_alloc_context **meta_ac);
 #endif /* OCFS2_REFCOUNTTREE_H */
-- 
cgit v0.10.2


From 6f70fa519976a379d72781d927cf8e5f5b05ec86 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 25 Aug 2009 08:05:12 +0800
Subject: ocfs2: Add CoW support.

This patch try CoW support for a refcounted record.

the whole process will be:
1. Calculate how many clusters we need to CoW and where we start.
   Extents that are not completely encompassed by the write will
   be broken on 1MB boundaries.
2. Do CoW for the clusters with the help of page cache.
3. Change the b-tree structure with the new allocated clusters.

Signed-off-by: Tao Ma <tao.ma@oracle.com>

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 03438a6..b8fc95d 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -6998,9 +6998,9 @@ static int ocfs2_zero_func(handle_t *handle, struct buffer_head *bh)
 	return 0;
 }
 
-static void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle,
-				     unsigned int from, unsigned int to,
-				     struct page *page, int zero, u64 *phys)
+void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle,
+			      unsigned int from, unsigned int to,
+			      struct page *page, int zero, u64 *phys)
 {
 	int ret, partial = 0;
 
@@ -7068,20 +7068,16 @@ out:
 		ocfs2_unlock_and_free_pages(pages, numpages);
 }
 
-static int ocfs2_grab_eof_pages(struct inode *inode, loff_t start, loff_t end,
-				struct page **pages, int *num)
+int ocfs2_grab_pages(struct inode *inode, loff_t start, loff_t end,
+		     struct page **pages, int *num)
 {
 	int numpages, ret = 0;
-	struct super_block *sb = inode->i_sb;
 	struct address_space *mapping = inode->i_mapping;
 	unsigned long index;
 	loff_t last_page_bytes;
 
 	BUG_ON(start > end);
 
-	BUG_ON(start >> OCFS2_SB(sb)->s_clustersize_bits !=
-	       (end - 1) >> OCFS2_SB(sb)->s_clustersize_bits);
-
 	numpages = 0;
 	last_page_bytes = PAGE_ALIGN(end);
 	index = start >> PAGE_CACHE_SHIFT;
@@ -7109,6 +7105,17 @@ out:
 	return ret;
 }
 
+static int ocfs2_grab_eof_pages(struct inode *inode, loff_t start, loff_t end,
+				struct page **pages, int *num)
+{
+	struct super_block *sb = inode->i_sb;
+
+	BUG_ON(start >> OCFS2_SB(sb)->s_clustersize_bits !=
+	       (end - 1) >> OCFS2_SB(sb)->s_clustersize_bits);
+
+	return ocfs2_grab_pages(inode, start, end, pages, num);
+}
+
 /*
  * Zero the area past i_size but still within an allocated
  * cluster. This avoids exposing nonzero data on subsequent file
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index 19d5b88..9c122d5 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -271,6 +271,11 @@ static inline int ocfs2_is_empty_extent(struct ocfs2_extent_rec *rec)
 	return !rec->e_leaf_clusters;
 }
 
+int ocfs2_grab_pages(struct inode *inode, loff_t start, loff_t end,
+		     struct page **pages, int *num);
+void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle,
+			      unsigned int from, unsigned int to,
+			      struct page *page, int zero, u64 *phys);
 /*
  * Structures which describe a path through a btree, and functions to
  * manipulate them.
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 15c594d..fdad075 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -126,8 +126,8 @@ bail:
 	return err;
 }
 
-static int ocfs2_get_block(struct inode *inode, sector_t iblock,
-			   struct buffer_head *bh_result, int create)
+int ocfs2_get_block(struct inode *inode, sector_t iblock,
+		    struct buffer_head *bh_result, int create)
 {
 	int err = 0;
 	unsigned int ext_flags;
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h
index 503e492..c48e93f 100644
--- a/fs/ocfs2/aops.h
+++ b/fs/ocfs2/aops.h
@@ -57,6 +57,8 @@ int ocfs2_read_inline_data(struct inode *inode, struct page *page,
 			   struct buffer_head *di_bh);
 int ocfs2_size_fits_inline_data(struct buffer_head *di_bh, u64 new_size);
 
+int ocfs2_get_block(struct inode *inode, sector_t iblock,
+		    struct buffer_head *bh_result, int create);
 /* all ocfs2_dio_end_io()'s fault */
 #define ocfs2_iocb_is_rw_locked(iocb) \
 	test_bit(0, (unsigned long *)&iocb->private)
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index e72dbdd..4e7df8b 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -31,6 +31,27 @@
 #include "sysfile.h"
 #include "dlmglue.h"
 #include "extent_map.h"
+#include "aops.h"
+
+#include <linux/bio.h>
+#include <linux/blkdev.h>
+#include <linux/gfp.h>
+#include <linux/slab.h>
+#include <linux/writeback.h>
+#include <linux/pagevec.h>
+#include <linux/swap.h>
+
+struct ocfs2_cow_context {
+	struct inode *inode;
+	u32 cow_start;
+	u32 cow_len;
+	struct ocfs2_extent_tree di_et;
+	struct ocfs2_caching_info *ref_ci;
+	struct buffer_head *ref_root_bh;
+	struct ocfs2_alloc_context *meta_ac;
+	struct ocfs2_alloc_context *data_ac;
+	struct ocfs2_cached_dealloc_ctxt dealloc;
+};
 
 static inline struct ocfs2_refcount_tree *
 cache_info_to_refcount(struct ocfs2_caching_info *ci)
@@ -2404,3 +2425,796 @@ out:
 	brelse(ref_root_bh);
 	return ret;
 }
+
+#define	MAX_CONTIG_BYTES	1048576
+
+static inline unsigned int ocfs2_cow_contig_clusters(struct super_block *sb)
+{
+	return ocfs2_clusters_for_bytes(sb, MAX_CONTIG_BYTES);
+}
+
+static inline unsigned int ocfs2_cow_contig_mask(struct super_block *sb)
+{
+	return ~(ocfs2_cow_contig_clusters(sb) - 1);
+}
+
+/*
+ * Given an extent that starts at 'start' and an I/O that starts at 'cpos',
+ * find an offset (start + (n * contig_clusters)) that is closest to cpos
+ * while still being less than or equal to it.
+ *
+ * The goal is to break the extent at a multiple of contig_clusters.
+ */
+static inline unsigned int ocfs2_cow_align_start(struct super_block *sb,
+						 unsigned int start,
+						 unsigned int cpos)
+{
+	BUG_ON(start > cpos);
+
+	return start + ((cpos - start) & ocfs2_cow_contig_mask(sb));
+}
+
+/*
+ * Given a cluster count of len, pad it out so that it is a multiple
+ * of contig_clusters.
+ */
+static inline unsigned int ocfs2_cow_align_length(struct super_block *sb,
+						  unsigned int len)
+{
+	unsigned int padded =
+		(len + (ocfs2_cow_contig_clusters(sb) - 1)) &
+		ocfs2_cow_contig_mask(sb);
+
+	/* Did we wrap? */
+	if (padded < len)
+		padded = UINT_MAX;
+
+	return padded;
+}
+
+/*
+ * Calculate out the start and number of virtual clusters we need to to CoW.
+ *
+ * cpos is vitual start cluster position we want to do CoW in a
+ * file and write_len is the cluster length.
+ *
+ * Normal we will start CoW from the beginning of extent record cotaining cpos.
+ * We try to break up extents on boundaries of MAX_CONTIG_BYTES so that we
+ * get good I/O from the resulting extent tree.
+ */
+static int ocfs2_refcount_cal_cow_clusters(struct inode *inode,
+					   struct buffer_head *di_bh,
+					   u32 cpos,
+					   u32 write_len,
+					   u32 *cow_start,
+					   u32 *cow_len)
+{
+	int ret = 0;
+	struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
+	struct ocfs2_extent_list *el = &di->id2.i_list;
+	int tree_height = le16_to_cpu(el->l_tree_depth), i;
+	struct buffer_head *eb_bh = NULL;
+	struct ocfs2_extent_block *eb = NULL;
+	struct ocfs2_extent_rec *rec;
+	unsigned int want_clusters, rec_end = 0;
+	int contig_clusters = ocfs2_cow_contig_clusters(inode->i_sb);
+	int leaf_clusters;
+
+	if (tree_height > 0) {
+		ret = ocfs2_find_leaf(INODE_CACHE(inode), el, cpos, &eb_bh);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+
+		eb = (struct ocfs2_extent_block *) eb_bh->b_data;
+		el = &eb->h_list;
+
+		if (el->l_tree_depth) {
+			ocfs2_error(inode->i_sb,
+				    "Inode %lu has non zero tree depth in "
+				    "leaf block %llu\n", inode->i_ino,
+				    (unsigned long long)eb_bh->b_blocknr);
+			ret = -EROFS;
+			goto out;
+		}
+	}
+
+	*cow_len = 0;
+	for (i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
+		rec = &el->l_recs[i];
+
+		if (ocfs2_is_empty_extent(rec)) {
+			mlog_bug_on_msg(i != 0, "Inode %lu has empty record in "
+					"index %d\n", inode->i_ino, i);
+			continue;
+		}
+
+		if (le32_to_cpu(rec->e_cpos) +
+		    le16_to_cpu(rec->e_leaf_clusters) <= cpos)
+			continue;
+
+		if (*cow_len == 0) {
+			/*
+			 * We should find a refcounted record in the
+			 * first pass.
+			 */
+			BUG_ON(!(rec->e_flags & OCFS2_EXT_REFCOUNTED));
+			*cow_start = le32_to_cpu(rec->e_cpos);
+		}
+
+		/*
+		 * If we encounter a hole or a non-refcounted record,
+		 * stop the search.
+		 */
+		if ((!(rec->e_flags & OCFS2_EXT_REFCOUNTED)) ||
+		    (*cow_len && rec_end != le32_to_cpu(rec->e_cpos)))
+			break;
+
+		leaf_clusters = le16_to_cpu(rec->e_leaf_clusters);
+		rec_end = le32_to_cpu(rec->e_cpos) + leaf_clusters;
+
+		/*
+		 * How many clusters do we actually need from
+		 * this extent?  First we see how many we actually
+		 * need to complete the write.  If that's smaller
+		 * than contig_clusters, we try for contig_clusters.
+		 */
+		if (!*cow_len)
+			want_clusters = write_len;
+		else
+			want_clusters = (cpos + write_len) -
+				(*cow_start + *cow_len);
+		if (want_clusters < contig_clusters)
+			want_clusters = contig_clusters;
+
+		/*
+		 * If the write does not cover the whole extent, we
+		 * need to calculate how we're going to split the extent.
+		 * We try to do it on contig_clusters boundaries.
+		 *
+		 * Any extent smaller than contig_clusters will be
+		 * CoWed in its entirety.
+		 */
+		if (leaf_clusters <= contig_clusters)
+			*cow_len += leaf_clusters;
+		else if (*cow_len || (*cow_start == cpos)) {
+			/*
+			 * This extent needs to be CoW'd from its
+			 * beginning, so all we have to do is compute
+			 * how many clusters to grab.  We align
+			 * want_clusters to the edge of contig_clusters
+			 * to get better I/O.
+			 */
+			want_clusters = ocfs2_cow_align_length(inode->i_sb,
+							       want_clusters);
+
+			if (leaf_clusters < want_clusters)
+				*cow_len += leaf_clusters;
+			else
+				*cow_len += want_clusters;
+		} else if ((*cow_start + contig_clusters) >=
+			   (cpos + write_len)) {
+			/*
+			 * Breaking off contig_clusters at the front
+			 * of the extent will cover our write.  That's
+			 * easy.
+			 */
+			*cow_len = contig_clusters;
+		} else if ((rec_end - cpos) <= contig_clusters) {
+			/*
+			 * Breaking off contig_clusters at the tail of
+			 * this extent will cover cpos.
+			 */
+			*cow_start = rec_end - contig_clusters;
+			*cow_len = contig_clusters;
+		} else if ((rec_end - cpos) <= want_clusters) {
+			/*
+			 * While we can't fit the entire write in this
+			 * extent, we know that the write goes from cpos
+			 * to the end of the extent.  Break that off.
+			 * We try to break it at some multiple of
+			 * contig_clusters from the front of the extent.
+			 * Failing that (ie, cpos is within
+			 * contig_clusters of the front), we'll CoW the
+			 * entire extent.
+			 */
+			*cow_start = ocfs2_cow_align_start(inode->i_sb,
+							   *cow_start, cpos);
+			*cow_len = rec_end - *cow_start;
+		} else {
+			/*
+			 * Ok, the entire write lives in the middle of
+			 * this extent.  Let's try to slice the extent up
+			 * nicely.  Optimally, our CoW region starts at
+			 * m*contig_clusters from the beginning of the
+			 * extent and goes for n*contig_clusters,
+			 * covering the entire write.
+			 */
+			*cow_start = ocfs2_cow_align_start(inode->i_sb,
+							   *cow_start, cpos);
+
+			want_clusters = (cpos + write_len) - *cow_start;
+			want_clusters = ocfs2_cow_align_length(inode->i_sb,
+							       want_clusters);
+			if (*cow_start + want_clusters <= rec_end)
+				*cow_len = want_clusters;
+			else
+				*cow_len = rec_end - *cow_start;
+		}
+
+		/* Have we covered our entire write yet? */
+		if ((*cow_start + *cow_len) >= (cpos + write_len))
+			break;
+
+		/*
+		 * If we reach the end of the extent block and don't get enough
+		 * clusters, continue with the next extent block if possible.
+		 */
+		if (i + 1 == le16_to_cpu(el->l_next_free_rec) &&
+		    eb && eb->h_next_leaf_blk) {
+			brelse(eb_bh);
+			eb_bh = NULL;
+
+			ret = ocfs2_read_extent_block(INODE_CACHE(inode),
+					       le64_to_cpu(eb->h_next_leaf_blk),
+					       &eb_bh);
+			if (ret) {
+				mlog_errno(ret);
+				goto out;
+			}
+
+			eb = (struct ocfs2_extent_block *) eb_bh->b_data;
+			el = &eb->h_list;
+			i = -1;
+		}
+	}
+
+out:
+	brelse(eb_bh);
+	return ret;
+}
+
+/*
+ * Prepare meta_ac, data_ac and calculate credits when we want to add some
+ * num_clusters in data_tree "et" and change the refcount for the old
+ * clusters(starting form p_cluster) in the refcount tree.
+ *
+ * Note:
+ * 1. since we may split the old tree, so we at most will need num_clusters + 2
+ *    more new leaf records.
+ * 2. In some case, we may not need to reserve new clusters(e.g, reflink), so
+ *    just give data_ac = NULL.
+ */
+static int ocfs2_lock_refcount_allocators(struct super_block *sb,
+					u32 p_cluster, u32 num_clusters,
+					struct ocfs2_extent_tree *et,
+					struct ocfs2_caching_info *ref_ci,
+					struct buffer_head *ref_root_bh,
+					struct ocfs2_alloc_context **meta_ac,
+					struct ocfs2_alloc_context **data_ac,
+					int *credits)
+{
+	int ret = 0, meta_add = 0;
+	int num_free_extents = ocfs2_num_free_extents(OCFS2_SB(sb), et);
+
+	if (num_free_extents < 0) {
+		ret = num_free_extents;
+		mlog_errno(ret);
+		goto out;
+	}
+
+	if (num_free_extents < num_clusters + 2)
+		meta_add =
+			ocfs2_extend_meta_needed(et->et_root_el);
+
+	*credits += ocfs2_calc_extend_credits(sb, et->et_root_el,
+					      num_clusters + 2);
+
+	ret = ocfs2_calc_refcount_meta_credits(sb, ref_ci, ref_root_bh,
+					       p_cluster, num_clusters,
+					       &meta_add, credits);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	mlog(0, "reserve new metadata %d, clusters %u, credits = %d\n",
+	     meta_add, num_clusters, *credits);
+	ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(sb), meta_add,
+						meta_ac);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	if (data_ac) {
+		ret = ocfs2_reserve_clusters(OCFS2_SB(sb), num_clusters,
+					     data_ac);
+		if (ret)
+			mlog_errno(ret);
+	}
+
+out:
+	if (ret) {
+		if (*meta_ac) {
+			ocfs2_free_alloc_context(*meta_ac);
+			*meta_ac = NULL;
+		}
+	}
+
+	return ret;
+}
+
+static int ocfs2_clear_cow_buffer(handle_t *handle, struct buffer_head *bh)
+{
+	BUG_ON(buffer_dirty(bh));
+
+	clear_buffer_mapped(bh);
+
+	return 0;
+}
+
+static int ocfs2_duplicate_clusters(handle_t *handle,
+				    struct ocfs2_cow_context *context,
+				    u32 cpos, u32 old_cluster,
+				    u32 new_cluster, u32 new_len)
+{
+	int ret = 0, partial;
+	struct ocfs2_caching_info *ci = context->di_et.et_ci;
+	struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
+	u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster);
+	struct page *page;
+	pgoff_t page_index;
+	unsigned int from, to;
+	loff_t offset, end, map_end;
+	struct address_space *mapping = context->inode->i_mapping;
+
+	mlog(0, "old_cluster %u, new %u, len %u at offset %u\n", old_cluster,
+	     new_cluster, new_len, cpos);
+
+	offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits;
+	end = offset + (new_len << OCFS2_SB(sb)->s_clustersize_bits);
+
+	while (offset < end) {
+		page_index = offset >> PAGE_CACHE_SHIFT;
+		map_end = (page_index + 1) << PAGE_CACHE_SHIFT;
+		if (map_end > end)
+			map_end = end;
+
+		/* from, to is the offset within the page. */
+		from = offset & (PAGE_CACHE_SIZE - 1);
+		to = PAGE_CACHE_SIZE;
+		if (map_end & (PAGE_CACHE_SIZE - 1))
+			to = map_end & (PAGE_CACHE_SIZE - 1);
+
+		page = grab_cache_page(mapping, page_index);
+
+		/* This page can't be dirtied before we CoW it out. */
+		BUG_ON(PageDirty(page));
+
+		if (!PageUptodate(page)) {
+			ret = block_read_full_page(page, ocfs2_get_block);
+			if (ret) {
+				mlog_errno(ret);
+				goto unlock;
+			}
+			lock_page(page);
+		}
+
+		if (page_has_buffers(page)) {
+			ret = walk_page_buffers(handle, page_buffers(page),
+						from, to, &partial,
+						ocfs2_clear_cow_buffer);
+			if (ret) {
+				mlog_errno(ret);
+				goto unlock;
+			}
+		}
+
+		ocfs2_map_and_dirty_page(context->inode,
+					 handle, from, to,
+					 page, 0, &new_block);
+		mark_page_accessed(page);
+unlock:
+		unlock_page(page);
+		page_cache_release(page);
+		page = NULL;
+		offset = map_end;
+		if (ret)
+			break;
+	}
+
+	return ret;
+}
+
+static int ocfs2_clear_ext_refcount(handle_t *handle,
+				    struct ocfs2_extent_tree *et,
+				    u32 cpos, u32 p_cluster, u32 len,
+				    unsigned int ext_flags,
+				    struct ocfs2_alloc_context *meta_ac,
+				    struct ocfs2_cached_dealloc_ctxt *dealloc)
+{
+	int ret, index;
+	struct ocfs2_extent_rec replace_rec;
+	struct ocfs2_path *path = NULL;
+	struct ocfs2_extent_list *el;
+	struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci);
+	u64 ino = ocfs2_metadata_cache_owner(et->et_ci);
+
+	mlog(0, "inode %llu cpos %u, len %u, p_cluster %u, ext_flags %u\n",
+	     (unsigned long long)ino, cpos, len, p_cluster, ext_flags);
+
+	memset(&replace_rec, 0, sizeof(replace_rec));
+	replace_rec.e_cpos = cpu_to_le32(cpos);
+	replace_rec.e_leaf_clusters = cpu_to_le16(len);
+	replace_rec.e_blkno = cpu_to_le64(ocfs2_clusters_to_blocks(sb,
+								   p_cluster));
+	replace_rec.e_flags = ext_flags;
+	replace_rec.e_flags &= ~OCFS2_EXT_REFCOUNTED;
+
+	path = ocfs2_new_path_from_et(et);
+	if (!path) {
+		ret = -ENOMEM;
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_find_path(et->et_ci, path, cpos);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	el = path_leaf_el(path);
+
+	index = ocfs2_search_extent_list(el, cpos);
+	if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
+		ocfs2_error(sb,
+			    "Inode %llu has an extent at cpos %u which can no "
+			    "longer be found.\n",
+			    (unsigned long long)ino, cpos);
+		ret = -EROFS;
+		goto out;
+	}
+
+	ret = ocfs2_split_extent(handle, et, path, index,
+				 &replace_rec, meta_ac, dealloc);
+	if (ret)
+		mlog_errno(ret);
+
+out:
+	ocfs2_free_path(path);
+	return ret;
+}
+
+static int ocfs2_replace_clusters(handle_t *handle,
+				  struct ocfs2_cow_context *context,
+				  u32 cpos, u32 old,
+				  u32 new, u32 len,
+				  unsigned int ext_flags)
+{
+	int ret;
+	struct ocfs2_caching_info *ci = context->di_et.et_ci;
+	u64 ino = ocfs2_metadata_cache_owner(ci);
+
+	mlog(0, "inode %llu, cpos %u, old %u, new %u, len %u, ext_flags %u\n",
+	     (unsigned long long)ino, cpos, old, new, len, ext_flags);
+
+	/*If the old clusters is unwritten, no need to duplicate. */
+	if (!(ext_flags & OCFS2_EXT_UNWRITTEN)) {
+		ret = ocfs2_duplicate_clusters(handle, context, cpos,
+					       old, new, len);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+	}
+
+	ret = ocfs2_clear_ext_refcount(handle, &context->di_et,
+				       cpos, new, len, ext_flags,
+				       context->meta_ac, &context->dealloc);
+	if (ret)
+		mlog_errno(ret);
+out:
+	return ret;
+}
+
+static int ocfs2_cow_sync_writeback(struct super_block *sb,
+				    struct ocfs2_cow_context *context,
+				    u32 cpos, u32 num_clusters)
+{
+	int ret = 0;
+	loff_t offset, end, map_end;
+	pgoff_t page_index;
+	struct page *page;
+
+	if (ocfs2_should_order_data(context->inode))
+		return 0;
+
+	offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits;
+	end = offset + (num_clusters << OCFS2_SB(sb)->s_clustersize_bits);
+
+	ret = filemap_fdatawrite_range(context->inode->i_mapping,
+				       offset, end - 1);
+	if (ret < 0) {
+		mlog_errno(ret);
+		return ret;
+	}
+
+	while (offset < end) {
+		page_index = offset >> PAGE_CACHE_SHIFT;
+		map_end = (page_index + 1) << PAGE_CACHE_SHIFT;
+		if (map_end > end)
+			map_end = end;
+
+		page = grab_cache_page(context->inode->i_mapping, page_index);
+		BUG_ON(!page);
+
+		wait_on_page_writeback(page);
+		if (PageError(page)) {
+			ret = -EIO;
+			mlog_errno(ret);
+		} else
+			mark_page_accessed(page);
+
+		unlock_page(page);
+		page_cache_release(page);
+		page = NULL;
+		offset = map_end;
+		if (ret)
+			break;
+	}
+
+	return ret;
+}
+
+static int ocfs2_make_clusters_writable(struct super_block *sb,
+					struct ocfs2_cow_context *context,
+					u32 cpos, u32 p_cluster,
+					u32 num_clusters, unsigned int e_flags)
+{
+	int ret, credits =  0;
+	u32 new_bit, new_len;
+	struct ocfs2_super *osb = OCFS2_SB(sb);
+	handle_t *handle;
+
+	ret = ocfs2_lock_refcount_allocators(sb, p_cluster, num_clusters,
+					     &context->di_et,
+					     context->ref_ci,
+					     context->ref_root_bh,
+					     &context->meta_ac,
+					     &context->data_ac, &credits);
+	if (ret) {
+		mlog_errno(ret);
+		return ret;
+	}
+
+	handle = ocfs2_start_trans(osb, credits);
+	if (IS_ERR(handle)) {
+		ret = PTR_ERR(handle);
+		mlog_errno(ret);
+		goto out;
+	}
+
+	while (num_clusters) {
+		ret = __ocfs2_claim_clusters(osb, handle, context->data_ac,
+					     1, num_clusters,
+					     &new_bit, &new_len);
+		if (ret) {
+			mlog_errno(ret);
+			goto out_commit;
+		}
+
+		ret = ocfs2_replace_clusters(handle, context,
+					     cpos, p_cluster, new_bit,
+					     new_len, e_flags);
+		if (ret) {
+			mlog_errno(ret);
+			goto out_commit;
+		}
+
+		cpos += new_len;
+		p_cluster += new_len;
+		num_clusters -= new_len;
+	}
+
+	ret = __ocfs2_decrease_refcount(handle, context->ref_ci,
+					context->ref_root_bh,
+					p_cluster, num_clusters,
+					context->meta_ac,
+					&context->dealloc);
+	if (ret) {
+		mlog_errno(ret);
+		goto out_commit;
+	}
+
+	/*
+	 * Here we should write the new page out first if we are
+	 * in write-back mode.
+	 */
+	ret = ocfs2_cow_sync_writeback(sb, context, cpos, num_clusters);
+	if (ret)
+		mlog_errno(ret);
+
+out_commit:
+	ocfs2_commit_trans(osb, handle);
+
+out:
+	if (context->data_ac) {
+		ocfs2_free_alloc_context(context->data_ac);
+		context->data_ac = NULL;
+	}
+	if (context->meta_ac) {
+		ocfs2_free_alloc_context(context->meta_ac);
+		context->meta_ac = NULL;
+	}
+
+	return ret;
+}
+
+static int ocfs2_replace_cow(struct inode *inode,
+			     struct buffer_head *di_bh,
+			     struct buffer_head *ref_root_bh,
+			     struct ocfs2_caching_info *ref_ci,
+			     u32 cow_start, u32 cow_len)
+{
+	int ret = 0;
+	u32 p_cluster, num_clusters, start = cow_start;
+	unsigned int ext_flags;
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct ocfs2_cow_context *context;
+
+	if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) {
+		ocfs2_error(inode->i_sb, "Inode %lu want to use refcount "
+			    "tree, but the feature bit is not set in the "
+			    "super block.", inode->i_ino);
+		return -EROFS;
+	}
+
+	context = kzalloc(sizeof(struct ocfs2_cow_context), GFP_NOFS);
+	if (!context) {
+		ret = -ENOMEM;
+		mlog_errno(ret);
+		return ret;
+	}
+
+	context->inode = inode;
+	context->cow_start = cow_start;
+	context->cow_len = cow_len;
+	context->ref_ci = ref_ci;
+	context->ref_root_bh = ref_root_bh;
+
+	ocfs2_init_dealloc_ctxt(&context->dealloc);
+	ocfs2_init_dinode_extent_tree(&context->di_et,
+				      INODE_CACHE(inode), di_bh);
+
+	while (cow_len) {
+		ret = ocfs2_get_clusters(inode, cow_start, &p_cluster,
+					 &num_clusters, &ext_flags);
+		if (ret) {
+			mlog_errno(ret);
+			break;
+		}
+
+		BUG_ON(!(ext_flags & OCFS2_EXT_REFCOUNTED));
+
+		if (cow_len < num_clusters)
+			num_clusters = cow_len;
+
+		ret = ocfs2_make_clusters_writable(inode->i_sb, context,
+						   cow_start, p_cluster,
+						   num_clusters, ext_flags);
+		if (ret) {
+			mlog_errno(ret);
+			break;
+		}
+
+		cow_len -= num_clusters;
+		cow_start += num_clusters;
+	}
+
+
+	/*
+	 * truncate the extent map here since no matter whether we meet with
+	 * any error during the action, we shouldn't trust cached extent map
+	 * any more.
+	 */
+	ocfs2_extent_map_trunc(inode, start);
+
+	if (ocfs2_dealloc_has_cluster(&context->dealloc)) {
+		ocfs2_schedule_truncate_log_flush(osb, 1);
+		ocfs2_run_deallocs(osb, &context->dealloc);
+	}
+
+	kfree(context);
+	return ret;
+}
+
+/*
+ * Starting at cpos, try to CoW write_len clusters.
+ * This will stop when it runs into a hole or an unrefcounted extent.
+ */
+static int ocfs2_refcount_cow_hunk(struct inode *inode,
+				   struct buffer_head *di_bh,
+				   u32 cpos, u32 write_len)
+{
+	int ret;
+	u32 cow_start = 0, cow_len = 0;
+	struct ocfs2_inode_info *oi = OCFS2_I(inode);
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
+	struct buffer_head *ref_root_bh = NULL;
+	struct ocfs2_refcount_tree *ref_tree;
+
+	BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
+
+	ret = ocfs2_refcount_cal_cow_clusters(inode, di_bh, cpos, write_len,
+					      &cow_start, &cow_len);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+	mlog(0, "CoW inode %lu, cpos %u, write_len %u, cow_start %u, "
+	     "cow_len %u\n", inode->i_ino,
+	     cpos, write_len, cow_start, cow_len);
+
+	BUG_ON(cow_len == 0);
+
+	ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc),
+				       1, &ref_tree, &ref_root_bh);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_replace_cow(inode, di_bh, ref_root_bh, &ref_tree->rf_ci,
+				cow_start, cow_len);
+	if (ret)
+		mlog_errno(ret);
+
+	ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
+	brelse(ref_root_bh);
+out:
+	return ret;
+}
+
+/*
+ * CoW any and all clusters between cpos and cpos+write_len.
+ * If this returns successfully, all clusters between cpos and
+ * cpos+write_len are safe to modify.
+ */
+int ocfs2_refcount_cow(struct inode *inode,
+		       struct buffer_head *di_bh,
+		       u32 cpos, u32 write_len)
+{
+	int ret = 0;
+	u32 p_cluster, num_clusters;
+	unsigned int ext_flags;
+
+	while (write_len) {
+		ret = ocfs2_get_clusters(inode, cpos, &p_cluster,
+					 &num_clusters, &ext_flags);
+		if (ret) {
+			mlog_errno(ret);
+			break;
+		}
+
+		if (write_len < num_clusters)
+			num_clusters = write_len;
+
+		if (ext_flags & OCFS2_EXT_REFCOUNTED) {
+			ret = ocfs2_refcount_cow_hunk(inode, di_bh, cpos,
+						      num_clusters);
+			if (ret) {
+				mlog_errno(ret);
+				break;
+			}
+		}
+
+		write_len -= num_clusters;
+		cpos += num_clusters;
+	}
+
+	return ret;
+}
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h
index b8c9ed7..9960878 100644
--- a/fs/ocfs2/refcounttree.h
+++ b/fs/ocfs2/refcounttree.h
@@ -51,4 +51,6 @@ int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
 					  u32 clusters,
 					  int *credits,
 					  struct ocfs2_alloc_context **meta_ac);
+int ocfs2_refcount_cow(struct inode *inode, struct buffer_head *di_bh,
+		       u32 cpos, u32 write_len);
 #endif /* OCFS2_REFCOUNTTREE_H */
-- 
cgit v0.10.2


From 6ae23c5555176c5b23480c9c578ff27437085ba5 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 18 Aug 2009 11:30:55 +0800
Subject: ocfs2: CoW refcount tree improvement.

During CoW, if the old extent record is refcounted, we allocate
som new clusters and do CoW. Actually we can have some improvement
here. If the old extent has refcount=1, that means now it is only
used by this file. So we don't need to allocate new clusters, just
remove the refcounted flag and it is OK. We also have to remove
it from the refcount tree while not deleting it.

Signed-off-by: Tao Ma <tao.ma@oracle.com>

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index b8fc95d..7c879fc 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -6974,7 +6974,7 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
 					ocfs2_blocks_to_clusters(osb->sb,
 								 delete_blk),
 					clusters_to_del, meta_ac,
-					&tc->tc_dealloc);
+					&tc->tc_dealloc, 1);
 		else
 			status = ocfs2_truncate_log_append(osb, handle,
 							   delete_blk,
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 4e7df8b..0a924365 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -2076,7 +2076,8 @@ static int __ocfs2_decrease_refcount(handle_t *handle,
 				     struct buffer_head *ref_root_bh,
 				     u64 cpos, u32 len,
 				     struct ocfs2_alloc_context *meta_ac,
-				     struct ocfs2_cached_dealloc_ctxt *dealloc)
+				     struct ocfs2_cached_dealloc_ctxt *dealloc,
+				     int delete)
 {
 	int ret = 0, index = 0;
 	struct ocfs2_refcount_rec rec;
@@ -2084,9 +2085,10 @@ static int __ocfs2_decrease_refcount(handle_t *handle,
 	struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
 	struct buffer_head *ref_leaf_bh = NULL;
 
-	mlog(0, "Tree owner %llu, decrease refcount start %llu, len %u\n",
+	mlog(0, "Tree owner %llu, decrease refcount start %llu, "
+	     "len %u, delete %u\n",
 	     (unsigned long long)ocfs2_metadata_cache_owner(ci),
-	     (unsigned long long)cpos, len);
+	     (unsigned long long)cpos, len, delete);
 
 	while (len) {
 		ret = ocfs2_get_refcount_rec(ci, ref_root_bh,
@@ -2099,6 +2101,8 @@ static int __ocfs2_decrease_refcount(handle_t *handle,
 
 		r_count = le32_to_cpu(rec.r_refcount);
 		BUG_ON(r_count == 0);
+		if (!delete)
+			BUG_ON(r_count > 1);
 
 		r_len = min((u64)(cpos + len), le64_to_cpu(rec.r_cpos) +
 			      le32_to_cpu(rec.r_clusters)) - cpos;
@@ -2112,7 +2116,7 @@ static int __ocfs2_decrease_refcount(handle_t *handle,
 			goto out;
 		}
 
-		if (le32_to_cpu(rec.r_refcount) == 1) {
+		if (le32_to_cpu(rec.r_refcount) == 1 && delete) {
 			ret = ocfs2_cache_cluster_dealloc(dealloc,
 					  ocfs2_clusters_to_blocks(sb, cpos),
 							  r_len);
@@ -2137,7 +2141,8 @@ out:
 int ocfs2_decrease_refcount(struct inode *inode,
 			    handle_t *handle, u32 cpos, u32 len,
 			    struct ocfs2_alloc_context *meta_ac,
-			    struct ocfs2_cached_dealloc_ctxt *dealloc)
+			    struct ocfs2_cached_dealloc_ctxt *dealloc,
+			    int delete)
 {
 	int ret;
 	u64 ref_blkno;
@@ -2167,7 +2172,7 @@ int ocfs2_decrease_refcount(struct inode *inode,
 	}
 
 	ret = __ocfs2_decrease_refcount(handle, &tree->rf_ci, ref_root_bh,
-					cpos, len, meta_ac, dealloc);
+					cpos, len, meta_ac, dealloc, delete);
 	if (ret)
 		mlog_errno(ret);
 out:
@@ -2974,10 +2979,16 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
 					u32 cpos, u32 p_cluster,
 					u32 num_clusters, unsigned int e_flags)
 {
-	int ret, credits =  0;
+	int ret, delete, index, credits =  0;
 	u32 new_bit, new_len;
+	unsigned int set_len;
 	struct ocfs2_super *osb = OCFS2_SB(sb);
 	handle_t *handle;
+	struct buffer_head *ref_leaf_bh = NULL;
+	struct ocfs2_refcount_rec rec;
+
+	mlog(0, "cpos %u, p_cluster %u, num_clusters %u, e_flags %u\n",
+	     cpos, p_cluster, num_clusters, e_flags);
 
 	ret = ocfs2_lock_refcount_allocators(sb, p_cluster, num_clusters,
 					     &context->di_et,
@@ -2998,35 +3009,75 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
 	}
 
 	while (num_clusters) {
-		ret = __ocfs2_claim_clusters(osb, handle, context->data_ac,
-					     1, num_clusters,
-					     &new_bit, &new_len);
+		ret = ocfs2_get_refcount_rec(context->ref_ci,
+					     context->ref_root_bh,
+					     p_cluster, num_clusters,
+					     &rec, &index, &ref_leaf_bh);
 		if (ret) {
 			mlog_errno(ret);
 			goto out_commit;
 		}
 
-		ret = ocfs2_replace_clusters(handle, context,
-					     cpos, p_cluster, new_bit,
-					     new_len, e_flags);
+		BUG_ON(!rec.r_refcount);
+		set_len = min((u64)p_cluster + num_clusters,
+			      le64_to_cpu(rec.r_cpos) +
+			      le32_to_cpu(rec.r_clusters)) - p_cluster;
+
+		/*
+		 * There are many different situation here.
+		 * 1. If refcount == 1, remove the flag and don't COW.
+		 * 2. If refcount > 1, allocate clusters.
+		 *    Here we may not allocate r_len once at a time, so continue
+		 *    until we reach num_clusters.
+		 */
+		if (le32_to_cpu(rec.r_refcount) == 1) {
+			delete = 0;
+			ret = ocfs2_clear_ext_refcount(handle, &context->di_et,
+						       cpos, p_cluster,
+						       set_len, e_flags,
+						       context->meta_ac,
+						       &context->dealloc);
+			if (ret) {
+				mlog_errno(ret);
+				goto out_commit;
+			}
+		} else {
+			delete = 1;
+
+			ret = __ocfs2_claim_clusters(osb, handle,
+						     context->data_ac,
+						     1, set_len,
+						     &new_bit, &new_len);
+			if (ret) {
+				mlog_errno(ret);
+				goto out_commit;
+			}
+
+			ret = ocfs2_replace_clusters(handle, context,
+						     cpos, p_cluster, new_bit,
+						     new_len, e_flags);
+			if (ret) {
+				mlog_errno(ret);
+				goto out_commit;
+			}
+			set_len = new_len;
+		}
+
+		ret = __ocfs2_decrease_refcount(handle, context->ref_ci,
+						context->ref_root_bh,
+						p_cluster, set_len,
+						context->meta_ac,
+						&context->dealloc, delete);
 		if (ret) {
 			mlog_errno(ret);
 			goto out_commit;
 		}
 
-		cpos += new_len;
-		p_cluster += new_len;
-		num_clusters -= new_len;
-	}
-
-	ret = __ocfs2_decrease_refcount(handle, context->ref_ci,
-					context->ref_root_bh,
-					p_cluster, num_clusters,
-					context->meta_ac,
-					&context->dealloc);
-	if (ret) {
-		mlog_errno(ret);
-		goto out_commit;
+		cpos += set_len;
+		p_cluster += set_len;
+		num_clusters -= set_len;
+		brelse(ref_leaf_bh);
+		ref_leaf_bh = NULL;
 	}
 
 	/*
@@ -3049,6 +3100,7 @@ out:
 		ocfs2_free_alloc_context(context->meta_ac);
 		context->meta_ac = NULL;
 	}
+	brelse(ref_leaf_bh);
 
 	return ret;
 }
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h
index 9960878..a8c15b0 100644
--- a/fs/ocfs2/refcounttree.h
+++ b/fs/ocfs2/refcounttree.h
@@ -44,7 +44,8 @@ void ocfs2_unlock_refcount_tree(struct ocfs2_super *osb,
 int ocfs2_decrease_refcount(struct inode *inode,
 			    handle_t *handle, u32 cpos, u32 len,
 			    struct ocfs2_alloc_context *meta_ac,
-			    struct ocfs2_cached_dealloc_ctxt *dealloc);
+			    struct ocfs2_cached_dealloc_ctxt *dealloc,
+			    int delete);
 int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
 					  struct buffer_head *di_bh,
 					  u64 phys_blkno,
-- 
cgit v0.10.2


From 293b2f70b4a16a1ca91efd28ef3d6634262c6887 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 25 Aug 2009 08:02:48 +0800
Subject: ocfs2: Integrate CoW in file write.

When we use mmap, we CoW the refcountd clusters in
ocfs2_write_begin_nolock. While for normal file
io(including directio), we do CoW in
ocfs2_prepare_inode_for_write.

Signed-off-by: Tao Ma <tao.ma@oracle.com>

diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index fdad075..9db9d64 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -44,6 +44,7 @@
 #include "suballoc.h"
 #include "super.h"
 #include "symlink.h"
+#include "refcounttree.h"
 
 #include "buffer_head_io.h"
 
@@ -590,6 +591,8 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
 		goto bail;
 	}
 
+	/* We should already CoW the refcounted extent. */
+	BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED);
 	/*
 	 * get_more_blocks() expects us to describe a hole by clearing
 	 * the mapped bit on bh_result().
@@ -1449,6 +1452,9 @@ static int ocfs2_populate_write_desc(struct inode *inode,
 				goto out;
 			}
 
+			/* We should already CoW the refcountd extent. */
+			BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED);
+
 			/*
 			 * Assume worst case - that we're writing in
 			 * the middle of the extent.
@@ -1700,6 +1706,19 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
 		goto out;
 	}
 
+	ret = ocfs2_check_range_for_refcount(inode, pos, len);
+	if (ret < 0) {
+		mlog_errno(ret);
+		goto out;
+	} else if (ret == 1) {
+		ret = ocfs2_refcount_cow(inode, di_bh,
+					 wc->w_cpos, wc->w_clen);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+	}
+
 	ret = ocfs2_populate_write_desc(inode, wc, &clusters_to_alloc,
 					&extents_to_split);
 	if (ret) {
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 4921b4e..6ee20e8 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -59,6 +59,7 @@
 #include "xattr.h"
 #include "acl.h"
 #include "quota.h"
+#include "refcounttree.h"
 
 #include "buffer_head_io.h"
 
@@ -1656,6 +1657,70 @@ static long ocfs2_fallocate(struct inode *inode, int mode, loff_t offset,
 					 OCFS2_IOC_RESVSP64, &sr, change_size);
 }
 
+int ocfs2_check_range_for_refcount(struct inode *inode, loff_t pos,
+				   size_t count)
+{
+	int ret = 0;
+	unsigned int extent_flags;
+	u32 cpos, clusters, extent_len, phys_cpos;
+	struct super_block *sb = inode->i_sb;
+
+	if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb)) ||
+	    !(OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL))
+		return 0;
+
+	cpos = pos >> OCFS2_SB(sb)->s_clustersize_bits;
+	clusters = ocfs2_clusters_for_bytes(sb, pos + count) - cpos;
+
+	while (clusters) {
+		ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, &extent_len,
+					 &extent_flags);
+		if (ret < 0) {
+			mlog_errno(ret);
+			goto out;
+		}
+
+		if (phys_cpos && (extent_flags & OCFS2_EXT_REFCOUNTED)) {
+			ret = 1;
+			break;
+		}
+
+		if (extent_len > clusters)
+			extent_len = clusters;
+
+		clusters -= extent_len;
+		cpos += extent_len;
+	}
+out:
+	return ret;
+}
+
+static int ocfs2_prepare_inode_for_refcount(struct inode *inode,
+					    loff_t pos, size_t count,
+					    int *meta_level)
+{
+	int ret;
+	struct buffer_head *di_bh = NULL;
+	u32 cpos = pos >> OCFS2_SB(inode->i_sb)->s_clustersize_bits;
+	u32 clusters =
+		ocfs2_clusters_for_bytes(inode->i_sb, pos + count) - cpos;
+
+	ret = ocfs2_inode_lock(inode, &di_bh, 1);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	*meta_level = 1;
+
+	ret = ocfs2_refcount_cow(inode, di_bh, cpos, clusters);
+	if (ret)
+		mlog_errno(ret);
+out:
+	brelse(di_bh);
+	return ret;
+}
+
 static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
 					 loff_t *ppos,
 					 size_t count,
@@ -1712,6 +1777,22 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
 
 		end = saved_pos + count;
 
+		ret = ocfs2_check_range_for_refcount(inode, saved_pos, count);
+		if (ret == 1) {
+			ocfs2_inode_unlock(inode, meta_level);
+			meta_level = -1;
+
+			ret = ocfs2_prepare_inode_for_refcount(inode,
+							       saved_pos,
+							       count,
+							       &meta_level);
+		}
+
+		if (ret < 0) {
+			mlog_errno(ret);
+			goto out_unlock;
+		}
+
 		/*
 		 * Skip the O_DIRECT checks if we don't need
 		 * them.
@@ -1758,7 +1839,8 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
 		*ppos = saved_pos;
 
 out_unlock:
-	ocfs2_inode_unlock(inode, meta_level);
+	if (meta_level >= 0)
+		ocfs2_inode_unlock(inode, meta_level);
 
 out:
 	return ret;
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index 172f9fb..d66cf4f 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -69,4 +69,6 @@ int ocfs2_update_inode_atime(struct inode *inode,
 int ocfs2_change_file_space(struct file *file, unsigned int cmd,
 			    struct ocfs2_space_resv *sr);
 
+int ocfs2_check_range_for_refcount(struct inode *inode, loff_t pos,
+				   size_t count);
 #endif /* OCFS2_FILE_H */
-- 
cgit v0.10.2


From 37f8a2bfaa8364dd3644cccee8824bb8f5e409a5 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Wed, 26 Aug 2009 09:47:28 +0800
Subject: ocfs2: CoW a reflinked cluster when it is truncated.

When we truncate a file to a specific size which resides in a reflinked
cluster, we need to CoW it since ocfs2_zero_range_for_truncate will
zero the space after the size(just another type of write).

So we add a "max_cpos" in ocfs2_refcount_cow so that it will stop when
it hit the max cluster offset.

Signed-off-by: Tao Ma <tao.ma@oracle.com>

diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 9db9d64..33e03c5 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1712,7 +1712,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
 		goto out;
 	} else if (ret == 1) {
 		ret = ocfs2_refcount_cow(inode, di_bh,
-					 wc->w_cpos, wc->w_clen);
+					 wc->w_cpos, wc->w_clen, UINT_MAX);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 6ee20e8..75f5b81 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -335,6 +335,39 @@ out:
 	return ret;
 }
 
+static int ocfs2_cow_file_pos(struct inode *inode,
+			      struct buffer_head *fe_bh,
+			      u64 offset)
+{
+	int status;
+	u32 phys, cpos = offset >> OCFS2_SB(inode->i_sb)->s_clustersize_bits;
+	unsigned int num_clusters = 0;
+	unsigned int ext_flags = 0;
+
+	/*
+	 * If the new offset is aligned to the range of the cluster, there is
+	 * no space for ocfs2_zero_range_for_truncate to fill, so no need to
+	 * CoW either.
+	 */
+	if ((offset & (OCFS2_SB(inode->i_sb)->s_clustersize - 1)) == 0)
+		return 0;
+
+	status = ocfs2_get_clusters(inode, cpos, &phys,
+				    &num_clusters, &ext_flags);
+	if (status) {
+		mlog_errno(status);
+		goto out;
+	}
+
+	if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
+		goto out;
+
+	return ocfs2_refcount_cow(inode, fe_bh, cpos, 1, cpos+1);
+
+out:
+	return status;
+}
+
 static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,
 				     struct inode *inode,
 				     struct buffer_head *fe_bh,
@@ -347,6 +380,17 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,
 
 	mlog_entry_void();
 
+	/*
+	 * We need to CoW the cluster contains the offset if it is reflinked
+	 * since we will call ocfs2_zero_range_for_truncate later which will
+	 * write "0" from offset to the end of the cluster.
+	 */
+	status = ocfs2_cow_file_pos(inode, fe_bh, new_i_size);
+	if (status) {
+		mlog_errno(status);
+		return status;
+	}
+
 	/* TODO: This needs to actually orphan the inode in this
 	 * transaction. */
 
@@ -1713,7 +1757,7 @@ static int ocfs2_prepare_inode_for_refcount(struct inode *inode,
 
 	*meta_level = 1;
 
-	ret = ocfs2_refcount_cow(inode, di_bh, cpos, clusters);
+	ret = ocfs2_refcount_cow(inode, di_bh, cpos, clusters, UINT_MAX);
 	if (ret)
 		mlog_errno(ret);
 out:
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 0a924365..37aa0c8 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -2482,6 +2482,7 @@ static inline unsigned int ocfs2_cow_align_length(struct super_block *sb,
  *
  * cpos is vitual start cluster position we want to do CoW in a
  * file and write_len is the cluster length.
+ * max_cpos is the place where we want to stop CoW intentionally.
  *
  * Normal we will start CoW from the beginning of extent record cotaining cpos.
  * We try to break up extents on boundaries of MAX_CONTIG_BYTES so that we
@@ -2491,6 +2492,7 @@ static int ocfs2_refcount_cal_cow_clusters(struct inode *inode,
 					   struct buffer_head *di_bh,
 					   u32 cpos,
 					   u32 write_len,
+					   u32 max_cpos,
 					   u32 *cow_start,
 					   u32 *cow_len)
 {
@@ -2505,6 +2507,8 @@ static int ocfs2_refcount_cal_cow_clusters(struct inode *inode,
 	int contig_clusters = ocfs2_cow_contig_clusters(inode->i_sb);
 	int leaf_clusters;
 
+	BUG_ON(cpos + write_len > max_cpos);
+
 	if (tree_height > 0) {
 		ret = ocfs2_find_leaf(INODE_CACHE(inode), el, cpos, &eb_bh);
 		if (ret) {
@@ -2549,15 +2553,20 @@ static int ocfs2_refcount_cal_cow_clusters(struct inode *inode,
 		}
 
 		/*
-		 * If we encounter a hole or a non-refcounted record,
-		 * stop the search.
+		 * If we encounter a hole, a non-refcounted record or
+		 * pass the max_cpos, stop the search.
 		 */
 		if ((!(rec->e_flags & OCFS2_EXT_REFCOUNTED)) ||
-		    (*cow_len && rec_end != le32_to_cpu(rec->e_cpos)))
+		    (*cow_len && rec_end != le32_to_cpu(rec->e_cpos)) ||
+		    (max_cpos <= le32_to_cpu(rec->e_cpos)))
 			break;
 
 		leaf_clusters = le16_to_cpu(rec->e_leaf_clusters);
 		rec_end = le32_to_cpu(rec->e_cpos) + leaf_clusters;
+		if (rec_end > max_cpos) {
+			rec_end = max_cpos;
+			leaf_clusters = rec_end - le32_to_cpu(rec->e_cpos);
+		}
 
 		/*
 		 * How many clusters do we actually need from
@@ -3184,12 +3193,13 @@ static int ocfs2_replace_cow(struct inode *inode,
 }
 
 /*
- * Starting at cpos, try to CoW write_len clusters.
- * This will stop when it runs into a hole or an unrefcounted extent.
+ * Starting at cpos, try to CoW write_len clusters.  Don't CoW
+ * past max_cpos.  This will stop when it runs into a hole or an
+ * unrefcounted extent.
  */
 static int ocfs2_refcount_cow_hunk(struct inode *inode,
 				   struct buffer_head *di_bh,
-				   u32 cpos, u32 write_len)
+				   u32 cpos, u32 write_len, u32 max_cpos)
 {
 	int ret;
 	u32 cow_start = 0, cow_len = 0;
@@ -3201,12 +3211,14 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode,
 
 	BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
 
-	ret = ocfs2_refcount_cal_cow_clusters(inode, di_bh, cpos, write_len,
+	ret = ocfs2_refcount_cal_cow_clusters(inode, di_bh,
+					      cpos, write_len, max_cpos,
 					      &cow_start, &cow_len);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
 	}
+
 	mlog(0, "CoW inode %lu, cpos %u, write_len %u, cow_start %u, "
 	     "cow_len %u\n", inode->i_ino,
 	     cpos, write_len, cow_start, cow_len);
@@ -3233,12 +3245,12 @@ out:
 
 /*
  * CoW any and all clusters between cpos and cpos+write_len.
- * If this returns successfully, all clusters between cpos and
- * cpos+write_len are safe to modify.
+ * Don't CoW past max_cpos.  If this returns successfully, all
+ * clusters between cpos and cpos+write_len are safe to modify.
  */
 int ocfs2_refcount_cow(struct inode *inode,
 		       struct buffer_head *di_bh,
-		       u32 cpos, u32 write_len)
+		       u32 cpos, u32 write_len, u32 max_cpos)
 {
 	int ret = 0;
 	u32 p_cluster, num_clusters;
@@ -3257,7 +3269,7 @@ int ocfs2_refcount_cow(struct inode *inode,
 
 		if (ext_flags & OCFS2_EXT_REFCOUNTED) {
 			ret = ocfs2_refcount_cow_hunk(inode, di_bh, cpos,
-						      num_clusters);
+						      num_clusters, max_cpos);
 			if (ret) {
 				mlog_errno(ret);
 				break;
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h
index a8c15b0..356f99c 100644
--- a/fs/ocfs2/refcounttree.h
+++ b/fs/ocfs2/refcounttree.h
@@ -53,5 +53,5 @@ int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
 					  int *credits,
 					  struct ocfs2_alloc_context **meta_ac);
 int ocfs2_refcount_cow(struct inode *inode, struct buffer_head *di_bh,
-		       u32 cpos, u32 write_len);
+		       u32 cpos, u32 write_len, u32 max_cpos);
 #endif /* OCFS2_REFCOUNTTREE_H */
-- 
cgit v0.10.2


From 110a045aca62f6f564e3b68f89af2a3a5a6ecff2 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Sat, 22 Aug 2009 23:54:27 +0800
Subject: ocfs2: Add normal functions for reflink a normal file's extents.

2 major functions are added in this patch.

ocfs2_attach_refcount_tree will create a new refcount tree to the
old file if it doesn't have one and insert all the extent records
to the tree if they are not refcounted.

ocfs2_create_reflink_node will:
1. set the refcount tree to the new file.
2. call ocfs2_duplicate_extent_list which will iterate all the
   extents for the old file, insert it to the new file and increase
   the corresponding referennce count.

Signed-off-by: Tao Ma <tao.ma@oracle.com>

diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 37aa0c8..e3171c4 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -3282,3 +3282,289 @@ int ocfs2_refcount_cow(struct inode *inode,
 
 	return ret;
 }
+
+/*
+ * Insert a new extent into refcount tree and mark a extent rec
+ * as refcounted in the dinode tree.
+ */
+int ocfs2_add_refcount_flag(struct inode *inode,
+			    struct ocfs2_extent_tree *data_et,
+			    struct ocfs2_caching_info *ref_ci,
+			    struct buffer_head *ref_root_bh,
+			    u32 cpos, u32 p_cluster, u32 num_clusters,
+			    struct ocfs2_cached_dealloc_ctxt *dealloc)
+{
+	int ret;
+	handle_t *handle;
+	int credits = 1, ref_blocks = 0;
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct ocfs2_alloc_context *meta_ac = NULL;
+
+	ret = ocfs2_calc_refcount_meta_credits(inode->i_sb,
+					       ref_ci, ref_root_bh,
+					       p_cluster, num_clusters,
+					       &ref_blocks, &credits);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	mlog(0, "reserve new metadata %d, credits = %d\n",
+	     ref_blocks, credits);
+
+	if (ref_blocks) {
+		ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb),
+							ref_blocks, &meta_ac);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+	}
+
+	handle = ocfs2_start_trans(osb, credits);
+	if (IS_ERR(handle)) {
+		ret = PTR_ERR(handle);
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_mark_extent_refcounted(inode, data_et, handle,
+					   cpos, num_clusters, p_cluster,
+					   meta_ac, dealloc);
+	if (ret) {
+		mlog_errno(ret);
+		goto out_commit;
+	}
+
+	ret = __ocfs2_increase_refcount(handle, ref_ci, ref_root_bh,
+					p_cluster, num_clusters,
+					meta_ac, dealloc);
+	if (ret)
+		mlog_errno(ret);
+
+out_commit:
+	ocfs2_commit_trans(osb, handle);
+out:
+	if (meta_ac)
+		ocfs2_free_alloc_context(meta_ac);
+	return ret;
+}
+
+static int ocfs2_attach_refcount_tree(struct inode *inode,
+				      struct buffer_head *di_bh)
+{
+	int ret;
+	struct buffer_head *ref_root_bh = NULL;
+	struct ocfs2_inode_info *oi = OCFS2_I(inode);
+	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct ocfs2_refcount_tree *ref_tree;
+	unsigned int ext_flags;
+	loff_t size;
+	u32 cpos, num_clusters, clusters, p_cluster;
+	struct ocfs2_cached_dealloc_ctxt dealloc;
+	struct ocfs2_extent_tree di_et;
+
+	ocfs2_init_dealloc_ctxt(&dealloc);
+
+	if (!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)) {
+		ret = ocfs2_create_refcount_tree(inode, di_bh);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+	}
+
+	BUG_ON(!di->i_refcount_loc);
+	ret = ocfs2_lock_refcount_tree(osb,
+				       le64_to_cpu(di->i_refcount_loc), 1,
+				       &ref_tree, &ref_root_bh);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ocfs2_init_dinode_extent_tree(&di_et, INODE_CACHE(inode), di_bh);
+
+	size = i_size_read(inode);
+	clusters = ocfs2_clusters_for_bytes(inode->i_sb, size);
+
+	cpos = 0;
+	while (cpos < clusters) {
+		ret = ocfs2_get_clusters(inode, cpos, &p_cluster,
+					 &num_clusters, &ext_flags);
+
+		if (p_cluster && !(ext_flags & OCFS2_EXT_REFCOUNTED)) {
+			ret = ocfs2_add_refcount_flag(inode, &di_et,
+						      &ref_tree->rf_ci,
+						      ref_root_bh, cpos,
+						      p_cluster, num_clusters,
+						      &dealloc);
+			if (ret) {
+				mlog_errno(ret);
+				break;
+			}
+		}
+		cpos += num_clusters;
+	}
+
+	ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
+	brelse(ref_root_bh);
+
+	if (!ret && ocfs2_dealloc_has_cluster(&dealloc)) {
+		ocfs2_schedule_truncate_log_flush(osb, 1);
+		ocfs2_run_deallocs(osb, &dealloc);
+	}
+out:
+	/*
+	 * Empty the extent map so that we may get the right extent
+	 * record from the disk.
+	 */
+	ocfs2_extent_map_trunc(inode, 0);
+
+	return ret;
+}
+
+static int ocfs2_add_refcounted_extent(struct inode *inode,
+				   struct ocfs2_extent_tree *et,
+				   struct ocfs2_caching_info *ref_ci,
+				   struct buffer_head *ref_root_bh,
+				   u32 cpos, u32 p_cluster, u32 num_clusters,
+				   unsigned int ext_flags,
+				   struct ocfs2_cached_dealloc_ctxt *dealloc)
+{
+	int ret;
+	handle_t *handle;
+	int credits = 0;
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct ocfs2_alloc_context *meta_ac = NULL;
+
+	ret = ocfs2_lock_refcount_allocators(inode->i_sb,
+					     p_cluster, num_clusters,
+					     et, ref_ci,
+					     ref_root_bh, &meta_ac,
+					     NULL, &credits);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	handle = ocfs2_start_trans(osb, credits);
+	if (IS_ERR(handle)) {
+		ret = PTR_ERR(handle);
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_insert_extent(handle, et, cpos,
+			cpu_to_le64(ocfs2_clusters_to_blocks(inode->i_sb,
+							     p_cluster)),
+			num_clusters, ext_flags, meta_ac);
+	if (ret) {
+		mlog_errno(ret);
+		goto out_commit;
+	}
+
+	ret = __ocfs2_increase_refcount(handle, ref_ci, ref_root_bh,
+					p_cluster, num_clusters,
+					meta_ac, dealloc);
+	if (ret)
+		mlog_errno(ret);
+
+out_commit:
+	ocfs2_commit_trans(osb, handle);
+out:
+	if (meta_ac)
+		ocfs2_free_alloc_context(meta_ac);
+	return ret;
+}
+
+static int ocfs2_duplicate_extent_list(struct inode *s_inode,
+				struct inode *t_inode,
+				struct buffer_head *t_bh,
+				struct ocfs2_caching_info *ref_ci,
+				struct buffer_head *ref_root_bh,
+				struct ocfs2_cached_dealloc_ctxt *dealloc)
+{
+	int ret = 0;
+	u32 p_cluster, num_clusters, clusters, cpos;
+	loff_t size;
+	unsigned int ext_flags;
+	struct ocfs2_extent_tree et;
+
+	ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(t_inode), t_bh);
+
+	size = i_size_read(s_inode);
+	clusters = ocfs2_clusters_for_bytes(s_inode->i_sb, size);
+
+	cpos = 0;
+	while (cpos < clusters) {
+		ret = ocfs2_get_clusters(s_inode, cpos, &p_cluster,
+					 &num_clusters, &ext_flags);
+
+		if (p_cluster) {
+			ret = ocfs2_add_refcounted_extent(t_inode, &et,
+							  ref_ci, ref_root_bh,
+							  cpos, p_cluster,
+							  num_clusters,
+							  ext_flags,
+							  dealloc);
+			if (ret) {
+				mlog_errno(ret);
+				goto out;
+			}
+		}
+
+		cpos += num_clusters;
+	}
+
+out:
+	return ret;
+}
+
+static int ocfs2_create_reflink_node(struct inode *s_inode,
+				     struct buffer_head *s_bh,
+				     struct inode *t_inode,
+				     struct buffer_head *t_bh)
+{
+	int ret;
+	struct buffer_head *ref_root_bh = NULL;
+	struct ocfs2_cached_dealloc_ctxt dealloc;
+	struct ocfs2_super *osb = OCFS2_SB(s_inode->i_sb);
+	struct ocfs2_refcount_block *rb;
+	struct ocfs2_dinode *di = (struct ocfs2_dinode *)s_bh->b_data;
+	struct ocfs2_refcount_tree *ref_tree;
+
+	ocfs2_init_dealloc_ctxt(&dealloc);
+
+	ret = ocfs2_set_refcount_tree(t_inode, t_bh,
+				      le64_to_cpu(di->i_refcount_loc));
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc),
+				       1, &ref_tree, &ref_root_bh);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+	rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data;
+
+	ret = ocfs2_duplicate_extent_list(s_inode, t_inode, t_bh,
+					  &ref_tree->rf_ci, ref_root_bh,
+					  &dealloc);
+	if (ret)
+		mlog_errno(ret);
+
+	ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
+	brelse(ref_root_bh);
+out:
+	if (ocfs2_dealloc_has_cluster(&dealloc)) {
+		ocfs2_schedule_truncate_log_flush(osb, 1);
+		ocfs2_run_deallocs(osb, &dealloc);
+	}
+
+	return ret;
+}
-- 
cgit v0.10.2


From a9063ab9a3827483007124bdb6f9877f0ab4c3f5 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 18 Aug 2009 11:40:59 +0800
Subject: ocfs2: handle file attributes issue for reflink.

A reflink creates a snapshot of a file, that means the attributes
must be identical except for three exceptions - nlink, ino, and ctime.

As for time changes, Here is a brief description:

1. Source file:
   1) atime: Ignore. Let the lazy atime code handle that.
   2) mtime: don't touch.
   3) ctime: If we change the tree (adding REFCOUNTED to at least one
             extent), update it.
2. Destination file:
   1) atime: ignore.
   2) mtime: we want it to appear identical to the source.
   3) ctime: update.

The idea here is that an ls -l will show the same time for the
src and target - it shows mtime.  Backup software like rsync and tar
will treat the new file correctly too.

Signed-off-by: Tao Ma <tao.ma@oracle.com>

diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index e3171c4..62d21c6 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -3350,10 +3350,44 @@ out:
 	return ret;
 }
 
+static int ocfs2_change_ctime(struct inode *inode,
+			      struct buffer_head *di_bh)
+{
+	int ret;
+	handle_t *handle;
+	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
+
+	handle = ocfs2_start_trans(OCFS2_SB(inode->i_sb),
+				   OCFS2_INODE_UPDATE_CREDITS);
+	if (IS_ERR(handle)) {
+		ret = PTR_ERR(handle);
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
+				      OCFS2_JOURNAL_ACCESS_WRITE);
+	if (ret) {
+		mlog_errno(ret);
+		goto out_commit;
+	}
+
+	inode->i_ctime = CURRENT_TIME;
+	di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
+	di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
+
+	ocfs2_journal_dirty(handle, di_bh);
+
+out_commit:
+	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
+out:
+	return ret;
+}
+
 static int ocfs2_attach_refcount_tree(struct inode *inode,
 				      struct buffer_head *di_bh)
 {
-	int ret;
+	int ret, data_changed = 0;
 	struct buffer_head *ref_root_bh = NULL;
 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
@@ -3402,12 +3436,21 @@ static int ocfs2_attach_refcount_tree(struct inode *inode,
 						      &dealloc);
 			if (ret) {
 				mlog_errno(ret);
-				break;
+				goto unlock;
 			}
+
+			data_changed = 1;
 		}
 		cpos += num_clusters;
 	}
 
+	if (data_changed) {
+		ret = ocfs2_change_ctime(inode, di_bh);
+		if (ret)
+			mlog_errno(ret);
+	}
+
+unlock:
 	ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
 	brelse(ref_root_bh);
 
@@ -3522,6 +3565,74 @@ out:
 	return ret;
 }
 
+/*
+ * change the new file's attributes to the src.
+ *
+ * reflink creates a snapshot of a file, that means the attributes
+ * must be identical except for three exceptions - nlink, ino, and ctime.
+ */
+static int ocfs2_complete_reflink(struct inode *s_inode,
+				  struct buffer_head *s_bh,
+				  struct inode *t_inode,
+				  struct buffer_head *t_bh)
+{
+	int ret;
+	handle_t *handle;
+	struct ocfs2_dinode *s_di = (struct ocfs2_dinode *)s_bh->b_data;
+	struct ocfs2_dinode *di = (struct ocfs2_dinode *)t_bh->b_data;
+	loff_t size = i_size_read(s_inode);
+
+	handle = ocfs2_start_trans(OCFS2_SB(t_inode->i_sb),
+				   OCFS2_INODE_UPDATE_CREDITS);
+	if (IS_ERR(handle)) {
+		ret = PTR_ERR(handle);
+		mlog_errno(ret);
+		return ret;
+	}
+
+	ret = ocfs2_journal_access_di(handle, INODE_CACHE(t_inode), t_bh,
+				      OCFS2_JOURNAL_ACCESS_WRITE);
+	if (ret) {
+		mlog_errno(ret);
+		goto out_commit;
+	}
+
+	spin_lock(&OCFS2_I(t_inode)->ip_lock);
+	OCFS2_I(t_inode)->ip_clusters = OCFS2_I(s_inode)->ip_clusters;
+	OCFS2_I(t_inode)->ip_attr = OCFS2_I(s_inode)->ip_attr;
+	OCFS2_I(t_inode)->ip_dyn_features = OCFS2_I(s_inode)->ip_dyn_features;
+	spin_unlock(&OCFS2_I(t_inode)->ip_lock);
+	i_size_write(t_inode, size);
+
+	di->i_xattr_inline_size = s_di->i_xattr_inline_size;
+	di->i_clusters = s_di->i_clusters;
+	di->i_size = s_di->i_size;
+	di->i_dyn_features = s_di->i_dyn_features;
+	di->i_attr = s_di->i_attr;
+	di->i_uid = s_di->i_uid;
+	di->i_gid = s_di->i_gid;
+	di->i_mode = s_di->i_mode;
+
+	/*
+	 * update time.
+	 * we want mtime to appear identical to the source and update ctime.
+	 */
+	t_inode->i_ctime = CURRENT_TIME;
+
+	di->i_ctime = cpu_to_le64(t_inode->i_ctime.tv_sec);
+	di->i_ctime_nsec = cpu_to_le32(t_inode->i_ctime.tv_nsec);
+
+	t_inode->i_mtime = s_inode->i_mtime;
+	di->i_mtime = s_di->i_mtime;
+	di->i_mtime_nsec = s_di->i_mtime_nsec;
+
+	ocfs2_journal_dirty(handle, t_bh);
+
+out_commit:
+	ocfs2_commit_trans(OCFS2_SB(t_inode->i_sb), handle);
+	return ret;
+}
+
 static int ocfs2_create_reflink_node(struct inode *s_inode,
 				     struct buffer_head *s_bh,
 				     struct inode *t_inode,
@@ -3555,9 +3666,16 @@ static int ocfs2_create_reflink_node(struct inode *s_inode,
 	ret = ocfs2_duplicate_extent_list(s_inode, t_inode, t_bh,
 					  &ref_tree->rf_ci, ref_root_bh,
 					  &dealloc);
+	if (ret) {
+		mlog_errno(ret);
+		goto out_unlock_refcount;
+	}
+
+	ret = ocfs2_complete_reflink(s_inode, s_bh, t_inode, t_bh);
 	if (ret)
 		mlog_errno(ret);
 
+out_unlock_refcount:
 	ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
 	brelse(ref_root_bh);
 out:
-- 
cgit v0.10.2


From 1061f9c1c9f81ed88b5d268a95d8e3ace80da63a Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 18 Aug 2009 11:41:57 +0800
Subject: ocfs2: Return extent flags for xattr value tree.

With the new refcount tree, xattr value can also be refcounted
among multiple files. So return the appropriate extent flags
so that CoW can used it later.

Signed-off-by: Tao Ma <tao.ma@oracle.com>

diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index 40b5105..843db64 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -541,7 +541,8 @@ static void ocfs2_relative_extent_offsets(struct super_block *sb,
 
 int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
 			     u32 *p_cluster, u32 *num_clusters,
-			     struct ocfs2_extent_list *el)
+			     struct ocfs2_extent_list *el,
+			     unsigned int *extent_flags)
 {
 	int ret = 0, i;
 	struct buffer_head *eb_bh = NULL;
@@ -593,6 +594,9 @@ int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
 		*p_cluster = *p_cluster + coff;
 		if (num_clusters)
 			*num_clusters = ocfs2_rec_clusters(el, rec) - coff;
+
+		if (extent_flags)
+			*extent_flags = rec->e_flags;
 	}
 out:
 	if (eb_bh)
diff --git a/fs/ocfs2/extent_map.h b/fs/ocfs2/extent_map.h
index 9942f47..e79d41c 100644
--- a/fs/ocfs2/extent_map.h
+++ b/fs/ocfs2/extent_map.h
@@ -55,7 +55,8 @@ int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 
 int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
 			     u32 *p_cluster, u32 *num_clusters,
-			     struct ocfs2_extent_list *el);
+			     struct ocfs2_extent_list *el,
+			     unsigned int *extent_flags);
 
 int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr,
 			   struct buffer_head *bhs[], int flags,
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 1bf12c4..dda49c0 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -704,7 +704,7 @@ static int ocfs2_xattr_shrink_size(struct inode *inode,
 	while (trunc_len) {
 		ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos,
 					       &alloc_size,
-					       &vb->vb_xv->xr_list);
+					       &vb->vb_xv->xr_list, NULL);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -959,7 +959,7 @@ static int ocfs2_xattr_get_value_outside(struct inode *inode,
 	cpos = 0;
 	while (cpos < clusters) {
 		ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
-					       &num_clusters, el);
+					       &num_clusters, el, NULL);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -1198,7 +1198,8 @@ static int __ocfs2_xattr_set_value_outside(struct inode *inode,
 
 	while (cpos < clusters) {
 		ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
-					       &num_clusters, &xv->xr_list);
+					       &num_clusters, &xv->xr_list,
+					       NULL);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
-- 
cgit v0.10.2


From 913580b4cd445c4fb25d7cf167911a8cf6bdb1eb Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Mon, 24 Aug 2009 14:31:03 +0800
Subject: ocfs2: Abstract duplicate clusters process in CoW.

We currently use pagecache to duplicate clusters in CoW,
but it isn't suitable for xattr case. So abstract it out
so that the caller can decide which method it use.

Signed-off-by: Tao Ma <tao.ma@oracle.com>

diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 62d21c6..40de7bb 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -45,12 +45,20 @@ struct ocfs2_cow_context {
 	struct inode *inode;
 	u32 cow_start;
 	u32 cow_len;
-	struct ocfs2_extent_tree di_et;
-	struct ocfs2_caching_info *ref_ci;
+	struct ocfs2_extent_tree data_et;
+	struct ocfs2_refcount_tree *ref_tree;
 	struct buffer_head *ref_root_bh;
 	struct ocfs2_alloc_context *meta_ac;
 	struct ocfs2_alloc_context *data_ac;
 	struct ocfs2_cached_dealloc_ctxt dealloc;
+	int (*get_clusters)(struct ocfs2_cow_context *context,
+			    u32 v_cluster, u32 *p_cluster,
+			    u32 *num_clusters,
+			    unsigned int *extent_flags);
+	int (*cow_duplicate_clusters)(handle_t *handle,
+				      struct ocfs2_cow_context *context,
+				      u32 cpos, u32 old_cluster,
+				      u32 new_cluster, u32 new_len);
 };
 
 static inline struct ocfs2_refcount_tree *
@@ -2489,7 +2497,7 @@ static inline unsigned int ocfs2_cow_align_length(struct super_block *sb,
  * get good I/O from the resulting extent tree.
  */
 static int ocfs2_refcount_cal_cow_clusters(struct inode *inode,
-					   struct buffer_head *di_bh,
+					   struct ocfs2_extent_list *el,
 					   u32 cpos,
 					   u32 write_len,
 					   u32 max_cpos,
@@ -2497,8 +2505,6 @@ static int ocfs2_refcount_cal_cow_clusters(struct inode *inode,
 					   u32 *cow_len)
 {
 	int ret = 0;
-	struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
-	struct ocfs2_extent_list *el = &di->id2.i_list;
 	int tree_height = le16_to_cpu(el->l_tree_depth), i;
 	struct buffer_head *eb_bh = NULL;
 	struct ocfs2_extent_block *eb = NULL;
@@ -2769,13 +2775,13 @@ static int ocfs2_clear_cow_buffer(handle_t *handle, struct buffer_head *bh)
 	return 0;
 }
 
-static int ocfs2_duplicate_clusters(handle_t *handle,
-				    struct ocfs2_cow_context *context,
-				    u32 cpos, u32 old_cluster,
-				    u32 new_cluster, u32 new_len)
+static int ocfs2_duplicate_clusters_by_page(handle_t *handle,
+					    struct ocfs2_cow_context *context,
+					    u32 cpos, u32 old_cluster,
+					    u32 new_cluster, u32 new_len)
 {
 	int ret = 0, partial;
-	struct ocfs2_caching_info *ci = context->di_et.et_ci;
+	struct ocfs2_caching_info *ci = context->data_et.et_ci;
 	struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
 	u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster);
 	struct page *page;
@@ -2909,7 +2915,7 @@ static int ocfs2_replace_clusters(handle_t *handle,
 				  unsigned int ext_flags)
 {
 	int ret;
-	struct ocfs2_caching_info *ci = context->di_et.et_ci;
+	struct ocfs2_caching_info *ci = context->data_et.et_ci;
 	u64 ino = ocfs2_metadata_cache_owner(ci);
 
 	mlog(0, "inode %llu, cpos %u, old %u, new %u, len %u, ext_flags %u\n",
@@ -2917,15 +2923,15 @@ static int ocfs2_replace_clusters(handle_t *handle,
 
 	/*If the old clusters is unwritten, no need to duplicate. */
 	if (!(ext_flags & OCFS2_EXT_UNWRITTEN)) {
-		ret = ocfs2_duplicate_clusters(handle, context, cpos,
-					       old, new, len);
+		ret = context->cow_duplicate_clusters(handle, context, cpos,
+						      old, new, len);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
 		}
 	}
 
-	ret = ocfs2_clear_ext_refcount(handle, &context->di_et,
+	ret = ocfs2_clear_ext_refcount(handle, &context->data_et,
 				       cpos, new, len, ext_flags,
 				       context->meta_ac, &context->dealloc);
 	if (ret)
@@ -2983,6 +2989,15 @@ static int ocfs2_cow_sync_writeback(struct super_block *sb,
 	return ret;
 }
 
+static int ocfs2_di_get_clusters(struct ocfs2_cow_context *context,
+				 u32 v_cluster, u32 *p_cluster,
+				 u32 *num_clusters,
+				 unsigned int *extent_flags)
+{
+	return ocfs2_get_clusters(context->inode, v_cluster, p_cluster,
+				  num_clusters, extent_flags);
+}
+
 static int ocfs2_make_clusters_writable(struct super_block *sb,
 					struct ocfs2_cow_context *context,
 					u32 cpos, u32 p_cluster,
@@ -2994,14 +3009,15 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
 	struct ocfs2_super *osb = OCFS2_SB(sb);
 	handle_t *handle;
 	struct buffer_head *ref_leaf_bh = NULL;
+	struct ocfs2_caching_info *ref_ci = &context->ref_tree->rf_ci;
 	struct ocfs2_refcount_rec rec;
 
 	mlog(0, "cpos %u, p_cluster %u, num_clusters %u, e_flags %u\n",
 	     cpos, p_cluster, num_clusters, e_flags);
 
 	ret = ocfs2_lock_refcount_allocators(sb, p_cluster, num_clusters,
-					     &context->di_et,
-					     context->ref_ci,
+					     &context->data_et,
+					     ref_ci,
 					     context->ref_root_bh,
 					     &context->meta_ac,
 					     &context->data_ac, &credits);
@@ -3018,8 +3034,7 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
 	}
 
 	while (num_clusters) {
-		ret = ocfs2_get_refcount_rec(context->ref_ci,
-					     context->ref_root_bh,
+		ret = ocfs2_get_refcount_rec(ref_ci, context->ref_root_bh,
 					     p_cluster, num_clusters,
 					     &rec, &index, &ref_leaf_bh);
 		if (ret) {
@@ -3041,7 +3056,8 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
 		 */
 		if (le32_to_cpu(rec.r_refcount) == 1) {
 			delete = 0;
-			ret = ocfs2_clear_ext_refcount(handle, &context->di_et,
+			ret = ocfs2_clear_ext_refcount(handle,
+						       &context->data_et,
 						       cpos, p_cluster,
 						       set_len, e_flags,
 						       context->meta_ac,
@@ -3072,7 +3088,7 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
 			set_len = new_len;
 		}
 
-		ret = __ocfs2_decrease_refcount(handle, context->ref_ci,
+		ret = __ocfs2_decrease_refcount(handle, ref_ci,
 						context->ref_root_bh,
 						p_cluster, set_len,
 						context->meta_ac,
@@ -3114,17 +3130,14 @@ out:
 	return ret;
 }
 
-static int ocfs2_replace_cow(struct inode *inode,
-			     struct buffer_head *di_bh,
-			     struct buffer_head *ref_root_bh,
-			     struct ocfs2_caching_info *ref_ci,
-			     u32 cow_start, u32 cow_len)
+static int ocfs2_replace_cow(struct ocfs2_cow_context *context)
 {
 	int ret = 0;
-	u32 p_cluster, num_clusters, start = cow_start;
+	struct inode *inode = context->inode;
+	u32 cow_start = context->cow_start, cow_len = context->cow_len;
+	u32 p_cluster, num_clusters;
 	unsigned int ext_flags;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
-	struct ocfs2_cow_context *context;
 
 	if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) {
 		ocfs2_error(inode->i_sb, "Inode %lu want to use refcount "
@@ -3133,26 +3146,11 @@ static int ocfs2_replace_cow(struct inode *inode,
 		return -EROFS;
 	}
 
-	context = kzalloc(sizeof(struct ocfs2_cow_context), GFP_NOFS);
-	if (!context) {
-		ret = -ENOMEM;
-		mlog_errno(ret);
-		return ret;
-	}
-
-	context->inode = inode;
-	context->cow_start = cow_start;
-	context->cow_len = cow_len;
-	context->ref_ci = ref_ci;
-	context->ref_root_bh = ref_root_bh;
-
 	ocfs2_init_dealloc_ctxt(&context->dealloc);
-	ocfs2_init_dinode_extent_tree(&context->di_et,
-				      INODE_CACHE(inode), di_bh);
 
 	while (cow_len) {
-		ret = ocfs2_get_clusters(inode, cow_start, &p_cluster,
-					 &num_clusters, &ext_flags);
+		ret = context->get_clusters(context, cow_start, &p_cluster,
+					    &num_clusters, &ext_flags);
 		if (ret) {
 			mlog_errno(ret);
 			break;
@@ -3175,20 +3173,11 @@ static int ocfs2_replace_cow(struct inode *inode,
 		cow_start += num_clusters;
 	}
 
-
-	/*
-	 * truncate the extent map here since no matter whether we meet with
-	 * any error during the action, we shouldn't trust cached extent map
-	 * any more.
-	 */
-	ocfs2_extent_map_trunc(inode, start);
-
 	if (ocfs2_dealloc_has_cluster(&context->dealloc)) {
 		ocfs2_schedule_truncate_log_flush(osb, 1);
 		ocfs2_run_deallocs(osb, &context->dealloc);
 	}
 
-	kfree(context);
 	return ret;
 }
 
@@ -3208,10 +3197,11 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode,
 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
 	struct buffer_head *ref_root_bh = NULL;
 	struct ocfs2_refcount_tree *ref_tree;
+	struct ocfs2_cow_context *context = NULL;
 
 	BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
 
-	ret = ocfs2_refcount_cal_cow_clusters(inode, di_bh,
+	ret = ocfs2_refcount_cal_cow_clusters(inode, &di->id2.i_list,
 					      cpos, write_len, max_cpos,
 					      &cow_start, &cow_len);
 	if (ret) {
@@ -3225,6 +3215,13 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode,
 
 	BUG_ON(cow_len == 0);
 
+	context = kzalloc(sizeof(struct ocfs2_cow_context), GFP_NOFS);
+	if (!context) {
+		ret = -ENOMEM;
+		mlog_errno(ret);
+		goto out;
+	}
+
 	ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc),
 				       1, &ref_tree, &ref_root_bh);
 	if (ret) {
@@ -3232,14 +3229,32 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode,
 		goto out;
 	}
 
-	ret = ocfs2_replace_cow(inode, di_bh, ref_root_bh, &ref_tree->rf_ci,
-				cow_start, cow_len);
+	context->inode = inode;
+	context->cow_start = cow_start;
+	context->cow_len = cow_len;
+	context->ref_tree = ref_tree;
+	context->ref_root_bh = ref_root_bh;
+	context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_page;
+	context->get_clusters = ocfs2_di_get_clusters;
+
+	ocfs2_init_dinode_extent_tree(&context->data_et,
+				      INODE_CACHE(inode), di_bh);
+
+	ret = ocfs2_replace_cow(context);
 	if (ret)
 		mlog_errno(ret);
 
+	/*
+	 * truncate the extent map here since no matter whether we meet with
+	 * any error during the action, we shouldn't trust cached extent map
+	 * any more.
+	 */
+	ocfs2_extent_map_trunc(inode, cow_start);
+
 	ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
 	brelse(ref_root_bh);
 out:
+	kfree(context);
 	return ret;
 }
 
-- 
cgit v0.10.2


From 492a8a33e1cb966fa0b5756c5fc11d30c8f8848e Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 18 Aug 2009 11:43:17 +0800
Subject: ocfs2: Add CoW support for xattr.

In order to make 2 transcation(xattr and cow) independent with each other,
we CoW the whole xattr out in case we are setting them.

Signed-off-by: Tao Ma <tao.ma@oracle.com>

diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 40de7bb..a5b5bef 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -32,6 +32,7 @@
 #include "dlmglue.h"
 #include "extent_map.h"
 #include "aops.h"
+#include "xattr.h"
 
 #include <linux/bio.h>
 #include <linux/blkdev.h>
@@ -51,6 +52,9 @@ struct ocfs2_cow_context {
 	struct ocfs2_alloc_context *meta_ac;
 	struct ocfs2_alloc_context *data_ac;
 	struct ocfs2_cached_dealloc_ctxt dealloc;
+	void *cow_object;
+	struct ocfs2_post_refcount *post_refcount;
+	int extra_credits;
 	int (*get_clusters)(struct ocfs2_cow_context *context,
 			    u32 v_cluster, u32 *p_cluster,
 			    u32 *num_clusters,
@@ -2848,6 +2852,65 @@ unlock:
 	return ret;
 }
 
+static int ocfs2_duplicate_clusters_by_jbd(handle_t *handle,
+					   struct ocfs2_cow_context *context,
+					   u32 cpos, u32 old_cluster,
+					   u32 new_cluster, u32 new_len)
+{
+	int ret = 0;
+	struct super_block *sb = context->inode->i_sb;
+	struct ocfs2_caching_info *ci = context->data_et.et_ci;
+	int i, blocks = ocfs2_clusters_to_blocks(sb, new_len);
+	u64 old_block = ocfs2_clusters_to_blocks(sb, old_cluster);
+	u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster);
+	struct ocfs2_super *osb = OCFS2_SB(sb);
+	struct buffer_head *old_bh = NULL;
+	struct buffer_head *new_bh = NULL;
+
+	mlog(0, "old_cluster %u, new %u, len %u\n", old_cluster,
+	     new_cluster, new_len);
+
+	for (i = 0; i < blocks; i++, old_block++, new_block++) {
+		new_bh = sb_getblk(osb->sb, new_block);
+		if (new_bh == NULL) {
+			ret = -EIO;
+			mlog_errno(ret);
+			break;
+		}
+
+		ocfs2_set_new_buffer_uptodate(ci, new_bh);
+
+		ret = ocfs2_read_block(ci, old_block, &old_bh, NULL);
+		if (ret) {
+			mlog_errno(ret);
+			break;
+		}
+
+		ret = ocfs2_journal_access(handle, ci, new_bh,
+					   OCFS2_JOURNAL_ACCESS_CREATE);
+		if (ret) {
+			mlog_errno(ret);
+			break;
+		}
+
+		memcpy(new_bh->b_data, old_bh->b_data, sb->s_blocksize);
+		ret = ocfs2_journal_dirty(handle, new_bh);
+		if (ret) {
+			mlog_errno(ret);
+			break;
+		}
+
+		brelse(new_bh);
+		brelse(old_bh);
+		new_bh = NULL;
+		old_bh = NULL;
+	}
+
+	brelse(new_bh);
+	brelse(old_bh);
+	return ret;
+}
+
 static int ocfs2_clear_ext_refcount(handle_t *handle,
 				    struct ocfs2_extent_tree *et,
 				    u32 cpos, u32 p_cluster, u32 len,
@@ -3026,6 +3089,10 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
 		return ret;
 	}
 
+	if (context->post_refcount)
+		credits += context->post_refcount->credits;
+
+	credits += context->extra_credits;
 	handle = ocfs2_start_trans(osb, credits);
 	if (IS_ERR(handle)) {
 		ret = PTR_ERR(handle);
@@ -3105,13 +3172,25 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
 		ref_leaf_bh = NULL;
 	}
 
+	/* handle any post_cow action. */
+	if (context->post_refcount && context->post_refcount->func) {
+		ret = context->post_refcount->func(context->inode, handle,
+						context->post_refcount->para);
+		if (ret) {
+			mlog_errno(ret);
+			goto out_commit;
+		}
+	}
+
 	/*
 	 * Here we should write the new page out first if we are
 	 * in write-back mode.
 	 */
-	ret = ocfs2_cow_sync_writeback(sb, context, cpos, num_clusters);
-	if (ret)
-		mlog_errno(ret);
+	if (context->get_clusters == ocfs2_di_get_clusters) {
+		ret = ocfs2_cow_sync_writeback(sb, context, cpos, num_clusters);
+		if (ret)
+			mlog_errno(ret);
+	}
 
 out_commit:
 	ocfs2_commit_trans(osb, handle);
@@ -3298,6 +3377,167 @@ int ocfs2_refcount_cow(struct inode *inode,
 	return ret;
 }
 
+static int ocfs2_xattr_value_get_clusters(struct ocfs2_cow_context *context,
+					  u32 v_cluster, u32 *p_cluster,
+					  u32 *num_clusters,
+					  unsigned int *extent_flags)
+{
+	struct inode *inode = context->inode;
+	struct ocfs2_xattr_value_root *xv = context->cow_object;
+
+	return ocfs2_xattr_get_clusters(inode, v_cluster, p_cluster,
+					num_clusters, &xv->xr_list,
+					extent_flags);
+}
+
+/*
+ * Given a xattr value root, calculate the most meta/credits we need for
+ * refcount tree change if we truncate it to 0.
+ */
+int ocfs2_refcounted_xattr_delete_need(struct inode *inode,
+				       struct ocfs2_caching_info *ref_ci,
+				       struct buffer_head *ref_root_bh,
+				       struct ocfs2_xattr_value_root *xv,
+				       int *meta_add, int *credits)
+{
+	int ret = 0, index, ref_blocks = 0;
+	u32 p_cluster, num_clusters;
+	u32 cpos = 0, clusters = le32_to_cpu(xv->xr_clusters);
+	struct ocfs2_refcount_block *rb;
+	struct ocfs2_refcount_rec rec;
+	struct buffer_head *ref_leaf_bh = NULL;
+
+	while (cpos < clusters) {
+		ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
+					       &num_clusters, &xv->xr_list,
+					       NULL);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+
+		cpos += num_clusters;
+
+		while (num_clusters) {
+			ret = ocfs2_get_refcount_rec(ref_ci, ref_root_bh,
+						     p_cluster, num_clusters,
+						     &rec, &index,
+						     &ref_leaf_bh);
+			if (ret) {
+				mlog_errno(ret);
+				goto out;
+			}
+
+			BUG_ON(!rec.r_refcount);
+
+			rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
+
+			/*
+			 * We really don't know whether the other clusters is in
+			 * this refcount block or not, so just take the worst
+			 * case that all the clusters are in this block and each
+			 * one will split a refcount rec, so totally we need
+			 * clusters * 2 new refcount rec.
+			 */
+			if (le64_to_cpu(rb->rf_records.rl_used) + clusters * 2 >
+			    le16_to_cpu(rb->rf_records.rl_count))
+				ref_blocks++;
+
+			*credits += 1;
+			brelse(ref_leaf_bh);
+			ref_leaf_bh = NULL;
+
+			if (num_clusters <= le32_to_cpu(rec.r_clusters))
+				break;
+			else
+				num_clusters -= le32_to_cpu(rec.r_clusters);
+			p_cluster += num_clusters;
+		}
+	}
+
+	*meta_add += ref_blocks;
+	if (!ref_blocks)
+		goto out;
+
+	rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data;
+	if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)
+		*credits += OCFS2_EXPAND_REFCOUNT_TREE_CREDITS;
+	else {
+		struct ocfs2_extent_tree et;
+
+		ocfs2_init_refcount_extent_tree(&et, ref_ci, ref_root_bh);
+		*credits += ocfs2_calc_extend_credits(inode->i_sb,
+						      et.et_root_el,
+						      ref_blocks);
+	}
+
+out:
+	brelse(ref_leaf_bh);
+	return ret;
+}
+
+/*
+ * Do CoW for xattr.
+ */
+int ocfs2_refcount_cow_xattr(struct inode *inode,
+			     struct ocfs2_dinode *di,
+			     struct ocfs2_xattr_value_buf *vb,
+			     struct ocfs2_refcount_tree *ref_tree,
+			     struct buffer_head *ref_root_bh,
+			     u32 cpos, u32 write_len,
+			     struct ocfs2_post_refcount *post)
+{
+	int ret;
+	struct ocfs2_xattr_value_root *xv = vb->vb_xv;
+	struct ocfs2_inode_info *oi = OCFS2_I(inode);
+	struct ocfs2_cow_context *context = NULL;
+	u32 cow_start, cow_len;
+
+	BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
+
+	ret = ocfs2_refcount_cal_cow_clusters(inode, &xv->xr_list,
+					      cpos, write_len, UINT_MAX,
+					      &cow_start, &cow_len);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	BUG_ON(cow_len == 0);
+
+	context = kzalloc(sizeof(struct ocfs2_cow_context), GFP_NOFS);
+	if (!context) {
+		ret = -ENOMEM;
+		mlog_errno(ret);
+		goto out;
+	}
+
+	context->inode = inode;
+	context->cow_start = cow_start;
+	context->cow_len = cow_len;
+	context->ref_tree = ref_tree;
+	context->ref_root_bh = ref_root_bh;;
+	context->cow_object = xv;
+
+	context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_jbd;
+	/* We need the extra credits for duplicate_clusters by jbd. */
+	context->extra_credits =
+		ocfs2_clusters_to_blocks(inode->i_sb, 1) * cow_len;
+	context->get_clusters = ocfs2_xattr_value_get_clusters;
+	context->post_refcount = post;
+
+	ocfs2_init_xattr_value_extent_tree(&context->data_et,
+					   INODE_CACHE(inode), vb);
+
+	ret = ocfs2_replace_cow(context);
+	if (ret)
+		mlog_errno(ret);
+
+out:
+	kfree(context);
+	return ret;
+}
+
 /*
  * Insert a new extent into refcount tree and mark a extent rec
  * as refcounted in the dinode tree.
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h
index 356f99c..d09d64b 100644
--- a/fs/ocfs2/refcounttree.h
+++ b/fs/ocfs2/refcounttree.h
@@ -54,4 +54,33 @@ int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
 					  struct ocfs2_alloc_context **meta_ac);
 int ocfs2_refcount_cow(struct inode *inode, struct buffer_head *di_bh,
 		       u32 cpos, u32 write_len, u32 max_cpos);
+
+typedef int (ocfs2_post_refcount_func)(struct inode *inode,
+				       handle_t *handle,
+				       void *para);
+/*
+ * Some refcount caller need to do more work after we modify the data b-tree
+ * during refcount operation(including CoW and add refcount flag), and make the
+ * transaction complete. So it must give us this structure so that we can do it
+ * within our transaction.
+ *
+ */
+struct ocfs2_post_refcount {
+	int credits;			/* credits it need for journal. */
+	ocfs2_post_refcount_func *func;	/* real function. */
+	void *para;
+};
+
+int ocfs2_refcounted_xattr_delete_need(struct inode *inode,
+				       struct ocfs2_caching_info *ref_ci,
+				       struct buffer_head *ref_root_bh,
+				       struct ocfs2_xattr_value_root *xv,
+				       int *meta_add, int *credits);
+int ocfs2_refcount_cow_xattr(struct inode *inode,
+			     struct ocfs2_dinode *di,
+			     struct ocfs2_xattr_value_buf *vb,
+			     struct ocfs2_refcount_tree *ref_tree,
+			     struct buffer_head *ref_root_bh,
+			     u32 cpos, u32 write_len,
+			     struct ocfs2_post_refcount *post);
 #endif /* OCFS2_REFCOUNTTREE_H */
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index dda49c0..a538ceb 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -55,7 +55,7 @@
 #include "buffer_head_io.h"
 #include "super.h"
 #include "xattr.h"
-
+#include "refcounttree.h"
 
 struct ocfs2_xattr_def_value_root {
 	struct ocfs2_xattr_value_root	xv;
@@ -176,6 +176,14 @@ static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
 				  u64 src_blk, u64 last_blk, u64 to_blk,
 				  unsigned int start_bucket,
 				  u32 *first_hash);
+static int ocfs2_prepare_refcount_xattr(struct inode *inode,
+					struct ocfs2_dinode *di,
+					struct ocfs2_xattr_info *xi,
+					struct ocfs2_xattr_search *xis,
+					struct ocfs2_xattr_search *xbs,
+					struct ocfs2_refcount_tree **ref_tree,
+					int *meta_need,
+					int *credits);
 
 static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb)
 {
@@ -647,6 +655,7 @@ leave:
 static int __ocfs2_remove_xattr_range(struct inode *inode,
 				      struct ocfs2_xattr_value_buf *vb,
 				      u32 cpos, u32 phys_cpos, u32 len,
+				      unsigned int ext_flags,
 				      struct ocfs2_xattr_set_ctxt *ctxt)
 {
 	int ret;
@@ -678,7 +687,14 @@ static int __ocfs2_remove_xattr_range(struct inode *inode,
 		goto out;
 	}
 
-	ret = ocfs2_cache_cluster_dealloc(&ctxt->dealloc, phys_blkno, len);
+	if (ext_flags & OCFS2_EXT_REFCOUNTED)
+		ret = ocfs2_decrease_refcount(inode, handle,
+					ocfs2_blocks_to_clusters(inode->i_sb,
+								 phys_blkno),
+					len, ctxt->meta_ac, &ctxt->dealloc, 1);
+	else
+		ret = ocfs2_cache_cluster_dealloc(&ctxt->dealloc,
+						  phys_blkno, len);
 	if (ret)
 		mlog_errno(ret);
 
@@ -693,6 +709,7 @@ static int ocfs2_xattr_shrink_size(struct inode *inode,
 				   struct ocfs2_xattr_set_ctxt *ctxt)
 {
 	int ret = 0;
+	unsigned int ext_flags;
 	u32 trunc_len, cpos, phys_cpos, alloc_size;
 	u64 block;
 
@@ -704,7 +721,7 @@ static int ocfs2_xattr_shrink_size(struct inode *inode,
 	while (trunc_len) {
 		ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos,
 					       &alloc_size,
-					       &vb->vb_xv->xr_list, NULL);
+					       &vb->vb_xv->xr_list, &ext_flags);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -715,7 +732,7 @@ static int ocfs2_xattr_shrink_size(struct inode *inode,
 
 		ret = __ocfs2_remove_xattr_range(inode, vb, cpos,
 						 phys_cpos, alloc_size,
-						 ctxt);
+						 ext_flags, ctxt);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -1182,7 +1199,7 @@ static int ocfs2_xattr_get(struct inode *inode,
 
 static int __ocfs2_xattr_set_value_outside(struct inode *inode,
 					   handle_t *handle,
-					   struct ocfs2_xattr_value_root *xv,
+					   struct ocfs2_xattr_value_buf *vb,
 					   const void *value,
 					   int value_len)
 {
@@ -1193,18 +1210,22 @@ static int __ocfs2_xattr_set_value_outside(struct inode *inode,
 	u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len);
 	u64 blkno;
 	struct buffer_head *bh = NULL;
+	unsigned int ext_flags;
+	struct ocfs2_xattr_value_root *xv = vb->vb_xv;
 
 	BUG_ON(clusters > le32_to_cpu(xv->xr_clusters));
 
 	while (cpos < clusters) {
 		ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
 					       &num_clusters, &xv->xr_list,
-					       NULL);
+					       &ext_flags);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
 		}
 
+		BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED);
+
 		blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
 
 		for (i = 0; i < num_clusters * bpc; i++, blkno++) {
@@ -1356,7 +1377,7 @@ static int ocfs2_xattr_set_value_outside(struct inode *inode,
 		mlog_errno(ret);
 		return ret;
 	}
-	ret = __ocfs2_xattr_set_value_outside(inode, ctxt->handle, vb->vb_xv,
+	ret = __ocfs2_xattr_set_value_outside(inode, ctxt->handle, vb,
 					      xi->value, xi->value_len);
 	if (ret < 0)
 		mlog_errno(ret);
@@ -1595,7 +1616,7 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
 
 				ret = __ocfs2_xattr_set_value_outside(inode,
 								handle,
-								vb.vb_xv,
+								&vb,
 								xi->value,
 								xi->value_len);
 				if (ret < 0)
@@ -2431,6 +2452,7 @@ static int ocfs2_init_xattr_set_ctxt(struct inode *inode,
 				     struct ocfs2_xattr_search *xis,
 				     struct ocfs2_xattr_search *xbs,
 				     struct ocfs2_xattr_set_ctxt *ctxt,
+				     int extra_meta,
 				     int *credits)
 {
 	int clusters_add, meta_add, ret;
@@ -2447,6 +2469,7 @@ static int ocfs2_init_xattr_set_ctxt(struct inode *inode,
 		return ret;
 	}
 
+	meta_add += extra_meta;
 	mlog(0, "Set xattr %s, reserve meta blocks = %d, clusters = %d, "
 	     "credits = %d\n", xi->name, meta_add, clusters_add, *credits);
 
@@ -2714,10 +2737,11 @@ int ocfs2_xattr_set(struct inode *inode,
 {
 	struct buffer_head *di_bh = NULL;
 	struct ocfs2_dinode *di;
-	int ret, credits;
+	int ret, credits, ref_meta = 0, ref_credits = 0;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	struct inode *tl_inode = osb->osb_tl_inode;
 	struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
+	struct ocfs2_refcount_tree *ref_tree = NULL;
 
 	struct ocfs2_xattr_info xi = {
 		.name_index = name_index,
@@ -2782,6 +2806,17 @@ int ocfs2_xattr_set(struct inode *inode,
 			goto cleanup;
 	}
 
+	/* Check whether the value is refcounted and do some prepartion. */
+	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL &&
+	    (!xis.not_found || !xbs.not_found)) {
+		ret = ocfs2_prepare_refcount_xattr(inode, di, &xi,
+						   &xis, &xbs, &ref_tree,
+						   &ref_meta, &ref_credits);
+		if (ret) {
+			mlog_errno(ret);
+			goto cleanup;
+		}
+	}
 
 	mutex_lock(&tl_inode->i_mutex);
 
@@ -2796,7 +2831,7 @@ int ocfs2_xattr_set(struct inode *inode,
 	mutex_unlock(&tl_inode->i_mutex);
 
 	ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis,
-					&xbs, &ctxt, &credits);
+					&xbs, &ctxt, ref_meta, &credits);
 	if (ret) {
 		mlog_errno(ret);
 		goto cleanup;
@@ -2804,7 +2839,7 @@ int ocfs2_xattr_set(struct inode *inode,
 
 	/* we need to update inode's ctime field, so add credit for it. */
 	credits += OCFS2_INODE_UPDATE_CREDITS;
-	ctxt.handle = ocfs2_start_trans(osb, credits);
+	ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits);
 	if (IS_ERR(ctxt.handle)) {
 		ret = PTR_ERR(ctxt.handle);
 		mlog_errno(ret);
@@ -2823,6 +2858,8 @@ int ocfs2_xattr_set(struct inode *inode,
 		ocfs2_schedule_truncate_log_flush(osb, 1);
 	ocfs2_run_deallocs(osb, &ctxt.dealloc);
 cleanup:
+	if (ref_tree)
+		ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
 	up_write(&OCFS2_I(inode)->ip_xattr_sem);
 	ocfs2_inode_unlock(inode, 1);
 cleanup_nolock:
@@ -4802,6 +4839,9 @@ static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode,
 	struct ocfs2_xattr_entry *xe = xs->here;
 	struct ocfs2_xattr_header *xh = bucket_xh(xs->bucket);
 	void *base;
+	struct ocfs2_xattr_value_buf vb = {
+		.vb_access = ocfs2_journal_access,
+	};
 
 	BUG_ON(!xs->base || !xe || ocfs2_xattr_is_local(xe));
 
@@ -4818,8 +4858,10 @@ static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode,
 	xv = (struct ocfs2_xattr_value_root *)(base + offset +
 		 OCFS2_XATTR_SIZE(xe->xe_name_len));
 
+	vb.vb_xv = xv;
+	vb.vb_bh = xs->bucket->bu_bhs[block_off];
 	ret = __ocfs2_xattr_set_value_outside(inode, handle,
-					      xv, val, value_len);
+					      &vb, val, value_len);
 	if (ret)
 		mlog_errno(ret);
 out:
@@ -5311,6 +5353,174 @@ out:
 }
 
 /*
+ * Whenever we modify a xattr value root in the bucket(e.g, CoW
+ * or change the extent record flag), we need to recalculate
+ * the metaecc for the whole bucket. So it is done here.
+ *
+ * Note:
+ * We have to give the extra credits for the caller.
+ */
+static int ocfs2_xattr_bucket_post_refcount(struct inode *inode,
+					    handle_t *handle,
+					    void *para)
+{
+	int ret;
+	struct ocfs2_xattr_bucket *bucket =
+			(struct ocfs2_xattr_bucket *)para;
+
+	ret = ocfs2_xattr_bucket_journal_access(handle, bucket,
+						OCFS2_JOURNAL_ACCESS_WRITE);
+	if (ret) {
+		mlog_errno(ret);
+		return ret;
+	}
+
+	ocfs2_xattr_bucket_journal_dirty(handle, bucket);
+
+	return 0;
+}
+
+/*
+ * Special action we need if the xattr value is refcounted.
+ *
+ * 1. If the xattr is refcounted, lock the tree.
+ * 2. CoW the xattr if we are setting the new value and the value
+ *    will be stored outside.
+ * 3. In other case, decrease_refcount will work for us, so just
+ *    lock the refcount tree, calculate the meta and credits is OK.
+ *
+ * We have to do CoW before ocfs2_init_xattr_set_ctxt since
+ * currently CoW is a completed transaction, while this function
+ * will also lock the allocators and let us deadlock. So we will
+ * CoW the whole xattr value.
+ */
+static int ocfs2_prepare_refcount_xattr(struct inode *inode,
+					struct ocfs2_dinode *di,
+					struct ocfs2_xattr_info *xi,
+					struct ocfs2_xattr_search *xis,
+					struct ocfs2_xattr_search *xbs,
+					struct ocfs2_refcount_tree **ref_tree,
+					int *meta_add,
+					int *credits)
+{
+	int ret = 0;
+	struct ocfs2_xattr_block *xb;
+	struct ocfs2_xattr_entry *xe;
+	char *base;
+	u32 p_cluster, num_clusters;
+	unsigned int ext_flags;
+	int name_offset, name_len;
+	struct ocfs2_xattr_value_buf vb;
+	struct ocfs2_xattr_bucket *bucket = NULL;
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct ocfs2_post_refcount refcount;
+	struct ocfs2_post_refcount *p = NULL;
+	struct buffer_head *ref_root_bh = NULL;
+
+	if (!xis->not_found) {
+		xe = xis->here;
+		name_offset = le16_to_cpu(xe->xe_name_offset);
+		name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
+		base = xis->base;
+		vb.vb_bh = xis->inode_bh;
+		vb.vb_access = ocfs2_journal_access_di;
+	} else {
+		int i, block_off = 0;
+		xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
+		xe = xbs->here;
+		name_offset = le16_to_cpu(xe->xe_name_offset);
+		name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
+		i = xbs->here - xbs->header->xh_entries;
+
+		if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
+			ret = ocfs2_xattr_bucket_get_name_value(inode,
+							bucket_xh(xbs->bucket),
+							i, &block_off,
+							&name_offset);
+			if (ret) {
+				mlog_errno(ret);
+				goto out;
+			}
+			base = bucket_block(xbs->bucket, block_off);
+			vb.vb_bh = xbs->bucket->bu_bhs[block_off];
+			vb.vb_access = ocfs2_journal_access;
+
+			if (ocfs2_meta_ecc(osb)) {
+				/*create parameters for ocfs2_post_refcount. */
+				bucket = xbs->bucket;
+				refcount.credits = bucket->bu_blocks;
+				refcount.para = bucket;
+				refcount.func =
+					ocfs2_xattr_bucket_post_refcount;
+				p = &refcount;
+			}
+		} else {
+			base = xbs->base;
+			vb.vb_bh = xbs->xattr_bh;
+			vb.vb_access = ocfs2_journal_access_xb;
+		}
+	}
+
+	if (ocfs2_xattr_is_local(xe))
+		goto out;
+
+	vb.vb_xv = (struct ocfs2_xattr_value_root *)
+				(base + name_offset + name_len);
+
+	ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster,
+				       &num_clusters, &vb.vb_xv->xr_list,
+				       &ext_flags);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	/*
+	 * We just need to check the 1st extent record, since we always
+	 * CoW the whole xattr. So there shouldn't be a xattr with
+	 * some REFCOUNT extent recs after the 1st one.
+	 */
+	if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
+		goto out;
+
+	ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc),
+				       1, ref_tree, &ref_root_bh);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	/*
+	 * If we are deleting the xattr or the new size will be stored inside,
+	 * cool, leave it there, the xattr truncate process will remove them
+	 * for us(it still needs the refcount tree lock and the meta, credits).
+	 * And the worse case is that every cluster truncate will split the
+	 * refcount tree, and make the original extent become 3. So we will need
+	 * 2 * cluster more extent recs at most.
+	 */
+	if (!xi->value || xi->value_len <= OCFS2_XATTR_INLINE_SIZE) {
+
+		ret = ocfs2_refcounted_xattr_delete_need(inode,
+							 &(*ref_tree)->rf_ci,
+							 ref_root_bh, vb.vb_xv,
+							 meta_add, credits);
+		if (ret)
+			mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_refcount_cow_xattr(inode, di, &vb,
+				       *ref_tree, ref_root_bh, 0,
+				       le32_to_cpu(vb.vb_xv->xr_clusters), p);
+	if (ret)
+		mlog_errno(ret);
+
+out:
+	brelse(ref_root_bh);
+	return ret;
+}
+
+/*
  * 'security' attributes support
  */
 static size_t ocfs2_xattr_security_list(struct inode *inode, char *list,
-- 
cgit v0.10.2


From fd68a894fc9641f816d9cffa58e853ba91cbc1a1 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 18 Aug 2009 11:43:21 +0800
Subject: ocfs2: Remove inode from ocfs2_xattr_bucket_get_name_value.

In ocfs2_xattr_bucket_get_name_value, actually we only use
super_block. So use it.

Signed-off-by: Tao Ma <tao.ma@oracle.com>

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index a538ceb..eeb5b7c 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -140,7 +140,7 @@ struct ocfs2_xattr_search {
 	int not_found;
 };
 
-static int ocfs2_xattr_bucket_get_name_value(struct inode *inode,
+static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb,
 					     struct ocfs2_xattr_header *xh,
 					     int index,
 					     int *block_off,
@@ -1101,7 +1101,7 @@ static int ocfs2_xattr_block_get(struct inode *inode,
 		i = xs->here - xs->header->xh_entries;
 
 		if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
-			ret = ocfs2_xattr_bucket_get_name_value(inode,
+			ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
 								bucket_xh(xs->bucket),
 								i,
 								&block_off,
@@ -2297,7 +2297,7 @@ static int ocfs2_calc_xattr_set_need(struct inode *inode,
 		old_in_xb = 1;
 
 		if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
-			ret = ocfs2_xattr_bucket_get_name_value(inode,
+			ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
 							bucket_xh(xbs->bucket),
 							i, &block_off,
 							&name_offset);
@@ -2972,7 +2972,7 @@ static int ocfs2_find_xe_in_bucket(struct inode *inode,
 		if (cmp)
 			continue;
 
-		ret = ocfs2_xattr_bucket_get_name_value(inode,
+		ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
 							xh,
 							i,
 							&block_off,
@@ -3216,7 +3216,7 @@ struct ocfs2_xattr_tree_list {
 	size_t result;
 };
 
-static int ocfs2_xattr_bucket_get_name_value(struct inode *inode,
+static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb,
 					     struct ocfs2_xattr_header *xh,
 					     int index,
 					     int *block_off,
@@ -3229,8 +3229,8 @@ static int ocfs2_xattr_bucket_get_name_value(struct inode *inode,
 
 	name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset);
 
-	*block_off = name_offset >> inode->i_sb->s_blocksize_bits;
-	*new_offset = name_offset % inode->i_sb->s_blocksize;
+	*block_off = name_offset >> sb->s_blocksize_bits;
+	*new_offset = name_offset % sb->s_blocksize;
 
 	return 0;
 }
@@ -3250,7 +3250,7 @@ static int ocfs2_list_xattr_bucket(struct inode *inode,
 		prefix = ocfs2_xattr_prefix(type);
 
 		if (prefix) {
-			ret = ocfs2_xattr_bucket_get_name_value(inode,
+			ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
 								bucket_xh(bucket),
 								i,
 								&block_off,
@@ -4845,7 +4845,7 @@ static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode,
 
 	BUG_ON(!xs->base || !xe || ocfs2_xattr_is_local(xe));
 
-	ret = ocfs2_xattr_bucket_get_name_value(inode, xh,
+	ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, xh,
 						xe - xh->xh_entries,
 						&block_off,
 						&offset);
@@ -5433,7 +5433,7 @@ static int ocfs2_prepare_refcount_xattr(struct inode *inode,
 		i = xbs->here - xbs->header->xh_entries;
 
 		if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
-			ret = ocfs2_xattr_bucket_get_name_value(inode,
+			ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
 							bucket_xh(xbs->bucket),
 							i, &block_off,
 							&name_offset);
-- 
cgit v0.10.2


From 5aea1f0ef4024ba28213c10181e1b16ec678c82d Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 18 Aug 2009 11:43:24 +0800
Subject: ocfs2: Abstract the creation of xattr block.

In xattr reflink, we also need to create xattr block, so
abstract the process out.

Signed-off-by: Tao Ma <tao.ma@oracle.com>

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index eeb5b7c..a9339eb 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -2105,6 +2105,72 @@ cleanup:
 	return ret;
 }
 
+static int ocfs2_create_xattr_block(handle_t *handle,
+				    struct inode *inode,
+				    struct buffer_head *inode_bh,
+				    struct ocfs2_alloc_context *meta_ac,
+				    struct buffer_head **ret_bh)
+{
+	int ret;
+	u16 suballoc_bit_start;
+	u32 num_got;
+	u64 first_blkno;
+	struct ocfs2_dinode *di =  (struct ocfs2_dinode *)inode_bh->b_data;
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct buffer_head *new_bh = NULL;
+	struct ocfs2_xattr_block *xblk;
+
+	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), inode_bh,
+				      OCFS2_JOURNAL_ACCESS_CREATE);
+	if (ret < 0) {
+		mlog_errno(ret);
+		goto end;
+	}
+
+	ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1,
+				   &suballoc_bit_start, &num_got,
+				   &first_blkno);
+	if (ret < 0) {
+		mlog_errno(ret);
+		goto end;
+	}
+
+	new_bh = sb_getblk(inode->i_sb, first_blkno);
+	ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh);
+
+	ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode),
+				      new_bh,
+				      OCFS2_JOURNAL_ACCESS_CREATE);
+	if (ret < 0) {
+		mlog_errno(ret);
+		goto end;
+	}
+
+	/* Initialize ocfs2_xattr_block */
+	xblk = (struct ocfs2_xattr_block *)new_bh->b_data;
+	memset(xblk, 0, inode->i_sb->s_blocksize);
+	strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE);
+	xblk->xb_suballoc_slot = cpu_to_le16(osb->slot_num);
+	xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start);
+	xblk->xb_fs_generation = cpu_to_le32(osb->fs_generation);
+	xblk->xb_blkno = cpu_to_le64(first_blkno);
+
+	ret = ocfs2_journal_dirty(handle, new_bh);
+	if (ret < 0) {
+		mlog_errno(ret);
+		goto end;
+	}
+	di->i_xattr_loc = cpu_to_le64(first_blkno);
+	ocfs2_journal_dirty(handle, inode_bh);
+
+	*ret_bh = new_bh;
+	new_bh = NULL;
+
+end:
+	brelse(new_bh);
+	return ret;
+}
+
 /*
  * ocfs2_xattr_block_set()
  *
@@ -2117,65 +2183,24 @@ static int ocfs2_xattr_block_set(struct inode *inode,
 				 struct ocfs2_xattr_set_ctxt *ctxt)
 {
 	struct buffer_head *new_bh = NULL;
-	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
-	struct ocfs2_dinode *di =  (struct ocfs2_dinode *)xs->inode_bh->b_data;
 	handle_t *handle = ctxt->handle;
 	struct ocfs2_xattr_block *xblk = NULL;
-	u16 suballoc_bit_start;
-	u32 num_got;
-	u64 first_blkno;
 	int ret;
 
 	if (!xs->xattr_bh) {
-		ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode),
-					      xs->inode_bh,
-					      OCFS2_JOURNAL_ACCESS_CREATE);
-		if (ret < 0) {
-			mlog_errno(ret);
-			goto end;
-		}
-
-		ret = ocfs2_claim_metadata(osb, handle, ctxt->meta_ac, 1,
-					   &suballoc_bit_start, &num_got,
-					   &first_blkno);
-		if (ret < 0) {
-			mlog_errno(ret);
-			goto end;
-		}
-
-		new_bh = sb_getblk(inode->i_sb, first_blkno);
-		ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh);
-
-		ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode),
-					      new_bh,
-					      OCFS2_JOURNAL_ACCESS_CREATE);
-		if (ret < 0) {
+		ret = ocfs2_create_xattr_block(handle, inode, xs->inode_bh,
+					       ctxt->meta_ac, &new_bh);
+		if (ret) {
 			mlog_errno(ret);
 			goto end;
 		}
 
-		/* Initialize ocfs2_xattr_block */
 		xs->xattr_bh = new_bh;
-		xblk = (struct ocfs2_xattr_block *)new_bh->b_data;
-		memset(xblk, 0, inode->i_sb->s_blocksize);
-		strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE);
-		xblk->xb_suballoc_slot = cpu_to_le16(osb->slot_num);
-		xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start);
-		xblk->xb_fs_generation = cpu_to_le32(osb->fs_generation);
-		xblk->xb_blkno = cpu_to_le64(first_blkno);
-
+		xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
 		xs->header = &xblk->xb_attrs.xb_header;
 		xs->base = (void *)xs->header;
 		xs->end = (void *)xblk + inode->i_sb->s_blocksize;
 		xs->here = xs->header->xh_entries;
-
-		ret = ocfs2_journal_dirty(handle, new_bh);
-		if (ret < 0) {
-			mlog_errno(ret);
-			goto end;
-		}
-		di->i_xattr_loc = cpu_to_le64(first_blkno);
-		ocfs2_journal_dirty(handle, xs->inode_bh);
 	} else
 		xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
 
-- 
cgit v0.10.2


From 47bca4950bc40fb54e9d41cbbc8b06cd653d2ae2 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 18 Aug 2009 11:43:42 +0800
Subject: ocfs2: Abstract ocfs2 xattr tree extend rec iteration process.

Currently we have ocfs2_iterate_xattr_buckets which can receive
a para and a callback to iterate a series of bucket. It is good.
But actually the 2 callers ocfs2_xattr_tree_list_index_block and
ocfs2_delete_xattr_index_block are almost the same. The only
difference is that the latter need to handle the extent record
also. So add a new function named ocfs2_iterate_xattr_index_block.
It can be given func callback which are used for exten record.
So now we only have one iteration function for the xattr index
block. Ane what's more, it is useful for our future reflink
operations.

Signed-off-by: Tao Ma <tao.ma@oracle.com>

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index a9339eb..bfa7ee2 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -157,7 +157,7 @@ static int ocfs2_xattr_index_block_find(struct inode *inode,
 					struct ocfs2_xattr_search *xs);
 
 static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
-					struct ocfs2_xattr_tree_root *xt,
+					struct buffer_head *blk_bh,
 					char *buffer,
 					size_t buffer_size);
 
@@ -170,8 +170,23 @@ static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
 					     struct ocfs2_xattr_search *xs,
 					     struct ocfs2_xattr_set_ctxt *ctxt);
 
-static int ocfs2_delete_xattr_index_block(struct inode *inode,
-					  struct buffer_head *xb_bh);
+typedef int (xattr_tree_rec_func)(struct inode *inode,
+				  struct buffer_head *root_bh,
+				  u64 blkno, u32 cpos, u32 len, void *para);
+static int ocfs2_iterate_xattr_index_block(struct inode *inode,
+					   struct buffer_head *root_bh,
+					   xattr_tree_rec_func *rec_func,
+					   void *para);
+static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
+					struct ocfs2_xattr_bucket *bucket,
+					void *para);
+static int ocfs2_rm_xattr_cluster(struct inode *inode,
+				  struct buffer_head *root_bh,
+				  u64 blkno,
+				  u32 cpos,
+				  u32 len,
+				  void *para);
+
 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
 				  u64 src_blk, u64 last_blk, u64 to_blk,
 				  unsigned int start_bucket,
@@ -870,11 +885,9 @@ static int ocfs2_xattr_block_list(struct inode *inode,
 		struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
 		ret = ocfs2_xattr_list_entries(inode, header,
 					       buffer, buffer_size);
-	} else {
-		struct ocfs2_xattr_tree_root *xt = &xb->xb_attrs.xb_root;
-		ret = ocfs2_xattr_tree_list_index_block(inode, xt,
+	} else
+		ret = ocfs2_xattr_tree_list_index_block(inode, blk_bh,
 						   buffer, buffer_size);
-	}
 
 	brelse(blk_bh);
 
@@ -1801,7 +1814,10 @@ static int ocfs2_xattr_block_remove(struct inode *inode,
 		struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header);
 		ret = ocfs2_remove_value_outside(inode, &vb, header);
 	} else
-		ret = ocfs2_delete_xattr_index_block(inode, blk_bh);
+		ret = ocfs2_iterate_xattr_index_block(inode,
+						blk_bh,
+						ocfs2_rm_xattr_cluster,
+						NULL);
 
 	return ret;
 }
@@ -3298,22 +3314,19 @@ static int ocfs2_list_xattr_bucket(struct inode *inode,
 	return ret;
 }
 
-static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
-					     struct ocfs2_xattr_tree_root *xt,
-					     char *buffer,
-					     size_t buffer_size)
+static int ocfs2_iterate_xattr_index_block(struct inode *inode,
+					   struct buffer_head *blk_bh,
+					   xattr_tree_rec_func *rec_func,
+					   void *para)
 {
-	struct ocfs2_extent_list *el = &xt->xt_list;
+	struct ocfs2_xattr_block *xb =
+			(struct ocfs2_xattr_block *)blk_bh->b_data;
+	struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list;
 	int ret = 0;
 	u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0;
 	u64 p_blkno = 0;
-	struct ocfs2_xattr_tree_list xl = {
-		.buffer = buffer,
-		.buffer_size = buffer_size,
-		.result = 0,
-	};
 
-	if (le16_to_cpu(el->l_next_free_rec) == 0)
+	if (!el->l_next_free_rec || !rec_func)
 		return 0;
 
 	while (name_hash > 0) {
@@ -3321,16 +3334,15 @@ static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
 					  &e_cpos, &num_clusters, el);
 		if (ret) {
 			mlog_errno(ret);
-			goto out;
+			break;
 		}
 
-		ret = ocfs2_iterate_xattr_buckets(inode, p_blkno, num_clusters,
-						  ocfs2_list_xattr_bucket,
-						  &xl);
+		ret = rec_func(inode, blk_bh, p_blkno, e_cpos,
+			       num_clusters, para);
 		if (ret) {
 			if (ret != -ERANGE)
 				mlog_errno(ret);
-			goto out;
+			break;
 		}
 
 		if (e_cpos == 0)
@@ -3339,6 +3351,37 @@ static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
 		name_hash = e_cpos - 1;
 	}
 
+	return ret;
+
+}
+
+static int ocfs2_list_xattr_tree_rec(struct inode *inode,
+				     struct buffer_head *root_bh,
+				     u64 blkno, u32 cpos, u32 len, void *para)
+{
+	return ocfs2_iterate_xattr_buckets(inode, blkno, len,
+					   ocfs2_list_xattr_bucket, para);
+}
+
+static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
+					     struct buffer_head *blk_bh,
+					     char *buffer,
+					     size_t buffer_size)
+{
+	int ret;
+	struct ocfs2_xattr_tree_list xl = {
+		.buffer = buffer,
+		.buffer_size = buffer_size,
+		.result = 0,
+	};
+
+	ret = ocfs2_iterate_xattr_index_block(inode, blk_bh,
+					      ocfs2_list_xattr_tree_rec, &xl);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
 	ret = xl.result;
 out:
 	return ret;
@@ -4897,7 +4940,8 @@ static int ocfs2_rm_xattr_cluster(struct inode *inode,
 				  struct buffer_head *root_bh,
 				  u64 blkno,
 				  u32 cpos,
-				  u32 len)
+				  u32 len,
+				  void *para)
 {
 	int ret;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
@@ -4909,6 +4953,13 @@ static int ocfs2_rm_xattr_cluster(struct inode *inode,
 	struct ocfs2_cached_dealloc_ctxt dealloc;
 	struct ocfs2_extent_tree et;
 
+	ret = ocfs2_iterate_xattr_buckets(inode, blkno, len,
+					  ocfs2_delete_xattr_in_bucket, NULL);
+	if (ret) {
+		mlog_errno(ret);
+		return ret;
+	}
+
 	ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh);
 
 	ocfs2_init_dealloc_ctxt(&dealloc);
@@ -5331,52 +5382,6 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
 	return ret;
 }
 
-static int ocfs2_delete_xattr_index_block(struct inode *inode,
-					  struct buffer_head *xb_bh)
-{
-	struct ocfs2_xattr_block *xb =
-			(struct ocfs2_xattr_block *)xb_bh->b_data;
-	struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list;
-	int ret = 0;
-	u32 name_hash = UINT_MAX, e_cpos, num_clusters;
-	u64 p_blkno;
-
-	if (le16_to_cpu(el->l_next_free_rec) == 0)
-		return 0;
-
-	while (name_hash > 0) {
-		ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno,
-					  &e_cpos, &num_clusters, el);
-		if (ret) {
-			mlog_errno(ret);
-			goto out;
-		}
-
-		ret = ocfs2_iterate_xattr_buckets(inode, p_blkno, num_clusters,
-						  ocfs2_delete_xattr_in_bucket,
-						  NULL);
-		if (ret) {
-			mlog_errno(ret);
-			goto out;
-		}
-
-		ret = ocfs2_rm_xattr_cluster(inode, xb_bh,
-					     p_blkno, e_cpos, num_clusters);
-		if (ret) {
-			mlog_errno(ret);
-			break;
-		}
-
-		if (e_cpos == 0)
-			break;
-
-		name_hash = e_cpos - 1;
-	}
-
-out:
-	return ret;
-}
-
 /*
  * Whenever we modify a xattr value root in the bucket(e.g, CoW
  * or change the extent record flag), we need to recalculate
-- 
cgit v0.10.2


From 0129241e2b3b90ff83a8c774353e5612d84bd493 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Mon, 21 Sep 2009 13:04:19 +0800
Subject: ocfs2: Attach xattr clusters to refcount tree.

In ocfs2, when xattr's value is larger than OCFS2_XATTR_INLINE_SIZE,
it will be kept outside of the blocks we store xattr entry. And they
are stored in a b-tree also. So this patch try to attach all these
clusters to refcount tree also.

Signed-off-by: Tao Ma <tao.ma@oracle.com>

diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index a5b5bef..a85c01c 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -3547,7 +3547,8 @@ int ocfs2_add_refcount_flag(struct inode *inode,
 			    struct ocfs2_caching_info *ref_ci,
 			    struct buffer_head *ref_root_bh,
 			    u32 cpos, u32 p_cluster, u32 num_clusters,
-			    struct ocfs2_cached_dealloc_ctxt *dealloc)
+			    struct ocfs2_cached_dealloc_ctxt *dealloc,
+			    struct ocfs2_post_refcount *post)
 {
 	int ret;
 	handle_t *handle;
@@ -3576,6 +3577,9 @@ int ocfs2_add_refcount_flag(struct inode *inode,
 		}
 	}
 
+	if (post)
+		credits += post->credits;
+
 	handle = ocfs2_start_trans(osb, credits);
 	if (IS_ERR(handle)) {
 		ret = PTR_ERR(handle);
@@ -3594,8 +3598,16 @@ int ocfs2_add_refcount_flag(struct inode *inode,
 	ret = __ocfs2_increase_refcount(handle, ref_ci, ref_root_bh,
 					p_cluster, num_clusters,
 					meta_ac, dealloc);
-	if (ret)
+	if (ret) {
 		mlog_errno(ret);
+		goto out_commit;
+	}
+
+	if (post && post->func) {
+		ret = post->func(inode, handle, post->para);
+		if (ret)
+			mlog_errno(ret);
+	}
 
 out_commit:
 	ocfs2_commit_trans(osb, handle);
@@ -3688,7 +3700,7 @@ static int ocfs2_attach_refcount_tree(struct inode *inode,
 						      &ref_tree->rf_ci,
 						      ref_root_bh, cpos,
 						      p_cluster, num_clusters,
-						      &dealloc);
+						      &dealloc, NULL);
 			if (ret) {
 				mlog_errno(ret);
 				goto unlock;
@@ -3699,6 +3711,17 @@ static int ocfs2_attach_refcount_tree(struct inode *inode,
 		cpos += num_clusters;
 	}
 
+	if (oi->ip_dyn_features & OCFS2_HAS_XATTR_FL) {
+		ret = ocfs2_xattr_attach_refcount_tree(inode, di_bh,
+						       &ref_tree->rf_ci,
+						       ref_root_bh,
+						       &dealloc);
+		if (ret) {
+			mlog_errno(ret);
+			goto unlock;
+		}
+	}
+
 	if (data_changed) {
 		ret = ocfs2_change_ctime(inode, di_bh);
 		if (ret)
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h
index d09d64b..7d6900c 100644
--- a/fs/ocfs2/refcounttree.h
+++ b/fs/ocfs2/refcounttree.h
@@ -83,4 +83,11 @@ int ocfs2_refcount_cow_xattr(struct inode *inode,
 			     struct buffer_head *ref_root_bh,
 			     u32 cpos, u32 write_len,
 			     struct ocfs2_post_refcount *post);
+int ocfs2_add_refcount_flag(struct inode *inode,
+			    struct ocfs2_extent_tree *data_et,
+			    struct ocfs2_caching_info *ref_ci,
+			    struct buffer_head *ref_root_bh,
+			    u32 cpos, u32 p_cluster, u32 num_clusters,
+			    struct ocfs2_cached_dealloc_ctxt *dealloc,
+			    struct ocfs2_post_refcount *post);
 #endif /* OCFS2_REFCOUNTTREE_H */
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index bfa7ee2..501539a 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -5551,6 +5551,297 @@ out:
 }
 
 /*
+ * Add the REFCOUNTED flags for all the extent rec in ocfs2_xattr_value_root.
+ * The physical clusters will be added to refcount tree.
+ */
+static int ocfs2_xattr_value_attach_refcount(struct inode *inode,
+				struct ocfs2_xattr_value_root *xv,
+				struct ocfs2_extent_tree *value_et,
+				struct ocfs2_caching_info *ref_ci,
+				struct buffer_head *ref_root_bh,
+				struct ocfs2_cached_dealloc_ctxt *dealloc,
+				struct ocfs2_post_refcount *refcount)
+{
+	int ret = 0;
+	u32 clusters = le32_to_cpu(xv->xr_clusters);
+	u32 cpos, p_cluster, num_clusters;
+	struct ocfs2_extent_list *el = &xv->xr_list;
+	unsigned int ext_flags;
+
+	cpos = 0;
+	while (cpos < clusters) {
+		ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
+					       &num_clusters, el, &ext_flags);
+
+		cpos += num_clusters;
+		if ((ext_flags & OCFS2_EXT_REFCOUNTED))
+			continue;
+
+		BUG_ON(!p_cluster);
+
+		ret = ocfs2_add_refcount_flag(inode, value_et,
+					      ref_ci, ref_root_bh,
+					      cpos - num_clusters,
+					      p_cluster, num_clusters,
+					      dealloc, refcount);
+		if (ret) {
+			mlog_errno(ret);
+			break;
+		}
+	}
+
+	return ret;
+}
+
+/*
+ * Given a normal ocfs2_xattr_header, refcount all the entries which
+ * have value stored outside.
+ * Used for xattrs stored in inode and ocfs2_xattr_block.
+ */
+static int ocfs2_xattr_attach_refcount_normal(struct inode *inode,
+				struct ocfs2_xattr_value_buf *vb,
+				struct ocfs2_xattr_header *header,
+				struct ocfs2_caching_info *ref_ci,
+				struct buffer_head *ref_root_bh,
+				struct ocfs2_cached_dealloc_ctxt *dealloc)
+{
+
+	struct ocfs2_xattr_entry *xe;
+	struct ocfs2_xattr_value_root *xv;
+	struct ocfs2_extent_tree et;
+	int i, ret = 0;
+
+	for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
+		xe = &header->xh_entries[i];
+
+		if (ocfs2_xattr_is_local(xe))
+			continue;
+
+		xv = (struct ocfs2_xattr_value_root *)((void *)header +
+			le16_to_cpu(xe->xe_name_offset) +
+			OCFS2_XATTR_SIZE(xe->xe_name_len));
+
+		vb->vb_xv = xv;
+		ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
+
+		ret = ocfs2_xattr_value_attach_refcount(inode, xv, &et,
+							ref_ci, ref_root_bh,
+							dealloc, NULL);
+		if (ret) {
+			mlog_errno(ret);
+			break;
+		}
+	}
+
+	return ret;
+}
+
+static int ocfs2_xattr_inline_attach_refcount(struct inode *inode,
+				struct buffer_head *fe_bh,
+				struct ocfs2_caching_info *ref_ci,
+				struct buffer_head *ref_root_bh,
+				struct ocfs2_cached_dealloc_ctxt *dealloc)
+{
+	struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data;
+	struct ocfs2_xattr_header *header = (struct ocfs2_xattr_header *)
+				(fe_bh->b_data + inode->i_sb->s_blocksize -
+				le16_to_cpu(di->i_xattr_inline_size));
+	struct ocfs2_xattr_value_buf vb = {
+		.vb_bh = fe_bh,
+		.vb_access = ocfs2_journal_access_di,
+	};
+
+	return ocfs2_xattr_attach_refcount_normal(inode, &vb, header,
+						  ref_ci, ref_root_bh, dealloc);
+}
+
+struct ocfs2_xattr_tree_value_refcount_para {
+	struct ocfs2_caching_info *ref_ci;
+	struct buffer_head *ref_root_bh;
+	struct ocfs2_cached_dealloc_ctxt *dealloc;
+};
+
+static int ocfs2_get_xattr_tree_value_root(struct super_block *sb,
+					   struct ocfs2_xattr_bucket *bucket,
+					   int offset,
+					   struct ocfs2_xattr_value_root **xv,
+					   struct buffer_head **bh)
+{
+	int ret, block_off, name_offset;
+	struct ocfs2_xattr_header *xh = bucket_xh(bucket);
+	struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset];
+	void *base;
+
+	ret = ocfs2_xattr_bucket_get_name_value(sb,
+						bucket_xh(bucket),
+						offset,
+						&block_off,
+						&name_offset);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	base = bucket_block(bucket, block_off);
+
+	*xv = (struct ocfs2_xattr_value_root *)(base + name_offset +
+			 OCFS2_XATTR_SIZE(xe->xe_name_len));
+
+	if (bh)
+		*bh = bucket->bu_bhs[block_off];
+out:
+	return ret;
+}
+
+/*
+ * For a given xattr bucket, refcount all the entries which
+ * have value stored outside.
+ */
+static int ocfs2_xattr_bucket_value_refcount(struct inode *inode,
+					     struct ocfs2_xattr_bucket *bucket,
+					     void *para)
+{
+	int i, ret = 0;
+	struct ocfs2_extent_tree et;
+	struct ocfs2_xattr_tree_value_refcount_para *ref =
+			(struct ocfs2_xattr_tree_value_refcount_para *)para;
+	struct ocfs2_xattr_header *xh =
+			(struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data;
+	struct ocfs2_xattr_entry *xe;
+	struct ocfs2_xattr_value_buf vb = {
+		.vb_access = ocfs2_journal_access,
+	};
+	struct ocfs2_post_refcount refcount = {
+		.credits = bucket->bu_blocks,
+		.para = bucket,
+		.func = ocfs2_xattr_bucket_post_refcount,
+	};
+	struct ocfs2_post_refcount *p = NULL;
+
+	/* We only need post_refcount if we support metaecc. */
+	if (ocfs2_meta_ecc(OCFS2_SB(inode->i_sb)))
+		p = &refcount;
+
+	mlog(0, "refcount bucket %llu, count = %u\n",
+	     (unsigned long long)bucket_blkno(bucket),
+	     le16_to_cpu(xh->xh_count));
+	for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
+		xe = &xh->xh_entries[i];
+
+		if (ocfs2_xattr_is_local(xe))
+			continue;
+
+		ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, i,
+						      &vb.vb_xv, &vb.vb_bh);
+		if (ret) {
+			mlog_errno(ret);
+			break;
+		}
+
+		ocfs2_init_xattr_value_extent_tree(&et,
+						   INODE_CACHE(inode), &vb);
+
+		ret = ocfs2_xattr_value_attach_refcount(inode, vb.vb_xv,
+							&et, ref->ref_ci,
+							ref->ref_root_bh,
+							ref->dealloc, p);
+		if (ret) {
+			mlog_errno(ret);
+			break;
+		}
+	}
+
+	return ret;
+
+}
+
+static int ocfs2_refcount_xattr_tree_rec(struct inode *inode,
+				     struct buffer_head *root_bh,
+				     u64 blkno, u32 cpos, u32 len, void *para)
+{
+	return ocfs2_iterate_xattr_buckets(inode, blkno, len,
+					   ocfs2_xattr_bucket_value_refcount,
+					   para);
+}
+
+static int ocfs2_xattr_block_attach_refcount(struct inode *inode,
+				struct buffer_head *blk_bh,
+				struct ocfs2_caching_info *ref_ci,
+				struct buffer_head *ref_root_bh,
+				struct ocfs2_cached_dealloc_ctxt *dealloc)
+{
+	int ret = 0;
+	struct ocfs2_xattr_block *xb =
+				(struct ocfs2_xattr_block *)blk_bh->b_data;
+
+	if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
+		struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
+		struct ocfs2_xattr_value_buf vb = {
+			.vb_bh = blk_bh,
+			.vb_access = ocfs2_journal_access_xb,
+		};
+
+		ret = ocfs2_xattr_attach_refcount_normal(inode, &vb, header,
+							 ref_ci, ref_root_bh,
+							 dealloc);
+	} else {
+		struct ocfs2_xattr_tree_value_refcount_para para = {
+			.ref_ci = ref_ci,
+			.ref_root_bh = ref_root_bh,
+			.dealloc = dealloc,
+		};
+
+		ret = ocfs2_iterate_xattr_index_block(inode, blk_bh,
+						ocfs2_refcount_xattr_tree_rec,
+						&para);
+	}
+
+	return ret;
+}
+
+int ocfs2_xattr_attach_refcount_tree(struct inode *inode,
+				     struct buffer_head *fe_bh,
+				     struct ocfs2_caching_info *ref_ci,
+				     struct buffer_head *ref_root_bh,
+				     struct ocfs2_cached_dealloc_ctxt *dealloc)
+{
+	int ret = 0;
+	struct ocfs2_inode_info *oi = OCFS2_I(inode);
+	struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data;
+	struct buffer_head *blk_bh = NULL;
+
+	if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
+		ret = ocfs2_xattr_inline_attach_refcount(inode, fe_bh,
+							 ref_ci, ref_root_bh,
+							 dealloc);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+	}
+
+	if (!di->i_xattr_loc)
+		goto out;
+
+	ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
+				     &blk_bh);
+	if (ret < 0) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_xattr_block_attach_refcount(inode, blk_bh, ref_ci,
+						ref_root_bh, dealloc);
+	if (ret)
+		mlog_errno(ret);
+
+	brelse(blk_bh);
+out:
+
+	return ret;
+}
+
+/*
  * 'security' attributes support
  */
 static size_t ocfs2_xattr_security_list(struct inode *inode, char *list,
diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h
index 1ca7e9a..a3295d7 100644
--- a/fs/ocfs2/xattr.h
+++ b/fs/ocfs2/xattr.h
@@ -83,5 +83,9 @@ struct ocfs2_xattr_value_buf {
 	struct ocfs2_xattr_value_root	*vb_xv;
 };
 
-
+int ocfs2_xattr_attach_refcount_tree(struct inode *inode,
+				     struct buffer_head *fe_bh,
+				     struct ocfs2_caching_info *ref_ci,
+				     struct buffer_head *ref_root_bh,
+				     struct ocfs2_cached_dealloc_ctxt *dealloc);
 #endif /* OCFS2_XATTR_H */
-- 
cgit v0.10.2


From 8b2c0dba5159570af5721d40490f6c529d721500 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 18 Aug 2009 11:43:49 +0800
Subject: ocfs2: Call refcount tree remove process properly.

Now with xattr refcount support, we need to check whether
we have xattr refcounted before we remove the refcount tree.

Now the mechanism is:
1) Check whether i_clusters == 0, if no, exit.
2) check whether we have i_xattr_loc in dinode. if yes, exit.
2) Check whether we have inline xattr stored outside, if yes, exit.
4) Remove the tree.

Signed-off-by: Tao Ma <tao.ma@oracle.com>

diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 75f5b81..2effac5 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -531,6 +531,8 @@ bail_unlock_sem:
 	up_write(&OCFS2_I(inode)->ip_alloc_sem);
 
 bail:
+	if (!status && OCFS2_I(inode)->ip_clusters == 0)
+		status = ocfs2_try_remove_refcount_tree(inode, di_bh);
 
 	mlog_exit(status);
 	return status;
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index e82ceb3..0297fb8 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -53,6 +53,7 @@
 #include "sysfile.h"
 #include "uptodate.h"
 #include "xattr.h"
+#include "refcounttree.h"
 
 #include "buffer_head_io.h"
 
@@ -782,6 +783,12 @@ static int ocfs2_wipe_inode(struct inode *inode,
 		goto bail_unlock_dir;
 	}
 
+	status = ocfs2_remove_refcount_tree(inode, di_bh);
+	if (status < 0) {
+		mlog_errno(status);
+		goto bail_unlock_dir;
+	}
+
 	status = ocfs2_remove_inode(inode, di_bh, orphan_dir_inode,
 				    orphan_dir_bh);
 	if (status < 0)
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index a85c01c..5656c68 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -927,6 +927,42 @@ out:
 }
 
 /*
+ * Try to remove refcount tree. The mechanism is:
+ * 1) Check whether i_clusters == 0, if no, exit.
+ * 2) check whether we have i_xattr_loc in dinode. if yes, exit.
+ * 3) Check whether we have inline xattr stored outside, if yes, exit.
+ * 4) Remove the tree.
+ */
+int ocfs2_try_remove_refcount_tree(struct inode *inode,
+				   struct buffer_head *di_bh)
+{
+	int ret;
+	struct ocfs2_inode_info *oi = OCFS2_I(inode);
+	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
+
+	down_write(&oi->ip_xattr_sem);
+	down_write(&oi->ip_alloc_sem);
+
+	if (oi->ip_clusters)
+		goto out;
+
+	if ((oi->ip_dyn_features & OCFS2_HAS_XATTR_FL) && di->i_xattr_loc)
+		goto out;
+
+	if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL &&
+	    ocfs2_has_inline_xattr_value_outside(inode, di))
+		goto out;
+
+	ret = ocfs2_remove_refcount_tree(inode, di_bh);
+	if (ret)
+		mlog_errno(ret);
+out:
+	up_write(&oi->ip_alloc_sem);
+	up_write(&oi->ip_xattr_sem);
+	return 0;
+}
+
+/*
  * Given a cpos and len, try to find the refcount record which contains cpos.
  * 1. If cpos can be found in one refcount record, return the record.
  * 2. If cpos can't be found, return a fake record which start from cpos
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h
index 7d6900c..1e3446a 100644
--- a/fs/ocfs2/refcounttree.h
+++ b/fs/ocfs2/refcounttree.h
@@ -90,4 +90,7 @@ int ocfs2_add_refcount_flag(struct inode *inode,
 			    u32 cpos, u32 p_cluster, u32 num_clusters,
 			    struct ocfs2_cached_dealloc_ctxt *dealloc,
 			    struct ocfs2_post_refcount *post);
+int ocfs2_remove_refcount_tree(struct inode *inode, struct buffer_head *di_bh);
+int ocfs2_try_remove_refcount_tree(struct inode *inode,
+				   struct buffer_head *di_bh);
 #endif /* OCFS2_REFCOUNTTREE_H */
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 501539a..6660f1c 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -840,6 +840,23 @@ static int ocfs2_xattr_list_entries(struct inode *inode,
 	return result;
 }
 
+int ocfs2_has_inline_xattr_value_outside(struct inode *inode,
+					 struct ocfs2_dinode *di)
+{
+	struct ocfs2_xattr_header *xh;
+	int i;
+
+	xh = (struct ocfs2_xattr_header *)
+		 ((void *)di + inode->i_sb->s_blocksize -
+		 le16_to_cpu(di->i_xattr_inline_size));
+
+	for (i = 0; i < le16_to_cpu(xh->xh_count); i++)
+		if (!ocfs2_xattr_is_local(&xh->xh_entries[i]))
+			return 1;
+
+	return 0;
+}
+
 static int ocfs2_xattr_ibody_list(struct inode *inode,
 				  struct ocfs2_dinode *di,
 				  char *buffer,
@@ -2898,10 +2915,16 @@ int ocfs2_xattr_set(struct inode *inode,
 	if (ocfs2_dealloc_has_cluster(&ctxt.dealloc))
 		ocfs2_schedule_truncate_log_flush(osb, 1);
 	ocfs2_run_deallocs(osb, &ctxt.dealloc);
+
 cleanup:
 	if (ref_tree)
 		ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
 	up_write(&OCFS2_I(inode)->ip_xattr_sem);
+	if (!value && !ret) {
+		ret = ocfs2_try_remove_refcount_tree(inode, di_bh);
+		if (ret)
+			mlog_errno(ret);
+	}
 	ocfs2_inode_unlock(inode, 1);
 cleanup_nolock:
 	brelse(di_bh);
diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h
index a3295d7..e74703f 100644
--- a/fs/ocfs2/xattr.h
+++ b/fs/ocfs2/xattr.h
@@ -55,6 +55,8 @@ int ocfs2_xattr_set_handle(handle_t *, struct inode *, struct buffer_head *,
 			   int, const char *, const void *, size_t, int,
 			   struct ocfs2_alloc_context *,
 			   struct ocfs2_alloc_context *);
+int ocfs2_has_inline_xattr_value_outside(struct inode *inode,
+					 struct ocfs2_dinode *di);
 int ocfs2_xattr_remove(struct inode *, struct buffer_head *);
 int ocfs2_init_security_get(struct inode *, struct inode *,
 			    struct ocfs2_security_xattr_info *);
-- 
cgit v0.10.2


From a7fe7a3a1ab5dac8d81e531c060f51e12010133b Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 18 Aug 2009 11:43:52 +0800
Subject: ocfs2: Create an xattr indexed block if needed.

With reflink, there is a need that we create a new xattr indexed
block from the very beginning. So add a new parameter for
ocfs2_create_xattr_block.

Signed-off-by: Tao Ma <tao.ma@oracle.com>

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 6660f1c..bb92a6d 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -2142,7 +2142,8 @@ static int ocfs2_create_xattr_block(handle_t *handle,
 				    struct inode *inode,
 				    struct buffer_head *inode_bh,
 				    struct ocfs2_alloc_context *meta_ac,
-				    struct buffer_head **ret_bh)
+				    struct buffer_head **ret_bh,
+				    int indexed)
 {
 	int ret;
 	u16 suballoc_bit_start;
@@ -2188,6 +2189,17 @@ static int ocfs2_create_xattr_block(handle_t *handle,
 	xblk->xb_fs_generation = cpu_to_le32(osb->fs_generation);
 	xblk->xb_blkno = cpu_to_le64(first_blkno);
 
+	if (indexed) {
+		struct ocfs2_xattr_tree_root *xr = &xblk->xb_attrs.xb_root;
+		xr->xt_clusters = cpu_to_le32(1);
+		xr->xt_last_eb_blk = 0;
+		xr->xt_list.l_tree_depth = 0;
+		xr->xt_list.l_count = cpu_to_le16(
+					ocfs2_xattr_recs_per_xb(inode->i_sb));
+		xr->xt_list.l_next_free_rec = cpu_to_le16(1);
+		xblk->xb_flags = cpu_to_le16(OCFS2_XATTR_INDEXED);
+	}
+
 	ret = ocfs2_journal_dirty(handle, new_bh);
 	if (ret < 0) {
 		mlog_errno(ret);
@@ -2222,7 +2234,7 @@ static int ocfs2_xattr_block_set(struct inode *inode,
 
 	if (!xs->xattr_bh) {
 		ret = ocfs2_create_xattr_block(handle, inode, xs->inode_bh,
-					       ctxt->meta_ac, &new_bh);
+					       ctxt->meta_ac, &new_bh, 0);
 		if (ret) {
 			mlog_errno(ret);
 			goto end;
-- 
cgit v0.10.2


From 2999d12f4d5529b282ce201b21444590c3f9f723 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 18 Aug 2009 11:43:55 +0800
Subject: ocfs2: Add reflink support for xattr.

Signed-off-by: Tao Ma <tao.ma@oracle.com>

diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 5656c68..dc57d06 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -1894,12 +1894,12 @@ out:
 	return ret;
 }
 
-static int __ocfs2_increase_refcount(handle_t *handle,
-				     struct ocfs2_caching_info *ci,
-				     struct buffer_head *ref_root_bh,
-				     u64 cpos, u32 len,
-				     struct ocfs2_alloc_context *meta_ac,
-				     struct ocfs2_cached_dealloc_ctxt *dealloc)
+int ocfs2_increase_refcount(handle_t *handle,
+			    struct ocfs2_caching_info *ci,
+			    struct buffer_head *ref_root_bh,
+			    u64 cpos, u32 len,
+			    struct ocfs2_alloc_context *meta_ac,
+			    struct ocfs2_cached_dealloc_ctxt *dealloc)
 {
 	int ret = 0, index;
 	struct buffer_head *ref_leaf_bh = NULL;
@@ -3631,9 +3631,9 @@ int ocfs2_add_refcount_flag(struct inode *inode,
 		goto out_commit;
 	}
 
-	ret = __ocfs2_increase_refcount(handle, ref_ci, ref_root_bh,
-					p_cluster, num_clusters,
-					meta_ac, dealloc);
+	ret = ocfs2_increase_refcount(handle, ref_ci, ref_root_bh,
+				      p_cluster, num_clusters,
+				      meta_ac, dealloc);
 	if (ret) {
 		mlog_errno(ret);
 		goto out_commit;
@@ -3822,9 +3822,9 @@ static int ocfs2_add_refcounted_extent(struct inode *inode,
 		goto out_commit;
 	}
 
-	ret = __ocfs2_increase_refcount(handle, ref_ci, ref_root_bh,
-					p_cluster, num_clusters,
-					meta_ac, dealloc);
+	ret = ocfs2_increase_refcount(handle, ref_ci, ref_root_bh,
+				      p_cluster, num_clusters,
+				      meta_ac, dealloc);
 	if (ret)
 		mlog_errno(ret);
 
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h
index 1e3446a..2c238e6 100644
--- a/fs/ocfs2/refcounttree.h
+++ b/fs/ocfs2/refcounttree.h
@@ -93,4 +93,10 @@ int ocfs2_add_refcount_flag(struct inode *inode,
 int ocfs2_remove_refcount_tree(struct inode *inode, struct buffer_head *di_bh);
 int ocfs2_try_remove_refcount_tree(struct inode *inode,
 				   struct buffer_head *di_bh);
+int ocfs2_increase_refcount(handle_t *handle,
+			    struct ocfs2_caching_info *ci,
+			    struct buffer_head *ref_root_bh,
+			    u64 cpos, u32 len,
+			    struct ocfs2_alloc_context *meta_ac,
+			    struct ocfs2_cached_dealloc_ctxt *dealloc);
 #endif /* OCFS2_REFCOUNTTREE_H */
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index bb92a6d..661ed9b 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -5877,6 +5877,929 @@ out:
 }
 
 /*
+ * Store the information we need in xattr reflink.
+ * old_bh and new_bh are inode bh for the old and new inode.
+ */
+struct ocfs2_xattr_reflink {
+	struct inode *old_inode;
+	struct inode *new_inode;
+	struct buffer_head *old_bh;
+	struct buffer_head *new_bh;
+	struct ocfs2_caching_info *ref_ci;
+	struct buffer_head *ref_root_bh;
+	struct ocfs2_cached_dealloc_ctxt *dealloc;
+};
+
+/*
+ * Given a xattr header and xe offset,
+ * return the proper xv and the corresponding bh.
+ * xattr in inode, block and xattr tree have different implementaions.
+ */
+typedef int (get_xattr_value_root)(struct super_block *sb,
+				   struct buffer_head *bh,
+				   struct ocfs2_xattr_header *xh,
+				   int offset,
+				   struct ocfs2_xattr_value_root **xv,
+				   struct buffer_head **ret_bh,
+				   void *para);
+
+/*
+ * Calculate all the xattr value root metadata stored in this xattr header and
+ * credits we need if we create them from the scratch.
+ * We use get_xattr_value_root so that all types of xattr container can use it.
+ */
+static int ocfs2_value_metas_in_xattr_header(struct super_block *sb,
+					     struct buffer_head *bh,
+					     struct ocfs2_xattr_header *xh,
+					     int *metas, int *credits,
+					     int *num_recs,
+					     get_xattr_value_root *func,
+					     void *para)
+{
+	int i, ret = 0;
+	struct ocfs2_xattr_value_root *xv;
+	struct ocfs2_xattr_entry *xe;
+
+	for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
+		xe = &xh->xh_entries[i];
+		if (ocfs2_xattr_is_local(xe))
+			continue;
+
+		ret = func(sb, bh, xh, i, &xv, NULL, para);
+		if (ret) {
+			mlog_errno(ret);
+			break;
+		}
+
+		*metas += le16_to_cpu(xv->xr_list.l_tree_depth) *
+			  le16_to_cpu(xv->xr_list.l_next_free_rec);
+
+		*credits += ocfs2_calc_extend_credits(sb,
+						&def_xv.xv.xr_list,
+						le32_to_cpu(xv->xr_clusters));
+
+		/*
+		 * If the value is a tree with depth > 1, We don't go deep
+		 * to the extent block, so just calculate a maximum record num.
+		 */
+		if (!xv->xr_list.l_tree_depth)
+			*num_recs += xv->xr_list.l_next_free_rec;
+		else
+			*num_recs += ocfs2_clusters_for_bytes(sb,
+							      XATTR_SIZE_MAX);
+	}
+
+	return ret;
+}
+
+/* Used by xattr inode and block to return the right xv and buffer_head. */
+static int ocfs2_get_xattr_value_root(struct super_block *sb,
+				      struct buffer_head *bh,
+				      struct ocfs2_xattr_header *xh,
+				      int offset,
+				      struct ocfs2_xattr_value_root **xv,
+				      struct buffer_head **ret_bh,
+				      void *para)
+{
+	struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset];
+
+	*xv = (struct ocfs2_xattr_value_root *)((void *)xh +
+		le16_to_cpu(xe->xe_name_offset) +
+		OCFS2_XATTR_SIZE(xe->xe_name_len));
+
+	if (ret_bh)
+		*ret_bh = bh;
+
+	return 0;
+}
+
+/*
+ * Lock the meta_ac and caculate how much credits we need for reflink xattrs.
+ * It is only used for inline xattr and xattr block.
+ */
+static int ocfs2_reflink_lock_xattr_allocators(struct ocfs2_super *osb,
+					struct ocfs2_xattr_header *xh,
+					struct buffer_head *ref_root_bh,
+					int *credits,
+					struct ocfs2_alloc_context **meta_ac)
+{
+	int ret, meta_add = 0, num_recs = 0;
+	struct ocfs2_refcount_block *rb =
+			(struct ocfs2_refcount_block *)ref_root_bh->b_data;
+
+	*credits = 0;
+
+	ret = ocfs2_value_metas_in_xattr_header(osb->sb, NULL, xh,
+						&meta_add, credits, &num_recs,
+						ocfs2_get_xattr_value_root,
+						NULL);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	/*
+	 * We need to add/modify num_recs in refcount tree, so just calculate
+	 * an approximate number we need for refcount tree change.
+	 * Sometimes we need to split the tree, and after split,  half recs
+	 * will be moved to the new block, and a new block can only provide
+	 * half number of recs. So we multiple new blocks by 2.
+	 */
+	num_recs = num_recs / ocfs2_refcount_recs_per_rb(osb->sb) * 2;
+	meta_add += num_recs;
+	*credits += num_recs + num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS;
+	if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)
+		*credits += le16_to_cpu(rb->rf_list.l_tree_depth) *
+			    le16_to_cpu(rb->rf_list.l_next_free_rec) + 1;
+	else
+		*credits += 1;
+
+	ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add, meta_ac);
+	if (ret)
+		mlog_errno(ret);
+
+out:
+	return ret;
+}
+
+/*
+ * Given a xattr header, reflink all the xattrs in this container.
+ * It can be used for inode, block and bucket.
+ *
+ * NOTE:
+ * Before we call this function, the caller has memcpy the xattr in
+ * old_xh to the new_xh.
+ */
+static int ocfs2_reflink_xattr_header(handle_t *handle,
+				      struct ocfs2_xattr_reflink *args,
+				      struct buffer_head *old_bh,
+				      struct ocfs2_xattr_header *xh,
+				      struct buffer_head *new_bh,
+				      struct ocfs2_xattr_header *new_xh,
+				      struct ocfs2_xattr_value_buf *vb,
+				      struct ocfs2_alloc_context *meta_ac,
+				      get_xattr_value_root *func,
+				      void *para)
+{
+	int ret = 0, i;
+	struct super_block *sb = args->old_inode->i_sb;
+	struct buffer_head *value_bh;
+	struct ocfs2_xattr_entry *xe;
+	struct ocfs2_xattr_value_root *xv, *new_xv;
+	struct ocfs2_extent_tree data_et;
+	u32 clusters, cpos, p_cluster, num_clusters;
+	unsigned int ext_flags = 0;
+
+	mlog(0, "reflink xattr in container %llu, count = %u\n",
+	     (unsigned long long)old_bh->b_blocknr, le16_to_cpu(xh->xh_count));
+	for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
+		xe = &xh->xh_entries[i];
+
+		if (ocfs2_xattr_is_local(xe))
+			continue;
+
+		ret = func(sb, old_bh, xh, i, &xv, NULL, para);
+		if (ret) {
+			mlog_errno(ret);
+			break;
+		}
+
+		ret = func(sb, new_bh, new_xh, i, &new_xv, &value_bh, para);
+		if (ret) {
+			mlog_errno(ret);
+			break;
+		}
+
+		/*
+		 * For the xattr which has l_tree_depth = 0, all the extent
+		 * recs have already be copied to the new xh with the
+		 * propriate OCFS2_EXT_REFCOUNTED flag we just need to
+		 * increase the refount count int the refcount tree.
+		 *
+		 * For the xattr which has l_tree_depth > 0, we need
+		 * to initialize it to the empty default value root,
+		 * and then insert the extents one by one.
+		 */
+		if (xv->xr_list.l_tree_depth) {
+			memcpy(new_xv, &def_xv, sizeof(def_xv));
+			vb->vb_xv = new_xv;
+			vb->vb_bh = value_bh;
+			ocfs2_init_xattr_value_extent_tree(&data_et,
+					INODE_CACHE(args->new_inode), vb);
+		}
+
+		clusters = le32_to_cpu(xv->xr_clusters);
+		cpos = 0;
+		while (cpos < clusters) {
+			ret = ocfs2_xattr_get_clusters(args->old_inode,
+						       cpos,
+						       &p_cluster,
+						       &num_clusters,
+						       &xv->xr_list,
+						       &ext_flags);
+			if (ret) {
+				mlog_errno(ret);
+				goto out;
+			}
+
+			BUG_ON(!p_cluster);
+
+			if (xv->xr_list.l_tree_depth) {
+				ret = ocfs2_insert_extent(handle,
+						&data_et, cpos,
+						ocfs2_clusters_to_blocks(
+							args->old_inode->i_sb,
+							p_cluster),
+						num_clusters, ext_flags,
+						meta_ac);
+				if (ret) {
+					mlog_errno(ret);
+					goto out;
+				}
+			}
+
+			ret = ocfs2_increase_refcount(handle, args->ref_ci,
+						      args->ref_root_bh,
+						      p_cluster, num_clusters,
+						      meta_ac, args->dealloc);
+			if (ret) {
+				mlog_errno(ret);
+				goto out;
+			}
+
+			cpos += num_clusters;
+		}
+	}
+
+out:
+	return ret;
+}
+
+static int ocfs2_reflink_xattr_inline(struct ocfs2_xattr_reflink *args)
+{
+	int ret = 0, credits = 0;
+	handle_t *handle;
+	struct ocfs2_super *osb = OCFS2_SB(args->old_inode->i_sb);
+	struct ocfs2_dinode *di = (struct ocfs2_dinode *)args->old_bh->b_data;
+	int inline_size = le16_to_cpu(di->i_xattr_inline_size);
+	int header_off = osb->sb->s_blocksize - inline_size;
+	struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)
+					(args->old_bh->b_data + header_off);
+	struct ocfs2_xattr_header *new_xh = (struct ocfs2_xattr_header *)
+					(args->new_bh->b_data + header_off);
+	struct ocfs2_alloc_context *meta_ac = NULL;
+	struct ocfs2_inode_info *new_oi;
+	struct ocfs2_dinode *new_di;
+	struct ocfs2_xattr_value_buf vb = {
+		.vb_bh = args->new_bh,
+		.vb_access = ocfs2_journal_access_di,
+	};
+
+	ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh,
+						  &credits, &meta_ac);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	handle = ocfs2_start_trans(osb, credits);
+	if (IS_ERR(handle)) {
+		ret = PTR_ERR(handle);
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_journal_access_di(handle, INODE_CACHE(args->new_inode),
+				      args->new_bh, OCFS2_JOURNAL_ACCESS_WRITE);
+	if (ret) {
+		mlog_errno(ret);
+		goto out_commit;
+	}
+
+	memcpy(args->new_bh->b_data + header_off,
+	       args->old_bh->b_data + header_off, inline_size);
+
+	new_di = (struct ocfs2_dinode *)args->new_bh->b_data;
+	new_di->i_xattr_inline_size = cpu_to_le16(inline_size);
+
+	ret = ocfs2_reflink_xattr_header(handle, args, args->old_bh, xh,
+					 args->new_bh, new_xh, &vb, meta_ac,
+					 ocfs2_get_xattr_value_root, NULL);
+	if (ret) {
+		mlog_errno(ret);
+		goto out_commit;
+	}
+
+	new_oi = OCFS2_I(args->new_inode);
+	spin_lock(&new_oi->ip_lock);
+	new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL;
+	new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features);
+	spin_unlock(&new_oi->ip_lock);
+
+	ocfs2_journal_dirty(handle, args->new_bh);
+
+out_commit:
+	ocfs2_commit_trans(osb, handle);
+
+out:
+	if (meta_ac)
+		ocfs2_free_alloc_context(meta_ac);
+	return ret;
+}
+
+static int ocfs2_create_empty_xattr_block(struct inode *inode,
+					  struct buffer_head *fe_bh,
+					  struct buffer_head **ret_bh,
+					  int indexed)
+{
+	int ret;
+	handle_t *handle;
+	struct ocfs2_alloc_context *meta_ac;
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+
+	ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac);
+	if (ret < 0) {
+		mlog_errno(ret);
+		return ret;
+	}
+
+	handle = ocfs2_start_trans(osb, OCFS2_XATTR_BLOCK_CREATE_CREDITS);
+	if (IS_ERR(handle)) {
+		ret = PTR_ERR(handle);
+		mlog_errno(ret);
+		goto out;
+	}
+
+	mlog(0, "create new xattr block for inode %llu, index = %d\n",
+	     (unsigned long long)fe_bh->b_blocknr, indexed);
+	ret = ocfs2_create_xattr_block(handle, inode, fe_bh,
+				       meta_ac, ret_bh, indexed);
+	if (ret)
+		mlog_errno(ret);
+
+	ocfs2_commit_trans(osb, handle);
+out:
+	ocfs2_free_alloc_context(meta_ac);
+	return ret;
+}
+
+static int ocfs2_reflink_xattr_block(struct ocfs2_xattr_reflink *args,
+				     struct buffer_head *blk_bh,
+				     struct buffer_head *new_blk_bh)
+{
+	int ret = 0, credits = 0;
+	handle_t *handle;
+	struct ocfs2_inode_info *new_oi = OCFS2_I(args->new_inode);
+	struct ocfs2_dinode *new_di;
+	struct ocfs2_super *osb = OCFS2_SB(args->new_inode->i_sb);
+	int header_off = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
+	struct ocfs2_xattr_block *xb =
+			(struct ocfs2_xattr_block *)blk_bh->b_data;
+	struct ocfs2_xattr_header *xh = &xb->xb_attrs.xb_header;
+	struct ocfs2_xattr_block *new_xb =
+			(struct ocfs2_xattr_block *)new_blk_bh->b_data;
+	struct ocfs2_xattr_header *new_xh = &new_xb->xb_attrs.xb_header;
+	struct ocfs2_alloc_context *meta_ac;
+	struct ocfs2_xattr_value_buf vb = {
+		.vb_bh = new_blk_bh,
+		.vb_access = ocfs2_journal_access_xb,
+	};
+
+	ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh,
+						  &credits, &meta_ac);
+	if (ret) {
+		mlog_errno(ret);
+		return ret;
+	}
+
+	/* One more credits in case we need to add xattr flags in new inode. */
+	handle = ocfs2_start_trans(osb, credits + 1);
+	if (IS_ERR(handle)) {
+		ret = PTR_ERR(handle);
+		mlog_errno(ret);
+		goto out;
+	}
+
+	if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) {
+		ret = ocfs2_journal_access_di(handle,
+					      INODE_CACHE(args->new_inode),
+					      args->new_bh,
+					      OCFS2_JOURNAL_ACCESS_WRITE);
+		if (ret) {
+			mlog_errno(ret);
+			goto out_commit;
+		}
+	}
+
+	ret = ocfs2_journal_access_xb(handle, INODE_CACHE(args->new_inode),
+				      new_blk_bh, OCFS2_JOURNAL_ACCESS_WRITE);
+	if (ret) {
+		mlog_errno(ret);
+		goto out_commit;
+	}
+
+	memcpy(new_blk_bh->b_data + header_off, blk_bh->b_data + header_off,
+	       osb->sb->s_blocksize - header_off);
+
+	ret = ocfs2_reflink_xattr_header(handle, args, blk_bh, xh,
+					 new_blk_bh, new_xh, &vb, meta_ac,
+					 ocfs2_get_xattr_value_root, NULL);
+	if (ret) {
+		mlog_errno(ret);
+		goto out_commit;
+	}
+
+	ocfs2_journal_dirty(handle, new_blk_bh);
+
+	if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) {
+		new_di = (struct ocfs2_dinode *)args->new_bh->b_data;
+		spin_lock(&new_oi->ip_lock);
+		new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL;
+		new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features);
+		spin_unlock(&new_oi->ip_lock);
+
+		ocfs2_journal_dirty(handle, args->new_bh);
+	}
+
+out_commit:
+	ocfs2_commit_trans(osb, handle);
+
+out:
+	ocfs2_free_alloc_context(meta_ac);
+	return ret;
+}
+
+struct ocfs2_reflink_xattr_tree_args {
+	struct ocfs2_xattr_reflink *reflink;
+	struct buffer_head *old_blk_bh;
+	struct buffer_head *new_blk_bh;
+	struct ocfs2_xattr_bucket *old_bucket;
+	struct ocfs2_xattr_bucket *new_bucket;
+};
+
+/*
+ * NOTE:
+ * We have to handle the case that both old bucket and new bucket
+ * will call this function to get the right ret_bh.
+ * So The caller must give us the right bh.
+ */
+static int ocfs2_get_reflink_xattr_value_root(struct super_block *sb,
+					struct buffer_head *bh,
+					struct ocfs2_xattr_header *xh,
+					int offset,
+					struct ocfs2_xattr_value_root **xv,
+					struct buffer_head **ret_bh,
+					void *para)
+{
+	struct ocfs2_reflink_xattr_tree_args *args =
+			(struct ocfs2_reflink_xattr_tree_args *)para;
+	struct ocfs2_xattr_bucket *bucket;
+
+	if (bh == args->old_bucket->bu_bhs[0])
+		bucket = args->old_bucket;
+	else
+		bucket = args->new_bucket;
+
+	return ocfs2_get_xattr_tree_value_root(sb, bucket, offset,
+					       xv, ret_bh);
+}
+
+struct ocfs2_value_tree_metas {
+	int num_metas;
+	int credits;
+	int num_recs;
+};
+
+static int ocfs2_value_tree_metas_in_bucket(struct super_block *sb,
+					struct buffer_head *bh,
+					struct ocfs2_xattr_header *xh,
+					int offset,
+					struct ocfs2_xattr_value_root **xv,
+					struct buffer_head **ret_bh,
+					void *para)
+{
+	struct ocfs2_xattr_bucket *bucket =
+				(struct ocfs2_xattr_bucket *)para;
+
+	return ocfs2_get_xattr_tree_value_root(sb, bucket, offset,
+					       xv, ret_bh);
+}
+
+static int ocfs2_calc_value_tree_metas(struct inode *inode,
+				      struct ocfs2_xattr_bucket *bucket,
+				      void *para)
+{
+	struct ocfs2_value_tree_metas *metas =
+			(struct ocfs2_value_tree_metas *)para;
+	struct ocfs2_xattr_header *xh =
+			(struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data;
+
+	/* Add the credits for this bucket first. */
+	metas->credits += bucket->bu_blocks;
+	return ocfs2_value_metas_in_xattr_header(inode->i_sb, bucket->bu_bhs[0],
+					xh, &metas->num_metas,
+					&metas->credits, &metas->num_recs,
+					ocfs2_value_tree_metas_in_bucket,
+					bucket);
+}
+
+/*
+ * Given a xattr extent rec starting from blkno and having len clusters,
+ * iterate all the buckets calculate how much metadata we need for reflinking
+ * all the ocfs2_xattr_value_root and lock the allocators accordingly.
+ */
+static int ocfs2_lock_reflink_xattr_rec_allocators(
+				struct ocfs2_reflink_xattr_tree_args *args,
+				struct ocfs2_extent_tree *xt_et,
+				u64 blkno, u32 len, int *credits,
+				struct ocfs2_alloc_context **meta_ac,
+				struct ocfs2_alloc_context **data_ac)
+{
+	int ret, num_free_extents;
+	struct ocfs2_value_tree_metas metas;
+	struct ocfs2_super *osb = OCFS2_SB(args->reflink->old_inode->i_sb);
+	struct ocfs2_refcount_block *rb;
+
+	memset(&metas, 0, sizeof(metas));
+
+	ret = ocfs2_iterate_xattr_buckets(args->reflink->old_inode, blkno, len,
+					  ocfs2_calc_value_tree_metas, &metas);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	*credits = metas.credits;
+
+	/*
+	 * Calculate we need for refcount tree change.
+	 *
+	 * We need to add/modify num_recs in refcount tree, so just calculate
+	 * an approximate number we need for refcount tree change.
+	 * Sometimes we need to split the tree, and after split,  half recs
+	 * will be moved to the new block, and a new block can only provide
+	 * half number of recs. So we multiple new blocks by 2.
+	 * In the end, we have to add credits for modifying the already
+	 * existed refcount block.
+	 */
+	rb = (struct ocfs2_refcount_block *)args->reflink->ref_root_bh->b_data;
+	metas.num_recs =
+		(metas.num_recs + ocfs2_refcount_recs_per_rb(osb->sb) - 1) /
+		 ocfs2_refcount_recs_per_rb(osb->sb) * 2;
+	metas.num_metas += metas.num_recs;
+	*credits += metas.num_recs +
+		    metas.num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS;
+	if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)
+		*credits += le16_to_cpu(rb->rf_list.l_tree_depth) *
+			    le16_to_cpu(rb->rf_list.l_next_free_rec) + 1;
+	else
+		*credits += 1;
+
+	/* count in the xattr tree change. */
+	num_free_extents = ocfs2_num_free_extents(osb, xt_et);
+	if (num_free_extents < 0) {
+		ret = num_free_extents;
+		mlog_errno(ret);
+		goto out;
+	}
+
+	if (num_free_extents < len)
+		metas.num_metas += ocfs2_extend_meta_needed(xt_et->et_root_el);
+
+	*credits += ocfs2_calc_extend_credits(osb->sb,
+					      xt_et->et_root_el, len);
+
+	if (metas.num_metas) {
+		ret = ocfs2_reserve_new_metadata_blocks(osb, metas.num_metas,
+							meta_ac);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+	}
+
+	if (len) {
+		ret = ocfs2_reserve_clusters(osb, len, data_ac);
+		if (ret)
+			mlog_errno(ret);
+	}
+out:
+	if (ret) {
+		if (*meta_ac) {
+			ocfs2_free_alloc_context(*meta_ac);
+			meta_ac = NULL;
+		}
+	}
+
+	return ret;
+}
+
+static int ocfs2_reflink_xattr_buckets(handle_t *handle,
+				u64 blkno, u64 new_blkno, u32 clusters,
+				struct ocfs2_alloc_context *meta_ac,
+				struct ocfs2_alloc_context *data_ac,
+				struct ocfs2_reflink_xattr_tree_args *args)
+{
+	int i, j, ret = 0;
+	struct super_block *sb = args->reflink->old_inode->i_sb;
+	u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb));
+	u32 num_buckets = clusters * bpc;
+	int bpb = args->old_bucket->bu_blocks;
+	struct ocfs2_xattr_value_buf vb = {
+		.vb_access = ocfs2_journal_access,
+	};
+
+	for (i = 0; i < num_buckets; i++, blkno += bpb, new_blkno += bpb) {
+		ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno);
+		if (ret) {
+			mlog_errno(ret);
+			break;
+		}
+
+		ret = ocfs2_init_xattr_bucket(args->new_bucket, new_blkno);
+		if (ret) {
+			mlog_errno(ret);
+			break;
+		}
+
+		/*
+		 * The real bucket num in this series of blocks is stored
+		 * in the 1st bucket.
+		 */
+		if (i == 0)
+			num_buckets = le16_to_cpu(
+				bucket_xh(args->old_bucket)->xh_num_buckets);
+
+		ret = ocfs2_xattr_bucket_journal_access(handle,
+						args->new_bucket,
+						OCFS2_JOURNAL_ACCESS_CREATE);
+		if (ret) {
+			mlog_errno(ret);
+			break;
+		}
+
+		for (j = 0; j < bpb; j++)
+			memcpy(bucket_block(args->new_bucket, j),
+			       bucket_block(args->old_bucket, j),
+			       sb->s_blocksize);
+
+		ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
+
+		ret = ocfs2_reflink_xattr_header(handle, args->reflink,
+					args->old_bucket->bu_bhs[0],
+					bucket_xh(args->old_bucket),
+					args->new_bucket->bu_bhs[0],
+					bucket_xh(args->new_bucket),
+					&vb, meta_ac,
+					ocfs2_get_reflink_xattr_value_root,
+					args);
+		if (ret) {
+			mlog_errno(ret);
+			break;
+		}
+
+		/*
+		 * Re-access and dirty the bucket to calculate metaecc.
+		 * Because we may extend the transaction in reflink_xattr_header
+		 * which will let the already accessed block gone.
+		 */
+		ret = ocfs2_xattr_bucket_journal_access(handle,
+						args->new_bucket,
+						OCFS2_JOURNAL_ACCESS_WRITE);
+		if (ret) {
+			mlog_errno(ret);
+			break;
+		}
+
+		ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
+		ocfs2_xattr_bucket_relse(args->old_bucket);
+		ocfs2_xattr_bucket_relse(args->new_bucket);
+	}
+
+	ocfs2_xattr_bucket_relse(args->old_bucket);
+	ocfs2_xattr_bucket_relse(args->new_bucket);
+	return ret;
+}
+/*
+ * Create the same xattr extent record in the new inode's xattr tree.
+ */
+static int ocfs2_reflink_xattr_rec(struct inode *inode,
+				   struct buffer_head *root_bh,
+				   u64 blkno,
+				   u32 cpos,
+				   u32 len,
+				   void *para)
+{
+	int ret, credits = 0;
+	u32 p_cluster, num_clusters;
+	u64 new_blkno;
+	handle_t *handle;
+	struct ocfs2_reflink_xattr_tree_args *args =
+			(struct ocfs2_reflink_xattr_tree_args *)para;
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct ocfs2_alloc_context *meta_ac = NULL;
+	struct ocfs2_alloc_context *data_ac = NULL;
+	struct ocfs2_extent_tree et;
+
+	ocfs2_init_xattr_tree_extent_tree(&et,
+					  INODE_CACHE(args->reflink->new_inode),
+					  args->new_blk_bh);
+
+	ret = ocfs2_lock_reflink_xattr_rec_allocators(args, &et, blkno,
+						      len, &credits,
+						      &meta_ac, &data_ac);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	handle = ocfs2_start_trans(osb, credits);
+	if (IS_ERR(handle)) {
+		ret = PTR_ERR(handle);
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_claim_clusters(osb, handle, data_ac,
+				   len, &p_cluster, &num_clusters);
+	if (ret) {
+		mlog_errno(ret);
+		goto out_commit;
+	}
+
+	new_blkno = ocfs2_clusters_to_blocks(osb->sb, p_cluster);
+
+	mlog(0, "reflink xattr buckets %llu to %llu, len %u\n",
+	     (unsigned long long)blkno, (unsigned long long)new_blkno, len);
+	ret = ocfs2_reflink_xattr_buckets(handle, blkno, new_blkno, len,
+					  meta_ac, data_ac, args);
+	if (ret) {
+		mlog_errno(ret);
+		goto out_commit;
+	}
+
+	mlog(0, "insert new xattr extent rec start %llu len %u to %u\n",
+	     (unsigned long long)new_blkno, len, cpos);
+	ret = ocfs2_insert_extent(handle, &et, cpos, new_blkno,
+				  len, 0, meta_ac);
+	if (ret)
+		mlog_errno(ret);
+
+out_commit:
+	ocfs2_commit_trans(osb, handle);
+
+out:
+	if (meta_ac)
+		ocfs2_free_alloc_context(meta_ac);
+	if (data_ac)
+		ocfs2_free_alloc_context(data_ac);
+	return ret;
+}
+
+/*
+ * Create reflinked xattr buckets.
+ * We will add bucket one by one, and refcount all the xattrs in the bucket
+ * if they are stored outside.
+ */
+static int ocfs2_reflink_xattr_tree(struct ocfs2_xattr_reflink *args,
+				    struct buffer_head *blk_bh,
+				    struct buffer_head *new_blk_bh)
+{
+	int ret;
+	struct ocfs2_reflink_xattr_tree_args para;
+
+	memset(&para, 0, sizeof(para));
+	para.reflink = args;
+	para.old_blk_bh = blk_bh;
+	para.new_blk_bh = new_blk_bh;
+
+	para.old_bucket = ocfs2_xattr_bucket_new(args->old_inode);
+	if (!para.old_bucket) {
+		mlog_errno(-ENOMEM);
+		return -ENOMEM;
+	}
+
+	para.new_bucket = ocfs2_xattr_bucket_new(args->new_inode);
+	if (!para.new_bucket) {
+		ret = -ENOMEM;
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_iterate_xattr_index_block(args->old_inode, blk_bh,
+					      ocfs2_reflink_xattr_rec,
+					      &para);
+	if (ret)
+		mlog_errno(ret);
+
+out:
+	ocfs2_xattr_bucket_free(para.old_bucket);
+	ocfs2_xattr_bucket_free(para.new_bucket);
+	return ret;
+}
+
+static int ocfs2_reflink_xattr_in_block(struct ocfs2_xattr_reflink *args,
+					struct buffer_head *blk_bh)
+{
+	int ret, indexed = 0;
+	struct buffer_head *new_blk_bh = NULL;
+	struct ocfs2_xattr_block *xb =
+			(struct ocfs2_xattr_block *)blk_bh->b_data;
+
+
+	if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)
+		indexed = 1;
+
+	ret = ocfs2_create_empty_xattr_block(args->new_inode, args->new_bh,
+					     &new_blk_bh, indexed);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED))
+		ret = ocfs2_reflink_xattr_block(args, blk_bh, new_blk_bh);
+	else
+		ret = ocfs2_reflink_xattr_tree(args, blk_bh, new_blk_bh);
+	if (ret)
+		mlog_errno(ret);
+
+out:
+	brelse(new_blk_bh);
+	return ret;
+}
+
+int ocfs2_reflink_xattrs(struct inode *old_inode,
+			 struct buffer_head *old_bh,
+			 struct inode *new_inode,
+			 struct buffer_head *new_bh)
+{
+	int ret;
+	struct ocfs2_xattr_reflink args;
+	struct ocfs2_inode_info *oi = OCFS2_I(old_inode);
+	struct ocfs2_dinode *di = (struct ocfs2_dinode *)old_bh->b_data;
+	struct buffer_head *blk_bh = NULL;
+	struct ocfs2_cached_dealloc_ctxt dealloc;
+	struct ocfs2_refcount_tree *ref_tree;
+	struct buffer_head *ref_root_bh = NULL;
+
+	ret = ocfs2_lock_refcount_tree(OCFS2_SB(old_inode->i_sb),
+				       le64_to_cpu(di->i_refcount_loc),
+				       1, &ref_tree, &ref_root_bh);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ocfs2_init_dealloc_ctxt(&dealloc);
+
+	args.old_inode = old_inode;
+	args.new_inode = new_inode;
+	args.old_bh = old_bh;
+	args.new_bh = new_bh;
+	args.ref_ci = &ref_tree->rf_ci;
+	args.ref_root_bh = ref_root_bh;
+	args.dealloc = &dealloc;
+
+	if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
+		ret = ocfs2_reflink_xattr_inline(&args);
+		if (ret) {
+			mlog_errno(ret);
+			goto out_unlock;
+		}
+	}
+
+	if (!di->i_xattr_loc)
+		goto out_unlock;
+
+	ret = ocfs2_read_xattr_block(old_inode, le64_to_cpu(di->i_xattr_loc),
+				     &blk_bh);
+	if (ret < 0) {
+		mlog_errno(ret);
+		goto out_unlock;
+	}
+
+	ret = ocfs2_reflink_xattr_in_block(&args, blk_bh);
+	if (ret)
+		mlog_errno(ret);
+
+	brelse(blk_bh);
+
+out_unlock:
+	ocfs2_unlock_refcount_tree(OCFS2_SB(old_inode->i_sb),
+				   ref_tree, 1);
+	brelse(ref_root_bh);
+
+	if (ocfs2_dealloc_has_cluster(&dealloc)) {
+		ocfs2_schedule_truncate_log_flush(OCFS2_SB(old_inode->i_sb), 1);
+		ocfs2_run_deallocs(OCFS2_SB(old_inode->i_sb), &dealloc);
+	}
+
+out:
+	return ret;
+}
+
+/*
  * 'security' attributes support
  */
 static size_t ocfs2_xattr_security_list(struct inode *inode, char *list,
diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h
index e74703f..4f91305 100644
--- a/fs/ocfs2/xattr.h
+++ b/fs/ocfs2/xattr.h
@@ -90,4 +90,8 @@ int ocfs2_xattr_attach_refcount_tree(struct inode *inode,
 				     struct ocfs2_caching_info *ref_ci,
 				     struct buffer_head *ref_root_bh,
 				     struct ocfs2_cached_dealloc_ctxt *dealloc);
+int ocfs2_reflink_xattrs(struct inode *old_inode,
+			 struct buffer_head *old_bh,
+			 struct inode *new_inode,
+			 struct buffer_head *new_bh);
 #endif /* OCFS2_XATTR_H */
-- 
cgit v0.10.2


From ce9c5a54c0f06b0efb4db8720a0616cc6aa0e5b2 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 18 Aug 2009 11:43:59 +0800
Subject: ocfs2: Modify removing xattr process for refcount.

The old xattr value remove is quite simple, it just erase the
tree and free the clusters. But as we have added refcount support,
The process is a little complicated.

We have to lock the refcount tree at the beginning, what's more,
we may split the refcount tree in some cases, so meta/credits are
needed.

Signed-off-by: Tao Ma <tao.ma@oracle.com>

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 661ed9b..8d1a0ab 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -199,6 +199,11 @@ static int ocfs2_prepare_refcount_xattr(struct inode *inode,
 					struct ocfs2_refcount_tree **ref_tree,
 					int *meta_need,
 					int *credits);
+static int ocfs2_get_xattr_tree_value_root(struct super_block *sb,
+					   struct ocfs2_xattr_bucket *bucket,
+					   int offset,
+					   struct ocfs2_xattr_value_root **xv,
+					   struct buffer_head **bh);
 
 static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb)
 {
@@ -1752,51 +1757,112 @@ out:
 	return ret;
 }
 
+/*
+ * In xattr remove, if it is stored outside and refcounted, we may have
+ * the chance to split the refcount tree. So need the allocators.
+ */
+static int ocfs2_lock_xattr_remove_allocators(struct inode *inode,
+					struct ocfs2_xattr_value_root *xv,
+					struct ocfs2_caching_info *ref_ci,
+					struct buffer_head *ref_root_bh,
+					struct ocfs2_alloc_context **meta_ac,
+					int *ref_credits)
+{
+	int ret, meta_add = 0;
+	u32 p_cluster, num_clusters;
+	unsigned int ext_flags;
+
+	*ref_credits = 0;
+	ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster,
+				       &num_clusters,
+				       &xv->xr_list,
+				       &ext_flags);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
+		goto out;
+
+	ret = ocfs2_refcounted_xattr_delete_need(inode, ref_ci,
+						 ref_root_bh, xv,
+						 &meta_add, ref_credits);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb),
+						meta_add, meta_ac);
+	if (ret)
+		mlog_errno(ret);
+
+out:
+	return ret;
+}
+
 static int ocfs2_remove_value_outside(struct inode*inode,
 				      struct ocfs2_xattr_value_buf *vb,
-				      struct ocfs2_xattr_header *header)
+				      struct ocfs2_xattr_header *header,
+				      struct ocfs2_caching_info *ref_ci,
+				      struct buffer_head *ref_root_bh)
 {
-	int ret = 0, i;
+	int ret = 0, i, ref_credits;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
+	void *val;
 
 	ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
 
-	ctxt.handle = ocfs2_start_trans(osb,
-					ocfs2_remove_extent_credits(osb->sb));
-	if (IS_ERR(ctxt.handle)) {
-		ret = PTR_ERR(ctxt.handle);
-		mlog_errno(ret);
-		goto out;
-	}
-
 	for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
 		struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
 
-		if (!ocfs2_xattr_is_local(entry)) {
-			void *val;
+		if (ocfs2_xattr_is_local(entry))
+			continue;
 
-			val = (void *)header +
-				le16_to_cpu(entry->xe_name_offset);
-			vb->vb_xv = (struct ocfs2_xattr_value_root *)
-				(val + OCFS2_XATTR_SIZE(entry->xe_name_len));
-			ret = ocfs2_xattr_value_truncate(inode, vb, 0, &ctxt);
-			if (ret < 0) {
-				mlog_errno(ret);
-				break;
-			}
+		val = (void *)header +
+			le16_to_cpu(entry->xe_name_offset);
+		vb->vb_xv = (struct ocfs2_xattr_value_root *)
+			(val + OCFS2_XATTR_SIZE(entry->xe_name_len));
+
+		ret = ocfs2_lock_xattr_remove_allocators(inode, vb->vb_xv,
+							 ref_ci, ref_root_bh,
+							 &ctxt.meta_ac,
+							 &ref_credits);
+
+		ctxt.handle = ocfs2_start_trans(osb, ref_credits +
+					ocfs2_remove_extent_credits(osb->sb));
+		if (IS_ERR(ctxt.handle)) {
+			ret = PTR_ERR(ctxt.handle);
+			mlog_errno(ret);
+			break;
+		}
+
+		ret = ocfs2_xattr_value_truncate(inode, vb, 0, &ctxt);
+		if (ret < 0) {
+			mlog_errno(ret);
+			break;
+		}
+
+		ocfs2_commit_trans(osb, ctxt.handle);
+		if (ctxt.meta_ac) {
+			ocfs2_free_alloc_context(ctxt.meta_ac);
+			ctxt.meta_ac = NULL;
 		}
 	}
 
-	ocfs2_commit_trans(osb, ctxt.handle);
+	if (ctxt.meta_ac)
+		ocfs2_free_alloc_context(ctxt.meta_ac);
 	ocfs2_schedule_truncate_log_flush(osb, 1);
 	ocfs2_run_deallocs(osb, &ctxt.dealloc);
-out:
 	return ret;
 }
 
 static int ocfs2_xattr_ibody_remove(struct inode *inode,
-				    struct buffer_head *di_bh)
+				    struct buffer_head *di_bh,
+				    struct ocfs2_caching_info *ref_ci,
+				    struct buffer_head *ref_root_bh)
 {
 
 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
@@ -1811,13 +1877,21 @@ static int ocfs2_xattr_ibody_remove(struct inode *inode,
 		 ((void *)di + inode->i_sb->s_blocksize -
 		 le16_to_cpu(di->i_xattr_inline_size));
 
-	ret = ocfs2_remove_value_outside(inode, &vb, header);
+	ret = ocfs2_remove_value_outside(inode, &vb, header,
+					 ref_ci, ref_root_bh);
 
 	return ret;
 }
 
+struct ocfs2_rm_xattr_bucket_para {
+	struct ocfs2_caching_info *ref_ci;
+	struct buffer_head *ref_root_bh;
+};
+
 static int ocfs2_xattr_block_remove(struct inode *inode,
-				    struct buffer_head *blk_bh)
+				    struct buffer_head *blk_bh,
+				    struct ocfs2_caching_info *ref_ci,
+				    struct buffer_head *ref_root_bh)
 {
 	struct ocfs2_xattr_block *xb;
 	int ret = 0;
@@ -1825,22 +1899,29 @@ static int ocfs2_xattr_block_remove(struct inode *inode,
 		.vb_bh = blk_bh,
 		.vb_access = ocfs2_journal_access_xb,
 	};
+	struct ocfs2_rm_xattr_bucket_para args = {
+		.ref_ci = ref_ci,
+		.ref_root_bh = ref_root_bh,
+	};
 
 	xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
 	if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
 		struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header);
-		ret = ocfs2_remove_value_outside(inode, &vb, header);
+		ret = ocfs2_remove_value_outside(inode, &vb, header,
+						 ref_ci, ref_root_bh);
 	} else
 		ret = ocfs2_iterate_xattr_index_block(inode,
 						blk_bh,
 						ocfs2_rm_xattr_cluster,
-						NULL);
+						&args);
 
 	return ret;
 }
 
 static int ocfs2_xattr_free_block(struct inode *inode,
-				  u64 block)
+				  u64 block,
+				  struct ocfs2_caching_info *ref_ci,
+				  struct buffer_head *ref_root_bh)
 {
 	struct inode *xb_alloc_inode;
 	struct buffer_head *xb_alloc_bh = NULL;
@@ -1858,7 +1939,7 @@ static int ocfs2_xattr_free_block(struct inode *inode,
 		goto out;
 	}
 
-	ret = ocfs2_xattr_block_remove(inode, blk_bh);
+	ret = ocfs2_xattr_block_remove(inode, blk_bh, ref_ci, ref_root_bh);
 	if (ret < 0) {
 		mlog_errno(ret);
 		goto out;
@@ -1918,6 +1999,9 @@ int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
 {
 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
+	struct ocfs2_refcount_tree *ref_tree = NULL;
+	struct buffer_head *ref_root_bh = NULL;
+	struct ocfs2_caching_info *ref_ci = NULL;
 	handle_t *handle;
 	int ret;
 
@@ -1927,8 +2011,21 @@ int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
 	if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
 		return 0;
 
+	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL) {
+		ret = ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb),
+					       le64_to_cpu(di->i_refcount_loc),
+					       1, &ref_tree, &ref_root_bh);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+		ref_ci = &ref_tree->rf_ci;
+
+	}
+
 	if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
-		ret = ocfs2_xattr_ibody_remove(inode, di_bh);
+		ret = ocfs2_xattr_ibody_remove(inode, di_bh,
+					       ref_ci, ref_root_bh);
 		if (ret < 0) {
 			mlog_errno(ret);
 			goto out;
@@ -1937,7 +2034,8 @@ int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
 
 	if (di->i_xattr_loc) {
 		ret = ocfs2_xattr_free_block(inode,
-					     le64_to_cpu(di->i_xattr_loc));
+					     le64_to_cpu(di->i_xattr_loc),
+					     ref_ci, ref_root_bh);
 		if (ret < 0) {
 			mlog_errno(ret);
 			goto out;
@@ -1971,6 +2069,9 @@ int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
 out_commit:
 	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
 out:
+	if (ref_tree)
+		ocfs2_unlock_refcount_tree(OCFS2_SB(inode->i_sb), ref_tree, 1);
+	brelse(ref_root_bh);
 	return ret;
 }
 
@@ -4989,7 +5090,7 @@ static int ocfs2_rm_xattr_cluster(struct inode *inode,
 	struct ocfs2_extent_tree et;
 
 	ret = ocfs2_iterate_xattr_buckets(inode, blkno, len,
-					  ocfs2_delete_xattr_in_bucket, NULL);
+					  ocfs2_delete_xattr_in_bucket, para);
 	if (ret) {
 		mlog_errno(ret);
 		return ret;
@@ -5378,7 +5479,7 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
 					struct ocfs2_xattr_bucket *bucket,
 					void *para)
 {
-	int ret = 0;
+	int ret = 0, ref_credits;
 	struct ocfs2_xattr_header *xh = bucket_xh(bucket);
 	u16 i;
 	struct ocfs2_xattr_entry *xe;
@@ -5386,7 +5487,9 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
 	struct ocfs2_xattr_set_ctxt ctxt = {NULL, NULL,};
 	int credits = ocfs2_remove_extent_credits(osb->sb) +
 		ocfs2_blocks_per_xattr_bucket(inode->i_sb);
-
+	struct ocfs2_xattr_value_root *xv;
+	struct ocfs2_rm_xattr_bucket_para *args =
+			(struct ocfs2_rm_xattr_bucket_para *)para;
 
 	ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
 
@@ -5395,7 +5498,16 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
 		if (ocfs2_xattr_is_local(xe))
 			continue;
 
-		ctxt.handle = ocfs2_start_trans(osb, credits);
+		ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket,
+						      i, &xv, NULL);
+
+		ret = ocfs2_lock_xattr_remove_allocators(inode, xv,
+							 args->ref_ci,
+							 args->ref_root_bh,
+							 &ctxt.meta_ac,
+							 &ref_credits);
+
+		ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits);
 		if (IS_ERR(ctxt.handle)) {
 			ret = PTR_ERR(ctxt.handle);
 			mlog_errno(ret);
@@ -5406,12 +5518,18 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
 							i, 0, &ctxt);
 
 		ocfs2_commit_trans(osb, ctxt.handle);
+		if (ctxt.meta_ac) {
+			ocfs2_free_alloc_context(ctxt.meta_ac);
+			ctxt.meta_ac = NULL;
+		}
 		if (ret) {
 			mlog_errno(ret);
 			break;
 		}
 	}
 
+	if (ctxt.meta_ac)
+		ocfs2_free_alloc_context(ctxt.meta_ac);
 	ocfs2_schedule_truncate_log_flush(osb, 1);
 	ocfs2_run_deallocs(osb, &ctxt.dealloc);
 	return ret;
-- 
cgit v0.10.2


From 7540c1a77b26bc2f9d86a0bfbe6597b05ec5f93d Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 18 Aug 2009 11:44:03 +0800
Subject: ocfs2: Don't merge in 1st refcount ops of reflink.

Actually the whole reflink will touch refcount tree 2 times:
1. It will add the clusters in the extent record to the tree if it
   isn't refcounted before.
2. It will add 1 refcount to these clusters when it add these
   extent records to the tree.

So actually we shouldn't do merge in the 1st operation since the 2nd
one will soon be called and we may have to split it again. Do a merge
first and split soon is a waste of time. So we only merge in the 2nd
round. This is done by adding a new internal __ocfs2_increase_refcount
and call it with "not-merge" for 1st refcount operation in reflink.

This also has a side-effect that we don't need to worry too much about
the metadata allocation in the 2nd round since it will only merge and
no split will happen for those records.

Signed-off-by: Tao Ma <tao.ma@oracle.com>

diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index dc57d06..47df8c5 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -1147,7 +1147,7 @@ static void ocfs2_refcount_rec_merge(struct ocfs2_refcount_block *rb,
 static int ocfs2_change_refcount_rec(handle_t *handle,
 				     struct ocfs2_caching_info *ci,
 				     struct buffer_head *ref_leaf_bh,
-				     int index, int change)
+				     int index, int merge, int change)
 {
 	int ret;
 	struct ocfs2_refcount_block *rb =
@@ -1176,7 +1176,7 @@ static int ocfs2_change_refcount_rec(handle_t *handle,
 		}
 
 		le16_add_cpu(&rl->rl_used, -1);
-	} else
+	} else if (merge)
 		ocfs2_refcount_rec_merge(rb, index);
 
 	ret = ocfs2_journal_dirty(handle, ref_leaf_bh);
@@ -1652,7 +1652,7 @@ static int ocfs2_insert_refcount_rec(handle_t *handle,
 				     struct buffer_head *ref_root_bh,
 				     struct buffer_head *ref_leaf_bh,
 				     struct ocfs2_refcount_rec *rec,
-				     int index,
+				     int index, int merge,
 				     struct ocfs2_alloc_context *meta_ac)
 {
 	int ret;
@@ -1710,7 +1710,8 @@ static int ocfs2_insert_refcount_rec(handle_t *handle,
 
 	le16_add_cpu(&rf_list->rl_used, 1);
 
-	ocfs2_refcount_rec_merge(rb, index);
+	if (merge)
+		ocfs2_refcount_rec_merge(rb, index);
 
 	ret = ocfs2_journal_dirty(handle, ref_leaf_bh);
 	if (ret) {
@@ -1744,7 +1745,7 @@ static int ocfs2_split_refcount_rec(handle_t *handle,
 				    struct buffer_head *ref_root_bh,
 				    struct buffer_head *ref_leaf_bh,
 				    struct ocfs2_refcount_rec *split_rec,
-				    int index,
+				    int index, int merge,
 				    struct ocfs2_alloc_context *meta_ac,
 				    struct ocfs2_cached_dealloc_ctxt *dealloc)
 {
@@ -1882,7 +1883,8 @@ static int ocfs2_split_refcount_rec(handle_t *handle,
 		     le32_to_cpu(split_rec->r_refcount),
 		     (unsigned long long)ref_leaf_bh->b_blocknr, index);
 
-		ocfs2_refcount_rec_merge(rb, index);
+		if (merge)
+			ocfs2_refcount_rec_merge(rb, index);
 	}
 
 	ret = ocfs2_journal_dirty(handle, ref_leaf_bh);
@@ -1894,12 +1896,12 @@ out:
 	return ret;
 }
 
-int ocfs2_increase_refcount(handle_t *handle,
-			    struct ocfs2_caching_info *ci,
-			    struct buffer_head *ref_root_bh,
-			    u64 cpos, u32 len,
-			    struct ocfs2_alloc_context *meta_ac,
-			    struct ocfs2_cached_dealloc_ctxt *dealloc)
+static int __ocfs2_increase_refcount(handle_t *handle,
+				     struct ocfs2_caching_info *ci,
+				     struct buffer_head *ref_root_bh,
+				     u64 cpos, u32 len, int merge,
+				     struct ocfs2_alloc_context *meta_ac,
+				     struct ocfs2_cached_dealloc_ctxt *dealloc)
 {
 	int ret = 0, index;
 	struct buffer_head *ref_leaf_bh = NULL;
@@ -1937,7 +1939,8 @@ int ocfs2_increase_refcount(handle_t *handle,
 			     "count %u\n", (unsigned long long)cpos, set_len,
 			     le32_to_cpu(rec.r_refcount));
 			ret = ocfs2_change_refcount_rec(handle, ci,
-							ref_leaf_bh, index, 1);
+							ref_leaf_bh, index,
+							merge, 1);
 			if (ret) {
 				mlog_errno(ret);
 				goto out;
@@ -1950,7 +1953,8 @@ int ocfs2_increase_refcount(handle_t *handle,
 			     set_len);
 			ret = ocfs2_insert_refcount_rec(handle, ci, ref_root_bh,
 							ref_leaf_bh,
-							&rec, index, meta_ac);
+							&rec, index,
+							merge, meta_ac);
 			if (ret) {
 				mlog_errno(ret);
 				goto out;
@@ -1968,7 +1972,7 @@ int ocfs2_increase_refcount(handle_t *handle,
 			     set_len, le32_to_cpu(rec.r_refcount));
 			ret = ocfs2_split_refcount_rec(handle, ci,
 						       ref_root_bh, ref_leaf_bh,
-						       &rec, index,
+						       &rec, index, merge,
 						       meta_ac, dealloc);
 			if (ret) {
 				mlog_errno(ret);
@@ -2061,6 +2065,18 @@ out:
 	return ret;
 }
 
+int ocfs2_increase_refcount(handle_t *handle,
+			    struct ocfs2_caching_info *ci,
+			    struct buffer_head *ref_root_bh,
+			    u64 cpos, u32 len,
+			    struct ocfs2_alloc_context *meta_ac,
+			    struct ocfs2_cached_dealloc_ctxt *dealloc)
+{
+	return __ocfs2_increase_refcount(handle, ci, ref_root_bh,
+					 cpos, len, 1,
+					 meta_ac, dealloc);
+}
+
 static int ocfs2_decrease_refcount_rec(handle_t *handle,
 				struct ocfs2_caching_info *ci,
 				struct buffer_head *ref_root_bh,
@@ -2081,7 +2097,7 @@ static int ocfs2_decrease_refcount_rec(handle_t *handle,
 	if (cpos == le64_to_cpu(rec->r_cpos) &&
 	    len == le32_to_cpu(rec->r_clusters))
 		ret = ocfs2_change_refcount_rec(handle, ci,
-						ref_leaf_bh, index, -1);
+						ref_leaf_bh, index, 1, -1);
 	else {
 		struct ocfs2_refcount_rec split = *rec;
 		split.r_cpos = cpu_to_le64(cpos);
@@ -2097,7 +2113,7 @@ static int ocfs2_decrease_refcount_rec(handle_t *handle,
 		     le32_to_cpu(rec->r_clusters));
 		ret = ocfs2_split_refcount_rec(handle, ci,
 					       ref_root_bh, ref_leaf_bh,
-					       &split, index,
+					       &split, index, 1,
 					       meta_ac, dealloc);
 	}
 
@@ -3631,9 +3647,9 @@ int ocfs2_add_refcount_flag(struct inode *inode,
 		goto out_commit;
 	}
 
-	ret = ocfs2_increase_refcount(handle, ref_ci, ref_root_bh,
-				      p_cluster, num_clusters,
-				      meta_ac, dealloc);
+	ret = __ocfs2_increase_refcount(handle, ref_ci, ref_root_bh,
+					p_cluster, num_clusters, 0,
+					meta_ac, dealloc);
 	if (ret) {
 		mlog_errno(ret);
 		goto out_commit;
-- 
cgit v0.10.2


From c18b812d127a971901180188b918a7cd98ccd4d6 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 18 Aug 2009 11:44:07 +0800
Subject: ocfs2: Make transaction extend more efficient.

In ocfs2_extend_rotate_transaction, op_credits is the orignal
credits in the handle and we only want to extend the credits
for the rotation, but the old solution always double it. It
is harmless for some minor operations, but for actions like
reflink we may rotate tree many times and cause the credits
increase dramatically. So this patch try to only increase
the desired credits.

Signed-off-by: Tao Ma <tao.ma@oracle.com>

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 7c879fc..38a42f5 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -2326,10 +2326,18 @@ static int ocfs2_extend_rotate_transaction(handle_t *handle, int subtree_depth,
 					   int op_credits,
 					   struct ocfs2_path *path)
 {
+	int ret;
 	int credits = (path->p_tree_depth - subtree_depth) * 2 + 1 + op_credits;
 
-	if (handle->h_buffer_credits < credits)
-		return ocfs2_extend_trans(handle, credits);
+	if (handle->h_buffer_credits < credits) {
+		ret = ocfs2_extend_trans(handle,
+					 credits - handle->h_buffer_credits);
+		if (ret)
+			return ret;
+
+		if (unlikely(handle->h_buffer_credits < credits))
+			return ocfs2_extend_trans(handle, credits);
+	}
 
 	return 0;
 }
-- 
cgit v0.10.2


From 19bd341f6a6c6b314bcac55bbd906bfd3603fe9e Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 18 Aug 2009 11:44:10 +0800
Subject: ocfs2: Use proper parameter for some inode operation.

In order to make the original function more suitable for reflink,
we modify the following inode operations. Both are tiny.

1. ocfs2_mknod_locked only use dentry for mlog, so move it to
   the caller so that reflink can use it without dentry.
2. ocfs2_prepare_orphan_dir only want inode to get its ip_blkno.
   So use ip_blkno instead.

Signed-off-by: Tao Ma <tao.ma@oracle.com>

diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index c07217a..818df58 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -69,7 +69,6 @@
 static int ocfs2_mknod_locked(struct ocfs2_super *osb,
 			      struct inode *dir,
 			      struct inode *inode,
-			      struct dentry *dentry,
 			      dev_t dev,
 			      struct buffer_head **new_fe_bh,
 			      struct buffer_head *parent_fe_bh,
@@ -78,7 +77,7 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
 
 static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
 				    struct inode **ret_orphan_dir,
-				    struct inode *inode,
+				    u64 blkno,
 				    char *name,
 				    struct ocfs2_dir_lookup_result *lookup);
 
@@ -358,8 +357,12 @@ static int ocfs2_mknod(struct inode *dir,
 	}
 	did_quota_inode = 1;
 
+	mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry,
+		   inode->i_mode, (unsigned long)dev, dentry->d_name.len,
+		   dentry->d_name.name);
+
 	/* do the real work now. */
-	status = ocfs2_mknod_locked(osb, dir, inode, dentry, dev,
+	status = ocfs2_mknod_locked(osb, dir, inode, dev,
 				    &new_fe_bh, parent_fe_bh, handle,
 				    inode_ac);
 	if (status < 0) {
@@ -466,7 +469,6 @@ leave:
 static int ocfs2_mknod_locked(struct ocfs2_super *osb,
 			      struct inode *dir,
 			      struct inode *inode,
-			      struct dentry *dentry,
 			      dev_t dev,
 			      struct buffer_head **new_fe_bh,
 			      struct buffer_head *parent_fe_bh,
@@ -480,10 +482,6 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
 	u16 suballoc_bit;
 	u16 feat;
 
-	mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry,
-		   inode->i_mode, (unsigned long)dev, dentry->d_name.len,
-		   dentry->d_name.name);
-
 	*new_fe_bh = NULL;
 
 	status = ocfs2_claim_new_inode(osb, handle, dir, parent_fe_bh,
@@ -852,7 +850,8 @@ static int ocfs2_unlink(struct inode *dir,
 	}
 
 	if (inode_is_unlinkable(inode)) {
-		status = ocfs2_prepare_orphan_dir(osb, &orphan_dir, inode,
+		status = ocfs2_prepare_orphan_dir(osb, &orphan_dir,
+						  OCFS2_I(inode)->ip_blkno,
 						  orphan_name, &orphan_insert);
 		if (status < 0) {
 			mlog_errno(status);
@@ -1243,9 +1242,8 @@ static int ocfs2_rename(struct inode *old_dir,
 
 		if (S_ISDIR(new_inode->i_mode) || (new_inode->i_nlink == 1)) {
 			status = ocfs2_prepare_orphan_dir(osb, &orphan_dir,
-							  new_inode,
-							  orphan_name,
-							  &orphan_insert);
+						OCFS2_I(new_inode)->ip_blkno,
+						orphan_name, &orphan_insert);
 			if (status < 0) {
 				mlog_errno(status);
 				goto bail;
@@ -1699,7 +1697,11 @@ static int ocfs2_symlink(struct inode *dir,
 	}
 	did_quota_inode = 1;
 
-	status = ocfs2_mknod_locked(osb, dir, inode, dentry,
+	mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", dir, dentry,
+		   inode->i_mode, dentry->d_name.len,
+		   dentry->d_name.name);
+
+	status = ocfs2_mknod_locked(osb, dir, inode,
 				    0, &new_fe_bh, parent_fe_bh, handle,
 				    inode_ac);
 	if (status < 0) {
@@ -1849,7 +1851,7 @@ bail:
 
 static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
 				    struct inode **ret_orphan_dir,
-				    struct inode *inode,
+				    u64 blkno,
 				    char *name,
 				    struct ocfs2_dir_lookup_result *lookup)
 {
@@ -1857,7 +1859,7 @@ static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
 	struct buffer_head *orphan_dir_bh = NULL;
 	int status = 0;
 
-	status = ocfs2_blkno_stringify(OCFS2_I(inode)->ip_blkno, name);
+	status = ocfs2_blkno_stringify(blkno, name);
 	if (status < 0) {
 		mlog_errno(status);
 		return status;
-- 
cgit v0.10.2


From bc13d347574fc0a8a666bc0f4cc2b635d202e372 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 18 Aug 2009 11:44:14 +0800
Subject: ocfs2: Create reflinked file in orphan dir.

reflink is a very complicated process, so it can't be integrated
into one transaction. So if the system panic in the operation, we
may leave a unfinished inode in the destication directory.

So we will try to create an inode in orphan_dir first, reflink it
to the src file and then move it to the destication file in the end.
In that way we won't be afraid of any corruption during the reflink.

This patch adds 2 functions for orphan_dir operation:
1. Create a new inode in orphand dir.
2. Move an inode to a target dir.

Note:
fsck.ocfs2 should work for us to remove the unfinished file in the
orphan_dir.

Signed-off-by: Tao Ma <tao.ma@oracle.com>

diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 818df58..f010b22 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -2041,6 +2041,274 @@ leave:
 	return status;
 }
 
+int ocfs2_create_inode_in_orphan(struct inode *dir,
+				 int mode,
+				 struct inode **new_inode)
+{
+	int status, did_quota_inode = 0;
+	struct inode *inode = NULL;
+	struct inode *orphan_dir = NULL;
+	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
+	struct ocfs2_dinode *di = NULL;
+	handle_t *handle = NULL;
+	char orphan_name[OCFS2_ORPHAN_NAMELEN + 1];
+	struct buffer_head *parent_di_bh = NULL;
+	struct buffer_head *new_di_bh = NULL;
+	struct ocfs2_alloc_context *inode_ac = NULL;
+	struct ocfs2_dir_lookup_result orphan_insert = { NULL, };
+
+	status = ocfs2_inode_lock(dir, &parent_di_bh, 1);
+	if (status < 0) {
+		if (status != -ENOENT)
+			mlog_errno(status);
+		return status;
+	}
+
+	/*
+	 * We give the orphan dir the root blkno to fake an orphan name,
+	 * and allocate enough space for our insertion.
+	 */
+	status = ocfs2_prepare_orphan_dir(osb, &orphan_dir,
+					  osb->root_blkno,
+					  orphan_name, &orphan_insert);
+	if (status < 0) {
+		mlog_errno(status);
+		goto leave;
+	}
+
+	/* reserve an inode spot */
+	status = ocfs2_reserve_new_inode(osb, &inode_ac);
+	if (status < 0) {
+		if (status != -ENOSPC)
+			mlog_errno(status);
+		goto leave;
+	}
+
+	inode = ocfs2_get_init_inode(dir, mode);
+	if (!inode) {
+		status = -ENOMEM;
+		mlog_errno(status);
+		goto leave;
+	}
+
+	handle = ocfs2_start_trans(osb, ocfs2_mknod_credits(osb->sb, 0, 0));
+	if (IS_ERR(handle)) {
+		status = PTR_ERR(handle);
+		handle = NULL;
+		mlog_errno(status);
+		goto leave;
+	}
+
+	/* We don't use standard VFS wrapper because we don't want vfs_dq_init
+	 * to be called. */
+	if (sb_any_quota_active(osb->sb) &&
+	    osb->sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA) {
+		status = -EDQUOT;
+		goto leave;
+	}
+	did_quota_inode = 1;
+
+	/* do the real work now. */
+	status = ocfs2_mknod_locked(osb, dir, inode,
+				    0, &new_di_bh, parent_di_bh, handle,
+				    inode_ac);
+	if (status < 0) {
+		mlog_errno(status);
+		goto leave;
+	}
+
+	status = ocfs2_blkno_stringify(OCFS2_I(inode)->ip_blkno, orphan_name);
+	if (status < 0) {
+		mlog_errno(status);
+		goto leave;
+	}
+
+	di = (struct ocfs2_dinode *)new_di_bh->b_data;
+	status = ocfs2_orphan_add(osb, handle, inode, di, orphan_name,
+				  &orphan_insert, orphan_dir);
+	if (status < 0) {
+		mlog_errno(status);
+		goto leave;
+	}
+
+	/* get open lock so that only nodes can't remove it from orphan dir. */
+	status = ocfs2_open_lock(inode);
+	if (status < 0)
+		mlog_errno(status);
+
+leave:
+	if (status < 0 && did_quota_inode)
+		vfs_dq_free_inode(inode);
+	if (handle)
+		ocfs2_commit_trans(osb, handle);
+
+	if (orphan_dir) {
+		/* This was locked for us in ocfs2_prepare_orphan_dir() */
+		ocfs2_inode_unlock(orphan_dir, 1);
+		mutex_unlock(&orphan_dir->i_mutex);
+		iput(orphan_dir);
+	}
+
+	if (status == -ENOSPC)
+		mlog(0, "Disk is full\n");
+
+	if ((status < 0) && inode) {
+		clear_nlink(inode);
+		iput(inode);
+	}
+
+	if (inode_ac)
+		ocfs2_free_alloc_context(inode_ac);
+
+	brelse(new_di_bh);
+
+	if (!status)
+		*new_inode = inode;
+
+	ocfs2_free_dir_lookup_result(&orphan_insert);
+
+	ocfs2_inode_unlock(dir, 1);
+	brelse(parent_di_bh);
+	return status;
+}
+
+int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
+				   struct inode *inode,
+				   struct dentry *dentry)
+{
+	int status = 0;
+	struct buffer_head *parent_di_bh = NULL;
+	handle_t *handle = NULL;
+	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
+	struct ocfs2_dinode *dir_di, *di;
+	struct inode *orphan_dir_inode = NULL;
+	struct buffer_head *orphan_dir_bh = NULL;
+	struct buffer_head *di_bh = NULL;
+	struct ocfs2_dir_lookup_result lookup = { NULL, };
+
+	mlog_entry("(0x%p, 0x%p, %.*s')\n", dir, dentry,
+		   dentry->d_name.len, dentry->d_name.name);
+
+	status = ocfs2_inode_lock(dir, &parent_di_bh, 1);
+	if (status < 0) {
+		if (status != -ENOENT)
+			mlog_errno(status);
+		return status;
+	}
+
+	dir_di = (struct ocfs2_dinode *) parent_di_bh->b_data;
+	if (!dir_di->i_links_count) {
+		/* can't make a file in a deleted directory. */
+		status = -ENOENT;
+		goto leave;
+	}
+
+	status = ocfs2_check_dir_for_entry(dir, dentry->d_name.name,
+					   dentry->d_name.len);
+	if (status)
+		goto leave;
+
+	/* get a spot inside the dir. */
+	status = ocfs2_prepare_dir_for_insert(osb, dir, parent_di_bh,
+					      dentry->d_name.name,
+					      dentry->d_name.len, &lookup);
+	if (status < 0) {
+		mlog_errno(status);
+		goto leave;
+	}
+
+	orphan_dir_inode = ocfs2_get_system_file_inode(osb,
+						       ORPHAN_DIR_SYSTEM_INODE,
+						       osb->slot_num);
+	if (!orphan_dir_inode) {
+		status = -EEXIST;
+		mlog_errno(status);
+		goto leave;
+	}
+
+	mutex_lock(&orphan_dir_inode->i_mutex);
+
+	status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1);
+	if (status < 0) {
+		mlog_errno(status);
+		mutex_unlock(&orphan_dir_inode->i_mutex);
+		iput(orphan_dir_inode);
+		goto leave;
+	}
+
+	status = ocfs2_read_inode_block(inode, &di_bh);
+	if (status < 0) {
+		mlog_errno(status);
+		goto orphan_unlock;
+	}
+
+	handle = ocfs2_start_trans(osb, ocfs2_rename_credits(osb->sb));
+	if (IS_ERR(handle)) {
+		status = PTR_ERR(handle);
+		handle = NULL;
+		mlog_errno(status);
+		goto orphan_unlock;
+	}
+
+	status = ocfs2_journal_access_di(handle, INODE_CACHE(inode),
+					 di_bh, OCFS2_JOURNAL_ACCESS_WRITE);
+	if (status < 0) {
+		mlog_errno(status);
+		goto out_commit;
+	}
+
+	status = ocfs2_orphan_del(osb, handle, orphan_dir_inode, inode,
+				  orphan_dir_bh);
+	if (status < 0) {
+		mlog_errno(status);
+		goto out_commit;
+	}
+
+	di = (struct ocfs2_dinode *)di_bh->b_data;
+	le32_add_cpu(&di->i_flags, -OCFS2_ORPHANED_FL);
+	di->i_orphaned_slot = 0;
+	ocfs2_journal_dirty(handle, di_bh);
+
+	status = ocfs2_add_entry(handle, dentry, inode,
+				 OCFS2_I(inode)->ip_blkno, parent_di_bh,
+				 &lookup);
+	if (status < 0) {
+		mlog_errno(status);
+		goto out_commit;
+	}
+
+	status = ocfs2_dentry_attach_lock(dentry, inode,
+					  OCFS2_I(dir)->ip_blkno);
+	if (status) {
+		mlog_errno(status);
+		goto out_commit;
+	}
+
+	insert_inode_hash(inode);
+	dentry->d_op = &ocfs2_dentry_ops;
+	d_instantiate(dentry, inode);
+	status = 0;
+out_commit:
+	ocfs2_commit_trans(osb, handle);
+orphan_unlock:
+	ocfs2_inode_unlock(orphan_dir_inode, 1);
+	mutex_unlock(&orphan_dir_inode->i_mutex);
+	iput(orphan_dir_inode);
+leave:
+
+	ocfs2_inode_unlock(dir, 1);
+
+	brelse(di_bh);
+	brelse(parent_di_bh);
+	brelse(orphan_dir_bh);
+
+	ocfs2_free_dir_lookup_result(&lookup);
+
+	mlog_exit(status);
+
+	return status;
+}
+
 const struct inode_operations ocfs2_dir_iops = {
 	.create		= ocfs2_create,
 	.lookup		= ocfs2_lookup,
diff --git a/fs/ocfs2/namei.h b/fs/ocfs2/namei.h
index 688aef6..e5d059d 100644
--- a/fs/ocfs2/namei.h
+++ b/fs/ocfs2/namei.h
@@ -35,5 +35,11 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
 		     struct inode *orphan_dir_inode,
 		     struct inode *inode,
 		     struct buffer_head *orphan_dir_bh);
+int ocfs2_create_inode_in_orphan(struct inode *dir,
+				 int mode,
+				 struct inode **new_inode);
+int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
+				   struct inode *new_inode,
+				   struct dentry *new_dentry);
 
 #endif /* OCFS2_NAMEI_H */
-- 
cgit v0.10.2


From 0fe9b66c65f3ff227da45381afe7612f91e32740 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 18 Aug 2009 11:47:56 +0800
Subject: ocfs2: Add preserve to reflink.

reflink has 2 options for the destination file:
1. snapshot: reflink will attempt to preserve ownership, permissions,
   and all other security state in order to create a full snapshot.
2. new file: it will acquire the data extent sharing but will see the
   file's security state and attributes initialized as a new file.

So add the option to ocfs2.

Signed-off-by: Tao Ma <tao.ma@oracle.com>

diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 47df8c5..5d88e76 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -3904,7 +3904,8 @@ out:
 static int ocfs2_complete_reflink(struct inode *s_inode,
 				  struct buffer_head *s_bh,
 				  struct inode *t_inode,
-				  struct buffer_head *t_bh)
+				  struct buffer_head *t_bh,
+				  bool preserve)
 {
 	int ret;
 	handle_t *handle;
@@ -3939,22 +3940,26 @@ static int ocfs2_complete_reflink(struct inode *s_inode,
 	di->i_size = s_di->i_size;
 	di->i_dyn_features = s_di->i_dyn_features;
 	di->i_attr = s_di->i_attr;
-	di->i_uid = s_di->i_uid;
-	di->i_gid = s_di->i_gid;
-	di->i_mode = s_di->i_mode;
 
-	/*
-	 * update time.
-	 * we want mtime to appear identical to the source and update ctime.
-	 */
-	t_inode->i_ctime = CURRENT_TIME;
+	if (preserve) {
+		di->i_uid = s_di->i_uid;
+		di->i_gid = s_di->i_gid;
+		di->i_mode = s_di->i_mode;
+
+		/*
+		 * update time.
+		 * we want mtime to appear identical to the source and
+		 * update ctime.
+		 */
+		t_inode->i_ctime = CURRENT_TIME;
 
-	di->i_ctime = cpu_to_le64(t_inode->i_ctime.tv_sec);
-	di->i_ctime_nsec = cpu_to_le32(t_inode->i_ctime.tv_nsec);
+		di->i_ctime = cpu_to_le64(t_inode->i_ctime.tv_sec);
+		di->i_ctime_nsec = cpu_to_le32(t_inode->i_ctime.tv_nsec);
 
-	t_inode->i_mtime = s_inode->i_mtime;
-	di->i_mtime = s_di->i_mtime;
-	di->i_mtime_nsec = s_di->i_mtime_nsec;
+		t_inode->i_mtime = s_inode->i_mtime;
+		di->i_mtime = s_di->i_mtime;
+		di->i_mtime_nsec = s_di->i_mtime_nsec;
+	}
 
 	ocfs2_journal_dirty(handle, t_bh);
 
@@ -3966,7 +3971,8 @@ out_commit:
 static int ocfs2_create_reflink_node(struct inode *s_inode,
 				     struct buffer_head *s_bh,
 				     struct inode *t_inode,
-				     struct buffer_head *t_bh)
+				     struct buffer_head *t_bh,
+				     bool preserve)
 {
 	int ret;
 	struct buffer_head *ref_root_bh = NULL;
@@ -4001,7 +4007,7 @@ static int ocfs2_create_reflink_node(struct inode *s_inode,
 		goto out_unlock_refcount;
 	}
 
-	ret = ocfs2_complete_reflink(s_inode, s_bh, t_inode, t_bh);
+	ret = ocfs2_complete_reflink(s_inode, s_bh, t_inode, t_bh, preserve);
 	if (ret)
 		mlog_errno(ret);
 
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 8d1a0ab..fe34190 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -56,6 +56,7 @@
 #include "super.h"
 #include "xattr.h"
 #include "refcounttree.h"
+#include "acl.h"
 
 struct ocfs2_xattr_def_value_root {
 	struct ocfs2_xattr_value_root	xv;
@@ -204,6 +205,8 @@ static int ocfs2_get_xattr_tree_value_root(struct super_block *sb,
 					   int offset,
 					   struct ocfs2_xattr_value_root **xv,
 					   struct buffer_head **bh);
+static int ocfs2_xattr_security_set(struct inode *inode, const char *name,
+				    const void *value, size_t size, int flags);
 
 static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb)
 {
@@ -5994,6 +5997,7 @@ out:
 	return ret;
 }
 
+typedef int (should_xattr_reflinked)(struct ocfs2_xattr_entry *xe);
 /*
  * Store the information we need in xattr reflink.
  * old_bh and new_bh are inode bh for the old and new inode.
@@ -6006,6 +6010,7 @@ struct ocfs2_xattr_reflink {
 	struct ocfs2_caching_info *ref_ci;
 	struct buffer_head *ref_root_bh;
 	struct ocfs2_cached_dealloc_ctxt *dealloc;
+	should_xattr_reflinked *xattr_reflinked;
 };
 
 /*
@@ -6147,6 +6152,9 @@ out:
  * NOTE:
  * Before we call this function, the caller has memcpy the xattr in
  * old_xh to the new_xh.
+ *
+ * If args.xattr_reflinked is set, call it to decide whether the xe should
+ * be reflinked or not. If not, remove it from the new xattr header.
  */
 static int ocfs2_reflink_xattr_header(handle_t *handle,
 				      struct ocfs2_xattr_reflink *args,
@@ -6159,10 +6167,10 @@ static int ocfs2_reflink_xattr_header(handle_t *handle,
 				      get_xattr_value_root *func,
 				      void *para)
 {
-	int ret = 0, i;
+	int ret = 0, i, j;
 	struct super_block *sb = args->old_inode->i_sb;
 	struct buffer_head *value_bh;
-	struct ocfs2_xattr_entry *xe;
+	struct ocfs2_xattr_entry *xe, *last;
 	struct ocfs2_xattr_value_root *xv, *new_xv;
 	struct ocfs2_extent_tree data_et;
 	u32 clusters, cpos, p_cluster, num_clusters;
@@ -6170,9 +6178,30 @@ static int ocfs2_reflink_xattr_header(handle_t *handle,
 
 	mlog(0, "reflink xattr in container %llu, count = %u\n",
 	     (unsigned long long)old_bh->b_blocknr, le16_to_cpu(xh->xh_count));
-	for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
+
+	last = &new_xh->xh_entries[le16_to_cpu(new_xh->xh_count)];
+	for (i = 0, j = 0; i < le16_to_cpu(xh->xh_count); i++, j++) {
 		xe = &xh->xh_entries[i];
 
+		if (args->xattr_reflinked && !args->xattr_reflinked(xe)) {
+			xe = &new_xh->xh_entries[j];
+
+			le16_add_cpu(&new_xh->xh_count, -1);
+			if (new_xh->xh_count) {
+				memmove(xe, xe + 1,
+					(void *)last - (void *)xe);
+				memset(last, 0,
+				       sizeof(struct ocfs2_xattr_entry));
+			}
+
+			/*
+			 * We don't want j to increase in the next round since
+			 * it is already moved ahead.
+			 */
+			j--;
+			continue;
+		}
+
 		if (ocfs2_xattr_is_local(xe))
 			continue;
 
@@ -6182,7 +6211,7 @@ static int ocfs2_reflink_xattr_header(handle_t *handle,
 			break;
 		}
 
-		ret = func(sb, new_bh, new_xh, i, &new_xv, &value_bh, para);
+		ret = func(sb, new_bh, new_xh, j, &new_xv, &value_bh, para);
 		if (ret) {
 			mlog_errno(ret);
 			break;
@@ -6847,10 +6876,20 @@ out:
 	return ret;
 }
 
+static int ocfs2_reflink_xattr_no_security(struct ocfs2_xattr_entry *xe)
+{
+	int type = ocfs2_xattr_get_type(xe);
+
+	return type != OCFS2_XATTR_INDEX_SECURITY &&
+	       type != OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS &&
+	       type != OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT;
+}
+
 int ocfs2_reflink_xattrs(struct inode *old_inode,
 			 struct buffer_head *old_bh,
 			 struct inode *new_inode,
-			 struct buffer_head *new_bh)
+			 struct buffer_head *new_bh,
+			 bool preserve_security)
 {
 	int ret;
 	struct ocfs2_xattr_reflink args;
@@ -6878,6 +6917,10 @@ int ocfs2_reflink_xattrs(struct inode *old_inode,
 	args.ref_ci = &ref_tree->rf_ci;
 	args.ref_root_bh = ref_root_bh;
 	args.dealloc = &dealloc;
+	if (preserve_security)
+		args.xattr_reflinked = NULL;
+	else
+		args.xattr_reflinked = ocfs2_reflink_xattr_no_security;
 
 	if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
 		ret = ocfs2_reflink_xattr_inline(&args);
@@ -6918,6 +6961,51 @@ out:
 }
 
 /*
+ * Initialize security and acl for a already created inode.
+ * Used for reflink a non-preserve-security file.
+ *
+ * It uses common api like ocfs2_xattr_set, so the caller
+ * must not hold any lock expect i_mutex.
+ */
+int ocfs2_init_security_and_acl(struct inode *dir,
+				struct inode *inode)
+{
+	int ret = 0;
+	struct buffer_head *dir_bh = NULL;
+	struct ocfs2_security_xattr_info si = {
+		.enable = 1,
+	};
+
+	ret = ocfs2_init_security_get(inode, dir, &si);
+	if (!ret) {
+		ret = ocfs2_xattr_security_set(inode, si.name,
+					       si.value, si.value_len,
+					       XATTR_CREATE);
+		if (ret) {
+			mlog_errno(ret);
+			goto leave;
+		}
+	} else if (ret != -EOPNOTSUPP) {
+		mlog_errno(ret);
+		goto leave;
+	}
+
+	ret = ocfs2_inode_lock(dir, &dir_bh, 0);
+	if (ret) {
+		mlog_errno(ret);
+		goto leave;
+	}
+
+	ret = ocfs2_init_acl(NULL, inode, dir, NULL, dir_bh, NULL, NULL);
+	if (ret)
+		mlog_errno(ret);
+
+	ocfs2_inode_unlock(dir, 0);
+	brelse(dir_bh);
+leave:
+	return ret;
+}
+/*
  * 'security' attributes support
  */
 static size_t ocfs2_xattr_security_list(struct inode *inode, char *list,
diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h
index 4f91305..08e3638 100644
--- a/fs/ocfs2/xattr.h
+++ b/fs/ocfs2/xattr.h
@@ -93,5 +93,8 @@ int ocfs2_xattr_attach_refcount_tree(struct inode *inode,
 int ocfs2_reflink_xattrs(struct inode *old_inode,
 			 struct buffer_head *old_bh,
 			 struct inode *new_inode,
-			 struct buffer_head *new_bh);
+			 struct buffer_head *new_bh,
+			 bool preserve_security);
+int ocfs2_init_security_and_acl(struct inode *dir,
+				struct inode *inode);
 #endif /* OCFS2_XATTR_H */
-- 
cgit v0.10.2


From 09bf27a000209e9e8c9c048b4c50f6bb0dd857bb Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Mon, 21 Sep 2009 10:38:17 +0800
Subject: ocfs2: Implement ocfs2_reflink.

Implement ocfs2_reflink.

Signed-off-by: Tao Ma <tao.ma@oracle.com>

diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 5d88e76..7a8a384 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -33,6 +33,7 @@
 #include "extent_map.h"
 #include "aops.h"
 #include "xattr.h"
+#include "namei.h"
 
 #include <linux/bio.h>
 #include <linux/blkdev.h>
@@ -4022,3 +4023,125 @@ out:
 
 	return ret;
 }
+
+static int __ocfs2_reflink(struct dentry *old_dentry,
+			   struct buffer_head *old_bh,
+			   struct inode *new_inode,
+			   bool preserve)
+{
+	int ret;
+	struct inode *inode = old_dentry->d_inode;
+	struct buffer_head *new_bh = NULL;
+
+	ret = filemap_fdatawrite(inode->i_mapping);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_attach_refcount_tree(inode, old_bh);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	mutex_lock(&new_inode->i_mutex);
+	ret = ocfs2_inode_lock(new_inode, &new_bh, 1);
+	if (ret) {
+		mlog_errno(ret);
+		goto out_unlock;
+	}
+
+	ret = ocfs2_create_reflink_node(inode, old_bh,
+					new_inode, new_bh, preserve);
+	if (ret) {
+		mlog_errno(ret);
+		goto inode_unlock;
+	}
+
+	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_XATTR_FL) {
+		ret = ocfs2_reflink_xattrs(inode, old_bh,
+					   new_inode, new_bh,
+					   preserve);
+		if (ret)
+			mlog_errno(ret);
+	}
+inode_unlock:
+	ocfs2_inode_unlock(new_inode, 1);
+	brelse(new_bh);
+out_unlock:
+	mutex_unlock(&new_inode->i_mutex);
+out:
+	if (!ret) {
+		ret = filemap_fdatawait(inode->i_mapping);
+		if (ret)
+			mlog_errno(ret);
+	}
+	return ret;
+}
+
+static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
+			 struct dentry *new_dentry, bool preserve)
+{
+	int error;
+	struct inode *inode = old_dentry->d_inode;
+	struct buffer_head *old_bh = NULL;
+	struct inode *new_orphan_inode = NULL;
+
+	if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb)))
+		return -EOPNOTSUPP;
+
+	error = ocfs2_create_inode_in_orphan(dir, inode->i_mode,
+					     &new_orphan_inode);
+	if (error) {
+		mlog_errno(error);
+		goto out;
+	}
+
+	error = ocfs2_inode_lock(inode, &old_bh, 1);
+	if (error) {
+		mlog_errno(error);
+		goto out;
+	}
+
+	down_write(&OCFS2_I(inode)->ip_xattr_sem);
+	down_write(&OCFS2_I(inode)->ip_alloc_sem);
+	error = __ocfs2_reflink(old_dentry, old_bh,
+				new_orphan_inode, preserve);
+	up_write(&OCFS2_I(inode)->ip_alloc_sem);
+	up_write(&OCFS2_I(inode)->ip_xattr_sem);
+
+	ocfs2_inode_unlock(inode, 1);
+	brelse(old_bh);
+
+	if (error) {
+		mlog_errno(error);
+		goto out;
+	}
+
+	/* If the security isn't preserved, we need to re-initialize them. */
+	if (!preserve) {
+		error = ocfs2_init_security_and_acl(dir, new_orphan_inode);
+		if (error)
+			mlog_errno(error);
+	}
+out:
+	if (!error) {
+		error = ocfs2_mv_orphaned_inode_to_new(dir, new_orphan_inode,
+						       new_dentry);
+		if (error)
+			mlog_errno(error);
+	}
+
+	if (new_orphan_inode) {
+		/*
+		 * We need to open_unlock the inode no matter whether we
+		 * succeed or not, so that other nodes can delete it later.
+		 */
+		ocfs2_open_unlock(new_orphan_inode);
+		if (error)
+			iput(new_orphan_inode);
+	}
+
+	return error;
+}
-- 
cgit v0.10.2


From 64871b8d62570fabec3b0959d494f8e0b87f5c4b Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 18 Aug 2009 11:48:02 +0800
Subject: ocfs2: Enable refcount tree support.

Signed-off-by: Tao Ma <tao.ma@oracle.com>

diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 40072cd..4a4565b 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -99,7 +99,8 @@
 					 | OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK \
 					 | OCFS2_FEATURE_INCOMPAT_XATTR \
 					 | OCFS2_FEATURE_INCOMPAT_META_ECC \
-					 | OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS)
+					 | OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS \
+					 | OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE)
 #define OCFS2_FEATURE_RO_COMPAT_SUPP	(OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \
 					 | OCFS2_FEATURE_RO_COMPAT_USRQUOTA \
 					 | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)
-- 
cgit v0.10.2


From bd50873dc725a9fa72592ecc986c58805e823051 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Mon, 21 Sep 2009 11:25:14 +0800
Subject: ocfs2: Add ioctl for reflink.

The ioctl will take 3 parameters: old_path, new_path and
preserve and call vfs_reflink. It is useful when we backport
reflink features to old kernels.

Signed-off-by: Tao Ma <tao.ma@oracle.com>

diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index 9fcd36d..a68d0e4 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -22,6 +22,7 @@
 #include "ocfs2_fs.h"
 #include "ioctl.h"
 #include "resize.h"
+#include "refcounttree.h"
 
 #include <linux/ext2_fs.h>
 
@@ -116,6 +117,9 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 	int status;
 	struct ocfs2_space_resv sr;
 	struct ocfs2_new_group_input input;
+	struct reflink_arguments args;
+	const char *old_path, *new_path;
+	bool preserve;
 
 	switch (cmd) {
 	case OCFS2_IOC_GETFLAGS:
@@ -161,6 +165,15 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 			return -EFAULT;
 
 		return ocfs2_group_add(inode, &input);
+	case OCFS2_IOC_REFLINK:
+		if (copy_from_user(&args, (struct reflink_arguments *)arg,
+				   sizeof(args)))
+			return -EFAULT;
+		old_path = (const char *)(unsigned long)args.old_path;
+		new_path = (const char *)(unsigned long)args.new_path;
+		preserve = (args.preserve != 0);
+
+		return ocfs2_reflink_ioctl(inode, old_path, new_path, preserve);
 	default:
 		return -ENOTTY;
 	}
@@ -183,6 +196,7 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 	case OCFS2_IOC_GROUP_EXTEND:
 	case OCFS2_IOC_GROUP_ADD:
 	case OCFS2_IOC_GROUP_ADD64:
+	case OCFS2_IOC_REFLINK:
 		break;
 	default:
 		return -ENOIOCTLCMD;
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 4a4565b..e9431e4 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -301,6 +301,15 @@ struct ocfs2_new_group_input {
 #define OCFS2_IOC_GROUP_ADD	_IOW('o', 2,struct ocfs2_new_group_input)
 #define OCFS2_IOC_GROUP_ADD64	_IOW('o', 3,struct ocfs2_new_group_input)
 
+/* Used to pass 2 file names to reflink. */
+struct reflink_arguments {
+	__u64 old_path;
+	__u64 new_path;
+	__u64 preserve;
+};
+#define OCFS2_IOC_REFLINK	_IOW('o', 4, struct reflink_arguments)
+
+
 /*
  * Journal Flags (ocfs2_dinode.id1.journal1.i_flags)
  */
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 7a8a384..60287fc 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -42,6 +42,11 @@
 #include <linux/writeback.h>
 #include <linux/pagevec.h>
 #include <linux/swap.h>
+#include <linux/security.h>
+#include <linux/fsnotify.h>
+#include <linux/quotaops.h>
+#include <linux/namei.h>
+#include <linux/mount.h>
 
 struct ocfs2_cow_context {
 	struct inode *inode;
@@ -4145,3 +4150,164 @@ out:
 
 	return error;
 }
+
+/*
+ * Below here are the bits used by OCFS2_IOC_REFLINK() to fake
+ * sys_reflink().  This will go away when vfs_reflink() exists in
+ * fs/namei.c.
+ */
+
+/* copied from may_create in VFS. */
+static inline int ocfs2_may_create(struct inode *dir, struct dentry *child)
+{
+	if (child->d_inode)
+		return -EEXIST;
+	if (IS_DEADDIR(dir))
+		return -ENOENT;
+	return inode_permission(dir, MAY_WRITE | MAY_EXEC);
+}
+
+/* copied from user_path_parent. */
+static int ocfs2_user_path_parent(const char __user *path,
+				  struct nameidata *nd, char **name)
+{
+	char *s = getname(path);
+	int error;
+
+	if (IS_ERR(s))
+		return PTR_ERR(s);
+
+	error = path_lookup(s, LOOKUP_PARENT, nd);
+	if (error)
+		putname(s);
+	else
+		*name = s;
+
+	return error;
+}
+
+/**
+ * ocfs2_vfs_reflink - Create a reference-counted link
+ *
+ * @old_dentry:        source dentry + inode
+ * @dir:       directory to create the target
+ * @new_dentry:        target dentry
+ * @preserve:  if true, preserve all file attributes
+ */
+int ocfs2_vfs_reflink(struct dentry *old_dentry, struct inode *dir,
+		      struct dentry *new_dentry, bool preserve)
+{
+	struct inode *inode = old_dentry->d_inode;
+	int error;
+
+	if (!inode)
+		return -ENOENT;
+
+	error = ocfs2_may_create(dir, new_dentry);
+	if (error)
+		return error;
+
+	if (dir->i_sb != inode->i_sb)
+		return -EXDEV;
+
+	/*
+	 * A reflink to an append-only or immutable file cannot be created.
+	 */
+	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
+		return -EPERM;
+
+	/* Only regular files can be reflinked. */
+	if (!S_ISREG(inode->i_mode))
+		return -EPERM;
+
+	/*
+	 * If the caller wants to preserve ownership, they require the
+	 * rights to do so.
+	 */
+	if (preserve) {
+		if ((current_fsuid() != inode->i_uid) && !capable(CAP_CHOWN))
+			return -EPERM;
+		if (!in_group_p(inode->i_gid) && !capable(CAP_CHOWN))
+			return -EPERM;
+	}
+
+	/*
+	 * If the caller is modifying any aspect of the attributes, they
+	 * are not creating a snapshot.  They need read permission on the
+	 * file.
+	 */
+	if (!preserve) {
+		error = inode_permission(inode, MAY_READ);
+		if (error)
+			return error;
+	}
+
+	mutex_lock(&inode->i_mutex);
+	vfs_dq_init(dir);
+	error = ocfs2_reflink(old_dentry, dir, new_dentry, preserve);
+	mutex_unlock(&inode->i_mutex);
+	if (!error)
+		fsnotify_create(dir, new_dentry);
+	return error;
+}
+/*
+ * Most codes are copied from sys_linkat.
+ */
+int ocfs2_reflink_ioctl(struct inode *inode,
+			const char __user *oldname,
+			const char __user *newname,
+			bool preserve)
+{
+	struct dentry *new_dentry;
+	struct nameidata nd;
+	struct path old_path;
+	int error;
+	char *to = NULL;
+
+	if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb)))
+		return -EOPNOTSUPP;
+
+	error = user_path_at(AT_FDCWD, oldname, 0, &old_path);
+	if (error) {
+		mlog_errno(error);
+		return error;
+	}
+
+	error = ocfs2_user_path_parent(newname, &nd, &to);
+	if (error) {
+		mlog_errno(error);
+		goto out;
+	}
+
+	error = -EXDEV;
+	if (old_path.mnt != nd.path.mnt)
+		goto out_release;
+	new_dentry = lookup_create(&nd, 0);
+	error = PTR_ERR(new_dentry);
+	if (IS_ERR(new_dentry)) {
+		mlog_errno(error);
+		goto out_unlock;
+	}
+
+	error = mnt_want_write(nd.path.mnt);
+	if (error) {
+		mlog_errno(error);
+		goto out_dput;
+	}
+
+	error = ocfs2_vfs_reflink(old_path.dentry,
+				  nd.path.dentry->d_inode,
+				  new_dentry, preserve);
+	mnt_drop_write(nd.path.mnt);
+out_dput:
+	dput(new_dentry);
+out_unlock:
+	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
+out_release:
+	path_put(&nd.path);
+	putname(to);
+out:
+	path_put(&old_path);
+
+	return error;
+}
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h
index 2c238e6..c1d19b1 100644
--- a/fs/ocfs2/refcounttree.h
+++ b/fs/ocfs2/refcounttree.h
@@ -99,4 +99,8 @@ int ocfs2_increase_refcount(handle_t *handle,
 			    u64 cpos, u32 len,
 			    struct ocfs2_alloc_context *meta_ac,
 			    struct ocfs2_cached_dealloc_ctxt *dealloc);
+int ocfs2_reflink_ioctl(struct inode *inode,
+			const char __user *oldname,
+			const char __user *newname,
+			bool preserve);
 #endif /* OCFS2_REFCOUNTTREE_H */
-- 
cgit v0.10.2


From b60e714dc3ef2a2541e3b262b8b742ba08a07fc0 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Tue, 22 Sep 2009 16:46:34 -0700
Subject: x86: ptrace: sysret path should reach syscall_trace_leave

If TIF_SYSCALL_TRACE or TIF_SINGLESTEP is set while inside a syscall,
the path back to user mode should get to syscall_trace_leave.

This does happen in most circumstances.  The exception to this is on
the 64-bit syscall fastpath, when no such flag was set on syscall
entry and nothing else has punted it off the fastpath for exit.  That
one exit fastpath fails to check for _TIF_WORK_SYSCALL_EXIT flags.
This makes the behavior inconsistent with what 32-bit tasks see and
what the native 32-bit kernel always does, and what 64-bit tasks see
in all cases where the iret path is taken anyhow.

Perhaps the only example that is affected is a ptrace stop inside
do_fork (for PTRACE_O_TRACE{CLONE,FORK,VFORK,VFORKDONE}).  Other
syscalls with internal ptrace stop points (execve) already take the
iret exit path for unrelated reasons.

Test cases for both PTRACE_SYSCALL and PTRACE_SINGLESTEP variants are at:
http://sources.redhat.com/cgi-bin/cvsweb.cgi/~checkout~/tests/ptrace-tests/tests/syscall-from-clone.c?cvsroot=systemtap
http://sources.redhat.com/cgi-bin/cvsweb.cgi/~checkout~/tests/ptrace-tests/tests/step-from-clone.c?cvsroot=systemtap

There was no special benefit to the sysret path's special path to call
do_notify_resume, because it always takes the iret exit path at the end.
So this change just makes the sysret exit path join the iret exit path
for all the signals and ptrace cases.  The fastpath still applies to
the plain syscall-audit and resched cases.

Signed-off-by: Roland McGrath <roland@redhat.com>
CC: Oleg Nesterov <oleg@redhat.com>

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 681c3fd..b5c061f 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -536,20 +536,13 @@ sysret_signal:
 	bt $TIF_SYSCALL_AUDIT,%edx
 	jc sysret_audit
 #endif
-	/* edx:	work flags (arg3) */
-	leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
-	xorl %esi,%esi # oldset -> arg2
-	SAVE_REST
-	FIXUP_TOP_OF_STACK %r11
-	call do_notify_resume
-	RESTORE_TOP_OF_STACK %r11
-	RESTORE_REST
-	movl $_TIF_WORK_MASK,%edi
-	/* Use IRET because user could have changed frame. This
-	   works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
-	DISABLE_INTERRUPTS(CLBR_NONE)
-	TRACE_IRQS_OFF
-	jmp int_with_check
+	/*
+	 * We have a signal, or exit tracing or single-step.
+	 * These all wind up with the iret return path anyway,
+	 * so just join that path right now.
+	 */
+	FIXUP_TOP_OF_STACK %r11, -ARGOFFSET
+	jmp int_check_syscall_exit_work
 
 badsys:
 	movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
@@ -654,6 +647,7 @@ int_careful:
 int_very_careful:
 	TRACE_IRQS_ON
 	ENABLE_INTERRUPTS(CLBR_NONE)
+int_check_syscall_exit_work:
 	SAVE_REST
 	/* Check for syscall exit trace */
 	testl $_TIF_WORK_SYSCALL_EXIT,%edx
-- 
cgit v0.10.2


From aa296ddf32da207b430f61b77a8e81c0663f07cf Mon Sep 17 00:00:00 2001
From: Tim Abbott <tabbott@ksplice.com>
Date: Sun, 20 Sep 2009 13:32:57 -0400
Subject: score: Make PAGE_SIZE available to assembly.

Signed-off-by: Tim Abbott <tabbott@ksplice.com>
Acked-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/arch/score/include/asm/page.h b/arch/score/include/asm/page.h
index ee58210..d92a5a2 100644
--- a/arch/score/include/asm/page.h
+++ b/arch/score/include/asm/page.h
@@ -2,10 +2,11 @@
 #define _ASM_SCORE_PAGE_H
 
 #include <linux/pfn.h>
+#include <linux/const.h>
 
 /* PAGE_SHIFT determines the page size */
 #define PAGE_SHIFT	(12)
-#define PAGE_SIZE	(1UL << PAGE_SHIFT)
+#define PAGE_SIZE	(_AC(1,UL) << PAGE_SHIFT)
 #define PAGE_MASK	(~(PAGE_SIZE-1))
 
 #ifdef __KERNEL__
-- 
cgit v0.10.2


From 0dab1006896ef43f55b82b83ec2316f0179f681b Mon Sep 17 00:00:00 2001
From: Tim Abbott <tabbott@ksplice.com>
Date: Sun, 20 Sep 2009 13:32:58 -0400
Subject: score: Make THREAD_SIZE available to assembly files.

Signed-off-by: Tim Abbott <tabbott@ksplice.com>
Acked-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/arch/score/include/asm/thread_info.h b/arch/score/include/asm/thread_info.h
index 3a11228..5593999 100644
--- a/arch/score/include/asm/thread_info.h
+++ b/arch/score/include/asm/thread_info.h
@@ -7,6 +7,15 @@
 #define KU_USER	0x08
 #define KU_KERN	0x00
 
+#include <asm/page.h>
+#include <linux/const.h>
+
+/* thread information allocation */
+#define THREAD_SIZE_ORDER 	(1)
+#define THREAD_SIZE 		(PAGE_SIZE << THREAD_SIZE_ORDER)
+#define THREAD_MASK 		(THREAD_SIZE - _AC(1,UL))
+#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+
 #ifndef __ASSEMBLY__
 
 #include <asm/processor.h>
@@ -62,12 +71,6 @@ struct thread_info {
 register struct thread_info *__current_thread_info __asm__("r28");
 #define current_thread_info()	__current_thread_info
 
-/* thread information allocation */
-#define THREAD_SIZE_ORDER 	(1)
-#define THREAD_SIZE 		(PAGE_SIZE << THREAD_SIZE_ORDER)
-#define THREAD_MASK 		(THREAD_SIZE - 1UL)
-#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
-
 #define alloc_thread_info(tsk) kmalloc(THREAD_SIZE, GFP_KERNEL)
 #define free_thread_info(info) kfree(info)
 
-- 
cgit v0.10.2


From eccfbf98f4d77bb5849dfb96f829f14d2b292551 Mon Sep 17 00:00:00 2001
From: Tim Abbott <tabbott@ksplice.com>
Date: Wed, 23 Sep 2009 11:38:42 +0800
Subject: score: Cleanup linker script using new macros.

Signed-off-by: Tim Abbott <tabbott@ksplice.com>
Acked-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: Chen Liqin <liqin.chen@sunplusct.com>

diff --git a/arch/score/kernel/vmlinux.lds.S b/arch/score/kernel/vmlinux.lds.S
index f855698..eebcbaa 100644
--- a/arch/score/kernel/vmlinux.lds.S
+++ b/arch/score/kernel/vmlinux.lds.S
@@ -24,6 +24,8 @@
  */
 
 #include <asm-generic/vmlinux.lds.h>
+#include <asm/thread_info.h>
+#include <asm/page.h>
 
 OUTPUT_ARCH(score)
 ENTRY(_stext)
@@ -49,21 +51,9 @@ SECTIONS
 	. = ALIGN(16);
 	RODATA
 
-	/* Exception table */
-	. = ALIGN(16);
-	__ex_table : {
-		__start___ex_table = .;
-		*(__ex_table)
-		__stop___ex_table = .;
-	}
+	EXCEPTION_TABLE(16)
 
-	/* writeable */
-	.data ALIGN (4096): {
-		*(.data.init_task)
-
-		DATA_DATA
-		CONSTRUCTORS
-	}
+	RW_DATA_SECTION(32, PAGE_SIZE, THREAD_SIZE)
 
 	/* We want the small data sections together, so single-instruction offsets
 	   can access them all, and initialized data all before uninitialized, so
@@ -72,45 +62,14 @@ SECTIONS
 	.sdata : {
 		*(.sdata)
 	}
-
-	. = ALIGN(32);
-	.data.cacheline_aligned : {
-		*(.data.cacheline_aligned)
-	}
 	_edata =  .;			/* End of data section */
 
 	/* will be freed after init */
-	. = ALIGN(4096);		/* Init code and data */
+	. = ALIGN(PAGE_SIZE);		/* Init code and data */
 	__init_begin = .;
 
-	. = ALIGN(4096);
-	.init.text : {
-		_sinittext = .;
-		INIT_TEXT
-		_einittext = .;
-	}
-	.init.data : {
-		INIT_DATA
-	}
-	. = ALIGN(16);
-	.init.setup : {
-		__setup_start = .;
-		*(.init.setup)
-		__setup_end = .;
-	}
-
-	.initcall.init : {
-		__initcall_start = .;
-		INITCALLS
-		__initcall_end = .;
-	}
-
-	.con_initcall.init : {
-		__con_initcall_start = .;
-		*(.con_initcall.init)
-		__con_initcall_end = .;
-	}
-	SECURITY_INIT
+	INIT_TEXT_SECTION(PAGE_SIZE)
+	INIT_DATA_SECTION(16)
 
 	/* .exit.text is discarded at runtime, not link time, to deal with
 	 * references from .rodata
@@ -121,28 +80,10 @@ SECTIONS
 	.exit.data : {
 		EXIT_DATA
 	}
-#if defined(CONFIG_BLK_DEV_INITRD)
-	.init.ramfs ALIGN(4096): {
-		__initramfs_start = .;
-		*(.init.ramfs)
-		__initramfs_end = .;
-		. = ALIGN(4);
-		LONG(0);
-	}
-#endif
-	. = ALIGN(4096);
+	. = ALIGN(PAGE_SIZE);
 	__init_end = .;
 	/* freed after init ends here */
 
-	__bss_start = .;	/* BSS */
-	.sbss  : {
-		*(.sbss)
-		*(.scommon)
-	}
-	.bss : {
-		*(.bss)
-		*(COMMON)
-	}
-	__bss_stop = .;
+	BSS_SECTION(0, 0, 0)
 	_end = .;
 }
-- 
cgit v0.10.2


From 7e483865612568268773317219d07b38a8c4d62d Mon Sep 17 00:00:00 2001
From: Chen Liqin <liqin.chen@sunplusct.com>
Date: Wed, 23 Sep 2009 11:40:15 +0800
Subject: score: update email address in MAINTAINERS.

Signed-off-by: Chen Liqin <liqin.chen@sunplusct.com>

diff --git a/MAINTAINERS b/MAINTAINERS
index d24c882..4467da2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4460,7 +4460,7 @@ SCORE ARCHITECTURE
 P:	Chen Liqin
 M:	liqin.chen@sunplusct.com
 P:	Lennox Wu
-M:	lennox.wu@sunplusct.com
+M:	lennox.wu@gmail.com
 W:	http://www.sunplusct.com
 S:	Supported
 
-- 
cgit v0.10.2


From 08ff18e299b1a1c91f4911fe9f35c4550218c73f Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Tue, 22 Sep 2009 19:58:58 -0700
Subject: x86: ptrace: do not sign-extend orig_ax on write

The high 32 bits of orig_ax will be ignored when it matters,
so don't fiddle them when setting it.

Signed-off-by: Roland McGrath <roland@redhat.com>

diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 8d7d5c9..52222fa 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -325,16 +325,6 @@ static int putreg(struct task_struct *child,
 		return set_flags(child, value);
 
 #ifdef CONFIG_X86_64
-	/*
-	 * Orig_ax is really just a flag with small positive and
-	 * negative values, so make sure to always sign-extend it
-	 * from 32 bits so that it works correctly regardless of
-	 * whether we come from a 32-bit environment or not.
-	 */
-	case offsetof(struct user_regs_struct, orig_ax):
-		value = (long) (s32) value;
-		break;
-
 	case offsetof(struct user_regs_struct,fs_base):
 		if (value >= TASK_SIZE_OF(child))
 			return -EIO;
@@ -1121,17 +1111,10 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 value)
 	R32(esi, si);
 	R32(ebp, bp);
 	R32(eax, ax);
+	R32(orig_eax, orig_ax);
 	R32(eip, ip);
 	R32(esp, sp);
 
-	case offsetof(struct user32, regs.orig_eax):
-		/*
-		 * Sign-extend the value so that orig_eax = -1
-		 * causes (long)orig_ax < 0 tests to fire correctly.
-		 */
-		regs->orig_ax = (long) (s32) value;
-		break;
-
 	case offsetof(struct user32, regs.eflags):
 		return set_flags(child, value);
 
-- 
cgit v0.10.2


From 8cb3ed13935b9b523c2de7afc8f68473fe1d4531 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Tue, 22 Sep 2009 20:12:07 -0700
Subject: x86: ptrace: set TS_COMPAT when 32-bit ptrace sets orig_eax>=0

The 32-bit ptrace syscall on a 64-bit kernel (32-bit debugger on
32-bit task) behaves differently than a native 32-bit kernel.  When
setting a register state of orig_eax>=0 and eax=-ERESTART* when the
debugged task is NOT on its way out of a 32-bit syscall, the task will
fail to do the syscall restart logic that it should do.

Test case available at http://sources.redhat.com/cgi-bin/cvsweb.cgi/~checkout~/tests/ptrace-tests/tests/erestartsys-trap.c?cvsroot=systemtap

This happens because the 32-bit ptrace syscall sets eax=0xffffffff
when it sets orig_eax>=0.  The resuming task will not sign-extend this
for the -ERESTART* check because TS_COMPAT is not set.  (So the task
thinks it is restarting after a 64-bit syscall, not a 32-bit one.)

The fix is to have 32-bit ptrace calls set TS_COMPAT when setting
orig_eax>=0.  This ensures that the 32-bit syscall restart logic
will apply when the child resumes.

Signed-off-by: Roland McGrath <roland@redhat.com>

diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 52222fa..7b058a2 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1111,10 +1111,22 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 value)
 	R32(esi, si);
 	R32(ebp, bp);
 	R32(eax, ax);
-	R32(orig_eax, orig_ax);
 	R32(eip, ip);
 	R32(esp, sp);
 
+	case offsetof(struct user32, regs.orig_eax):
+		/*
+		 * A 32-bit debugger setting orig_eax means to restore
+		 * the state of the task restarting a 32-bit syscall.
+		 * Make sure we interpret the -ERESTART* codes correctly
+		 * in case the task is not actually still sitting at the
+		 * exit from a 32-bit syscall with TS_COMPAT still set.
+		 */
+		regs->orig_ax = value;
+		if (syscall_get_nr(child, regs) >= 0)
+			task_thread_info(child)->status |= TS_COMPAT;
+		break;
+
 	case offsetof(struct user32, regs.eflags):
 		return set_flags(child, value);
 
-- 
cgit v0.10.2


From e1070211f7327a1f197d535aa886f721a241c32f Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Wed, 23 Sep 2009 00:49:39 -0400
Subject: mtd: jedec_probe: add PSD4256G6V id

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/mtd/chips/jedec_probe.c b/drivers/mtd/chips/jedec_probe.c
index 16bc2fa..736a3be 100644
--- a/drivers/mtd/chips/jedec_probe.c
+++ b/drivers/mtd/chips/jedec_probe.c
@@ -155,6 +155,7 @@
 #define M50LPW080       0x002F
 #define M50FLW080A	0x0080
 #define M50FLW080B	0x0081
+#define PSD4256G6V	0x00e9
 
 /* SST */
 #define SST29EE020	0x0010
@@ -206,6 +207,7 @@ enum uaddr {
 	MTD_UADDR_0x0555_0x02AA,
 	MTD_UADDR_0x0555_0x0AAA,
 	MTD_UADDR_0x5555_0x2AAA,
+	MTD_UADDR_0x0AAA_0x0554,
 	MTD_UADDR_0x0AAA_0x0555,
 	MTD_UADDR_0xAAAA_0x5555,
 	MTD_UADDR_DONT_CARE,		/* Requires an arbitrary address */
@@ -250,6 +252,11 @@ static const struct unlock_addr  unlock_addrs[] = {
 		.addr2 = 0x2aaa
 	},
 
+	[MTD_UADDR_0x0AAA_0x0554] = {
+		.addr1 = 0x0AAA,
+		.addr2 = 0x0554
+	},
+
 	[MTD_UADDR_0x0AAA_0x0555] = {
 		.addr1 = 0x0AAA,
 		.addr2 = 0x0555
@@ -1744,6 +1751,18 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x1000,16),
 		}
 	}, {
+		.mfr_id		= 0xff00 | MANUFACTURER_ST,
+		.dev_id		= 0xff00 | PSD4256G6V,
+		.name		= "ST PSD4256G6V",
+		.devtypes	= CFI_DEVICETYPE_X16,
+		.uaddr		= MTD_UADDR_0x0AAA_0x0554,
+		.dev_size	= SIZE_1MiB,
+		.cmd_set	= P_ID_AMD_STD,
+		.nr_regions	= 1,
+		.regions	= {
+			ERASEINFO(0x10000,16),
+		}
+	}, {
 		.mfr_id		= MANUFACTURER_TOSHIBA,
 		.dev_id		= TC58FVT160,
 		.name		= "Toshiba TC58FVT160",
-- 
cgit v0.10.2


From a2f2ddbf2bafdbc7e4f3bbf09439b42c8fee2747 Mon Sep 17 00:00:00 2001
From: Sunil Mushran <sunil.mushran@oracle.com>
Date: Wed, 19 Aug 2009 15:16:01 -0700
Subject: ocfs2: __ocfs2_abort() should not enable panic for local mounts

In a clustered setup, we have to panic the box on journal abort. This is
because we don't have the facility to go hard readonly. With hard ro, another
node would detect node failure and initiate recovery.

Having said that, we shouldn't force panic if the volume is mounted locally.
This patch defers the handling to the mount option, errors.

Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>

diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 8b60621..154e625 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -2494,7 +2494,8 @@ void __ocfs2_abort(struct super_block* sb,
 	/* Force a panic(). This stinks, but it's better than letting
 	 * things continue without having a proper hard readonly
 	 * here. */
-	OCFS2_SB(sb)->s_mount_opt |= OCFS2_MOUNT_ERRORS_PANIC;
+	if (!ocfs2_mount_local(OCFS2_SB(sb)))
+		OCFS2_SB(sb)->s_mount_opt |= OCFS2_MOUNT_ERRORS_PANIC;
 	ocfs2_handle_error(sb);
 }
 
-- 
cgit v0.10.2


From d92bc5127b27f315ef0ef2c1e1829fd6a5cba54a Mon Sep 17 00:00:00 2001
From: Coly Li <coly.li@suse.de>
Date: Fri, 28 Aug 2009 19:03:18 +0800
Subject: dlmglue.c: add missed mlog lines

This patch adds the missed mlog_exit() and mlog_exit_void() lines when routines
return.

Signed-off-by: Coly Li <coly.li@suse.de>
Acked-by: Mark Fasheh <mfasheh@suse.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>

diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index bb2fc69..0d38d67 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -1577,8 +1577,10 @@ int ocfs2_rw_lock(struct inode *inode, int write)
 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
 	     write ? "EXMODE" : "PRMODE");
 
-	if (ocfs2_mount_local(osb))
+	if (ocfs2_mount_local(osb)) {
+		mlog_exit(0);
 		return 0;
+	}
 
 	lockres = &OCFS2_I(inode)->ip_rw_lockres;
 
@@ -3038,6 +3040,7 @@ static void ocfs2_unlock_ast(void *opaque, int error)
 		     "unlock_action %d\n", error, lockres->l_name,
 		     lockres->l_unlock_action);
 		spin_unlock_irqrestore(&lockres->l_lock, flags);
+		mlog_exit_void();
 		return;
 	}
 
-- 
cgit v0.10.2


From 83e32d9044a4510fffdf65c2691a25c0ba84e259 Mon Sep 17 00:00:00 2001
From: Wengang Wang <wen.gang.wang@oracle.com>
Date: Thu, 3 Sep 2009 15:56:33 +0800
Subject: ocfs2: add spinlock protection when dealing with lockres->purge.

when we check/modify lockres->purge, we should with the protection of lockres->spinlock.
in dlm_purge_lockres(), the checking/modifying is not with the protectin.
this patch fixes it.

Signed-off-by: Wengang Wang <wen.gang.wang@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>

diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index d490b66..98569e8 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -212,14 +212,18 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm,
 		spin_lock(&dlm->spinlock);
 	}
 
+	spin_lock(&res->spinlock);
 	if (!list_empty(&res->purge)) {
 		mlog(0, "removing lockres %.*s:%p from purgelist, "
 		     "master = %d\n", res->lockname.len, res->lockname.name,
 		     res, master);
 		list_del_init(&res->purge);
+		spin_unlock(&res->spinlock);
 		dlm_lockres_put(res);
 		dlm->purge_count--;
-	}
+	} else
+		spin_unlock(&res->spinlock);
+
 	__dlm_unhash_lockres(res);
 
 	/* lockres is not in the hash now.  drop the flag and wake up
-- 
cgit v0.10.2


From b80474b432913f73cce8db001e9fa3104f9b79ee Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Thu, 10 Sep 2009 15:28:47 +0800
Subject: ocfs2: Use buffer IO if we are appending a file.

In ocfs2_file_aio_write, we will prevent direct io if
we find that we are appending(changing i_size) and call
generic_file_aio_write_nolock. But actually O_DIRECT flag
is there and this function will call generic_file_direct_write
eventually which will update i_size and leave di->i_size
alone. The bug is
http://oss.oracle.com/bugzilla/show_bug.cgi?id=1173.

So this patch let ocfs2_direct_IO returns 0 directly if we
are appending so that buffered write will be called and
di->i_size get updated successfully. And this is also
what we want in ocfs2_file_aio_write.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>

diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 33e03c5..72e7606 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -690,6 +690,10 @@ static ssize_t ocfs2_direct_IO(int rw,
 	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
 		return 0;
 
+	/* Fallback to buffered I/O if we are appending. */
+	if (i_size_read(inode) <= offset)
+		return 0;
+
 	ret = blockdev_direct_IO_no_locking(rw, iocb, inode,
 					    inode->i_sb->s_bdev, iov, offset,
 					    nr_segs, 
-- 
cgit v0.10.2


From f68d24082e22ccee3077d11aeb6dc5354f0ca7f1 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Wed, 23 Sep 2009 22:26:29 -0600
Subject: virtio_pci: minor MSI-X cleanups

1) Rename vp_request_vectors to vp_request_msix_vectors, and take
   non-MSI-X case out to caller.
2) Comment weird pci_enable_msix API
3) Rename vp_find_vq to setup_vq.
4) Fix spaces to tabs
5) Make nvectors calc internal to vp_try_to_find_vqs()
6) Rename vector to msix_vector for more clarity.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: "Michael S. Tsirkin" <mst@redhat.com>

diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index 248e00e..4a1f1eb 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -84,7 +84,7 @@ struct virtio_pci_vq_info
 	struct list_head node;
 
 	/* MSI-X vector (or none) */
-	unsigned vector;
+	unsigned msix_vector;
 };
 
 /* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */
@@ -280,25 +280,14 @@ static void vp_free_vectors(struct virtio_device *vdev)
 	vp_dev->msix_entries = NULL;
 }
 
-static int vp_request_vectors(struct virtio_device *vdev, int nvectors,
-			      bool per_vq_vectors)
+static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors,
+				   bool per_vq_vectors)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 	const char *name = dev_name(&vp_dev->vdev.dev);
 	unsigned i, v;
 	int err = -ENOMEM;
 
-	if (!nvectors) {
-		/* Can't allocate MSI-X vectors, use regular interrupt */
-		vp_dev->msix_vectors = 0;
-		err = request_irq(vp_dev->pci_dev->irq, vp_interrupt,
-				  IRQF_SHARED, name, vp_dev);
-		if (err)
-			return err;
-		vp_dev->intx_enabled = 1;
-		return 0;
-	}
-
 	vp_dev->msix_entries = kmalloc(nvectors * sizeof *vp_dev->msix_entries,
 				       GFP_KERNEL);
 	if (!vp_dev->msix_entries)
@@ -311,6 +300,7 @@ static int vp_request_vectors(struct virtio_device *vdev, int nvectors,
 	for (i = 0; i < nvectors; ++i)
 		vp_dev->msix_entries[i].entry = i;
 
+	/* pci_enable_msix returns positive if we can't get this many. */
 	err = pci_enable_msix(vp_dev->pci_dev, vp_dev->msix_entries, nvectors);
 	if (err > 0)
 		err = -ENOSPC;
@@ -356,10 +346,22 @@ error:
 	return err;
 }
 
-static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index,
-				    void (*callback)(struct virtqueue *vq),
-				    const char *name,
-				    u16 vector)
+static int vp_request_intx(struct virtio_device *vdev)
+{
+	int err;
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+
+	err = request_irq(vp_dev->pci_dev->irq, vp_interrupt,
+			  IRQF_SHARED, dev_name(&vdev->dev), vp_dev);
+	if (!err)
+		vp_dev->intx_enabled = 1;
+	return err;
+}
+
+static struct virtqueue *setup_vq(struct virtio_device *vdev, unsigned index,
+				  void (*callback)(struct virtqueue *vq),
+				  const char *name,
+				  u16 msix_vec)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 	struct virtio_pci_vq_info *info;
@@ -384,7 +386,7 @@ static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index,
 
 	info->queue_index = index;
 	info->num = num;
-	info->vector = vector;
+	info->msix_vector = msix_vec;
 
 	size = PAGE_ALIGN(vring_size(num, VIRTIO_PCI_VRING_ALIGN));
 	info->queue = alloc_pages_exact(size, GFP_KERNEL|__GFP_ZERO);
@@ -408,10 +410,10 @@ static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index,
 	vq->priv = info;
 	info->vq = vq;
 
-	 if (vector != VIRTIO_MSI_NO_VECTOR) {
-		iowrite16(vector, vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
-		vector = ioread16(vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
-		if (vector == VIRTIO_MSI_NO_VECTOR) {
+	if (msix_vec != VIRTIO_MSI_NO_VECTOR) {
+		iowrite16(msix_vec, vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
+		msix_vec = ioread16(vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
+		if (msix_vec == VIRTIO_MSI_NO_VECTOR) {
 			err = -EBUSY;
 			goto out_assign;
 		}
@@ -472,7 +474,8 @@ static void vp_del_vqs(struct virtio_device *vdev)
 	list_for_each_entry_safe(vq, n, &vdev->vqs, list) {
 		info = vq->priv;
 		if (vp_dev->per_vq_vectors)
-			free_irq(vp_dev->msix_entries[info->vector].vector, vq);
+			free_irq(vp_dev->msix_entries[info->msix_vector].vector,
+				 vq);
 		vp_del_vq(vq);
 	}
 	vp_dev->per_vq_vectors = false;
@@ -484,38 +487,58 @@ static int vp_try_to_find_vqs(struct virtio_device *vdev, unsigned nvqs,
 			      struct virtqueue *vqs[],
 			      vq_callback_t *callbacks[],
 			      const char *names[],
-			      int nvectors,
+			      bool use_msix,
 			      bool per_vq_vectors)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-	u16 vector;
-	int i, err, allocated_vectors;
+	u16 msix_vec;
+	int i, err, nvectors, allocated_vectors;
 
-	err = vp_request_vectors(vdev, nvectors, per_vq_vectors);
-	if (err)
-		goto error_request;
+	if (!use_msix) {
+		/* Old style: one normal interrupt for change and all vqs. */
+		err = vp_request_intx(vdev);
+		if (err)
+			goto error_request;
+	} else {
+		if (per_vq_vectors) {
+			/* Best option: one for change interrupt, one per vq. */
+			nvectors = 1;
+			for (i = 0; i < nvqs; ++i)
+				if (callbacks[i])
+					++nvectors;
+		} else {
+			/* Second best: one for change, shared for all vqs. */
+			nvectors = 2;
+		}
+
+		err = vp_request_msix_vectors(vdev, nvectors, per_vq_vectors);
+		if (err)
+			goto error_request;
+	}
 
 	vp_dev->per_vq_vectors = per_vq_vectors;
 	allocated_vectors = vp_dev->msix_used_vectors;
 	for (i = 0; i < nvqs; ++i) {
 		if (!callbacks[i] || !vp_dev->msix_enabled)
-			vector = VIRTIO_MSI_NO_VECTOR;
+			msix_vec = VIRTIO_MSI_NO_VECTOR;
 		else if (vp_dev->per_vq_vectors)
-			vector = allocated_vectors++;
+			msix_vec = allocated_vectors++;
 		else
-			vector = VP_MSIX_VQ_VECTOR;
-		vqs[i] = vp_find_vq(vdev, i, callbacks[i], names[i], vector);
+			msix_vec = VP_MSIX_VQ_VECTOR;
+		vqs[i] = setup_vq(vdev, i, callbacks[i], names[i], msix_vec);
 		if (IS_ERR(vqs[i])) {
 			err = PTR_ERR(vqs[i]);
 			goto error_find;
 		}
 		/* allocate per-vq irq if available and necessary */
-		if (vp_dev->per_vq_vectors && vector != VIRTIO_MSI_NO_VECTOR) {
-			snprintf(vp_dev->msix_names[vector], sizeof *vp_dev->msix_names,
-				 "%s-%s", dev_name(&vp_dev->vdev.dev), names[i]);
-			err = request_irq(vp_dev->msix_entries[vector].vector,
-					  vring_interrupt, 0,
-					  vp_dev->msix_names[vector], vqs[i]);
+		if (vp_dev->per_vq_vectors) {
+			snprintf(vp_dev->msix_names[msix_vec],
+				 sizeof *vp_dev->msix_names,
+				 "%s-%s",
+				 dev_name(&vp_dev->vdev.dev), names[i]);
+			err = request_irq(msix_vec, vring_interrupt, 0,
+					  vp_dev->msix_names[msix_vec],
+					  vqs[i]);
 			if (err) {
 				vp_del_vq(vqs[i]);
 				goto error_find;
@@ -537,28 +560,20 @@ static int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs,
 		       vq_callback_t *callbacks[],
 		       const char *names[])
 {
-	int vectors = 0;
-	int i, uninitialized_var(err);
-
-	/* How many vectors would we like? */
-	for (i = 0; i < nvqs; ++i)
-		if (callbacks[i])
-			++vectors;
+	int err;
 
-	/* We want at most one vector per queue and one for config changes. */
-	err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names,
-				 vectors + 1, true);
+	/* Try MSI-X with one vector per queue. */
+	err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names, true, true);
 	if (!err)
 		return 0;
-	/* Fallback to separate vectors for config and a shared for queues. */
+	/* Fallback: MSI-X with one vector for config, one shared for queues. */
 	err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names,
-				 2, false);
+				 true, false);
 	if (!err)
 		return 0;
 	/* Finally fall back to regular interrupts. */
-	err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names,
-				 0, false);
-	return err;
+	return vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names,
+				  false, false);
 }
 
 static struct virtio_config_ops virtio_pci_config_ops = {
-- 
cgit v0.10.2


From 3c1b27d5043086a485f8526353ae9fe37bfa1065 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Wed, 23 Sep 2009 22:26:31 -0600
Subject: virtio: make add_buf return capacity remaining

This API change means that virtio_net can tell how much capacity
remains for buffers.  It's necessarily fuzzy, since
VIRTIO_RING_F_INDIRECT_DESC means we can fit any number of descriptors
in one, *if* we can kmalloc.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Dinesh Subhraveti <dineshs@us.ibm.com>

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index aa1a3d5..d739ee4 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -139,7 +139,7 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
 		}
 	}
 
-	if (vblk->vq->vq_ops->add_buf(vblk->vq, vblk->sg, out, in, vbr)) {
+	if (vblk->vq->vq_ops->add_buf(vblk->vq, vblk->sg, out, in, vbr) < 0) {
 		mempool_free(vbr, vblk->pool);
 		return false;
 	}
diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c
index 32216b6..b6c24dc 100644
--- a/drivers/char/hw_random/virtio-rng.c
+++ b/drivers/char/hw_random/virtio-rng.c
@@ -51,7 +51,7 @@ static void register_buffer(void)
 
 	sg_init_one(&sg, random_data+data_left, RANDOM_DATA_SIZE-data_left);
 	/* There should always be room for one buffer. */
-	if (vq->vq_ops->add_buf(vq, &sg, 0, 1, random_data) != 0)
+	if (vq->vq_ops->add_buf(vq, &sg, 0, 1, random_data) < 0)
 		BUG();
 	vq->vq_ops->kick(vq);
 }
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index c74dacf..a035ae3 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -65,7 +65,7 @@ static int put_chars(u32 vtermno, const char *buf, int count)
 
 	/* add_buf wants a token to identify this buffer: we hand it any
 	 * non-NULL pointer, since there's only ever one buffer. */
-	if (out_vq->vq_ops->add_buf(out_vq, sg, 1, 0, (void *)1) == 0) {
+	if (out_vq->vq_ops->add_buf(out_vq, sg, 1, 0, (void *)1) >= 0) {
 		/* Tell Host to go! */
 		out_vq->vq_ops->kick(out_vq);
 		/* Chill out until it's done with the buffer. */
@@ -85,7 +85,7 @@ static void add_inbuf(void)
 	sg_init_one(sg, inbuf, PAGE_SIZE);
 
 	/* We should always be able to add one buffer to an empty queue. */
-	if (in_vq->vq_ops->add_buf(in_vq, sg, 0, 1, inbuf) != 0)
+	if (in_vq->vq_ops->add_buf(in_vq, sg, 0, 1, inbuf) < 0)
 		BUG();
 	in_vq->vq_ops->kick(in_vq);
 }
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 32266fb..fbf04a5 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -320,7 +320,7 @@ static bool try_fill_recv_maxbufs(struct virtnet_info *vi, gfp_t gfp)
 		skb_queue_head(&vi->recv, skb);
 
 		err = vi->rvq->vq_ops->add_buf(vi->rvq, sg, 0, num, skb);
-		if (err) {
+		if (err < 0) {
 			skb_unlink(skb, &vi->recv);
 			trim_pages(vi, skb);
 			kfree_skb(skb);
@@ -373,7 +373,7 @@ static bool try_fill_recv(struct virtnet_info *vi, gfp_t gfp)
 		skb_queue_head(&vi->recv, skb);
 
 		err = vi->rvq->vq_ops->add_buf(vi->rvq, sg, 0, 1, skb);
-		if (err) {
+		if (err < 0) {
 			skb_unlink(skb, &vi->recv);
 			kfree_skb(skb);
 			break;
@@ -527,7 +527,7 @@ static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb)
 	num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1;
 
 	err = vi->svq->vq_ops->add_buf(vi->svq, sg, num, 0, skb);
-	if (!err && !vi->free_in_tasklet)
+	if (err >= 0 && !vi->free_in_tasklet)
 		mod_timer(&vi->xmit_free_timer, jiffies + (HZ/10));
 
 	return err;
@@ -538,7 +538,7 @@ static void xmit_tasklet(unsigned long data)
 	struct virtnet_info *vi = (void *)data;
 
 	netif_tx_lock_bh(vi->dev);
-	if (vi->last_xmit_skb && xmit_skb(vi, vi->last_xmit_skb) == 0) {
+	if (vi->last_xmit_skb && xmit_skb(vi, vi->last_xmit_skb) >= 0) {
 		vi->svq->vq_ops->kick(vi->svq);
 		vi->last_xmit_skb = NULL;
 	}
@@ -557,7 +557,7 @@ again:
 
 	/* If we has a buffer left over from last time, send it now. */
 	if (unlikely(vi->last_xmit_skb) &&
-	    xmit_skb(vi, vi->last_xmit_skb) != 0)
+	    xmit_skb(vi, vi->last_xmit_skb) < 0)
 		goto stop_queue;
 
 	vi->last_xmit_skb = NULL;
@@ -565,7 +565,7 @@ again:
 	/* Put new one in send queue and do transmit */
 	if (likely(skb)) {
 		__skb_queue_head(&vi->send, skb);
-		if (xmit_skb(vi, skb) != 0) {
+		if (xmit_skb(vi, skb) < 0) {
 			vi->last_xmit_skb = skb;
 			skb = NULL;
 			goto stop_queue;
@@ -668,7 +668,7 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
 		sg_set_buf(&sg[i + 1], sg_virt(s), s->length);
 	sg_set_buf(&sg[out + in - 1], &status, sizeof(status));
 
-	BUG_ON(vi->cvq->vq_ops->add_buf(vi->cvq, sg, out, in, vi));
+	BUG_ON(vi->cvq->vq_ops->add_buf(vi->cvq, sg, out, in, vi) < 0);
 
 	vi->cvq->vq_ops->kick(vi->cvq);
 
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 26b2782..3978923 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -84,7 +84,7 @@ static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq)
 	init_completion(&vb->acked);
 
 	/* We should always be able to add one buffer to an empty queue. */
-	if (vq->vq_ops->add_buf(vq, &sg, 1, 0, vb) != 0)
+	if (vq->vq_ops->add_buf(vq, &sg, 1, 0, vb) < 0)
 		BUG();
 	vq->vq_ops->kick(vq);
 
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index a882f26..f536005 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -208,7 +208,11 @@ add_head:
 
 	pr_debug("Added buffer head %i to %p\n", head, vq);
 	END_USE(vq);
-	return 0;
+
+	/* If we're indirect, we can fit many (assuming not OOM). */
+	if (vq->indirect)
+		return vq->num_free ? vq->vring.num : 0;
+	return vq->num_free;
 }
 
 static void vring_kick(struct virtqueue *_vq)
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 4fca4f5..057a2e0 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -34,7 +34,7 @@ struct virtqueue {
  *	out_num: the number of sg readable by other side
  *	in_num: the number of sg which are writable (after readable ones)
  *	data: the token identifying the buffer.
- *      Returns 0 or an error.
+ *      Returns remaining capacity of queue (sg segments) or a negative error.
  * @kick: update after add_buf
  *	vq: the struct virtqueue
  *	After one or more add_buf calls, invoke this to kick the other side.
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 9bf0b73..53c139d 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -200,7 +200,7 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
 
 	req->status = REQ_STATUS_SENT;
 
-	if (chan->vq->vq_ops->add_buf(chan->vq, chan->sg, out, in, req->tc)) {
+	if (chan->vq->vq_ops->add_buf(chan->vq, chan->sg, out, in, req->tc) < 0) {
 		P9_DPRINTK(P9_DEBUG_TRANS,
 			"9p debug: virtio rpc add_buf returned failure");
 		return -EIO;
-- 
cgit v0.10.2


From 3a20210dc26bbfff3bbb48bb22d2846240b71d8f Mon Sep 17 00:00:00 2001
From: Fernando Luis Vazquez Cao <fernando@oss.ntt.co.jp>
Date: Fri, 31 Jul 2009 12:09:05 +0900
Subject: virtio: get rid of redundant VIRTIO_ID_9P definition

VIRTIO_ID_9P is already defined in include/linux/virtio_9p.h
so use that definition instead.

Signed-off-by: Fernando Luis Vazquez Cao <fernando@oss.ntt.co.jp>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Eric Van Hensbergen <ericvh@gmail.com>

diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 53c139d..ea1e3da 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -334,8 +334,6 @@ static void p9_virtio_remove(struct virtio_device *vdev)
 	}
 }
 
-#define VIRTIO_ID_9P 9
-
 static struct virtio_device_id id_table[] = {
 	{ VIRTIO_ID_9P, VIRTIO_DEV_ANY_ID },
 	{ 0 },
-- 
cgit v0.10.2


From 3ca4f5ca73057a617f9444a91022d7127041970a Mon Sep 17 00:00:00 2001
From: Fernando Luis Vazquez Cao <fernando@oss.ntt.co.jp>
Date: Fri, 31 Jul 2009 15:25:56 +0900
Subject: virtio: add virtio IDs file

Virtio IDs are spread all over the tree which makes assigning new IDs
bothersome. Putting them together should make the process less error-prone.

Signed-off-by: Fernando Luis Vazquez Cao <fernando@oss.ntt.co.jp>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c
index 950cde6..84bbb19 100644
--- a/Documentation/lguest/lguest.c
+++ b/Documentation/lguest/lguest.c
@@ -42,6 +42,7 @@
 #include <signal.h>
 #include "linux/lguest_launcher.h"
 #include "linux/virtio_config.h"
+#include <linux/virtio_ids.h>
 #include "linux/virtio_net.h"
 #include "linux/virtio_blk.h"
 #include "linux/virtio_console.h"
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index d739ee4..73de753 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -3,6 +3,7 @@
 #include <linux/blkdev.h>
 #include <linux/hdreg.h>
 #include <linux/virtio.h>
+#include <linux/virtio_ids.h>
 #include <linux/virtio_blk.h>
 #include <linux/scatterlist.h>
 
diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c
index b6c24dc..962968f 100644
--- a/drivers/char/hw_random/virtio-rng.c
+++ b/drivers/char/hw_random/virtio-rng.c
@@ -21,6 +21,7 @@
 #include <linux/scatterlist.h>
 #include <linux/spinlock.h>
 #include <linux/virtio.h>
+#include <linux/virtio_ids.h>
 #include <linux/virtio_rng.h>
 
 /* The host will fill any buffer we give it with sweet, sweet randomness.  We
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index a035ae3..0d328b5 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -31,6 +31,7 @@
 #include <linux/err.h>
 #include <linux/init.h>
 #include <linux/virtio.h>
+#include <linux/virtio_ids.h>
 #include <linux/virtio_console.h>
 #include "hvc_console.h"
 
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index fbf04a5..5c498d2 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -22,6 +22,7 @@
 #include <linux/ethtool.h>
 #include <linux/module.h>
 #include <linux/virtio.h>
+#include <linux/virtio_ids.h>
 #include <linux/virtio_net.h>
 #include <linux/scatterlist.h>
 #include <linux/if_vlan.h>
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 3978923..200c22f 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -19,6 +19,7 @@
  */
 //#define DEBUG
 #include <linux/virtio.h>
+#include <linux/virtio_ids.h>
 #include <linux/virtio_balloon.h>
 #include <linux/swap.h>
 #include <linux/kthread.h>
diff --git a/include/linux/virtio_9p.h b/include/linux/virtio_9p.h
index b3c4a60..ea7226a 100644
--- a/include/linux/virtio_9p.h
+++ b/include/linux/virtio_9p.h
@@ -4,8 +4,6 @@
  * compatible drivers/servers. */
 #include <linux/virtio_config.h>
 
-/* The ID for virtio console */
-#define VIRTIO_ID_9P	9
 /* Maximum number of virtio channels per partition (1 for now) */
 #define MAX_9P_CHAN	1
 
diff --git a/include/linux/virtio_balloon.h b/include/linux/virtio_balloon.h
index 8726ff7..09d7300 100644
--- a/include/linux/virtio_balloon.h
+++ b/include/linux/virtio_balloon.h
@@ -4,9 +4,6 @@
  * compatible drivers/servers. */
 #include <linux/virtio_config.h>
 
-/* The ID for virtio_balloon */
-#define VIRTIO_ID_BALLOON	5
-
 /* The feature bitmap for virtio balloon */
 #define VIRTIO_BALLOON_F_MUST_TELL_HOST	0 /* Tell before reclaiming pages */
 
diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h
index 8dab9f2..25fbabf 100644
--- a/include/linux/virtio_blk.h
+++ b/include/linux/virtio_blk.h
@@ -5,9 +5,6 @@
 #include <linux/types.h>
 #include <linux/virtio_config.h>
 
-/* The ID for virtio_block */
-#define VIRTIO_ID_BLOCK	2
-
 /* Feature bits */
 #define VIRTIO_BLK_F_BARRIER	0	/* Does host support barriers? */
 #define VIRTIO_BLK_F_SIZE_MAX	1	/* Indicates maximum segment size */
diff --git a/include/linux/virtio_console.h b/include/linux/virtio_console.h
index dc16111..b5f5198 100644
--- a/include/linux/virtio_console.h
+++ b/include/linux/virtio_console.h
@@ -5,9 +5,6 @@
 /* This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so
  * anyone can use the definitions to implement compatible drivers/servers. */
 
-/* The ID for virtio console */
-#define VIRTIO_ID_CONSOLE	3
-
 /* Feature bits */
 #define VIRTIO_CONSOLE_F_SIZE	0	/* Does host provide console size? */
 
diff --git a/include/linux/virtio_ids.h b/include/linux/virtio_ids.h
new file mode 100644
index 0000000..06660c0
--- /dev/null
+++ b/include/linux/virtio_ids.h
@@ -0,0 +1,17 @@
+#ifndef _LINUX_VIRTIO_IDS_H
+#define _LINUX_VIRTIO_IDS_H
+/*
+ * Virtio IDs
+ *
+ * This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers.
+ */
+
+#define VIRTIO_ID_NET		1 /* virtio net */
+#define VIRTIO_ID_BLOCK		2 /* virtio block */
+#define VIRTIO_ID_CONSOLE	3 /* virtio console */
+#define VIRTIO_ID_RNG		4 /* virtio ring */
+#define VIRTIO_ID_BALLOON	5 /* virtio balloon */
+#define VIRTIO_ID_9P		9 /* 9p virtio console */
+
+#endif /* _LINUX_VIRTIO_IDS_H */
diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index d8dd539..1f41734 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -6,9 +6,6 @@
 #include <linux/virtio_config.h>
 #include <linux/if_ether.h>
 
-/* The ID for virtio_net */
-#define VIRTIO_ID_NET	1
-
 /* The feature bitmap for virtio net */
 #define VIRTIO_NET_F_CSUM	0	/* Host handles pkts w/ partial csum */
 #define VIRTIO_NET_F_GUEST_CSUM	1	/* Guest handles pkts w/ partial csum */
diff --git a/include/linux/virtio_rng.h b/include/linux/virtio_rng.h
index 1a85dab..48121c3 100644
--- a/include/linux/virtio_rng.h
+++ b/include/linux/virtio_rng.h
@@ -4,7 +4,4 @@
  * compatible drivers/servers. */
 #include <linux/virtio_config.h>
 
-/* The ID for virtio_rng */
-#define VIRTIO_ID_RNG	4
-
 #endif /* _LINUX_VIRTIO_RNG_H */
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index ea1e3da..b2e07f0 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -43,6 +43,7 @@
 #include <net/9p/transport.h>
 #include <linux/scatterlist.h>
 #include <linux/virtio.h>
+#include <linux/virtio_ids.h>
 #include <linux/virtio_9p.h>
 
 #define VIRTQUEUE_NUM	128
-- 
cgit v0.10.2


From f1b0ef062602713c2c7cfa12362d5d90ed01c5f6 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 17 Sep 2009 19:57:42 +0200
Subject: virtio_blk: add support for cache flush

Recent qemu has added a VIRTIO_BLK_F_FLUSH flag to advertise that the
virtual disk has a volatile write cache that needs to be flushed.  In case
we see this feature implement tell the Linux block layer about the fact
and use the new VIRTIO_BLK_T_FLUSH to flush the cache when required.  This
allows for an correct and simple implementation of write barriers.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 73de753..3d5fe97 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -92,15 +92,26 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
 		return false;
 
 	vbr->req = req;
-	if (blk_fs_request(vbr->req)) {
+	switch (req->cmd_type) {
+	case REQ_TYPE_FS:
 		vbr->out_hdr.type = 0;
 		vbr->out_hdr.sector = blk_rq_pos(vbr->req);
 		vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
-	} else if (blk_pc_request(vbr->req)) {
+		break;
+	case REQ_TYPE_BLOCK_PC:
 		vbr->out_hdr.type = VIRTIO_BLK_T_SCSI_CMD;
 		vbr->out_hdr.sector = 0;
 		vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
-	} else {
+		break;
+	case REQ_TYPE_LINUX_BLOCK:
+		if (req->cmd[0] == REQ_LB_OP_FLUSH) {
+			vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
+			vbr->out_hdr.sector = 0;
+			vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
+			break;
+		}
+		/*FALLTHRU*/
+	default:
 		/* We don't put anything else in the queue. */
 		BUG();
 	}
@@ -200,6 +211,12 @@ out:
 	return err;
 }
 
+static void virtblk_prepare_flush(struct request_queue *q, struct request *req)
+{
+	req->cmd_type = REQ_TYPE_LINUX_BLOCK;
+	req->cmd[0] = REQ_LB_OP_FLUSH;
+}
+
 static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
 			 unsigned cmd, unsigned long data)
 {
@@ -338,7 +355,10 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
 	index++;
 
 	/* If barriers are supported, tell block layer that queue is ordered */
-	if (virtio_has_feature(vdev, VIRTIO_BLK_F_BARRIER))
+	if (virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH))
+		blk_queue_ordered(vblk->disk->queue, QUEUE_ORDERED_DRAIN_FLUSH,
+				  virtblk_prepare_flush);
+	else if (virtio_has_feature(vdev, VIRTIO_BLK_F_BARRIER))
 		blk_queue_ordered(vblk->disk->queue, QUEUE_ORDERED_TAG, NULL);
 
 	/* If disk is read-only in the host, the guest should obey */
@@ -425,7 +445,7 @@ static struct virtio_device_id id_table[] = {
 static unsigned int features[] = {
 	VIRTIO_BLK_F_BARRIER, VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX,
 	VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
-	VIRTIO_BLK_F_SCSI, VIRTIO_BLK_F_IDENTIFY
+	VIRTIO_BLK_F_SCSI, VIRTIO_BLK_F_IDENTIFY, VIRTIO_BLK_F_FLUSH
 };
 
 /*
diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h
index 25fbabf..15cb666 100644
--- a/include/linux/virtio_blk.h
+++ b/include/linux/virtio_blk.h
@@ -14,6 +14,7 @@
 #define VIRTIO_BLK_F_BLK_SIZE	6	/* Block size of disk is available*/
 #define VIRTIO_BLK_F_SCSI	7	/* Supports scsi command passthru */
 #define VIRTIO_BLK_F_IDENTIFY	8	/* ATA IDENTIFY supported */
+#define VIRTIO_BLK_F_FLUSH	9	/* Cache flush command support */
 
 #define VIRTIO_BLK_ID_BYTES	(sizeof(__u16[256]))	/* IDENTIFY DATA */
 
@@ -35,6 +36,17 @@ struct virtio_blk_config {
 	__u8 identify[VIRTIO_BLK_ID_BYTES];
 } __attribute__((packed));
 
+/*
+ * Command types
+ *
+ * Usage is a bit tricky as some bits are used as flags and some are not.
+ *
+ * Rules:
+ *   VIRTIO_BLK_T_OUT may be combined with VIRTIO_BLK_T_SCSI_CMD or
+ *   VIRTIO_BLK_T_BARRIER.  VIRTIO_BLK_T_FLUSH is a command of its own
+ *   and may not be combined with any of the other flags.
+ */
+
 /* These two define direction. */
 #define VIRTIO_BLK_T_IN		0
 #define VIRTIO_BLK_T_OUT	1
@@ -42,6 +54,9 @@ struct virtio_blk_config {
 /* This bit says it's a scsi command, not an actual read or write. */
 #define VIRTIO_BLK_T_SCSI_CMD	2
 
+/* Cache flush command */
+#define VIRTIO_BLK_T_FLUSH	4
+
 /* Barrier before this op. */
 #define VIRTIO_BLK_T_BARRIER	0x80000000
 
-- 
cgit v0.10.2


From cdae0ad5e8ea630017d4cad3923f763c4e6c3127 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Wed, 23 Sep 2009 22:26:42 -0600
Subject: lguest: move panic notifier registration to its expected place.

We used to defer it, so lockdep was happy.  We now init lockdep early
anyway, so just do it after that.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 4cb7d5d..7e59dc1 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -1135,11 +1135,6 @@ static struct notifier_block paniced = {
 /* Setting up memory is fairly easy. */
 static __init char *lguest_memory_setup(void)
 {
-	/* We do this here and not earlier because lockcheck used to barf if we
-	 * did it before start_kernel().  I think we fixed that, so it'd be
-	 * nice to move it back to lguest_init.  Patch welcome... */
-	atomic_notifier_chain_register(&panic_notifier_list, &paniced);
-
 	/*
 	 *The Linux bootloader header contains an "e820" memory map: the
 	 * Launcher populated the first entry with our memory limit.
@@ -1364,10 +1359,13 @@ __init void lguest_init(void)
 
 	/*
 	 * If we don't initialize the lock dependency checker now, it crashes
-	 * paravirt_disable_iospace.
+	 * atomic_notifier_chain_register, then paravirt_disable_iospace.
 	 */
 	lockdep_init();
 
+	/* Hook in our special panic hypercall code. */
+	atomic_notifier_chain_register(&panic_notifier_list, &paniced);
+
 	/*
 	 * The IDE code spends about 3 seconds probing for disks: if we reserve
 	 * all the I/O ports up front it can't get them and so doesn't probe.
-- 
cgit v0.10.2


From 4c1ea3dd718a1d93a726cb3e66665ac4170dcccd Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Wed, 23 Sep 2009 22:26:45 -0600
Subject: lguest: use set_pte/set_pmd uniformly for real page table entries

If we're building a pte, we can use simple assigment; only use set_pte
etc. when we're actually going to use that destination as a PTE.  I
don't know that we'll ever run under Xen, but it's neater.

And use set_pte/set_pmd rather than assuming native_ versions, even
though that's probably true for most people.

(Includes compile fix by Kamalesh Babulal <kamalesh@linux.vnet.ibm.com>)
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Matias Zabaljauregui <zabaljauregui@gmail.com>
Cc: Kamalesh Babulal <kamalesh@linux.vnet.ibm.com>

diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c
index a8d0aee..232fba6 100644
--- a/drivers/lguest/page_tables.c
+++ b/drivers/lguest/page_tables.c
@@ -380,7 +380,7 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
 		 * And we copy the flags to the shadow PMD entry.  The page
 		 * number in the shadow PMD is the page we just allocated.
 		 */
-		native_set_pmd(spmd, __pmd(__pa(ptepage) | pmd_flags(gpmd)));
+		set_pmd(spmd, __pmd(__pa(ptepage) | pmd_flags(gpmd)));
 	}
 
 	/*
@@ -447,7 +447,7 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
 		 * we will come back here when a write does actually occur, so
 		 * we can update the Guest's _PAGE_DIRTY flag.
 		 */
-		native_set_pte(spte, gpte_to_spte(cpu, pte_wrprotect(gpte), 0));
+		set_pte(spte, gpte_to_spte(cpu, pte_wrprotect(gpte), 0));
 
 	/*
 	 * Finally, we write the Guest PTE entry back: we've set the
@@ -528,7 +528,7 @@ static void release_pmd(pmd_t *spmd)
 		/* Now we can free the page of PTEs */
 		free_page((long)ptepage);
 		/* And zero out the PMD entry so we never release it twice. */
-		native_set_pmd(spmd, __pmd(0));
+		set_pmd(spmd, __pmd(0));
 	}
 }
 
@@ -833,15 +833,15 @@ static void do_set_pte(struct lg_cpu *cpu, int idx,
 			 */
 			if (pte_flags(gpte) & (_PAGE_DIRTY | _PAGE_ACCESSED)) {
 				check_gpte(cpu, gpte);
-				native_set_pte(spte,
-						gpte_to_spte(cpu, gpte,
+				set_pte(spte,
+					gpte_to_spte(cpu, gpte,
 						pte_flags(gpte) & _PAGE_DIRTY));
 			} else {
 				/*
 				 * Otherwise kill it and we can demand_page()
 				 * it in later.
 				 */
-				native_set_pte(spte, __pte(0));
+				set_pte(spte, __pte(0));
 			}
 #ifdef CONFIG_X86_PAE
 		}
@@ -983,16 +983,15 @@ static unsigned long setup_pagetables(struct lguest *lg,
 	 */
 	for (i = j = 0; i < mapped_pages && j < PTRS_PER_PMD;
 	     i += PTRS_PER_PTE, j++) {
-		/* FIXME: native_set_pmd is overkill here. */
-		native_set_pmd(&pmd, __pmd(((unsigned long)(linear + i)
-		- mem_base) | _PAGE_PRESENT | _PAGE_RW | _PAGE_USER));
+		pmd = pfn_pmd(((unsigned long)&linear[i] - mem_base)/PAGE_SIZE,
+			      __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER));
 
 		if (copy_to_user(&pmds[j], &pmd, sizeof(pmd)) != 0)
 			return -EFAULT;
 	}
 
 	/* One PGD entry, pointing to that PMD page. */
-	set_pgd(&pgd, __pgd(((u32)pmds - mem_base) | _PAGE_PRESENT));
+	pgd = __pgd(((unsigned long)pmds - mem_base) | _PAGE_PRESENT);
 	/* Copy it in as the first PGD entry (ie. addresses 0-1G). */
 	if (copy_to_user(&pgdir[0], &pgd, sizeof(pgd)) != 0)
 		return -EFAULT;
@@ -1141,15 +1140,13 @@ void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages)
 {
 	pte_t *switcher_pte_page = __get_cpu_var(switcher_pte_pages);
 	pte_t regs_pte;
-	unsigned long pfn;
 
 #ifdef CONFIG_X86_PAE
 	pmd_t switcher_pmd;
 	pmd_t *pmd_table;
 
-	/* FIXME: native_set_pmd is overkill here. */
-	native_set_pmd(&switcher_pmd, pfn_pmd(__pa(switcher_pte_page) >>
-		       PAGE_SHIFT, PAGE_KERNEL_EXEC));
+	switcher_pmd = pfn_pmd(__pa(switcher_pte_page) >> PAGE_SHIFT,
+			       PAGE_KERNEL_EXEC);
 
 	/* Figure out where the pmd page is, by reading the PGD, and converting
 	 * it to a virtual address. */
@@ -1157,7 +1154,7 @@ void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages)
 			pgdirs[cpu->cpu_pgd].pgdir[SWITCHER_PGD_INDEX])
 								<< PAGE_SHIFT);
 	/* Now write it into the shadow page table. */
-	native_set_pmd(&pmd_table[SWITCHER_PMD_INDEX], switcher_pmd);
+	set_pmd(&pmd_table[SWITCHER_PMD_INDEX], switcher_pmd);
 #else
 	pgd_t switcher_pgd;
 
@@ -1179,10 +1176,8 @@ void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages)
 	 * page is already mapped there, we don't have to copy them out
 	 * again.
 	 */
-	pfn = __pa(cpu->regs_page) >> PAGE_SHIFT;
-	native_set_pte(&regs_pte, pfn_pte(pfn, PAGE_KERNEL));
-	native_set_pte(&switcher_pte_page[pte_index((unsigned long)pages)],
-			regs_pte);
+	regs_pte = pfn_pte(__pa(cpu->regs_page) >> PAGE_SHIFT, PAGE_KERNEL);
+	set_pte(&switcher_pte_page[pte_index((unsigned long)pages)], regs_pte);
 }
 /*:*/
 
@@ -1209,7 +1204,7 @@ static __init void populate_switcher_pte_page(unsigned int cpu,
 
 	/* The first entries are easy: they map the Switcher code. */
 	for (i = 0; i < pages; i++) {
-		native_set_pte(&pte[i], mk_pte(switcher_page[i],
+		set_pte(&pte[i], mk_pte(switcher_page[i],
 				__pgprot(_PAGE_PRESENT|_PAGE_ACCESSED)));
 	}
 
@@ -1217,14 +1212,14 @@ static __init void populate_switcher_pte_page(unsigned int cpu,
 	i = pages + cpu*2;
 
 	/* First page (Guest registers) is writable from the Guest */
-	native_set_pte(&pte[i], pfn_pte(page_to_pfn(switcher_page[i]),
+	set_pte(&pte[i], pfn_pte(page_to_pfn(switcher_page[i]),
 			 __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_RW)));
 
 	/*
 	 * The second page contains the "struct lguest_ro_state", and is
 	 * read-only.
 	 */
-	native_set_pte(&pte[i+1], pfn_pte(page_to_pfn(switcher_page[i+1]),
+	set_pte(&pte[i+1], pfn_pte(page_to_pfn(switcher_page[i+1]),
 			   __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED)));
 }
 
-- 
cgit v0.10.2


From fb100d78c04ff6053047625d0368d0d4b1d9912a Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Wed, 23 Sep 2009 22:26:46 -0600
Subject: lguest: use PGDIR_SHIFT for PAE code to allow different PAGE_OFFSET

We still assume the Guest and Host have the same PAGE_OFFSET settings,
but now we don't assume 0xC0000000.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Matias Zabaljauregui <zabaljauregui@gmail.com>

diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c
index 232fba6..bf37a31 100644
--- a/drivers/lguest/page_tables.c
+++ b/drivers/lguest/page_tables.c
@@ -996,11 +996,9 @@ static unsigned long setup_pagetables(struct lguest *lg,
 	if (copy_to_user(&pgdir[0], &pgd, sizeof(pgd)) != 0)
 		return -EFAULT;
 	/*
-	 * And the third PGD entry (ie. addresses 3G-4G).
-	 *
-	 * FIXME: This assumes that PAGE_OFFSET for the Guest is 0xC0000000.
+	 * And the other PGD entry to make the linear mapping at PAGE_OFFSET
 	 */
-	if (copy_to_user(&pgdir[3], &pgd, sizeof(pgd)) != 0)
+	if (copy_to_user(&pgdir[KERNEL_PGD_BOUNDARY], &pgd, sizeof(pgd)))
 		return -EFAULT;
 #else
 	/*
-- 
cgit v0.10.2


From 6c189d8312246af776c2587c233d6afcf3714438 Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Date: Wed, 5 Aug 2009 17:42:37 +0800
Subject: lguest: cleanup for map_switcher()

We can use alloc_page() instead of get_zeroed_page() and virt_to_page()

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c
index 1e2cb84..8744d24 100644
--- a/drivers/lguest/core.c
+++ b/drivers/lguest/core.c
@@ -67,12 +67,11 @@ static __init int map_switcher(void)
 	 * so we make sure they're zeroed.
 	 */
 	for (i = 0; i < TOTAL_SWITCHER_PAGES; i++) {
-		unsigned long addr = get_zeroed_page(GFP_KERNEL);
-		if (!addr) {
+		switcher_page[i] = alloc_page(GFP_KERNEL|__GFP_ZERO);
+		if (!switcher_page[i]) {
 			err = -ENOMEM;
 			goto free_some_pages;
 		}
-		switcher_page[i] = virt_to_page(addr);
 	}
 
 	/*
-- 
cgit v0.10.2


From ca60a42c9be41c07ebcc2ec8c43dd1be53f147bf Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Wed, 23 Sep 2009 22:26:47 -0600
Subject: lguest: don't force VIRTIO_F_NOTIFY_ON_EMPTY

VIRTIO_F_NOTIFY_ON_EMPTY indicates to the Guest that we will hit them with
an interrupt every time the xmit queue is emptied.

Because it results in lots of tx interrupts, modern Guests probably don't
want it, so let's only force it when they accept the option.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c
index 84bbb19..ba9373f 100644
--- a/Documentation/lguest/lguest.c
+++ b/Documentation/lguest/lguest.c
@@ -134,6 +134,9 @@ struct device {
 	/* Is it operational */
 	bool running;
 
+	/* Does Guest want an intrrupt on empty? */
+	bool irq_on_empty;
+
 	/* Device-specific data. */
 	void *priv;
 };
@@ -624,10 +627,13 @@ static void trigger_irq(struct virtqueue *vq)
 		return;
 	vq->pending_used = 0;
 
-	/* If they don't want an interrupt, don't send one, unless empty. */
-	if ((vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)
-	    && lg_last_avail(vq) != vq->vring.avail->idx)
-		return;
+	/* If they don't want an interrupt, don't send one... */
+	if (vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT) {
+		/* ... unless they've asked us to force one on empty. */
+		if (!vq->dev->irq_on_empty
+		    || lg_last_avail(vq) != vq->vring.avail->idx)
+			return;
+	}
 
 	/* Send the Guest an interrupt tell them we used something up. */
 	if (write(lguest_fd, buf, sizeof(buf)) != 0)
@@ -1043,6 +1049,15 @@ static void create_thread(struct virtqueue *vq)
 	close(vq->eventfd);
 }
 
+static bool accepted_feature(struct device *dev, unsigned int bit)
+{
+	const u8 *features = get_feature_bits(dev) + dev->feature_len;
+
+	if (dev->feature_len < bit / CHAR_BIT)
+		return false;
+	return features[bit / CHAR_BIT] & (1 << (bit % CHAR_BIT));
+}
+
 static void start_device(struct device *dev)
 {
 	unsigned int i;
@@ -1056,6 +1071,8 @@ static void start_device(struct device *dev)
 		verbose(" %02x", get_feature_bits(dev)
 			[dev->feature_len+i]);
 
+	dev->irq_on_empty = accepted_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY);
+
 	for (vq = dev->vq; vq; vq = vq->next) {
 		if (vq->service)
 			create_thread(vq);
-- 
cgit v0.10.2


From e5dc8ae121592239c2a2521d383bcb789849b2a3 Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oliver@neukum.org>
Date: Wed, 26 Aug 2009 19:56:12 +0200
Subject: USB: storage: fix a resume path GFP_NOIO must be used

In the resume path of a block driver GFP_NOIO must be used to
avoid a possible deadlock. The onetouch subdriver of storage violates
the requirement.

Signed-off-by: Oliver Neukum <oliver@neukum.org>
Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/storage/onetouch.c b/drivers/usb/storage/onetouch.c
index 380233b..80e65f2 100644
--- a/drivers/usb/storage/onetouch.c
+++ b/drivers/usb/storage/onetouch.c
@@ -163,7 +163,7 @@ static void usb_onetouch_pm_hook(struct us_data *us, int action)
 			usb_kill_urb(onetouch->irq);
 			break;
 		case US_RESUME:
-			if (usb_submit_urb(onetouch->irq, GFP_KERNEL) != 0)
+			if (usb_submit_urb(onetouch->irq, GFP_NOIO) != 0)
 				dev_err(&onetouch->irq->dev->dev,
 					"usb_submit_urb failed\n");
 			break;
-- 
cgit v0.10.2


From e7d7fcc09ebde1ea1773521ecab5a3f0ad6bef6e Mon Sep 17 00:00:00 2001
From: Pawel Ludwikow <pludwiko@rab.ict.pwr.wroc.pl>
Date: Thu, 27 Aug 2009 14:15:50 +0200
Subject: USB: serial: ftdi_sio: new hardware support - hameg power supply

I'd like to present my small patch enabling to use Hameg HM8143 programmable
power supply with linux.

Signed-off-by: Pawel Ludwikow <pludwiko@rab.ict.pwr.wroc.pl>
Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index 76a17f9..f0cb7d6 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -702,6 +702,8 @@ static struct usb_device_id id_table_combined [] = {
 	{ USB_DEVICE(BAYER_VID, BAYER_CONTOUR_CABLE_PID) },
 	{ USB_DEVICE(FTDI_VID, MARVELL_OPENRD_PID),
 		.driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
+	{ USB_DEVICE(FTDI_VID, HAMEG_HO820_PID) },
+	{ USB_DEVICE(FTDI_VID, HAMEG_HO870_PID) },
 	{ },					/* Optional parameter entry */
 	{ }					/* Terminating entry */
 };
diff --git a/drivers/usb/serial/ftdi_sio.h b/drivers/usb/serial/ftdi_sio.h
index 8c92b88..3c85bfd 100644
--- a/drivers/usb/serial/ftdi_sio.h
+++ b/drivers/usb/serial/ftdi_sio.h
@@ -968,6 +968,12 @@
 #define MARVELL_OPENRD_PID	0x9e90
 
 /*
+ * Hameg HO820 and HO870 interface (using VID 0x0403)
+ */
+#define        HAMEG_HO820_PID         0xed74
+#define        HAMEG_HO870_PID         0xed71
+
+/*
  *   BmRequestType:  1100 0000b
  *   bRequest:       FTDI_E2_READ
  *   wValue:         0
-- 
cgit v0.10.2


From 35904e6b5106fda51b04c8b8080c04466865415f Mon Sep 17 00:00:00 2001
From: Pawel Ludwikow <pludwiko@rab.ict.pwr.wroc.pl>
Date: Thu, 27 Aug 2009 14:15:50 +0200
Subject: USB: serial: pl2303: new hardware support - sanwa multimeter

I'd like to present my small patch enabling to use Sanwa PC5000
mulitimeter with linux.

Signed-off-by: Pawel Ludwikow <pludwiko@rab.ict.pwr.wroc.pl>
Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c
index a63ea99..4cbe59c 100644
--- a/drivers/usb/serial/pl2303.c
+++ b/drivers/usb/serial/pl2303.c
@@ -96,6 +96,7 @@ static struct usb_device_id id_table [] = {
 	{ USB_DEVICE(HP_VENDOR_ID, HP_LD220_PRODUCT_ID) },
 	{ USB_DEVICE(CRESSI_VENDOR_ID, CRESSI_EDY_PRODUCT_ID) },
 	{ USB_DEVICE(SONY_VENDOR_ID, SONY_QN3USB_PRODUCT_ID) },
+	{ USB_DEVICE(SANWA_VENDOR_ID, SANWA_PRODUCT_ID) },
 	{ }					/* Terminating entry */
 };
 
diff --git a/drivers/usb/serial/pl2303.h b/drivers/usb/serial/pl2303.h
index ee9505e..d640dc9 100644
--- a/drivers/usb/serial/pl2303.h
+++ b/drivers/usb/serial/pl2303.h
@@ -130,3 +130,7 @@
 /* Sony, USB data cable for CMD-Jxx mobile phones */
 #define SONY_VENDOR_ID		0x054c
 #define SONY_QN3USB_PRODUCT_ID	0x0437
+
+/* Sanwa KB-USB2 multimeter cable (ID: 11ad:0001) */
+#define SANWA_VENDOR_ID		0x11ad
+#define SANWA_PRODUCT_ID	0x0001
-- 
cgit v0.10.2


From a67d8e6c1e49dc919c9d5480583fad8a46fc00aa Mon Sep 17 00:00:00 2001
From: Huzaifa Sidhpurwala <sidhpurwala.huzaifa@gmail.com>
Date: Tue, 1 Sep 2009 10:07:41 +0530
Subject: USB: option.c Add support for ZTE AC2726 EVDO modem

A few days ago i got the latest ZTE EVDO modem shown at:
http://img.alibaba.com/photo/240150115/ZTE_AC2726_EVDO_USB_Data_Modem.jpg

It seems that the latest kernel does not have support for it.
I wrote a small patch for the options.c module to add the relevant usb
ids to it.

From: Huzaifa Sidhpurwala <sidhpurwala.huzaifa@gmail.com>
Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index fe47051..ee20f92 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -299,6 +299,7 @@ static int  option_resume(struct usb_serial *serial);
 #define ZTE_PRODUCT_MF626			0x0031
 #define ZTE_PRODUCT_CDMA_TECH			0xfffe
 #define ZTE_PRODUCT_AC8710			0xfff1
+#define ZTE_PRODUCT_AC2726			0xfff5
 
 #define BENQ_VENDOR_ID				0x04a5
 #define BENQ_PRODUCT_H10			0x4068
@@ -571,6 +572,7 @@ static struct usb_device_id option_ids[] = {
 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0073, 0xff, 0xff, 0xff) },
 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_CDMA_TECH, 0xff, 0xff, 0xff) },
 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_AC8710, 0xff, 0xff, 0xff) },
+	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_AC2726, 0xff, 0xff, 0xff) },
 	{ USB_DEVICE(BENQ_VENDOR_ID, BENQ_PRODUCT_H10) },
 	{ USB_DEVICE(DLINK_VENDOR_ID, DLINK_PRODUCT_DWM_652) },
 	{ USB_DEVICE(QISDA_VENDOR_ID, QISDA_PRODUCT_H21_4512) },
-- 
cgit v0.10.2


From ce60c48871d2b3a15ab3fa2450e783bebb4ae407 Mon Sep 17 00:00:00 2001
From: Manuel Lauss <manuel.lauss@googlemail.com>
Date: Tue, 1 Sep 2009 09:04:35 +0200
Subject: USB: option: TELIT UC864G support

Add ID for Telit UC-864G GPS/UMTS/WCDMA modem and GPS receiver
to the option driver.

Signed-off-by: Manuel Lauss <manuel.lauss@gmail.com>
Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index ee20f92..67862d7 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -291,6 +291,7 @@ static int  option_resume(struct usb_serial *serial);
 
 #define TELIT_VENDOR_ID				0x1bc7
 #define TELIT_PRODUCT_UC864E			0x1003
+#define TELIT_PRODUCT_UC864G			0x1004
 
 /* ZTE PRODUCTS */
 #define ZTE_VENDOR_ID				0x19d2
@@ -503,6 +504,7 @@ static struct usb_device_id option_ids[] = {
 	{ USB_DEVICE(QUALCOMM_VENDOR_ID, 0x6613)}, /* Onda H600/ZTE MF330 */
 	{ USB_DEVICE(MAXON_VENDOR_ID, 0x6280) }, /* BP3-USB & BP3-EXT HSDPA */
 	{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_UC864E) },
+	{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_UC864G) },
 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_MF622, 0xff, 0xff, 0xff) }, /* ZTE WCDMA products */
 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0002, 0xff, 0xff, 0xff) },
 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0003, 0xff, 0xff, 0xff) },
-- 
cgit v0.10.2


From 7f1dc313d01f5f0f84c06051343a3b8623932d3c Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oliver@neukum.org>
Date: Wed, 9 Sep 2009 10:12:48 +0200
Subject: USB: CDC WDM driver doesn't support non-blocking reads

support for O_NONBLOCK in read and write path
by simply not waiting for data in read or availability
of the write urb in write but returning -EAGAIN

Signed-off-by: Oliver Neukum <oliver@neukum.org>
Tested-by: Marcel Holtmann <marcel@holtmann.org>
Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c
index 8c64c01..3e564bf 100644
--- a/drivers/usb/class/cdc-wdm.c
+++ b/drivers/usb/class/cdc-wdm.c
@@ -313,8 +313,13 @@ static ssize_t wdm_write
 	r = usb_autopm_get_interface(desc->intf);
 	if (r < 0)
 		goto outnp;
-	r = wait_event_interruptible(desc->wait, !test_bit(WDM_IN_USE,
-							   &desc->flags));
+
+	if (!file->f_flags && O_NONBLOCK)
+		r = wait_event_interruptible(desc->wait, !test_bit(WDM_IN_USE,
+								&desc->flags));
+	else
+		if (test_bit(WDM_IN_USE, &desc->flags))
+			r = -EAGAIN;
 	if (r < 0)
 		goto out;
 
@@ -377,7 +382,7 @@ outnl:
 static ssize_t wdm_read
 (struct file *file, char __user *buffer, size_t count, loff_t *ppos)
 {
-	int rv, cntr;
+	int rv, cntr = 0;
 	int i = 0;
 	struct wdm_device *desc = file->private_data;
 
@@ -389,10 +394,23 @@ static ssize_t wdm_read
 	if (desc->length == 0) {
 		desc->read = 0;
 retry:
+		if (test_bit(WDM_DISCONNECTING, &desc->flags)) {
+			rv = -ENODEV;
+			goto err;
+		}
 		i++;
-		rv = wait_event_interruptible(desc->wait,
-					      test_bit(WDM_READ, &desc->flags));
+		if (file->f_flags & O_NONBLOCK) {
+			if (!test_bit(WDM_READ, &desc->flags)) {
+				rv = cntr ? cntr : -EAGAIN;
+				goto err;
+			}
+			rv = 0;
+		} else {
+			rv = wait_event_interruptible(desc->wait,
+				test_bit(WDM_READ, &desc->flags));
+		}
 
+		/* may have happened while we slept */
 		if (test_bit(WDM_DISCONNECTING, &desc->flags)) {
 			rv = -ENODEV;
 			goto err;
@@ -448,7 +466,7 @@ retry:
 
 err:
 	mutex_unlock(&desc->rlock);
-	if (rv < 0)
+	if (rv < 0 && rv != -EAGAIN)
 		dev_err(&desc->intf->dev, "wdm_read: exit error\n");
 	return rv;
 }
-- 
cgit v0.10.2


From 7af25b4b34a2439020d78da765a3bed0ff73f25c Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oliver@neukum.org>
Date: Tue, 8 Sep 2009 23:51:28 +0200
Subject: USB: fix cdc-acm regression in open

cdc-acm needs to set a flag during open to tell the
tty layer that the device is initialized

Signed-off-by: Oliver Neukum <oliver@neukum.org>
Cc: Marcel Holtmann <marcel@holtmann.org>
Cc: Paul Martin <pm@debian.org>
Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index 85a1a55..e3861b2 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -59,6 +59,7 @@
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/tty.h>
+#include <linux/serial.h>
 #include <linux/tty_driver.h>
 #include <linux/tty_flip.h>
 #include <linux/module.h>
@@ -609,6 +610,7 @@ static int acm_tty_open(struct tty_struct *tty, struct file *filp)
 	acm->throttle = 0;
 
 	tasklet_schedule(&acm->urb_task);
+	set_bit(ASYNCB_INITIALIZED, &acm->port.flags);
 	rv = tty_port_block_til_ready(&acm->port, tty, filp);
 done:
 	mutex_unlock(&acm->mutex);
-- 
cgit v0.10.2


From ec3815c3e14dc68d49428e6505ae99e86e5dd067 Mon Sep 17 00:00:00 2001
From: "mail@rainerkeller.de" <mail@rainerkeller.de>
Date: Thu, 3 Sep 2009 11:55:17 +0200
Subject: USB: add PIDs for FTDI based OpenDCC hardware

Some devices from the OpenDCC project are missing in the list
of the FTDI PIDs. These PIDs are listed at
http://www.opendcc.de/elektronik/usb/opendcc_usb.html
(Sorry for the german only page.)
This patch adds the three missing devices.

Signed-off-by: Rainer Keller <mail@rainerkeller.de>
Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index f0cb7d6..7c8eb53 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -176,6 +176,9 @@ static struct usb_device_id id_table_combined [] = {
 	{ USB_DEVICE(FTDI_VID, FTDI_MICRO_CHAMELEON_PID) },
 	{ USB_DEVICE(FTDI_VID, FTDI_RELAIS_PID) },
 	{ USB_DEVICE(FTDI_VID, FTDI_OPENDCC_PID) },
+	{ USB_DEVICE(FTDI_VID, FTDI_OPENDCC_SNIFFER_PID) },
+	{ USB_DEVICE(FTDI_VID, FTDI_OPENDCC_THROTTLE_PID) },
+	{ USB_DEVICE(FTDI_VID, FTDI_OPENDCC_GATEWAY_PID) },
 	{ USB_DEVICE(INTERBIOMETRICS_VID, INTERBIOMETRICS_IOBOARD_PID) },
 	{ USB_DEVICE(INTERBIOMETRICS_VID, INTERBIOMETRICS_MINI_IOBOARD_PID) },
 	{ USB_DEVICE(FTDI_VID, FTDI_SPROG_II) },
diff --git a/drivers/usb/serial/ftdi_sio.h b/drivers/usb/serial/ftdi_sio.h
index 3c85bfd..35c2c48 100644
--- a/drivers/usb/serial/ftdi_sio.h
+++ b/drivers/usb/serial/ftdi_sio.h
@@ -81,6 +81,9 @@
 
 /* OpenDCC (www.opendcc.de) product id */
 #define FTDI_OPENDCC_PID	0xBFD8
+#define FTDI_OPENDCC_SNIFFER_PID	0xBFD9
+#define FTDI_OPENDCC_THROTTLE_PID	0xBFDA
+#define FTDI_OPENDCC_GATEWAY_PID	0xBFDB
 
 /* Sprog II (Andrew Crosland's SprogII DCC interface) */
 #define FTDI_SPROG_II		0xF0C8
-- 
cgit v0.10.2


From eb661bc88252e5c6dc69df732e77e42981dd4d8b Mon Sep 17 00:00:00 2001
From: "Hennerich, Michael" <Michael.Hennerich@analog.com>
Date: Wed, 2 Sep 2009 09:26:21 +0100
Subject: USB: sl811-hcd: Fix device disconnect:

SL811 Device detected after removal used to be working in linux-2.6.22
but then broke somewhere between 2.6.22 and 2.6.28. Current
hub_port_connect_change() in drivers/usb/core/hub.c won't call
usb_disconnect() in case the SL811 driver sets portstatus
USB_PORT_FEAT_CONNECTION upon removal.
AFAIK the SL811 has only a combined Device Insert/Remove
detection bit, therefore use a count to distinguish insert or remove.


Signed-Off-By: Michael Hennerich <hennerich@blackfin.uclinux.org>
Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/host/sl811-hcd.c b/drivers/usb/host/sl811-hcd.c
index a949259..5b22a4d 100644
--- a/drivers/usb/host/sl811-hcd.c
+++ b/drivers/usb/host/sl811-hcd.c
@@ -719,8 +719,12 @@ retry:
 		/* port status seems weird until after reset, so
 		 * force the reset and make khubd clean up later.
 		 */
-		sl811->port1 |= (1 << USB_PORT_FEAT_C_CONNECTION)
-				| (1 << USB_PORT_FEAT_CONNECTION);
+		if (sl811->stat_insrmv & 1)
+			sl811->port1 |= 1 << USB_PORT_FEAT_CONNECTION;
+		else
+			sl811->port1 &= ~(1 << USB_PORT_FEAT_CONNECTION);
+
+		sl811->port1 |= 1 << USB_PORT_FEAT_C_CONNECTION;
 
 	} else if (irqstat & SL11H_INTMASK_RD) {
 		if (sl811->port1 & (1 << USB_PORT_FEAT_SUSPEND)) {
-- 
cgit v0.10.2


From 11eaf170363d264ff587f300e41c927ba5a33967 Mon Sep 17 00:00:00 2001
From: Michael Hennerich <michael.hennerich@analog.com>
Date: Mon, 14 Sep 2009 10:39:53 -0400
Subject: USB: serial: ftdi: handle gnICE+ JTAG adaptors

Detect the UART on interface1 and blacklist interface0 (as that is the
JTAG port).

Signed-off-by: Michael Hennerich <michael.hennerich@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index 7c8eb53..4f883b1 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -697,6 +697,8 @@ static struct usb_device_id id_table_combined [] = {
 	{ USB_DEVICE(DE_VID, WHT_PID) },
 	{ USB_DEVICE(ADI_VID, ADI_GNICE_PID),
 		.driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
+	{ USB_DEVICE(ADI_VID, ADI_GNICEPLUS_PID),
+		.driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
 	{ USB_DEVICE(JETI_VID, JETI_SPC1201_PID) },
 	{ USB_DEVICE(MARVELL_VID, MARVELL_SHEEVAPLUG_PID),
 		.driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
diff --git a/drivers/usb/serial/ftdi_sio.h b/drivers/usb/serial/ftdi_sio.h
index 35c2c48..6f31e0d 100644
--- a/drivers/usb/serial/ftdi_sio.h
+++ b/drivers/usb/serial/ftdi_sio.h
@@ -933,6 +933,7 @@
  */
 #define ADI_VID 		0x0456
 #define ADI_GNICE_PID 		0xF000
+#define ADI_GNICEPLUS_PID 	0xF001
 
 /*
  * JETI SPECTROMETER SPECBOS 1201
-- 
cgit v0.10.2


From b0567b3f635db72c881a0d561cebb544ec085073 Mon Sep 17 00:00:00 2001
From: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Date: Fri, 7 Aug 2009 14:04:36 -0700
Subject: USB: xhci: Work around for chain bit in link TRBs.

Different sections of the xHCI 0.95 specification had opposing
requirements for the chain bit in a link transaction request buffer (TRB).
The chain bit is used to designate that adjacent TRBs are all part of the
same scatter gather list that should be sent to the device.  Link TRBs can
be in the middle, or at the beginning or end of these chained TRBs.

Sections 4.11.5.1 and 6.4.4.1 both stated the link TRB "shall have the
chain bit set to 1", meaning it is always chained to the next TRB.
However, section 4.6.9 on the stop endpoint command has specific cases for
what the hardware must do for a link TRB with the chain bit set to 0.  The
0.96 specification errata later cleared up this issue by fixing the
4.11.5.1 and 6.4.4.1 sections to state that a link TRB can have the chain
bit set to 1 or 0.

The problem is that the xHCI cancellation code depends on the chain bit of
the link TRB being cleared when it's at the end of a TD, and some 0.95
xHCI hardware simply stops processing the ring when it encounters a link
TRB with the chain bit cleared.

Allow users who are testing 0.95 xHCI prototypes to set a module parameter
(link_quirk) to turn on this link TRB work around.  Cancellation may not
work if the ring is stopped exactly on a link TRB with chain bit set, but
cancellation should be a relatively uncommon case.

Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/host/xhci-hcd.c b/drivers/usb/host/xhci-hcd.c
index 816c39c..994f4c0 100644
--- a/drivers/usb/host/xhci-hcd.c
+++ b/drivers/usb/host/xhci-hcd.c
@@ -22,12 +22,18 @@
 
 #include <linux/irq.h>
 #include <linux/module.h>
+#include <linux/moduleparam.h>
 
 #include "xhci.h"
 
 #define DRIVER_AUTHOR "Sarah Sharp"
 #define DRIVER_DESC "'eXtensible' Host Controller (xHC) Driver"
 
+/* Some 0.95 hardware can't handle the chain bit on a Link TRB being cleared */
+static int link_quirk;
+module_param(link_quirk, int, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(link_quirk, "Don't clear the chain bit on a link TRB");
+
 /* TODO: copied from ehci-hcd.c - can this be refactored? */
 /*
  * handshake - spin reading hc until handshake completes or fails
@@ -214,6 +220,12 @@ int xhci_init(struct usb_hcd *hcd)
 
 	xhci_dbg(xhci, "xhci_init\n");
 	spin_lock_init(&xhci->lock);
+	if (link_quirk) {
+		xhci_dbg(xhci, "QUIRK: Not clearing Link TRB chain bits.\n");
+		xhci->quirks |= XHCI_LINK_TRB_QUIRK;
+	} else {
+		xhci_dbg(xhci, "xHCI has no QUIRKS\n");
+	}
 	retval = xhci_mem_init(xhci, GFP_KERNEL);
 	xhci_dbg(xhci, "Finished xhci_init\n");
 
diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index e6b9a1c..cb20338 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -94,6 +94,9 @@ static void xhci_link_segments(struct xhci_hcd *xhci, struct xhci_segment *prev,
 		val = prev->trbs[TRBS_PER_SEGMENT-1].link.control;
 		val &= ~TRB_TYPE_BITMASK;
 		val |= TRB_TYPE(TRB_LINK);
+		/* Always set the chain bit with 0.95 hardware */
+		if (xhci_link_trb_quirk(xhci))
+			val |= TRB_CHAIN;
 		prev->trbs[TRBS_PER_SEGMENT-1].link.control = val;
 	}
 	xhci_dbg(xhci, "Linking segment 0x%llx to segment 0x%llx (DMA)\n",
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index aa88a06..011458f 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -172,8 +172,9 @@ static void inc_deq(struct xhci_hcd *xhci, struct xhci_ring *ring, bool consumer
  * have their chain bit cleared (so that each Link TRB is a separate TD).
  *
  * Section 6.4.4.1 of the 0.95 spec says link TRBs cannot have the chain bit
- * set, but other sections talk about dealing with the chain bit set.
- * Assume section 6.4.4.1 is wrong, and the chain bit can be set in a Link TRB.
+ * set, but other sections talk about dealing with the chain bit set.  This was
+ * fixed in the 0.96 specification errata, but we have to assume that all 0.95
+ * xHCI hardware can't handle the chain bit being cleared on a link TRB.
  */
 static void inc_enq(struct xhci_hcd *xhci, struct xhci_ring *ring, bool consumer)
 {
@@ -191,8 +192,14 @@ static void inc_enq(struct xhci_hcd *xhci, struct xhci_ring *ring, bool consumer
 	while (last_trb(xhci, ring, ring->enq_seg, next)) {
 		if (!consumer) {
 			if (ring != xhci->event_ring) {
-				next->link.control &= ~TRB_CHAIN;
-				next->link.control |= chain;
+				/* If we're not dealing with 0.95 hardware,
+				 * carry over the chain bit of the previous TRB
+				 * (which may mean the chain bit is cleared).
+				 */
+				if (!xhci_link_trb_quirk(xhci)) {
+					next->link.control &= ~TRB_CHAIN;
+					next->link.control |= chain;
+				}
 				/* Give this link TRB to the hardware */
 				wmb();
 				if (next->link.control & TRB_CYCLE)
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index ffe1625..79ea627 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -1058,6 +1058,8 @@ struct xhci_hcd {
 	int			noops_submitted;
 	int			noops_handled;
 	int			error_bitmask;
+	unsigned int		quirks;
+#define	XHCI_LINK_TRB_QUIRK	(1 << 0)
 };
 
 /* For testing purposes */
@@ -1136,6 +1138,13 @@ static inline void xhci_write_64(struct xhci_hcd *xhci,
 	writel(val_hi, ptr + 1);
 }
 
+static inline int xhci_link_trb_quirk(struct xhci_hcd *xhci)
+{
+	u32 temp = xhci_readl(xhci, &xhci->cap_regs->hc_capbase);
+	return ((HC_VERSION(temp) == 0x95) &&
+			(xhci->quirks & XHCI_LINK_TRB_QUIRK));
+}
+
 /* xHCI debugging */
 void xhci_print_ir_set(struct xhci_hcd *xhci, struct xhci_intr_reg *ir_set, int set_num);
 void xhci_print_registers(struct xhci_hcd *xhci);
-- 
cgit v0.10.2


From 018218d1d9eb06116d24a02dd5e7a390f0353d0f Mon Sep 17 00:00:00 2001
From: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Date: Fri, 7 Aug 2009 14:04:40 -0700
Subject: USB: xhci: Fix slot and endpoint context debugging.

Use the virtual address of the memory hardware uses, not the address for
the container of that memory.

Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/host/xhci-dbg.c b/drivers/usb/host/xhci-dbg.c
index 705e343..33128d5 100644
--- a/drivers/usb/host/xhci-dbg.c
+++ b/drivers/usb/host/xhci-dbg.c
@@ -413,7 +413,8 @@ void xhci_dbg_slot_ctx(struct xhci_hcd *xhci, struct xhci_container_ctx *ctx)
 	int i;
 
 	struct xhci_slot_ctx *slot_ctx = xhci_get_slot_ctx(xhci, ctx);
-	dma_addr_t dma = ctx->dma + ((unsigned long)slot_ctx - (unsigned long)ctx);
+	dma_addr_t dma = ctx->dma +
+		((unsigned long)slot_ctx - (unsigned long)ctx->bytes);
 	int csz = HCC_64BYTE_CONTEXT(xhci->hcc_params);
 
 	xhci_dbg(xhci, "Slot Context:\n");
@@ -459,7 +460,7 @@ void xhci_dbg_ep_ctx(struct xhci_hcd *xhci,
 	for (i = 0; i < last_ep_ctx; ++i) {
 		struct xhci_ep_ctx *ep_ctx = xhci_get_ep_ctx(xhci, ctx, i);
 		dma_addr_t dma = ctx->dma +
-			((unsigned long)ep_ctx - (unsigned long)ctx);
+			((unsigned long)ep_ctx - (unsigned long)ctx->bytes);
 
 		xhci_dbg(xhci, "Endpoint %02d Context:\n", i);
 		xhci_dbg(xhci, "@%p (virt) @%08llx (dma) %#08x - ep_info\n",
-- 
cgit v0.10.2


From f2217e8edd95b0428d8123d426e0097a5e955f9f Mon Sep 17 00:00:00 2001
From: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Date: Fri, 7 Aug 2009 14:04:43 -0700
Subject: USB: xhci: Configure endpoint code refactoring.

Refactor out the code issue, wait for, and parse the event completion code
for a configure endpoint command.  Modify it to support the evaluate
context command, which has a very similar submission process.  Add
functions to copy parts of the output context into the input context
(which will be used in the evaluate context command).

Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/host/xhci-hcd.c b/drivers/usb/host/xhci-hcd.c
index 994f4c0..ddb1a6a 100644
--- a/drivers/usb/host/xhci-hcd.c
+++ b/drivers/usb/host/xhci-hcd.c
@@ -942,6 +942,122 @@ static void xhci_zero_in_ctx(struct xhci_hcd *xhci, struct xhci_virt_device *vir
 	}
 }
 
+static int xhci_configure_endpoint_result(struct xhci_hcd *xhci,
+		struct usb_device *udev, struct xhci_virt_device *virt_dev)
+{
+	int ret;
+
+	switch (virt_dev->cmd_status) {
+	case COMP_ENOMEM:
+		dev_warn(&udev->dev, "Not enough host controller resources "
+				"for new device state.\n");
+		ret = -ENOMEM;
+		/* FIXME: can we allocate more resources for the HC? */
+		break;
+	case COMP_BW_ERR:
+		dev_warn(&udev->dev, "Not enough bandwidth "
+				"for new device state.\n");
+		ret = -ENOSPC;
+		/* FIXME: can we go back to the old state? */
+		break;
+	case COMP_TRB_ERR:
+		/* the HCD set up something wrong */
+		dev_warn(&udev->dev, "ERROR: Endpoint drop flag = 0, "
+				"add flag = 1, "
+				"and endpoint is not disabled.\n");
+		ret = -EINVAL;
+		break;
+	case COMP_SUCCESS:
+		dev_dbg(&udev->dev, "Successful Endpoint Configure command\n");
+		ret = 0;
+		break;
+	default:
+		xhci_err(xhci, "ERROR: unexpected command completion "
+				"code 0x%x.\n", virt_dev->cmd_status);
+		ret = -EINVAL;
+		break;
+	}
+	return ret;
+}
+
+static int xhci_evaluate_context_result(struct xhci_hcd *xhci,
+		struct usb_device *udev, struct xhci_virt_device *virt_dev)
+{
+	int ret;
+
+	switch (virt_dev->cmd_status) {
+	case COMP_EINVAL:
+		dev_warn(&udev->dev, "WARN: xHCI driver setup invalid evaluate "
+				"context command.\n");
+		ret = -EINVAL;
+		break;
+	case COMP_EBADSLT:
+		dev_warn(&udev->dev, "WARN: slot not enabled for"
+				"evaluate context command.\n");
+	case COMP_CTX_STATE:
+		dev_warn(&udev->dev, "WARN: invalid context state for "
+				"evaluate context command.\n");
+		xhci_dbg_ctx(xhci, virt_dev->out_ctx, 1);
+		ret = -EINVAL;
+		break;
+	case COMP_SUCCESS:
+		dev_dbg(&udev->dev, "Successful evaluate context command\n");
+		ret = 0;
+		break;
+	default:
+		xhci_err(xhci, "ERROR: unexpected command completion "
+				"code 0x%x.\n", virt_dev->cmd_status);
+		ret = -EINVAL;
+		break;
+	}
+	return ret;
+}
+
+/* Issue a configure endpoint command or evaluate context command
+ * and wait for it to finish.
+ */
+static int xhci_configure_endpoint(struct xhci_hcd *xhci,
+		struct usb_device *udev, struct xhci_virt_device *virt_dev,
+		bool ctx_change)
+{
+	int ret;
+	int timeleft;
+	unsigned long flags;
+
+	spin_lock_irqsave(&xhci->lock, flags);
+	if (!ctx_change)
+		ret = xhci_queue_configure_endpoint(xhci, virt_dev->in_ctx->dma,
+				udev->slot_id);
+	else
+		ret = xhci_queue_evaluate_context(xhci, virt_dev->in_ctx->dma,
+				udev->slot_id);
+	if (ret < 0) {
+		spin_unlock_irqrestore(&xhci->lock, flags);
+		xhci_dbg(xhci, "FIXME allocate a new ring segment\n");
+		return -ENOMEM;
+	}
+	xhci_ring_cmd_db(xhci);
+	spin_unlock_irqrestore(&xhci->lock, flags);
+
+	/* Wait for the configure endpoint command to complete */
+	timeleft = wait_for_completion_interruptible_timeout(
+			&virt_dev->cmd_completion,
+			USB_CTRL_SET_TIMEOUT);
+	if (timeleft <= 0) {
+		xhci_warn(xhci, "%s while waiting for %s command\n",
+				timeleft == 0 ? "Timeout" : "Signal",
+				ctx_change == 0 ?
+					"configure endpoint" :
+					"evaluate context");
+		/* FIXME cancel the configure endpoint command */
+		return -ETIME;
+	}
+
+	if (!ctx_change)
+		return xhci_configure_endpoint_result(xhci, udev, virt_dev);
+	return xhci_evaluate_context_result(xhci, udev, virt_dev);
+}
+
 /* Called after one or more calls to xhci_add_endpoint() or
  * xhci_drop_endpoint().  If this call fails, the USB core is expected
  * to call xhci_reset_bandwidth().
@@ -956,8 +1072,6 @@ int xhci_check_bandwidth(struct usb_hcd *hcd, struct usb_device *udev)
 {
 	int i;
 	int ret = 0;
-	int timeleft;
-	unsigned long flags;
 	struct xhci_hcd *xhci;
 	struct xhci_virt_device	*virt_dev;
 	struct xhci_input_control_ctx *ctrl_ctx;
@@ -987,56 +1101,7 @@ int xhci_check_bandwidth(struct usb_hcd *hcd, struct usb_device *udev)
 	xhci_dbg_ctx(xhci, virt_dev->in_ctx,
 			LAST_CTX_TO_EP_NUM(slot_ctx->dev_info));
 
-	spin_lock_irqsave(&xhci->lock, flags);
-	ret = xhci_queue_configure_endpoint(xhci, virt_dev->in_ctx->dma,
-			udev->slot_id);
-	if (ret < 0) {
-		spin_unlock_irqrestore(&xhci->lock, flags);
-		xhci_dbg(xhci, "FIXME allocate a new ring segment\n");
-		return -ENOMEM;
-	}
-	xhci_ring_cmd_db(xhci);
-	spin_unlock_irqrestore(&xhci->lock, flags);
-
-	/* Wait for the configure endpoint command to complete */
-	timeleft = wait_for_completion_interruptible_timeout(
-			&virt_dev->cmd_completion,
-			USB_CTRL_SET_TIMEOUT);
-	if (timeleft <= 0) {
-		xhci_warn(xhci, "%s while waiting for configure endpoint command\n",
-				timeleft == 0 ? "Timeout" : "Signal");
-		/* FIXME cancel the configure endpoint command */
-		return -ETIME;
-	}
-
-	switch (virt_dev->cmd_status) {
-	case COMP_ENOMEM:
-		dev_warn(&udev->dev, "Not enough host controller resources "
-				"for new device state.\n");
-		ret = -ENOMEM;
-		/* FIXME: can we allocate more resources for the HC? */
-		break;
-	case COMP_BW_ERR:
-		dev_warn(&udev->dev, "Not enough bandwidth "
-				"for new device state.\n");
-		ret = -ENOSPC;
-		/* FIXME: can we go back to the old state? */
-		break;
-	case COMP_TRB_ERR:
-		/* the HCD set up something wrong */
-		dev_warn(&udev->dev, "ERROR: Endpoint drop flag = 0, add flag = 1, "
-				"and endpoint is not disabled.\n");
-		ret = -EINVAL;
-		break;
-	case COMP_SUCCESS:
-		dev_dbg(&udev->dev, "Successful Endpoint Configure command\n");
-		break;
-	default:
-		xhci_err(xhci, "ERROR: unexpected command completion "
-				"code 0x%x.\n", virt_dev->cmd_status);
-		ret = -EINVAL;
-		break;
-	}
+	ret = xhci_configure_endpoint(xhci, udev, virt_dev, false);
 	if (ret) {
 		/* Callee should call reset_bandwidth() */
 		return ret;
diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index cb20338..c5313c8 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -601,6 +601,44 @@ void xhci_endpoint_zero(struct xhci_hcd *xhci,
 	 */
 }
 
+/* Copy output xhci_ep_ctx to the input xhci_ep_ctx copy.
+ * Useful when you want to change one particular aspect of the endpoint and then
+ * issue a configure endpoint command.
+ */
+void xhci_endpoint_copy(struct xhci_hcd *xhci,
+		struct xhci_virt_device *vdev, unsigned int ep_index)
+{
+	struct xhci_ep_ctx *out_ep_ctx;
+	struct xhci_ep_ctx *in_ep_ctx;
+
+	out_ep_ctx = xhci_get_ep_ctx(xhci, vdev->out_ctx, ep_index);
+	in_ep_ctx = xhci_get_ep_ctx(xhci, vdev->in_ctx, ep_index);
+
+	in_ep_ctx->ep_info = out_ep_ctx->ep_info;
+	in_ep_ctx->ep_info2 = out_ep_ctx->ep_info2;
+	in_ep_ctx->deq = out_ep_ctx->deq;
+	in_ep_ctx->tx_info = out_ep_ctx->tx_info;
+}
+
+/* Copy output xhci_slot_ctx to the input xhci_slot_ctx.
+ * Useful when you want to change one particular aspect of the endpoint and then
+ * issue a configure endpoint command.  Only the context entries field matters,
+ * but we'll copy the whole thing anyway.
+ */
+void xhci_slot_copy(struct xhci_hcd *xhci, struct xhci_virt_device *vdev)
+{
+	struct xhci_slot_ctx *in_slot_ctx;
+	struct xhci_slot_ctx *out_slot_ctx;
+
+	in_slot_ctx = xhci_get_slot_ctx(xhci, vdev->in_ctx);
+	out_slot_ctx = xhci_get_slot_ctx(xhci, vdev->out_ctx);
+
+	in_slot_ctx->dev_info = out_slot_ctx->dev_info;
+	in_slot_ctx->dev_info2 = out_slot_ctx->dev_info2;
+	in_slot_ctx->tt_info = out_slot_ctx->tt_info;
+	in_slot_ctx->dev_state = out_slot_ctx->dev_state;
+}
+
 /* Set up the scratchpad buffer array and scratchpad buffers, if needed. */
 static int scratchpad_alloc(struct xhci_hcd *xhci, gfp_t flags)
 {
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 011458f..ac5c662 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -1740,6 +1740,15 @@ int xhci_queue_configure_endpoint(struct xhci_hcd *xhci, dma_addr_t in_ctx_ptr,
 			TRB_TYPE(TRB_CONFIG_EP) | SLOT_ID_FOR_TRB(slot_id));
 }
 
+/* Queue an evaluate context command TRB */
+int xhci_queue_evaluate_context(struct xhci_hcd *xhci, dma_addr_t in_ctx_ptr,
+		u32 slot_id)
+{
+	return queue_command(xhci, lower_32_bits(in_ctx_ptr),
+			upper_32_bits(in_ctx_ptr), 0,
+			TRB_TYPE(TRB_EVAL_CONTEXT) | SLOT_ID_FOR_TRB(slot_id));
+}
+
 int xhci_queue_stop_endpoint(struct xhci_hcd *xhci, int slot_id,
 		unsigned int ep_index)
 {
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index 79ea627..a8fe217 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -1168,6 +1168,9 @@ int xhci_setup_addressable_virt_dev(struct xhci_hcd *xhci, struct usb_device *ud
 unsigned int xhci_get_endpoint_index(struct usb_endpoint_descriptor *desc);
 unsigned int xhci_get_endpoint_flag(struct usb_endpoint_descriptor *desc);
 void xhci_endpoint_zero(struct xhci_hcd *xhci, struct xhci_virt_device *virt_dev, struct usb_host_endpoint *ep);
+void xhci_endpoint_copy(struct xhci_hcd *xhci,
+		struct xhci_virt_device *vdev, unsigned int ep_index);
+void xhci_slot_copy(struct xhci_hcd *xhci, struct xhci_virt_device *vdev);
 int xhci_endpoint_init(struct xhci_hcd *xhci, struct xhci_virt_device *virt_dev,
 		struct usb_device *udev, struct usb_host_endpoint *ep,
 		gfp_t mem_flags);
@@ -1216,6 +1219,8 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags, struct urb *urb,
 		int slot_id, unsigned int ep_index);
 int xhci_queue_configure_endpoint(struct xhci_hcd *xhci, dma_addr_t in_ctx_ptr,
 		u32 slot_id);
+int xhci_queue_evaluate_context(struct xhci_hcd *xhci, dma_addr_t in_ctx_ptr,
+		u32 slot_id);
 int xhci_queue_reset_ep(struct xhci_hcd *xhci, int slot_id,
 		unsigned int ep_index);
 void xhci_find_new_dequeue_state(struct xhci_hcd *xhci,
-- 
cgit v0.10.2


From 47aded8ade9fee6779b121b2b156235f261239d7 Mon Sep 17 00:00:00 2001
From: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Date: Fri, 7 Aug 2009 14:04:46 -0700
Subject: USB: xhci: Set correct max packet size for HS/FS control endpoints.

Set the max packet size for the default control endpoint on high speed
devices to be 64 bytes.  High speed devices always have a max packet size
of 64 bytes.  There's no use setting it to eight for the initial 8 byte
descriptor fetch and then issuing (and waiting for) an evaluate context
command to update it to 64 bytes for the subsequent control transfers.

The USB core guesses that the max packet size on a full speed control
endpoint is 64 bytes, and then updates it after the first 8-byte
descriptor fetch.  Change the initial setup for the xHCI internal
representation of the full speed device to have a 64 byte max packet size.

Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index c5313c8..55920b3 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -401,15 +401,28 @@ int xhci_setup_addressable_virt_dev(struct xhci_hcd *xhci, struct usb_device *ud
 	/* Step 5 */
 	ep0_ctx->ep_info2 = EP_TYPE(CTRL_EP);
 	/*
-	 * See section 4.3 bullet 6:
-	 * The default Max Packet size for ep0 is "8 bytes for a USB2
-	 * LS/FS/HS device or 512 bytes for a USB3 SS device"
 	 * XXX: Not sure about wireless USB devices.
 	 */
-	if (udev->speed == USB_SPEED_SUPER)
+	switch (udev->speed) {
+	case USB_SPEED_SUPER:
 		ep0_ctx->ep_info2 |= MAX_PACKET(512);
-	else
+		break;
+	case USB_SPEED_HIGH:
+	/* USB core guesses at a 64-byte max packet first for FS devices */
+	case USB_SPEED_FULL:
+		ep0_ctx->ep_info2 |= MAX_PACKET(64);
+		break;
+	case USB_SPEED_LOW:
 		ep0_ctx->ep_info2 |= MAX_PACKET(8);
+		break;
+	case USB_SPEED_VARIABLE:
+		xhci_dbg(xhci, "FIXME xHCI doesn't support wireless speeds\n");
+		return -EINVAL;
+		break;
+	default:
+		/* New speed? */
+		BUG();
+	}
 	/* EP 0 can handle "burst" sizes of 1, so Max Burst Size field is 0 */
 	ep0_ctx->ep_info2 |= MAX_BURST(0);
 	ep0_ctx->ep_info2 |= ERROR_COUNT(3);
-- 
cgit v0.10.2


From 2d3f1fac7ee8bb4c6fad40f838488edbeabb0c50 Mon Sep 17 00:00:00 2001
From: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Date: Fri, 7 Aug 2009 14:04:49 -0700
Subject: USB: xhci: Support full speed devices.

Full speed devices have varying max packet sizes (8, 16, 32, or 64) for
endpoint 0.  The xHCI hardware needs to know the real max packet size
that the USB core discovers after it fetches the first 8 bytes of the
device descriptor.

In order to fix this without adding a new hook to host controller drivers,
the xHCI driver looks for an updated max packet size for control
endpoints.  If it finds an updated size, it issues an evaluate context
command and waits for that command to finish.  This should only happen in
the initialization and device descriptor fetching steps in the khubd
thread, so blocking should be fine.

Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/host/xhci-hcd.c b/drivers/usb/host/xhci-hcd.c
index ddb1a6a..4e18f4e 100644
--- a/drivers/usb/host/xhci-hcd.c
+++ b/drivers/usb/host/xhci-hcd.c
@@ -601,6 +601,70 @@ int xhci_check_args(struct usb_hcd *hcd, struct usb_device *udev,
 	return 1;
 }
 
+static int xhci_configure_endpoint(struct xhci_hcd *xhci,
+		struct usb_device *udev, struct xhci_virt_device *virt_dev,
+		bool ctx_change);
+
+/*
+ * Full speed devices may have a max packet size greater than 8 bytes, but the
+ * USB core doesn't know that until it reads the first 8 bytes of the
+ * descriptor.  If the usb_device's max packet size changes after that point,
+ * we need to issue an evaluate context command and wait on it.
+ */
+static int xhci_check_maxpacket(struct xhci_hcd *xhci, unsigned int slot_id,
+		unsigned int ep_index, struct urb *urb)
+{
+	struct xhci_container_ctx *in_ctx;
+	struct xhci_container_ctx *out_ctx;
+	struct xhci_input_control_ctx *ctrl_ctx;
+	struct xhci_ep_ctx *ep_ctx;
+	int max_packet_size;
+	int hw_max_packet_size;
+	int ret = 0;
+
+	out_ctx = xhci->devs[slot_id]->out_ctx;
+	ep_ctx = xhci_get_ep_ctx(xhci, out_ctx, ep_index);
+	hw_max_packet_size = MAX_PACKET_DECODED(ep_ctx->ep_info2);
+	max_packet_size = urb->dev->ep0.desc.wMaxPacketSize;
+	if (hw_max_packet_size != max_packet_size) {
+		xhci_dbg(xhci, "Max Packet Size for ep 0 changed.\n");
+		xhci_dbg(xhci, "Max packet size in usb_device = %d\n",
+				max_packet_size);
+		xhci_dbg(xhci, "Max packet size in xHCI HW = %d\n",
+				hw_max_packet_size);
+		xhci_dbg(xhci, "Issuing evaluate context command.\n");
+
+		/* Set up the modified control endpoint 0 */
+		xhci_endpoint_copy(xhci, xhci->devs[slot_id], ep_index);
+		in_ctx = xhci->devs[slot_id]->in_ctx;
+		ep_ctx = xhci_get_ep_ctx(xhci, in_ctx, ep_index);
+		ep_ctx->ep_info2 &= ~MAX_PACKET_MASK;
+		ep_ctx->ep_info2 |= MAX_PACKET(max_packet_size);
+
+		/* Set up the input context flags for the command */
+		/* FIXME: This won't work if a non-default control endpoint
+		 * changes max packet sizes.
+		 */
+		ctrl_ctx = xhci_get_input_control_ctx(xhci, in_ctx);
+		ctrl_ctx->add_flags = EP0_FLAG;
+		ctrl_ctx->drop_flags = 0;
+
+		xhci_dbg(xhci, "Slot %d input context\n", slot_id);
+		xhci_dbg_ctx(xhci, in_ctx, ep_index);
+		xhci_dbg(xhci, "Slot %d output context\n", slot_id);
+		xhci_dbg_ctx(xhci, out_ctx, ep_index);
+
+		ret = xhci_configure_endpoint(xhci, urb->dev,
+				xhci->devs[slot_id], true);
+
+		/* Clean up the input context for later use by bandwidth
+		 * functions.
+		 */
+		ctrl_ctx->add_flags = SLOT_FLAG;
+	}
+	return ret;
+}
+
 /*
  * non-error returns are a promise to giveback() the urb later
  * we drop ownership so next owner (or urb unlink) can get it
@@ -612,13 +676,13 @@ int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flags)
 	int ret = 0;
 	unsigned int slot_id, ep_index;
 
+
 	if (!urb || xhci_check_args(hcd, urb->dev, urb->ep, true, __func__) <= 0)
 		return -EINVAL;
 
 	slot_id = urb->dev->slot_id;
 	ep_index = xhci_get_endpoint_index(&urb->ep->desc);
 
-	spin_lock_irqsave(&xhci->lock, flags);
 	if (!xhci->devs || !xhci->devs[slot_id]) {
 		if (!in_interrupt())
 			dev_warn(&urb->dev->dev, "WARN: urb submitted for dev with no Slot ID\n");
@@ -631,19 +695,33 @@ int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flags)
 		ret = -ESHUTDOWN;
 		goto exit;
 	}
-	if (usb_endpoint_xfer_control(&urb->ep->desc))
+	if (usb_endpoint_xfer_control(&urb->ep->desc)) {
+		/* Check to see if the max packet size for the default control
+		 * endpoint changed during FS device enumeration
+		 */
+		if (urb->dev->speed == USB_SPEED_FULL) {
+			ret = xhci_check_maxpacket(xhci, slot_id,
+					ep_index, urb);
+			if (ret < 0)
+				return ret;
+		}
+
 		/* We have a spinlock and interrupts disabled, so we must pass
 		 * atomic context to this function, which may allocate memory.
 		 */
+		spin_lock_irqsave(&xhci->lock, flags);
 		ret = xhci_queue_ctrl_tx(xhci, GFP_ATOMIC, urb,
 				slot_id, ep_index);
-	else if (usb_endpoint_xfer_bulk(&urb->ep->desc))
+		spin_unlock_irqrestore(&xhci->lock, flags);
+	} else if (usb_endpoint_xfer_bulk(&urb->ep->desc)) {
+		spin_lock_irqsave(&xhci->lock, flags);
 		ret = xhci_queue_bulk_tx(xhci, GFP_ATOMIC, urb,
 				slot_id, ep_index);
-	else
+		spin_unlock_irqrestore(&xhci->lock, flags);
+	} else {
 		ret = -EINVAL;
+	}
 exit:
-	spin_unlock_irqrestore(&xhci->lock, flags);
 	return ret;
 }
 
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index ac5c662..ee7fc45 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -701,6 +701,10 @@ static void handle_cmd_completion(struct xhci_hcd *xhci,
 		xhci->devs[slot_id]->cmd_status = GET_COMP_CODE(event->status);
 		complete(&xhci->devs[slot_id]->cmd_completion);
 		break;
+	case TRB_TYPE(TRB_EVAL_CONTEXT):
+		xhci->devs[slot_id]->cmd_status = GET_COMP_CODE(event->status);
+		complete(&xhci->devs[slot_id]->cmd_completion);
+		break;
 	case TRB_TYPE(TRB_ADDR_DEV):
 		xhci->devs[slot_id]->cmd_status = GET_COMP_CODE(event->status);
 		complete(&xhci->addr_dev);
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index a8fe217..6aecede 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -601,6 +601,8 @@ struct xhci_ep_ctx {
 /* bit 7 is Host Initiate Disable - for disabling stream selection */
 #define MAX_BURST(p)	(((p)&0xff) << 8)
 #define MAX_PACKET(p)	(((p)&0xffff) << 16)
+#define MAX_PACKET_MASK		(0xffff << 16)
+#define MAX_PACKET_DECODED(p)	(((p) >> 16) & 0xffff)
 
 
 /**
-- 
cgit v0.10.2


From 82d1009f537c2a43be0a410abd33521f76ee3a5a Mon Sep 17 00:00:00 2001
From: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Date: Fri, 7 Aug 2009 14:04:52 -0700
Subject: USB: xhci: Handle stalled control endpoints.

When a control endpoint stalls, the next control transfer will clear the
stall.  The USB core doesn't call down to the host controller driver's
endpoint_reset() method when control endpoints stall, so the xHCI driver
has to do all its stall handling for internal state in its interrupt handler.

When the host stalls on a control endpoint, it may stop on the data phase
or status phase of the control transfer.  Like other stalled endpoints,
the xHCI driver needs to queue a Reset Endpoint command and move the
hardware's control endpoint ring dequeue pointer past the failed control
transfer (with a Set TR Dequeue Pointer or a Configure Endpoint command).

Since the USB core doesn't call usb_hcd_reset_endpoint() for control
endpoints, we need to do this in interrupt context when we get notified of
the stalled transfer.  URBs may be queued to the hardware before these two
commands complete.  The endpoint queue will be restarted once both
commands complete.

Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/host/xhci-hcd.c b/drivers/usb/host/xhci-hcd.c
index 4e18f4e..4353c1c 100644
--- a/drivers/usb/host/xhci-hcd.c
+++ b/drivers/usb/host/xhci-hcd.c
@@ -1230,6 +1230,25 @@ void xhci_reset_bandwidth(struct usb_hcd *hcd, struct usb_device *udev)
 	xhci_zero_in_ctx(xhci, virt_dev);
 }
 
+void xhci_cleanup_stalled_ring(struct xhci_hcd *xhci,
+		struct usb_device *udev, struct usb_host_endpoint *ep,
+		unsigned int ep_index, struct xhci_ring *ep_ring)
+{
+	struct xhci_dequeue_state deq_state;
+
+	xhci_dbg(xhci, "Cleaning up stalled endpoint ring\n");
+	/* We need to move the HW's dequeue pointer past this TD,
+	 * or it will attempt to resend it on the next doorbell ring.
+	 */
+	xhci_find_new_dequeue_state(xhci, udev->slot_id,
+			ep_index, ep_ring->stopped_td, &deq_state);
+
+	xhci_dbg(xhci, "Queueing new dequeue state\n");
+	xhci_queue_new_dequeue_state(xhci, ep_ring,
+			udev->slot_id,
+			ep_index, &deq_state);
+}
+
 /* Deal with stalled endpoints.  The core should have sent the control message
  * to clear the halt condition.  However, we need to make the xHCI hardware
  * reset its sequence number, since a device will expect a sequence number of
@@ -1244,7 +1263,6 @@ void xhci_endpoint_reset(struct usb_hcd *hcd,
 	unsigned int ep_index;
 	unsigned long flags;
 	int ret;
-	struct xhci_dequeue_state deq_state;
 	struct xhci_ring *ep_ring;
 
 	xhci = hcd_to_xhci(hcd);
@@ -1261,6 +1279,10 @@ void xhci_endpoint_reset(struct usb_hcd *hcd,
 				ep->desc.bEndpointAddress);
 		return;
 	}
+	if (usb_endpoint_xfer_control(&ep->desc)) {
+		xhci_dbg(xhci, "Control endpoint stall already handled.\n");
+		return;
+	}
 
 	xhci_dbg(xhci, "Queueing reset endpoint command\n");
 	spin_lock_irqsave(&xhci->lock, flags);
@@ -1271,16 +1293,7 @@ void xhci_endpoint_reset(struct usb_hcd *hcd,
 	 * command.  Better hope that last command worked!
 	 */
 	if (!ret) {
-		xhci_dbg(xhci, "Cleaning up stalled endpoint ring\n");
-		/* We need to move the HW's dequeue pointer past this TD,
-		 * or it will attempt to resend it on the next doorbell ring.
-		 */
-		xhci_find_new_dequeue_state(xhci, udev->slot_id,
-				ep_index, ep_ring->stopped_td, &deq_state);
-		xhci_dbg(xhci, "Queueing new dequeue state\n");
-		xhci_queue_new_dequeue_state(xhci, ep_ring,
-				udev->slot_id,
-				ep_index, &deq_state);
+		xhci_cleanup_stalled_ring(xhci, udev, ep, ep_index, ep_ring);
 		kfree(ep_ring->stopped_td);
 		xhci_ring_cmd_db(xhci);
 	}
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index ee7fc45..c831194 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -817,6 +817,7 @@ static int handle_tx_event(struct xhci_hcd *xhci,
 {
 	struct xhci_virt_device *xdev;
 	struct xhci_ring *ep_ring;
+	unsigned int slot_id;
 	int ep_index;
 	struct xhci_td *td = 0;
 	dma_addr_t event_dma;
@@ -827,7 +828,8 @@ static int handle_tx_event(struct xhci_hcd *xhci,
 	struct xhci_ep_ctx *ep_ctx;
 
 	xhci_dbg(xhci, "In %s\n", __func__);
-	xdev = xhci->devs[TRB_TO_SLOT_ID(event->flags)];
+	slot_id = TRB_TO_SLOT_ID(event->flags);
+	xdev = xhci->devs[slot_id];
 	if (!xdev) {
 		xhci_err(xhci, "ERROR Transfer event pointed to bad slot\n");
 		return -ENODEV;
@@ -941,6 +943,25 @@ static int handle_tx_event(struct xhci_hcd *xhci,
 			xhci_warn(xhci, "WARN: short transfer on control ep\n");
 			status = -EREMOTEIO;
 			break;
+		case COMP_STALL:
+			/* Did we transfer part of the data (middle) phase? */
+			if (event_trb != ep_ring->dequeue &&
+					event_trb != td->last_trb)
+				td->urb->actual_length =
+					td->urb->transfer_buffer_length
+					- TRB_LEN(event->transfer_len);
+			else
+				td->urb->actual_length = 0;
+
+			ep_ring->stopped_td = td;
+			ep_ring->stopped_trb = event_trb;
+			xhci_queue_reset_ep(xhci, slot_id, ep_index);
+			xhci_cleanup_stalled_ring(xhci,
+					td->urb->dev,
+					td->urb->ep,
+					ep_index, ep_ring);
+			xhci_ring_cmd_db(xhci);
+			goto td_cleanup;
 		default:
 			/* Others already handled above */
 			break;
@@ -1083,6 +1104,7 @@ static int handle_tx_event(struct xhci_hcd *xhci,
 			inc_deq(xhci, ep_ring, false);
 		}
 
+td_cleanup:
 		/* Clean up the endpoint's TD list */
 		urb = td->urb;
 		list_del(&td->td_list);
@@ -1091,8 +1113,13 @@ static int handle_tx_event(struct xhci_hcd *xhci,
 			list_del(&td->cancelled_td_list);
 			ep_ring->cancels_pending--;
 		}
-		/* Leave the TD around for the reset endpoint function to use */
-		if (GET_COMP_CODE(event->transfer_len) != COMP_STALL) {
+		/* Leave the TD around for the reset endpoint function to use
+		 * (but only if it's not a control endpoint, since we already
+		 * queued the Set TR dequeue pointer command for stalled
+		 * control endpoints).
+		 */
+		if (usb_endpoint_xfer_control(&urb->ep->desc) ||
+			GET_COMP_CODE(event->transfer_len) != COMP_STALL) {
 			kfree(td);
 		}
 		urb->hcpriv = NULL;
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index 6aecede..b1abaeb 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -589,6 +589,7 @@ struct xhci_ep_ctx {
  */
 #define	FORCE_EVENT	(0x1)
 #define ERROR_COUNT(p)	(((p) & 0x3) << 1)
+#define CTX_TO_EP_TYPE(p)	(((p) >> 3) & 0x7)
 #define EP_TYPE(p)	((p) << 3)
 #define ISOC_OUT_EP	1
 #define BULK_OUT_EP	2
@@ -1231,6 +1232,9 @@ void xhci_find_new_dequeue_state(struct xhci_hcd *xhci,
 void xhci_queue_new_dequeue_state(struct xhci_hcd *xhci,
 		struct xhci_ring *ep_ring, unsigned int slot_id,
 		unsigned int ep_index, struct xhci_dequeue_state *deq_state);
+void xhci_cleanup_stalled_ring(struct xhci_hcd *xhci,
+		struct usb_device *udev, struct usb_host_endpoint *ep,
+		unsigned int ep_index, struct xhci_ring *ep_ring);
 
 /* xHCI roothub code */
 int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, u16 wIndex,
-- 
cgit v0.10.2


From ac9d8fe7c6a8041cca5a0738915d2c4e21381421 Mon Sep 17 00:00:00 2001
From: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Date: Fri, 7 Aug 2009 14:04:55 -0700
Subject: USB: xhci: Add quirk for Fresco Logic xHCI hardware.

This Fresco Logic xHCI host controller chip revision puts bad data into
the output endpoint context after a Reset Endpoint command.  It needs a
Configure Endpoint command (instead of a Set TR Dequeue Pointer command)
after the reset endpoint command.

Set up the input context before issuing the Reset Endpoint command so we
don't copy bad data from the output endpoint context.  The HW also can't
handle two commands queued at once, so submit the TRB for the Configure
Endpoint command in the event handler for the Reset Endpoint command.

Devices that stall on control endpoints before a configuration is selected
will not work under this Fresco Logic xHCI host controller revision.

This patch is for prototype hardware that will be given to other companies
for evaluation purposes only, and should not reach consumer hands.  Fresco
Logic's next chip rev should have this bug fixed.

Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/host/xhci-hcd.c b/drivers/usb/host/xhci-hcd.c
index 4353c1c..1d4a1e3 100644
--- a/drivers/usb/host/xhci-hcd.c
+++ b/drivers/usb/host/xhci-hcd.c
@@ -224,7 +224,7 @@ int xhci_init(struct usb_hcd *hcd)
 		xhci_dbg(xhci, "QUIRK: Not clearing Link TRB chain bits.\n");
 		xhci->quirks |= XHCI_LINK_TRB_QUIRK;
 	} else {
-		xhci_dbg(xhci, "xHCI has no QUIRKS\n");
+		xhci_dbg(xhci, "xHCI doesn't need link TRB QUIRK\n");
 	}
 	retval = xhci_mem_init(xhci, GFP_KERNEL);
 	xhci_dbg(xhci, "Finished xhci_init\n");
@@ -567,13 +567,22 @@ unsigned int xhci_get_endpoint_flag(struct usb_endpoint_descriptor *desc)
 	return 1 << (xhci_get_endpoint_index(desc) + 1);
 }
 
+/* Find the flag for this endpoint (for use in the control context).  Use the
+ * endpoint index to create a bitmask.  The slot context is bit 0, endpoint 0 is
+ * bit 1, etc.
+ */
+unsigned int xhci_get_endpoint_flag_from_index(unsigned int ep_index)
+{
+	return 1 << (ep_index + 1);
+}
+
 /* Compute the last valid endpoint context index.  Basically, this is the
  * endpoint index plus one.  For slot contexts with more than valid endpoint,
  * we find the most significant bit set in the added contexts flags.
  * e.g. ep 1 IN (with epnum 0x81) => added_ctxs = 0b1000
  * fls(0b1000) = 4, but the endpoint context index is 3, so subtract one.
  */
-static inline unsigned int xhci_last_valid_endpoint(u32 added_ctxs)
+unsigned int xhci_last_valid_endpoint(u32 added_ctxs)
 {
 	return fls(added_ctxs) - 1;
 }
@@ -1230,8 +1239,44 @@ void xhci_reset_bandwidth(struct usb_hcd *hcd, struct usb_device *udev)
 	xhci_zero_in_ctx(xhci, virt_dev);
 }
 
+void xhci_setup_input_ctx_for_quirk(struct xhci_hcd *xhci,
+		unsigned int slot_id, unsigned int ep_index,
+		struct xhci_dequeue_state *deq_state)
+{
+	struct xhci_container_ctx *in_ctx;
+	struct xhci_input_control_ctx *ctrl_ctx;
+	struct xhci_ep_ctx *ep_ctx;
+	u32 added_ctxs;
+	dma_addr_t addr;
+
+	xhci_endpoint_copy(xhci, xhci->devs[slot_id], ep_index);
+	in_ctx = xhci->devs[slot_id]->in_ctx;
+	ep_ctx = xhci_get_ep_ctx(xhci, in_ctx, ep_index);
+	addr = xhci_trb_virt_to_dma(deq_state->new_deq_seg,
+			deq_state->new_deq_ptr);
+	if (addr == 0) {
+		xhci_warn(xhci, "WARN Cannot submit config ep after "
+				"reset ep command\n");
+		xhci_warn(xhci, "WARN deq seg = %p, deq ptr = %p\n",
+				deq_state->new_deq_seg,
+				deq_state->new_deq_ptr);
+		return;
+	}
+	ep_ctx->deq = addr | deq_state->new_cycle_state;
+
+	xhci_slot_copy(xhci, xhci->devs[slot_id]);
+
+	ctrl_ctx = xhci_get_input_control_ctx(xhci, in_ctx);
+	added_ctxs = xhci_get_endpoint_flag_from_index(ep_index);
+	ctrl_ctx->add_flags = added_ctxs | SLOT_FLAG;
+	ctrl_ctx->drop_flags = added_ctxs;
+
+	xhci_dbg(xhci, "Slot ID %d Input Context:\n", slot_id);
+	xhci_dbg_ctx(xhci, in_ctx, ep_index);
+}
+
 void xhci_cleanup_stalled_ring(struct xhci_hcd *xhci,
-		struct usb_device *udev, struct usb_host_endpoint *ep,
+		struct usb_device *udev,
 		unsigned int ep_index, struct xhci_ring *ep_ring)
 {
 	struct xhci_dequeue_state deq_state;
@@ -1241,12 +1286,26 @@ void xhci_cleanup_stalled_ring(struct xhci_hcd *xhci,
 	 * or it will attempt to resend it on the next doorbell ring.
 	 */
 	xhci_find_new_dequeue_state(xhci, udev->slot_id,
-			ep_index, ep_ring->stopped_td, &deq_state);
+			ep_index, ep_ring->stopped_td,
+			&deq_state);
 
-	xhci_dbg(xhci, "Queueing new dequeue state\n");
-	xhci_queue_new_dequeue_state(xhci, ep_ring,
-			udev->slot_id,
-			ep_index, &deq_state);
+	/* HW with the reset endpoint quirk will use the saved dequeue state to
+	 * issue a configure endpoint command later.
+	 */
+	if (!(xhci->quirks & XHCI_RESET_EP_QUIRK)) {
+		xhci_dbg(xhci, "Queueing new dequeue state\n");
+		xhci_queue_new_dequeue_state(xhci, ep_ring,
+				udev->slot_id,
+				ep_index, &deq_state);
+	} else {
+		/* Better hope no one uses the input context between now and the
+		 * reset endpoint completion!
+		 */
+		xhci_dbg(xhci, "Setting up input context for "
+				"configure endpoint command\n");
+		xhci_setup_input_ctx_for_quirk(xhci, udev->slot_id,
+				ep_index, &deq_state);
+	}
 }
 
 /* Deal with stalled endpoints.  The core should have sent the control message
@@ -1293,7 +1352,7 @@ void xhci_endpoint_reset(struct usb_hcd *hcd,
 	 * command.  Better hope that last command worked!
 	 */
 	if (!ret) {
-		xhci_cleanup_stalled_ring(xhci, udev, ep, ep_index, ep_ring);
+		xhci_cleanup_stalled_ring(xhci, udev, ep_index, ep_ring);
 		kfree(ep_ring->stopped_td);
 		xhci_ring_cmd_db(xhci);
 	}
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index 592fe7e..8fb308d 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -24,6 +24,10 @@
 
 #include "xhci.h"
 
+/* Device for a quirk */
+#define PCI_VENDOR_ID_FRESCO_LOGIC	0x1b73
+#define PCI_DEVICE_ID_FRESCO_LOGIC_PDK	0x1000
+
 static const char hcd_name[] = "xhci_hcd";
 
 /* called after powerup, by probe or system-pm "wakeup" */
@@ -62,6 +66,15 @@ static int xhci_pci_setup(struct usb_hcd *hcd)
 	xhci->hcc_params = xhci_readl(xhci, &xhci->cap_regs->hcc_params);
 	xhci_print_registers(xhci);
 
+	/* Look for vendor-specific quirks */
+	if (pdev->vendor == PCI_VENDOR_ID_FRESCO_LOGIC &&
+			pdev->device == PCI_DEVICE_ID_FRESCO_LOGIC_PDK &&
+			pdev->revision == 0x0) {
+			xhci->quirks |= XHCI_RESET_EP_QUIRK;
+			xhci_dbg(xhci, "QUIRK: Fresco Logic xHC needs configure"
+					" endpoint cmd after reset endpoint\n");
+	}
+
 	/* Make sure the HC is halted. */
 	retval = xhci_halt(xhci);
 	if (retval)
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index c831194..35374dd 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -469,7 +469,6 @@ void xhci_queue_new_dequeue_state(struct xhci_hcd *xhci,
 	 * ring running.
 	 */
 	ep_ring->state |= SET_DEQ_PENDING;
-	xhci_ring_cmd_db(xhci);
 }
 
 /*
@@ -538,6 +537,7 @@ static void handle_stopped_endpoint(struct xhci_hcd *xhci,
 	if (deq_state.new_deq_ptr && deq_state.new_deq_seg) {
 		xhci_queue_new_dequeue_state(xhci, ep_ring,
 				slot_id, ep_index, &deq_state);
+		xhci_ring_cmd_db(xhci);
 	} else {
 		/* Otherwise just ring the doorbell to restart the ring */
 		ring_ep_doorbell(xhci, slot_id, ep_index);
@@ -651,18 +651,31 @@ static void handle_reset_ep_completion(struct xhci_hcd *xhci,
 {
 	int slot_id;
 	unsigned int ep_index;
+	struct xhci_ring *ep_ring;
 
 	slot_id = TRB_TO_SLOT_ID(trb->generic.field[3]);
 	ep_index = TRB_TO_EP_INDEX(trb->generic.field[3]);
+	ep_ring = xhci->devs[slot_id]->ep_rings[ep_index];
 	/* This command will only fail if the endpoint wasn't halted,
 	 * but we don't care.
 	 */
 	xhci_dbg(xhci, "Ignoring reset ep completion code of %u\n",
 			(unsigned int) GET_COMP_CODE(event->status));
 
-	/* Clear our internal halted state and restart the ring */
-	xhci->devs[slot_id]->ep_rings[ep_index]->state &= ~EP_HALTED;
-	ring_ep_doorbell(xhci, slot_id, ep_index);
+	/* HW with the reset endpoint quirk needs to have a configure endpoint
+	 * command complete before the endpoint can be used.  Queue that here
+	 * because the HW can't handle two commands being queued in a row.
+	 */
+	if (xhci->quirks & XHCI_RESET_EP_QUIRK) {
+		xhci_dbg(xhci, "Queueing configure endpoint command\n");
+		xhci_queue_configure_endpoint(xhci,
+				xhci->devs[slot_id]->in_ctx->dma, slot_id);
+		xhci_ring_cmd_db(xhci);
+	} else {
+		/* Clear our internal halted state and restart the ring */
+		ep_ring->state &= ~EP_HALTED;
+		ring_ep_doorbell(xhci, slot_id, ep_index);
+	}
 }
 
 static void handle_cmd_completion(struct xhci_hcd *xhci,
@@ -671,6 +684,10 @@ static void handle_cmd_completion(struct xhci_hcd *xhci,
 	int slot_id = TRB_TO_SLOT_ID(event->flags);
 	u64 cmd_dma;
 	dma_addr_t cmd_dequeue_dma;
+	struct xhci_input_control_ctx *ctrl_ctx;
+	unsigned int ep_index;
+	struct xhci_ring *ep_ring;
+	unsigned int ep_state;
 
 	cmd_dma = event->cmd_trb;
 	cmd_dequeue_dma = xhci_trb_virt_to_dma(xhci->cmd_ring->deq_seg,
@@ -698,8 +715,39 @@ static void handle_cmd_completion(struct xhci_hcd *xhci,
 			xhci_free_virt_device(xhci, slot_id);
 		break;
 	case TRB_TYPE(TRB_CONFIG_EP):
-		xhci->devs[slot_id]->cmd_status = GET_COMP_CODE(event->status);
-		complete(&xhci->devs[slot_id]->cmd_completion);
+		/*
+		 * Configure endpoint commands can come from the USB core
+		 * configuration or alt setting changes, or because the HW
+		 * needed an extra configure endpoint command after a reset
+		 * endpoint command.  In the latter case, the xHCI driver is
+		 * not waiting on the configure endpoint command.
+		 */
+		ctrl_ctx = xhci_get_input_control_ctx(xhci,
+				xhci->devs[slot_id]->in_ctx);
+		/* Input ctx add_flags are the endpoint index plus one */
+		ep_index = xhci_last_valid_endpoint(ctrl_ctx->add_flags) - 1;
+		ep_ring = xhci->devs[slot_id]->ep_rings[ep_index];
+		if (!ep_ring) {
+			/* This must have been an initial configure endpoint */
+			xhci->devs[slot_id]->cmd_status =
+				GET_COMP_CODE(event->status);
+			complete(&xhci->devs[slot_id]->cmd_completion);
+			break;
+		}
+		ep_state = ep_ring->state;
+		xhci_dbg(xhci, "Completed config ep cmd - last ep index = %d, "
+				"state = %d\n", ep_index, ep_state);
+		if (xhci->quirks & XHCI_RESET_EP_QUIRK &&
+				ep_state & EP_HALTED) {
+			/* Clear our internal halted state and restart ring */
+			xhci->devs[slot_id]->ep_rings[ep_index]->state &=
+				~EP_HALTED;
+			ring_ep_doorbell(xhci, slot_id, ep_index);
+		} else {
+			xhci->devs[slot_id]->cmd_status =
+				GET_COMP_CODE(event->status);
+			complete(&xhci->devs[slot_id]->cmd_completion);
+		}
 		break;
 	case TRB_TYPE(TRB_EVAL_CONTEXT):
 		xhci->devs[slot_id]->cmd_status = GET_COMP_CODE(event->status);
@@ -958,7 +1006,6 @@ static int handle_tx_event(struct xhci_hcd *xhci,
 			xhci_queue_reset_ep(xhci, slot_id, ep_index);
 			xhci_cleanup_stalled_ring(xhci,
 					td->urb->dev,
-					td->urb->ep,
 					ep_index, ep_ring);
 			xhci_ring_cmd_db(xhci);
 			goto td_cleanup;
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index b1abaeb..bc64b50 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -929,6 +929,12 @@ struct xhci_td {
 	union xhci_trb		*last_trb;
 };
 
+struct xhci_dequeue_state {
+	struct xhci_segment *new_deq_seg;
+	union xhci_trb *new_deq_ptr;
+	int new_cycle_state;
+};
+
 struct xhci_ring {
 	struct xhci_segment	*first_seg;
 	union  xhci_trb		*enqueue;
@@ -955,12 +961,6 @@ struct xhci_ring {
 	u32			cycle_state;
 };
 
-struct xhci_dequeue_state {
-	struct xhci_segment *new_deq_seg;
-	union xhci_trb *new_deq_ptr;
-	int new_cycle_state;
-};
-
 struct xhci_erst_entry {
 	/* 64-bit event ring segment address */
 	u64	seg_addr;
@@ -1063,6 +1063,7 @@ struct xhci_hcd {
 	int			error_bitmask;
 	unsigned int		quirks;
 #define	XHCI_LINK_TRB_QUIRK	(1 << 0)
+#define XHCI_RESET_EP_QUIRK	(1 << 1)
 };
 
 /* For testing purposes */
@@ -1170,6 +1171,8 @@ int xhci_alloc_virt_device(struct xhci_hcd *xhci, int slot_id, struct usb_device
 int xhci_setup_addressable_virt_dev(struct xhci_hcd *xhci, struct usb_device *udev);
 unsigned int xhci_get_endpoint_index(struct usb_endpoint_descriptor *desc);
 unsigned int xhci_get_endpoint_flag(struct usb_endpoint_descriptor *desc);
+unsigned int xhci_get_endpoint_flag_from_index(unsigned int ep_index);
+unsigned int xhci_last_valid_endpoint(u32 added_ctxs);
 void xhci_endpoint_zero(struct xhci_hcd *xhci, struct xhci_virt_device *virt_dev, struct usb_host_endpoint *ep);
 void xhci_endpoint_copy(struct xhci_hcd *xhci,
 		struct xhci_virt_device *vdev, unsigned int ep_index);
@@ -1233,8 +1236,11 @@ void xhci_queue_new_dequeue_state(struct xhci_hcd *xhci,
 		struct xhci_ring *ep_ring, unsigned int slot_id,
 		unsigned int ep_index, struct xhci_dequeue_state *deq_state);
 void xhci_cleanup_stalled_ring(struct xhci_hcd *xhci,
-		struct usb_device *udev, struct usb_host_endpoint *ep,
+		struct usb_device *udev,
 		unsigned int ep_index, struct xhci_ring *ep_ring);
+void xhci_queue_config_ep_quirk(struct xhci_hcd *xhci,
+		unsigned int slot_id, unsigned int ep_index,
+		struct xhci_dequeue_state *deq_state);
 
 /* xHCI roothub code */
 int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, u16 wIndex,
-- 
cgit v0.10.2


From 66d1eebce5cca916e0b08d961690bb01c64751ef Mon Sep 17 00:00:00 2001
From: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Date: Thu, 27 Aug 2009 14:35:53 -0700
Subject: USB: xhci: Make TRB completion code comparison readable.

Use trb_comp_code instead of getting the completion code from the transfer
event every time.

Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 35374dd..e876fd3 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -874,6 +874,7 @@ static int handle_tx_event(struct xhci_hcd *xhci,
 	struct urb *urb = 0;
 	int status = -EINPROGRESS;
 	struct xhci_ep_ctx *ep_ctx;
+	u32 trb_comp_code;
 
 	xhci_dbg(xhci, "In %s\n", __func__);
 	slot_id = TRB_TO_SLOT_ID(event->flags);
@@ -931,7 +932,8 @@ static int handle_tx_event(struct xhci_hcd *xhci,
 			(unsigned int) event->flags);
 
 	/* Look for common error cases */
-	switch (GET_COMP_CODE(event->transfer_len)) {
+	trb_comp_code = GET_COMP_CODE(event->transfer_len);
+	switch (trb_comp_code) {
 	/* Skip codes that require special handling depending on
 	 * transfer type
 	 */
@@ -974,7 +976,7 @@ static int handle_tx_event(struct xhci_hcd *xhci,
 	/* Was this a control transfer? */
 	if (usb_endpoint_xfer_control(&td->urb->ep->desc)) {
 		xhci_debug_trb(xhci, xhci->event_ring->dequeue);
-		switch (GET_COMP_CODE(event->transfer_len)) {
+		switch (trb_comp_code) {
 		case COMP_SUCCESS:
 			if (event_trb == ep_ring->dequeue) {
 				xhci_warn(xhci, "WARN: Success on ctrl setup TRB without IOC set??\n");
@@ -1031,7 +1033,7 @@ static int handle_tx_event(struct xhci_hcd *xhci,
 				}
 			} else {
 			/* Maybe the event was for the data stage? */
-				if (GET_COMP_CODE(event->transfer_len) != COMP_STOP_INVAL) {
+				if (trb_comp_code != COMP_STOP_INVAL) {
 					/* We didn't stop on a link TRB in the middle */
 					td->urb->actual_length =
 						td->urb->transfer_buffer_length -
@@ -1043,7 +1045,7 @@ static int handle_tx_event(struct xhci_hcd *xhci,
 			}
 		}
 	} else {
-		switch (GET_COMP_CODE(event->transfer_len)) {
+		switch (trb_comp_code) {
 		case COMP_SUCCESS:
 			/* Double check that the HW transferred everything. */
 			if (event_trb != td->last_trb) {
@@ -1120,14 +1122,14 @@ static int handle_tx_event(struct xhci_hcd *xhci,
 			/* If the ring didn't stop on a Link or No-op TRB, add
 			 * in the actual bytes transferred from the Normal TRB
 			 */
-			if (GET_COMP_CODE(event->transfer_len) != COMP_STOP_INVAL)
+			if (trb_comp_code != COMP_STOP_INVAL)
 				td->urb->actual_length +=
 					TRB_LEN(cur_trb->generic.field[2]) -
 					TRB_LEN(event->transfer_len);
 		}
 	}
-	if (GET_COMP_CODE(event->transfer_len) == COMP_STOP_INVAL ||
-			GET_COMP_CODE(event->transfer_len) == COMP_STOP) {
+	if (trb_comp_code == COMP_STOP_INVAL ||
+			trb_comp_code == COMP_STOP) {
 		/* The Endpoint Stop Command completion will take care of any
 		 * stopped TDs.  A stopped TD may be restarted, so don't update
 		 * the ring dequeue pointer or take this TD off any lists yet.
-- 
cgit v0.10.2


From 83fbcdcca03013bb5af130d6d91eba11e3d3269e Mon Sep 17 00:00:00 2001
From: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Date: Thu, 27 Aug 2009 14:36:03 -0700
Subject: USB: xhci: Handle babbling endpoints correctly.

The 0.95 xHCI spec says that non-control endpoints will be halted if a
babble is detected on a transfer.  The 0.96 xHCI spec says all types of
endpoints will be halted when a babble is detected.  Some hardware that
claims to be 0.95 compliant halts the control endpoint anyway.

When a babble is detected on a control endpoint, check the hardware's
output endpoint context to see if the endpoint is marked as halted.  If
the control endpoint is halted, a reset endpoint command must be issued
and the transfer ring dequeue pointer needs to be moved past the stopped
transfer.  Basically, we treat it as if the control endpoint had stalled.

Handle bulk babbling endpoints as if we got a completion event with a
stall completion code.

Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index e876fd3..2eadf06 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -993,6 +993,16 @@ static int handle_tx_event(struct xhci_hcd *xhci,
 			xhci_warn(xhci, "WARN: short transfer on control ep\n");
 			status = -EREMOTEIO;
 			break;
+		case COMP_BABBLE:
+			/* The 0.96 spec says a babbling control endpoint
+			 * is not halted. The 0.96 spec says it is.  Some HW
+			 * claims to be 0.95 compliant, but it halts the control
+			 * endpoint anyway.  Check if a babble halted the
+			 * endpoint.
+			 */
+			if (ep_ctx->ep_info != EP_STATE_HALTED)
+				break;
+			/* else fall through */
 		case COMP_STALL:
 			/* Did we transfer part of the data (middle) phase? */
 			if (event_trb != ep_ring->dequeue &&
@@ -1137,7 +1147,8 @@ static int handle_tx_event(struct xhci_hcd *xhci,
 		ep_ring->stopped_td = td;
 		ep_ring->stopped_trb = event_trb;
 	} else {
-		if (GET_COMP_CODE(event->transfer_len) == COMP_STALL) {
+		if (trb_comp_code == COMP_STALL ||
+				trb_comp_code == COMP_BABBLE) {
 			/* The transfer is completed from the driver's
 			 * perspective, but we need to issue a set dequeue
 			 * command for this stalled endpoint to move the dequeue
@@ -1168,7 +1179,8 @@ td_cleanup:
 		 * control endpoints).
 		 */
 		if (usb_endpoint_xfer_control(&urb->ep->desc) ||
-			GET_COMP_CODE(event->transfer_len) != COMP_STALL) {
+			(trb_comp_code != COMP_STALL &&
+				trb_comp_code != COMP_BABBLE)) {
 			kfree(td);
 		}
 		urb->hcpriv = NULL;
-- 
cgit v0.10.2


From 9191eee7b8a0e18c07c06d6da502706805cab6d2 Mon Sep 17 00:00:00 2001
From: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Date: Thu, 27 Aug 2009 14:36:14 -0700
Subject: USB: xhci: Don't touch xhci_td after it's freed.

On a successful transfer, urb->td is freed before the URB is ready to be
given back to the driver.  Don't touch urb->td after it's freed.  This bug
would have only shown up when xHCI debugging was turned on, and the freed
memory was quickly reused for something else.

Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 2eadf06..4142c04 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -1193,7 +1193,7 @@ cleanup:
 	if (urb) {
 		usb_hcd_unlink_urb_from_ep(xhci_to_hcd(xhci), urb);
 		xhci_dbg(xhci, "Giveback URB %p, len = %d, status = %d\n",
-				urb, td->urb->actual_length, status);
+				urb, urb->actual_length, status);
 		spin_unlock(&xhci->lock);
 		usb_hcd_giveback_urb(xhci_to_hcd(xhci), urb, status);
 		spin_lock(&xhci->lock);
-- 
cgit v0.10.2


From 99eb32db45061443ab7552b8fdceae68b90fde55 Mon Sep 17 00:00:00 2001
From: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Date: Thu, 27 Aug 2009 14:36:24 -0700
Subject: USB: xhci: Check URB's actual transfer buffer size.

Make sure that the amount of data the xHC says was transmitted is less
than or equal to the size of the requested transfer buffer.  Before, if
the host controller erroneously reported that the number of bytes
untransferred was bigger than the buffer in the URB, urb->actual_length
could be set to a very large size.

Make sure urb->actual_length <= urb->transfer_buffer_length.

Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 4142c04..b4fab00 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -1092,7 +1092,8 @@ static int handle_tx_event(struct xhci_hcd *xhci,
 				td->urb->actual_length =
 					td->urb->transfer_buffer_length -
 					TRB_LEN(event->transfer_len);
-				if (td->urb->actual_length < 0) {
+				if (td->urb->transfer_buffer_length <
+						td->urb->actual_length) {
 					xhci_warn(xhci, "HC gave bad length "
 							"of %d bytes left\n",
 							TRB_LEN(event->transfer_len));
@@ -1167,6 +1168,20 @@ static int handle_tx_event(struct xhci_hcd *xhci,
 td_cleanup:
 		/* Clean up the endpoint's TD list */
 		urb = td->urb;
+		/* Do one last check of the actual transfer length.
+		 * If the host controller said we transferred more data than
+		 * the buffer length, urb->actual_length will be a very big
+		 * number (since it's unsigned).  Play it safe and say we didn't
+		 * transfer anything.
+		 */
+		if (urb->actual_length > urb->transfer_buffer_length) {
+			xhci_warn(xhci, "URB transfer length is wrong, "
+					"xHC issue? req. len = %u, "
+					"act. len = %u\n",
+					urb->transfer_buffer_length,
+					urb->actual_length);
+			urb->actual_length = 0;
+		}
 		list_del(&td->td_list);
 		/* Was this TD slated to be cancelled but completed anyway? */
 		if (!list_empty(&td->cancelled_td_list)) {
-- 
cgit v0.10.2


From 204970a4bb2f584afc430ae330cd44aee329cea4 Mon Sep 17 00:00:00 2001
From: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Date: Fri, 28 Aug 2009 14:28:15 -0700
Subject: USB: xhci: Check URB_SHORT_NOT_OK before setting short packet status.

Make sure that the driver that submitted the URB considers a short packet
an error before setting -EREMOTEIO during a short control transfer.

Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index b4fab00..d264f9a 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -991,7 +991,10 @@ static int handle_tx_event(struct xhci_hcd *xhci,
 			break;
 		case COMP_SHORT_TX:
 			xhci_warn(xhci, "WARN: short transfer on control ep\n");
-			status = -EREMOTEIO;
+			if (td->urb->transfer_flags & URB_SHORT_NOT_OK)
+				status = -EREMOTEIO;
+			else
+				status = 0;
 			break;
 		case COMP_BABBLE:
 			/* The 0.96 spec says a babbling control endpoint
@@ -1034,7 +1037,10 @@ static int handle_tx_event(struct xhci_hcd *xhci,
 			if (event_trb == td->last_trb) {
 				if (td->urb->actual_length != 0) {
 					/* Don't overwrite a previously set error code */
-					if (status == -EINPROGRESS || status == 0)
+					if ((status == -EINPROGRESS ||
+								status == 0) &&
+							(td->urb->transfer_flags
+							 & URB_SHORT_NOT_OK))
 						/* Did we already see a short data stage? */
 						status = -EREMOTEIO;
 				} else {
-- 
cgit v0.10.2


From 2f697f6cbff155b3ce4053a50cdf00b5be4dda11 Mon Sep 17 00:00:00 2001
From: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Date: Fri, 28 Aug 2009 14:28:18 -0700
Subject: USB: xhci: Set -EREMOTEIO when xHC gives bad transfer length.

The xHCI hardware reports the number of bytes untransferred for a given
transfer buffer.  If the hardware reports a bytes untransferred value
greater than the submitted buffer size, we want to play it safe and say no
data was transferred.  If the driver considers a short packet to be an
error, remember to set -EREMOTEIO.

Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index d264f9a..aac379e 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -1104,6 +1104,11 @@ static int handle_tx_event(struct xhci_hcd *xhci,
 							"of %d bytes left\n",
 							TRB_LEN(event->transfer_len));
 					td->urb->actual_length = 0;
+					if (td->urb->transfer_flags &
+							URB_SHORT_NOT_OK)
+						status = -EREMOTEIO;
+					else
+						status = 0;
 				}
 				/* Don't overwrite a previously set error code */
 				if (status == -EINPROGRESS) {
@@ -1187,6 +1192,10 @@ td_cleanup:
 					urb->transfer_buffer_length,
 					urb->actual_length);
 			urb->actual_length = 0;
+			if (td->urb->transfer_flags & URB_SHORT_NOT_OK)
+				status = -EREMOTEIO;
+			else
+				status = 0;
 		}
 		list_del(&td->td_list);
 		/* Was this TD slated to be cancelled but completed anyway? */
-- 
cgit v0.10.2


From 624defa12f304b4d11eda309bc207fa5a1900d0f Mon Sep 17 00:00:00 2001
From: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Date: Wed, 2 Sep 2009 12:14:28 -0700
Subject: USB: xhci: Support interrupt transfers.

Interrupt transfers are submitted to the xHCI hardware using the same TRB
type as bulk transfers.  Re-use the bulk transfer enqueueing code to
enqueue interrupt transfers.

Interrupt transfers are a bit different than bulk transfers.  When the
interrupt endpoint is to be serviced, the xHC will consume (at most) one
TD.  A TD (comprised of sg list entries) can take several service
intervals to transmit.  The important thing for device drivers to note is
that if they use the scatter gather interface to submit interrupt
requests, they will not get data sent from two different scatter gather
lists in the same service interval.

For now, the xHCI driver will use the service interval from the endpoint's
descriptor (bInterval).  Drivers will need a hook to poll at a more
frequent interval.  Set urb->interval to the interval that the xHCI
hardware will use.

Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/host/xhci-hcd.c b/drivers/usb/host/xhci-hcd.c
index 1d4a1e3..e478a63 100644
--- a/drivers/usb/host/xhci-hcd.c
+++ b/drivers/usb/host/xhci-hcd.c
@@ -727,6 +727,11 @@ int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flags)
 		ret = xhci_queue_bulk_tx(xhci, GFP_ATOMIC, urb,
 				slot_id, ep_index);
 		spin_unlock_irqrestore(&xhci->lock, flags);
+	} else if (usb_endpoint_xfer_int(&urb->ep->desc)) {
+		spin_lock_irqsave(&xhci->lock, flags);
+		ret = xhci_queue_intr_tx(xhci, GFP_ATOMIC, urb,
+				slot_id, ep_index);
+		spin_unlock_irqrestore(&xhci->lock, flags);
 	} else {
 		ret = -EINVAL;
 	}
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index aac379e..ff5e6bc 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -1072,7 +1072,12 @@ static int handle_tx_event(struct xhci_hcd *xhci,
 				else
 					status = 0;
 			} else {
-				xhci_dbg(xhci, "Successful bulk transfer!\n");
+				if (usb_endpoint_xfer_bulk(&td->urb->ep->desc))
+					xhci_dbg(xhci, "Successful bulk "
+							"transfer!\n");
+				else
+					xhci_dbg(xhci, "Successful interrupt "
+							"transfer!\n");
 				status = 0;
 			}
 			break;
@@ -1464,6 +1469,47 @@ static void giveback_first_trb(struct xhci_hcd *xhci, int slot_id,
 	ring_ep_doorbell(xhci, slot_id, ep_index);
 }
 
+/*
+ * xHCI uses normal TRBs for both bulk and interrupt.  When the interrupt
+ * endpoint is to be serviced, the xHC will consume (at most) one TD.  A TD
+ * (comprised of sg list entries) can take several service intervals to
+ * transmit.
+ */
+int xhci_queue_intr_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
+		struct urb *urb, int slot_id, unsigned int ep_index)
+{
+	struct xhci_ep_ctx *ep_ctx = xhci_get_ep_ctx(xhci,
+			xhci->devs[slot_id]->out_ctx, ep_index);
+	int xhci_interval;
+	int ep_interval;
+
+	xhci_interval = EP_INTERVAL_TO_UFRAMES(ep_ctx->ep_info);
+	ep_interval = urb->interval;
+	/* Convert to microframes */
+	if (urb->dev->speed == USB_SPEED_LOW ||
+			urb->dev->speed == USB_SPEED_FULL)
+		ep_interval *= 8;
+	/* FIXME change this to a warning and a suggestion to use the new API
+	 * to set the polling interval (once the API is added).
+	 */
+	if (xhci_interval != ep_interval) {
+		if (!printk_ratelimit())
+			dev_dbg(&urb->dev->dev, "Driver uses different interval"
+					" (%d microframe%s) than xHCI "
+					"(%d microframe%s)\n",
+					ep_interval,
+					ep_interval == 1 ? "" : "s",
+					xhci_interval,
+					xhci_interval == 1 ? "" : "s");
+		urb->interval = xhci_interval;
+		/* Convert back to frames for LS/FS devices */
+		if (urb->dev->speed == USB_SPEED_LOW ||
+				urb->dev->speed == USB_SPEED_FULL)
+			urb->interval /= 8;
+	}
+	return xhci_queue_bulk_tx(xhci, GFP_ATOMIC, urb, slot_id, ep_index);
+}
+
 static int queue_bulk_sg_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
 		struct urb *urb, int slot_id, unsigned int ep_index)
 {
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index bc64b50..a7728aa 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -581,6 +581,7 @@ struct xhci_ep_ctx {
 /* bit 15 is Linear Stream Array */
 /* Interval - period between requests to an endpoint - 125u increments. */
 #define EP_INTERVAL(p)		((p & 0xff) << 16)
+#define EP_INTERVAL_TO_UFRAMES(p)		(1 << (((p) >> 16) & 0xff))
 
 /* ep_info2 bitmasks */
 /*
@@ -1223,6 +1224,8 @@ int xhci_queue_ctrl_tx(struct xhci_hcd *xhci, gfp_t mem_flags, struct urb *urb,
 		int slot_id, unsigned int ep_index);
 int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags, struct urb *urb,
 		int slot_id, unsigned int ep_index);
+int xhci_queue_intr_tx(struct xhci_hcd *xhci, gfp_t mem_flags, struct urb *urb,
+		int slot_id, unsigned int ep_index);
 int xhci_queue_configure_endpoint(struct xhci_hcd *xhci, dma_addr_t in_ctx_ptr,
 		u32 slot_id);
 int xhci_queue_evaluate_context(struct xhci_hcd *xhci, dma_addr_t in_ctx_ptr,
-- 
cgit v0.10.2


From 6682bb39e111b34290e25c4d275c5bcf8bbccbe1 Mon Sep 17 00:00:00 2001
From: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Date: Tue, 8 Sep 2009 13:20:16 -0700
Subject: USB: Fix SS endpoint companion descriptor parsing.

When there's a descriptor after the SuperSpeed endpoint companion
descriptor, the previous code would have skipped over twice the length it
was supposed to.  This code fixes crashes seen with UASP devices (which
have a UASP descriptor after the SS endpoint companion descriptor).

Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c
index a16c538..0d3af6a 100644
--- a/drivers/usb/core/config.c
+++ b/drivers/usb/core/config.c
@@ -105,7 +105,7 @@ static int usb_parse_ss_endpoint_companion(struct device *ddev, int cfgno,
 	ep->ss_ep_comp->extralen = i;
 	buffer += i;
 	size -= i;
-	retval = buffer - buffer_start + i;
+	retval = buffer - buffer_start;
 	if (num_skipped > 0)
 		dev_dbg(ddev, "skipped %d descriptor%s after %s\n",
 				num_skipped, plural(num_skipped),
-- 
cgit v0.10.2


From f8086a07c4740ae37e5221508b9cabc8fef4bf6e Mon Sep 17 00:00:00 2001
From: "Figo.zhang" <figo1802@gmail.com>
Date: Sat, 6 Jun 2009 20:31:49 +0800
Subject: USB: ehci-dbg.c: no need for checking it before call vfree

vfree() does it's own NULL checking,so no need for check before
calling it.

Signed-off-by: Figo.zhang <figo1802@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/host/ehci-dbg.c b/drivers/usb/host/ehci-dbg.c
index 7f4ace7..1104caa 100644
--- a/drivers/usb/host/ehci-dbg.c
+++ b/drivers/usb/host/ehci-dbg.c
@@ -879,8 +879,7 @@ static int debug_close(struct inode *inode, struct file *file)
 	struct debug_buffer *buf = file->private_data;
 
 	if (buf) {
-		if (buf->output_buf)
-			vfree(buf->output_buf);
+		vfree(buf->output_buf);
 		kfree(buf);
 	}
 
-- 
cgit v0.10.2


From d2fb1bb32b9b379a029aeca969b45d28bd91ae89 Mon Sep 17 00:00:00 2001
From: Pete Zaitcev <zaitcev@redhat.com>
Date: Wed, 10 Jun 2009 14:15:52 -0600
Subject: USB: sisusbvga: drop usb_buffer_alloc

This patch falls out of my work to fix usbmon so it uses virtual addresses.
It is not necessary, the "new" usbmon should work just fine with sisusbvga.
However, it seems ridiculous that anyone would use uncached memory to
transfer bulk data. Dropping the unnecessary use of usb_buffer_alloc
should be beneficial here, in case anyone ever uses the dongle on
anything beyond x86.

I had no success in raising the author of the driver by e-mail, so
the patch is not actually tested.

Signed-off-by: Pete Zaitcev <zaitcev@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/misc/sisusbvga/sisusb.c b/drivers/usb/misc/sisusbvga/sisusb.c
index b4ec716..0025847 100644
--- a/drivers/usb/misc/sisusbvga/sisusb.c
+++ b/drivers/usb/misc/sisusbvga/sisusb.c
@@ -79,14 +79,12 @@ sisusb_free_buffers(struct sisusb_usb_data *sisusb)
 
 	for (i = 0; i < NUMOBUFS; i++) {
 		if (sisusb->obuf[i]) {
-			usb_buffer_free(sisusb->sisusb_dev, sisusb->obufsize,
-				sisusb->obuf[i], sisusb->transfer_dma_out[i]);
+			kfree(sisusb->obuf[i]);
 			sisusb->obuf[i] = NULL;
 		}
 	}
 	if (sisusb->ibuf) {
-		usb_buffer_free(sisusb->sisusb_dev, sisusb->ibufsize,
-			sisusb->ibuf, sisusb->transfer_dma_in);
+		kfree(sisusb->ibuf);
 		sisusb->ibuf = NULL;
 	}
 }
@@ -230,8 +228,7 @@ sisusb_bulk_completeout(struct urb *urb)
 
 static int
 sisusb_bulkout_msg(struct sisusb_usb_data *sisusb, int index, unsigned int pipe, void *data,
-		int len, int *actual_length, int timeout, unsigned int tflags,
-		dma_addr_t transfer_dma)
+		int len, int *actual_length, int timeout, unsigned int tflags)
 {
 	struct urb *urb = sisusb->sisurbout[index];
 	int retval, byteswritten = 0;
@@ -245,9 +242,6 @@ sisusb_bulkout_msg(struct sisusb_usb_data *sisusb, int index, unsigned int pipe,
 	urb->transfer_flags |= tflags;
 	urb->actual_length = 0;
 
-	if ((urb->transfer_dma = transfer_dma))
-		urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
-
 	/* Set up context */
 	sisusb->urbout_context[index].actual_length = (timeout) ?
 						NULL : actual_length;
@@ -297,8 +291,8 @@ sisusb_bulk_completein(struct urb *urb)
 }
 
 static int
-sisusb_bulkin_msg(struct sisusb_usb_data *sisusb, unsigned int pipe, void *data, int len,
-		int *actual_length, int timeout, unsigned int tflags, dma_addr_t transfer_dma)
+sisusb_bulkin_msg(struct sisusb_usb_data *sisusb, unsigned int pipe, void *data,
+	int len, int *actual_length, int timeout, unsigned int tflags)
 {
 	struct urb *urb = sisusb->sisurbin;
 	int retval, readbytes = 0;
@@ -311,9 +305,6 @@ sisusb_bulkin_msg(struct sisusb_usb_data *sisusb, unsigned int pipe, void *data,
 	urb->transfer_flags |= tflags;
 	urb->actual_length = 0;
 
-	if ((urb->transfer_dma = transfer_dma))
-		urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
-
 	sisusb->completein = 0;
 	retval = usb_submit_urb(urb, GFP_ATOMIC);
 	if (retval == 0) {
@@ -422,8 +413,7 @@ static int sisusb_send_bulk_msg(struct sisusb_usb_data *sisusb, int ep, int len,
 						thispass,
 						&transferred_len,
 						async ? 0 : 5 * HZ,
-						tflags,
-						sisusb->transfer_dma_out[index]);
+						tflags);
 
 			if (result == -ETIMEDOUT) {
 
@@ -432,29 +422,16 @@ static int sisusb_send_bulk_msg(struct sisusb_usb_data *sisusb, int ep, int len,
 					return -ETIME;
 
 				continue;
+			}
 
-			} else if ((result == 0) && !async && transferred_len) {
+			if ((result == 0) && !async && transferred_len) {
 
 				thispass -= transferred_len;
-				if (thispass) {
-					if (sisusb->transfer_dma_out) {
-						/* If DMA, copy remaining
-						 * to beginning of buffer
-						 */
-						memcpy(buffer,
-						       buffer + transferred_len,
-						       thispass);
-					} else {
-						/* If not DMA, simply increase
-						 * the pointer
-						 */
-						buffer += transferred_len;
-					}
-				}
+				buffer += transferred_len;
 
 			} else
 				break;
-		};
+		}
 
 		if (result)
 			return result;
@@ -530,8 +507,7 @@ static int sisusb_recv_bulk_msg(struct sisusb_usb_data *sisusb, int ep, int len,
 					   thispass,
 					   &transferred_len,
 					   5 * HZ,
-					   tflags,
-					   sisusb->transfer_dma_in);
+					   tflags);
 
 		if (transferred_len)
 			thispass = transferred_len;
@@ -3132,8 +3108,7 @@ static int sisusb_probe(struct usb_interface *intf,
 
 	/* Allocate buffers */
 	sisusb->ibufsize = SISUSB_IBUF_SIZE;
-	if (!(sisusb->ibuf = usb_buffer_alloc(dev, SISUSB_IBUF_SIZE,
-					GFP_KERNEL, &sisusb->transfer_dma_in))) {
+	if (!(sisusb->ibuf = kmalloc(SISUSB_IBUF_SIZE, GFP_KERNEL))) {
 		dev_err(&sisusb->sisusb_dev->dev, "Failed to allocate memory for input buffer");
 		retval = -ENOMEM;
 		goto error_2;
@@ -3142,9 +3117,7 @@ static int sisusb_probe(struct usb_interface *intf,
 	sisusb->numobufs = 0;
 	sisusb->obufsize = SISUSB_OBUF_SIZE;
 	for (i = 0; i < NUMOBUFS; i++) {
-		if (!(sisusb->obuf[i] = usb_buffer_alloc(dev, SISUSB_OBUF_SIZE,
-					GFP_KERNEL,
-					&sisusb->transfer_dma_out[i]))) {
+		if (!(sisusb->obuf[i] = kmalloc(SISUSB_OBUF_SIZE, GFP_KERNEL))) {
 			if (i == 0) {
 				dev_err(&sisusb->sisusb_dev->dev, "Failed to allocate memory for output buffer\n");
 				retval = -ENOMEM;
diff --git a/drivers/usb/misc/sisusbvga/sisusb.h b/drivers/usb/misc/sisusbvga/sisusb.h
index cf0b4a5..55492a5 100644
--- a/drivers/usb/misc/sisusbvga/sisusb.h
+++ b/drivers/usb/misc/sisusbvga/sisusb.h
@@ -123,8 +123,6 @@ struct sisusb_usb_data {
 	int numobufs;		/* number of obufs = number of out urbs */
 	char *obuf[NUMOBUFS], *ibuf;	/* transfer buffers */
 	int obufsize, ibufsize;
-	dma_addr_t transfer_dma_out[NUMOBUFS];
-	dma_addr_t transfer_dma_in;
 	struct urb *sisurbout[NUMOBUFS];
 	struct urb *sisurbin;
 	unsigned char urbstatus[NUMOBUFS];
-- 
cgit v0.10.2


From b07f4d016ed179bc4fb8046657a84d5dc0e94b42 Mon Sep 17 00:00:00 2001
From: Pete Zaitcev <zaitcev@redhat.com>
Date: Wed, 10 Jun 2009 15:21:24 -0600
Subject: USB: usbmon: drop Kconfig defaults

These statements seem to be unnecessary. No idea why, but I built all
possible configurations and everything gets built just as before.

It's an old patch that popped from discussion with Paul in November 2008.
Obviously not a very high priority but better late than never.

Signed-off-by: Pete Zaitcev <zaitcev@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/mon/Kconfig b/drivers/usb/mon/Kconfig
index f28f350..635745f 100644
--- a/drivers/usb/mon/Kconfig
+++ b/drivers/usb/mon/Kconfig
@@ -5,11 +5,9 @@
 config USB_MON
 	tristate "USB Monitor"
 	depends on USB
-	default y if USB=y
-	default m if USB=m
 	help
 	  If you select this option, a component which captures the USB traffic
 	  between peripheral-specific drivers and HC drivers will be built.
 	  For more information, see <file:Documentation/usb/usbmon.txt>.
 
-	  If unsure, say Y (if allowed), otherwise M.
+	  If unsure, say Y, if allowed, otherwise M.
-- 
cgit v0.10.2


From f4e2332cfcf900e0a926c4e0fc35f751bcbcaa1b Mon Sep 17 00:00:00 2001
From: Pete Zaitcev <zaitcev@redhat.com>
Date: Wed, 10 Jun 2009 15:23:52 -0600
Subject: USB: usbmon: touch up the documentation

I think this sentence was confusing regarding the possible size
of the data area.

Signed-off-by: Pete Zaitcev <zaitcev@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/Documentation/usb/usbmon.txt b/Documentation/usb/usbmon.txt
index 6c3c625..4efd847 100644
--- a/Documentation/usb/usbmon.txt
+++ b/Documentation/usb/usbmon.txt
@@ -305,7 +305,7 @@ Before the call, hdr, data, and alloc should be filled. Upon return, the area
 pointed by hdr contains the next event structure, and the data buffer contains
 the data, if any. The event is removed from the kernel buffer.
 
-The MON_IOCX_GET copies 48 bytes, MON_IOCX_GETX copies 64 bytes.
+The MON_IOCX_GET copies 48 bytes to hdr area, MON_IOCX_GETX copies 64 bytes.
 
  MON_IOCX_MFETCH, defined as _IOWR(MON_IOC_MAGIC, 7, struct mon_mfetch_arg)
 
-- 
cgit v0.10.2


From 4e9e92003529e5c7bb11281f7c2c9b3fe8858403 Mon Sep 17 00:00:00 2001
From: Pete Zaitcev <zaitcev@redhat.com>
Date: Thu, 11 Jun 2009 08:53:20 -0600
Subject: USB: usbmon: end ugly tricks with DMA peeking

This patch fixes crashes when usbmon attempts to access GART aperture.
The old code attempted to take a bus address and convert it into a
virtual address, which clearly was impossible on systems with actual
IOMMUs. Let us not persist in this foolishness, and use transfer_buffer
in all cases instead.

I think downsides are negligible. The ones I see are:
 - A driver may pass an address of one buffer down as transfer_buffer,
   and entirely different entity mapped for DMA, resulting in misleading
   output of usbmon. Note, however, that PIO based controllers would
   do transfer the same data that usbmon sees here.
 - Out of tree drivers may crash usbmon if they store garbage in
   transfer_buffer. I inspected the in-tree drivers, and clarified
   the documentation in comments.
 - Drivers that use get_user_pages will not be possible to monitor.
   I only found one driver with this problem (drivers/staging/rspiusb).
 - Same happens with with usb_storage transferring from highmem, but
   it works fine on 64-bit systems, so I think it's not a concern.
   At least we don't crash anymore.

Why didn't we do this in 2.6.10? That's because back in those days
it was popular not to fill in transfer_buffer, so almost all
traffic would be invisible (e.g. all of HID was like that).
But now, the tree is almost 100% PIO friendly, so we can do the
right thing at last.

Signed-off-by: Pete Zaitcev <zaitcev@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/mon/Makefile b/drivers/usb/mon/Makefile
index c6516b5..384b198 100644
--- a/drivers/usb/mon/Makefile
+++ b/drivers/usb/mon/Makefile
@@ -2,6 +2,6 @@
 # Makefile for USB monitor
 #
 
-usbmon-objs	:= mon_main.o mon_stat.o mon_text.o mon_bin.o mon_dma.o
+usbmon-objs	:= mon_main.o mon_stat.o mon_text.o mon_bin.o
 
 obj-$(CONFIG_USB_MON)	+= usbmon.o
diff --git a/drivers/usb/mon/mon_bin.c b/drivers/usb/mon/mon_bin.c
index 0f7a30b..dfdc43e 100644
--- a/drivers/usb/mon/mon_bin.c
+++ b/drivers/usb/mon/mon_bin.c
@@ -220,9 +220,8 @@ static void mon_free_buff(struct mon_pgmap *map, int npages);
 
 /*
  * This is a "chunked memcpy". It does not manipulate any counters.
- * But it returns the new offset for repeated application.
  */
-unsigned int mon_copy_to_buff(const struct mon_reader_bin *this,
+static void mon_copy_to_buff(const struct mon_reader_bin *this,
     unsigned int off, const unsigned char *from, unsigned int length)
 {
 	unsigned int step_len;
@@ -247,7 +246,6 @@ unsigned int mon_copy_to_buff(const struct mon_reader_bin *this,
 		from += step_len;
 		length -= step_len;
 	}
-	return off;
 }
 
 /*
@@ -400,15 +398,8 @@ static char mon_bin_get_data(const struct mon_reader_bin *rp,
     unsigned int offset, struct urb *urb, unsigned int length)
 {
 
-	if (urb->dev->bus->uses_dma &&
-	    (urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP)) {
-		mon_dmapeek_vec(rp, offset, urb->transfer_dma, length);
-		return 0;
-	}
-
 	if (urb->transfer_buffer == NULL)
 		return 'Z';
-
 	mon_copy_to_buff(rp, offset, urb->transfer_buffer, length);
 	return 0;
 }
@@ -635,7 +626,6 @@ static int mon_bin_open(struct inode *inode, struct file *file)
 	spin_lock_init(&rp->b_lock);
 	init_waitqueue_head(&rp->b_wait);
 	mutex_init(&rp->fetch_lock);
-
 	rp->b_size = BUFF_DFL;
 
 	size = sizeof(struct mon_pgmap) * (rp->b_size/CHUNK_SIZE);
diff --git a/drivers/usb/mon/mon_dma.c b/drivers/usb/mon/mon_dma.c
deleted file mode 100644
index 140cc80..0000000
--- a/drivers/usb/mon/mon_dma.c
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * The USB Monitor, inspired by Dave Harding's USBMon.
- *
- * mon_dma.c: Library which snoops on DMA areas.
- *
- * Copyright (C) 2005 Pete Zaitcev (zaitcev@redhat.com)
- */
-#include <linux/kernel.h>
-#include <linux/list.h>
-#include <linux/highmem.h>
-#include <asm/page.h>
-
-#include <linux/usb.h>	/* Only needed for declarations in usb_mon.h */
-#include "usb_mon.h"
-
-/*
- * PC-compatibles, are, fortunately, sufficiently cache-coherent for this.
- */
-#if defined(__i386__) || defined(__x86_64__) /* CONFIG_ARCH_I386 doesn't exit */
-#define MON_HAS_UNMAP 1
-
-#define phys_to_page(phys)	pfn_to_page((phys) >> PAGE_SHIFT)
-
-char mon_dmapeek(unsigned char *dst, dma_addr_t dma_addr, int len)
-{
-	struct page *pg;
-	unsigned long flags;
-	unsigned char *map;
-	unsigned char *ptr;
-
-	/*
-	 * On i386, a DMA handle is the "physical" address of a page.
-	 * In other words, the bus address is equal to physical address.
-	 * There is no IOMMU.
-	 */
-	pg = phys_to_page(dma_addr);
-
-	/*
-	 * We are called from hardware IRQs in case of callbacks.
-	 * But we can be called from softirq or process context in case
-	 * of submissions. In such case, we need to protect KM_IRQ0.
-	 */
-	local_irq_save(flags);
-	map = kmap_atomic(pg, KM_IRQ0);
-	ptr = map + (dma_addr & (PAGE_SIZE-1));
-	memcpy(dst, ptr, len);
-	kunmap_atomic(map, KM_IRQ0);
-	local_irq_restore(flags);
-	return 0;
-}
-
-void mon_dmapeek_vec(const struct mon_reader_bin *rp,
-    unsigned int offset, dma_addr_t dma_addr, unsigned int length)
-{
-	unsigned long flags;
-	unsigned int step_len;
-	struct page *pg;
-	unsigned char *map;
-	unsigned long page_off, page_len;
-
-	local_irq_save(flags);
-	while (length) {
-		/* compute number of bytes we are going to copy in this page */
-		step_len = length;
-		page_off = dma_addr & (PAGE_SIZE-1);
-		page_len = PAGE_SIZE - page_off;
-		if (page_len < step_len)
-			step_len = page_len;
-
-		/* copy data and advance pointers */
-		pg = phys_to_page(dma_addr);
-		map = kmap_atomic(pg, KM_IRQ0);
-		offset = mon_copy_to_buff(rp, offset, map + page_off, step_len);
-		kunmap_atomic(map, KM_IRQ0);
-		dma_addr += step_len;
-		length -= step_len;
-	}
-	local_irq_restore(flags);
-}
-
-#endif /* __i386__ */
-
-#ifndef MON_HAS_UNMAP
-char mon_dmapeek(unsigned char *dst, dma_addr_t dma_addr, int len)
-{
-	return 'D';
-}
-
-void mon_dmapeek_vec(const struct mon_reader_bin *rp,
-    unsigned int offset, dma_addr_t dma_addr, unsigned int length)
-{
-	;
-}
-
-#endif /* MON_HAS_UNMAP */
diff --git a/drivers/usb/mon/mon_main.c b/drivers/usb/mon/mon_main.c
index 5e0ab42..e0c2db3 100644
--- a/drivers/usb/mon/mon_main.c
+++ b/drivers/usb/mon/mon_main.c
@@ -361,7 +361,6 @@ static int __init mon_init(void)
 	}
 	// MOD_INC_USE_COUNT(which_module?);
 
-
 	mutex_lock(&usb_bus_list_lock);
 	list_for_each_entry (ubus, &usb_bus_list, bus_list) {
 		mon_bus_init(ubus);
diff --git a/drivers/usb/mon/mon_text.c b/drivers/usb/mon/mon_text.c
index a7eb4c9..9f1a922 100644
--- a/drivers/usb/mon/mon_text.c
+++ b/drivers/usb/mon/mon_text.c
@@ -150,20 +150,6 @@ static inline char mon_text_get_data(struct mon_event_text *ep, struct urb *urb,
 			return '>';
 	}
 
-	/*
-	 * The check to see if it's safe to poke at data has an enormous
-	 * number of corner cases, but it seems that the following is
-	 * more or less safe.
-	 *
-	 * We do not even try to look at transfer_buffer, because it can
-	 * contain non-NULL garbage in case the upper level promised to
-	 * set DMA for the HCD.
-	 */
-	if (urb->dev->bus->uses_dma &&
-	    (urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP)) {
-		return mon_dmapeek(ep->data, urb->transfer_dma, len);
-	}
-
 	if (urb->transfer_buffer == NULL)
 		return 'Z';	/* '0' would be not as pretty. */
 
diff --git a/drivers/usb/mon/usb_mon.h b/drivers/usb/mon/usb_mon.h
index f5d84ff..df9a4df 100644
--- a/drivers/usb/mon/usb_mon.h
+++ b/drivers/usb/mon/usb_mon.h
@@ -65,20 +65,6 @@ int __init mon_bin_init(void);
 void mon_bin_exit(void);
 
 /*
- * DMA interface.
- *
- * XXX The vectored side needs a serious re-thinking. Abstracting vectors,
- * like in Paolo's original patch, produces a double pkmap. We need an idea.
-*/
-extern char mon_dmapeek(unsigned char *dst, dma_addr_t dma_addr, int len);
-
-struct mon_reader_bin;
-extern void mon_dmapeek_vec(const struct mon_reader_bin *rp,
-    unsigned int offset, dma_addr_t dma_addr, unsigned int len);
-extern unsigned int mon_copy_to_buff(const struct mon_reader_bin *rp,
-    unsigned int offset, const unsigned char *from, unsigned int len);
-
-/*
  */
 extern struct mutex mon_lock;
 
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 19fabc4..3b45a0d 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1036,9 +1036,10 @@ typedef void (*usb_complete_t)(struct urb *);
  * @transfer_flags: A variety of flags may be used to affect how URB
  *	submission, unlinking, or operation are handled.  Different
  *	kinds of URB can use different flags.
- * @transfer_buffer:  This identifies the buffer to (or from) which
- * 	the I/O request will be performed (unless URB_NO_TRANSFER_DMA_MAP
- *	is set).  This buffer must be suitable for DMA; allocate it with
+ * @transfer_buffer:  This identifies the buffer to (or from) which the I/O
+ *	request will be performed unless URB_NO_TRANSFER_DMA_MAP is set
+ *	(however, do not leave garbage in transfer_buffer even then).
+ *	This buffer must be suitable for DMA; allocate it with
  *	kmalloc() or equivalent.  For transfers to "in" endpoints, contents
  *	of this buffer will be modified.  This buffer is used for the data
  *	stage of control transfers.
@@ -1104,9 +1105,15 @@ typedef void (*usb_complete_t)(struct urb *);
  * allocate a DMA buffer with usb_buffer_alloc() or call usb_buffer_map().
  * When these transfer flags are provided, host controller drivers will
  * attempt to use the dma addresses found in the transfer_dma and/or
- * setup_dma fields rather than determining a dma address themselves.  (Note
- * that transfer_buffer and setup_packet must still be set because not all
- * host controllers use DMA, nor do virtual root hubs).
+ * setup_dma fields rather than determining a dma address themselves.
+ *
+ * Note that transfer_buffer must still be set if the controller
+ * does not support DMA (as indicated by bus.uses_dma) and when talking
+ * to root hub. If you have to trasfer between highmem zone and the device
+ * on such controller, create a bounce buffer or bail out with an error.
+ * If transfer_buffer cannot be set (is in highmem) and the controller is DMA
+ * capable, assign NULL to it, so that usbmon knows not to use the value.
+ * The setup_packet must always be set, so it cannot be located in highmem.
  *
  * Initialization:
  *
-- 
cgit v0.10.2


From 81bf46f3034046c572714bdee1dc51beb3475082 Mon Sep 17 00:00:00 2001
From: Pete Zaitcev <zaitcev@redhat.com>
Date: Thu, 11 Jun 2009 08:40:39 -0600
Subject: USB: Let usb_sg_init to set transfer_buffer more often

This fix permits the "new" usbmon to access usb-storage's data buffer
without DMA remapping tricks. It should be compatible with PIO controllers
and not add any new crashes. Note that from now on PIO controllers and
usbmon are uniform in their access pattern and if one crashes then
the other will too. Hopefuly neither does.

As a side effect, we get rid for #ifdefs, which were a little ugly.

Signed-off-by: Pete Zaitcev <zaitcev@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c
index 9720e69..da718e8 100644
--- a/drivers/usb/core/message.c
+++ b/drivers/usb/core/message.c
@@ -459,35 +459,23 @@ int usb_sg_init(struct usb_sg_request *io, struct usb_device *dev,
 			io->urbs[i]->context = io;
 
 			/*
-			 * Some systems need to revert to PIO when DMA is
-			 * temporarily unavailable.  For their sakes, both
-			 * transfer_buffer and transfer_dma are set when
-			 * possible.  However this can only work on systems
-			 * without:
+			 * Some systems need to revert to PIO when DMA is temporarily
+			 * unavailable.  For their sakes, both transfer_buffer and
+			 * transfer_dma are set when possible.
 			 *
-			 *  - HIGHMEM, since DMA buffers located in high memory
-			 *    are not directly addressable by the CPU for PIO;
-			 *
-			 *  - IOMMU, since dma_map_sg() is allowed to use an
-			 *    IOMMU to make virtually discontiguous buffers be
-			 *    "dma-contiguous" so that PIO and DMA need diferent
-			 *    numbers of URBs.
-			 *
-			 * So when HIGHMEM or IOMMU are in use, transfer_buffer
-			 * is NULL to prevent stale pointers and to help spot
-			 * bugs.
+			 * Note that if IOMMU coalescing occurred, we cannot
+			 * trust sg_page anymore, so check if S/G list shrunk.
 			 */
+			if (io->nents == io->entries && !PageHighMem(sg_page(sg)))
+				io->urbs[i]->transfer_buffer = sg_virt(sg);
+			else
+				io->urbs[i]->transfer_buffer = NULL;
+
 			if (dma) {
 				io->urbs[i]->transfer_dma = sg_dma_address(sg);
 				len = sg_dma_len(sg);
-#if defined(CONFIG_HIGHMEM) || defined(CONFIG_GART_IOMMU)
-				io->urbs[i]->transfer_buffer = NULL;
-#else
-				io->urbs[i]->transfer_buffer = sg_virt(sg);
-#endif
 			} else {
 				/* hc may use _only_ transfer_buffer */
-				io->urbs[i]->transfer_buffer = sg_virt(sg);
 				len = sg->length;
 			}
 
-- 
cgit v0.10.2


From 85e08ca54c5c203cd2638f0fc8fa899a539f6254 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@skynet.be>
Date: Sun, 21 Jun 2009 23:19:23 +0200
Subject: USB: Move endpoint sync type definitions from usb/audio.h to
 usb/ch9.h

And use the new definitions in the USB Audio Class gadget driver.

Signed-off-by: Laurent Pinchart <laurent.pinchart@skynet.be>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/gadget/f_audio.c b/drivers/usb/gadget/f_audio.c
index 66527ba..76afbd1 100644
--- a/drivers/usb/gadget/f_audio.c
+++ b/drivers/usb/gadget/f_audio.c
@@ -174,7 +174,7 @@ static struct usb_endpoint_descriptor as_out_ep_desc __initdata = {
 	.bLength =		USB_DT_ENDPOINT_AUDIO_SIZE,
 	.bDescriptorType =	USB_DT_ENDPOINT,
 	.bEndpointAddress =	USB_DIR_OUT,
-	.bmAttributes =		USB_AS_ENDPOINT_ADAPTIVE
+	.bmAttributes =		USB_ENDPOINT_SYNC_ADAPTIVE
 				| USB_ENDPOINT_XFER_ISOC,
 	.wMaxPacketSize =	__constant_cpu_to_le16(OUT_EP_MAX_PACKET_SIZE),
 	.bInterval =		4,
diff --git a/include/linux/usb/audio.h b/include/linux/usb/audio.h
index b5744bc..bcf4fb4 100644
--- a/include/linux/usb/audio.h
+++ b/include/linux/usb/audio.h
@@ -46,12 +46,6 @@
 #define MIDI_IN_JACK			0x02
 #define MIDI_OUT_JACK			0x03
 
-/* endpoint attributes */
-#define EP_ATTR_MASK			0x0c
-#define EP_ATTR_ASYNC			0x04
-#define EP_ATTR_ADAPTIVE		0x08
-#define EP_ATTR_SYNC			0x0c
-
 /* cs endpoint attributes */
 #define EP_CS_ATTR_SAMPLE_RATE		0x01
 #define EP_CS_ATTR_PITCH_CONTROL	0x02
@@ -244,10 +238,6 @@ struct usb_as_formate_type_i_discrete_descriptor_##n {		\
 #define USB_AS_FORMAT_TYPE_II		0x2
 #define USB_AS_FORMAT_TYPE_III		0x3
 
-#define USB_AS_ENDPOINT_ASYNC		(1 << 2)
-#define USB_AS_ENDPOINT_ADAPTIVE	(2 << 2)
-#define USB_AS_ENDPOINT_SYNC		(3 << 2)
-
 struct usb_as_iso_endpoint_descriptor {
 	__u8  bLength;			/* in bytes: 7 */
 	__u8  bDescriptorType;		/* USB_DT_CS_ENDPOINT */
diff --git a/include/linux/usb/ch9.h b/include/linux/usb/ch9.h
index 9322363..8f8b741 100644
--- a/include/linux/usb/ch9.h
+++ b/include/linux/usb/ch9.h
@@ -348,6 +348,12 @@ struct usb_endpoint_descriptor {
 #define USB_ENDPOINT_NUMBER_MASK	0x0f	/* in bEndpointAddress */
 #define USB_ENDPOINT_DIR_MASK		0x80
 
+#define USB_ENDPOINT_SYNCTYPE		0x0c
+#define USB_ENDPOINT_SYNC_NONE		(0 << 2)
+#define USB_ENDPOINT_SYNC_ASYNC		(1 << 2)
+#define USB_ENDPOINT_SYNC_ADAPTIVE	(2 << 2)
+#define USB_ENDPOINT_SYNC_SYNC		(3 << 2)
+
 #define USB_ENDPOINT_XFERTYPE_MASK	0x03	/* in bmAttributes */
 #define USB_ENDPOINT_XFER_CONTROL	0
 #define USB_ENDPOINT_XFER_ISOC		1
-- 
cgit v0.10.2


From 315ad3028c8aae14891797040f855fc3291a076b Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@skynet.be>
Date: Sun, 21 Jun 2009 23:20:39 +0200
Subject: USB: Move vendor subclass definition from usb/audio.h to usb/ch9.h

USB_SUBCLASS_VENDOR_SPEC is common to several USB classes and as such belongs
to usb/ch9.h.

Signed-off-by: Laurent Pinchart <laurent.pinchart@skynet.be>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/include/linux/usb/audio.h b/include/linux/usb/audio.h
index bcf4fb4..f75092a 100644
--- a/include/linux/usb/audio.h
+++ b/include/linux/usb/audio.h
@@ -24,7 +24,6 @@
 #define USB_SUBCLASS_AUDIOCONTROL	0x01
 #define USB_SUBCLASS_AUDIOSTREAMING	0x02
 #define USB_SUBCLASS_MIDISTREAMING	0x03
-#define USB_SUBCLASS_VENDOR_SPEC	0xff
 
 /* A.5 Audio Class-Specific AC interface Descriptor Subtypes*/
 #define HEADER				0x01
diff --git a/include/linux/usb/ch9.h b/include/linux/usb/ch9.h
index 8f8b741..94012e6 100644
--- a/include/linux/usb/ch9.h
+++ b/include/linux/usb/ch9.h
@@ -258,6 +258,8 @@ struct usb_device_descriptor {
 #define USB_CLASS_APP_SPEC		0xfe
 #define USB_CLASS_VENDOR_SPEC		0xff
 
+#define USB_SUBCLASS_VENDOR_SPEC	0xff
+
 /*-------------------------------------------------------------------------*/
 
 /* USB_DT_CONFIG: Configuration descriptor information.
-- 
cgit v0.10.2


From 512ad27d8667158747de2e8da8a23e8f50e91856 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@skynet.be>
Date: Sun, 21 Jun 2009 23:23:05 +0200
Subject: USB audio gadget: Prefix all macro definitions with UAC_ in
 linux/usb/audio.h

linux/usb/audio.h is a public header file that includes definitions
exported to userspace. To avoid namespace clashes, prefix all macro
definitions with UAC_. Existing macros and structures prefixed with
USB_AC_ and USB_AS_ are renamed for consistency.

Signed-off-by: Laurent Pinchart <laurent.pinchart@skynet.be>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/gadget/audio.c b/drivers/usb/gadget/audio.c
index 9f80f4e..a3a0f4a 100644
--- a/drivers/usb/gadget/audio.c
+++ b/drivers/usb/gadget/audio.c
@@ -106,20 +106,20 @@ static int audio_set_endpoint_req(struct usb_configuration *c,
 			ctrl->bRequest, w_value, len, ep);
 
 	switch (ctrl->bRequest) {
-	case SET_CUR:
+	case UAC_SET_CUR:
 		value = 0;
 		break;
 
-	case SET_MIN:
+	case UAC_SET_MIN:
 		break;
 
-	case SET_MAX:
+	case UAC_SET_MAX:
 		break;
 
-	case SET_RES:
+	case UAC_SET_RES:
 		break;
 
-	case SET_MEM:
+	case UAC_SET_MEM:
 		break;
 
 	default:
@@ -142,13 +142,13 @@ static int audio_get_endpoint_req(struct usb_configuration *c,
 			ctrl->bRequest, w_value, len, ep);
 
 	switch (ctrl->bRequest) {
-	case GET_CUR:
-	case GET_MIN:
-	case GET_MAX:
-	case GET_RES:
+	case UAC_GET_CUR:
+	case UAC_GET_MIN:
+	case UAC_GET_MAX:
+	case UAC_GET_RES:
 		value = 3;
 		break;
-	case GET_MEM:
+	case UAC_GET_MEM:
 		break;
 	default:
 		break;
@@ -171,11 +171,11 @@ audio_setup(struct usb_configuration *c, const struct usb_ctrlrequest *ctrl)
 	 * Audio class messages; interface activation uses set_alt().
 	 */
 	switch (ctrl->bRequestType) {
-	case USB_AUDIO_SET_ENDPOINT:
+	case USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_ENDPOINT:
 		value = audio_set_endpoint_req(c, ctrl);
 		break;
 
-	case USB_AUDIO_GET_ENDPOINT:
+	case USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_ENDPOINT:
 		value = audio_get_endpoint_req(c, ctrl);
 		break;
 
diff --git a/drivers/usb/gadget/f_audio.c b/drivers/usb/gadget/f_audio.c
index 76afbd1..7b05b3c 100644
--- a/drivers/usb/gadget/f_audio.c
+++ b/drivers/usb/gadget/f_audio.c
@@ -50,16 +50,16 @@ static struct usb_interface_descriptor ac_interface_desc __initdata = {
 	.bInterfaceSubClass =	USB_SUBCLASS_AUDIOCONTROL,
 };
 
-DECLARE_USB_AC_HEADER_DESCRIPTOR(2);
+DECLARE_UAC_AC_HEADER_DESCRIPTOR(2);
 
-#define USB_DT_AC_HEADER_LENGH	USB_DT_AC_HEADER_SIZE(F_AUDIO_NUM_INTERFACES)
+#define UAC_DT_AC_HEADER_LENGTH	UAC_DT_AC_HEADER_SIZE(F_AUDIO_NUM_INTERFACES)
 /* B.3.2  Class-Specific AC Interface Descriptor */
-static struct usb_ac_header_descriptor_2 ac_header_desc = {
-	.bLength =		USB_DT_AC_HEADER_LENGH,
+static struct uac_ac_header_descriptor_2 ac_header_desc = {
+	.bLength =		UAC_DT_AC_HEADER_LENGTH,
 	.bDescriptorType =	USB_DT_CS_INTERFACE,
-	.bDescriptorSubtype =	HEADER,
+	.bDescriptorSubtype =	UAC_HEADER,
 	.bcdADC =		__constant_cpu_to_le16(0x0100),
-	.wTotalLength =		__constant_cpu_to_le16(USB_DT_AC_HEADER_LENGH),
+	.wTotalLength =		__constant_cpu_to_le16(UAC_DT_AC_HEADER_LENGTH),
 	.bInCollection =	F_AUDIO_NUM_INTERFACES,
 	.baInterfaceNr = {
 		[0] =		F_AUDIO_AC_INTERFACE,
@@ -68,33 +68,33 @@ static struct usb_ac_header_descriptor_2 ac_header_desc = {
 };
 
 #define INPUT_TERMINAL_ID	1
-static struct usb_input_terminal_descriptor input_terminal_desc = {
-	.bLength =		USB_DT_AC_INPUT_TERMINAL_SIZE,
+static struct uac_input_terminal_descriptor input_terminal_desc = {
+	.bLength =		UAC_DT_INPUT_TERMINAL_SIZE,
 	.bDescriptorType =	USB_DT_CS_INTERFACE,
-	.bDescriptorSubtype =	INPUT_TERMINAL,
+	.bDescriptorSubtype =	UAC_INPUT_TERMINAL,
 	.bTerminalID =		INPUT_TERMINAL_ID,
-	.wTerminalType =	USB_AC_TERMINAL_STREAMING,
+	.wTerminalType =	UAC_TERMINAL_STREAMING,
 	.bAssocTerminal =	0,
 	.wChannelConfig =	0x3,
 };
 
-DECLARE_USB_AC_FEATURE_UNIT_DESCRIPTOR(0);
+DECLARE_UAC_FEATURE_UNIT_DESCRIPTOR(0);
 
 #define FEATURE_UNIT_ID		2
-static struct usb_ac_feature_unit_descriptor_0 feature_unit_desc = {
-	.bLength		= USB_DT_AC_FEATURE_UNIT_SIZE(0),
+static struct uac_feature_unit_descriptor_0 feature_unit_desc = {
+	.bLength		= UAC_DT_FEATURE_UNIT_SIZE(0),
 	.bDescriptorType	= USB_DT_CS_INTERFACE,
-	.bDescriptorSubtype	= FEATURE_UNIT,
+	.bDescriptorSubtype	= UAC_FEATURE_UNIT,
 	.bUnitID		= FEATURE_UNIT_ID,
 	.bSourceID		= INPUT_TERMINAL_ID,
 	.bControlSize		= 2,
-	.bmaControls[0]		= (FU_MUTE | FU_VOLUME),
+	.bmaControls[0]		= (UAC_FU_MUTE | UAC_FU_VOLUME),
 };
 
 static struct usb_audio_control mute_control = {
 	.list = LIST_HEAD_INIT(mute_control.list),
 	.name = "Mute Control",
-	.type = MUTE_CONTROL,
+	.type = UAC_MUTE_CONTROL,
 	/* Todo: add real Mute control code */
 	.set = generic_set_cmd,
 	.get = generic_get_cmd,
@@ -103,7 +103,7 @@ static struct usb_audio_control mute_control = {
 static struct usb_audio_control volume_control = {
 	.list = LIST_HEAD_INIT(volume_control.list),
 	.name = "Volume Control",
-	.type = VOLUME_CONTROL,
+	.type = UAC_VOLUME_CONTROL,
 	/* Todo: add real Volume control code */
 	.set = generic_set_cmd,
 	.get = generic_get_cmd,
@@ -113,17 +113,17 @@ static struct usb_audio_control_selector feature_unit = {
 	.list = LIST_HEAD_INIT(feature_unit.list),
 	.id = FEATURE_UNIT_ID,
 	.name = "Mute & Volume Control",
-	.type = FEATURE_UNIT,
+	.type = UAC_FEATURE_UNIT,
 	.desc = (struct usb_descriptor_header *)&feature_unit_desc,
 };
 
 #define OUTPUT_TERMINAL_ID	3
-static struct usb_output_terminal_descriptor output_terminal_desc = {
-	.bLength		= USB_DT_AC_OUTPUT_TERMINAL_SIZE,
+static struct uac_output_terminal_descriptor output_terminal_desc = {
+	.bLength		= UAC_DT_OUTPUT_TERMINAL_SIZE,
 	.bDescriptorType	= USB_DT_CS_INTERFACE,
-	.bDescriptorSubtype	= OUTPUT_TERMINAL,
+	.bDescriptorSubtype	= UAC_OUTPUT_TERMINAL,
 	.bTerminalID		= OUTPUT_TERMINAL_ID,
-	.wTerminalType		= USB_AC_OUTPUT_TERMINAL_SPEAKER,
+	.wTerminalType		= UAC_OUTPUT_TERMINAL_SPEAKER,
 	.bAssocTerminal		= FEATURE_UNIT_ID,
 	.bSourceID		= FEATURE_UNIT_ID,
 };
@@ -148,22 +148,22 @@ static struct usb_interface_descriptor as_interface_alt_1_desc = {
 };
 
 /* B.4.2  Class-Specific AS Interface Descriptor */
-static struct usb_as_header_descriptor as_header_desc = {
-	.bLength =		USB_DT_AS_HEADER_SIZE,
+static struct uac_as_header_descriptor as_header_desc = {
+	.bLength =		UAC_DT_AS_HEADER_SIZE,
 	.bDescriptorType =	USB_DT_CS_INTERFACE,
-	.bDescriptorSubtype =	AS_GENERAL,
+	.bDescriptorSubtype =	UAC_AS_GENERAL,
 	.bTerminalLink =	INPUT_TERMINAL_ID,
 	.bDelay =		1,
-	.wFormatTag =		USB_AS_AUDIO_FORMAT_TYPE_I_PCM,
+	.wFormatTag =		UAC_FORMAT_TYPE_I_PCM,
 };
 
-DECLARE_USB_AS_FORMAT_TYPE_I_DISCRETE_DESC(1);
+DECLARE_UAC_FORMAT_TYPE_I_DISCRETE_DESC(1);
 
-static struct usb_as_formate_type_i_discrete_descriptor_1 as_type_i_desc = {
-	.bLength =		USB_AS_FORMAT_TYPE_I_DISCRETE_DESC_SIZE(1),
+static struct uac_format_type_i_discrete_descriptor_1 as_type_i_desc = {
+	.bLength =		UAC_FORMAT_TYPE_I_DISCRETE_DESC_SIZE(1),
 	.bDescriptorType =	USB_DT_CS_INTERFACE,
-	.bDescriptorSubtype =	FORMAT_TYPE,
-	.bFormatType =		USB_AS_FORMAT_TYPE_I,
+	.bDescriptorSubtype =	UAC_FORMAT_TYPE,
+	.bFormatType =		UAC_FORMAT_TYPE_I,
 	.bSubframeSize =	2,
 	.bBitResolution =	16,
 	.bSamFreqType =		1,
@@ -181,10 +181,10 @@ static struct usb_endpoint_descriptor as_out_ep_desc __initdata = {
 };
 
 /* Class-specific AS ISO OUT Endpoint Descriptor */
-static struct usb_as_iso_endpoint_descriptor as_iso_out_desc __initdata = {
-	.bLength =		USB_AS_ISO_ENDPOINT_DESC_SIZE,
+static struct uac_iso_endpoint_descriptor as_iso_out_desc __initdata = {
+	.bLength =		UAC_ISO_ENDPOINT_DESC_SIZE,
 	.bDescriptorType =	USB_DT_CS_ENDPOINT,
-	.bDescriptorSubtype =	EP_GENERAL,
+	.bDescriptorSubtype =	UAC_EP_GENERAL,
 	.bmAttributes = 	1,
 	.bLockDelayUnits =	1,
 	.wLockDelay =		__constant_cpu_to_le16(1),
@@ -456,11 +456,11 @@ f_audio_setup(struct usb_function *f, const struct usb_ctrlrequest *ctrl)
 	 * Audio class messages; interface activation uses set_alt().
 	 */
 	switch (ctrl->bRequestType) {
-	case USB_AUDIO_SET_INTF:
+	case USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE:
 		value = audio_set_intf_req(f, ctrl);
 		break;
 
-	case USB_AUDIO_GET_INTF:
+	case USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE:
 		value = audio_get_intf_req(f, ctrl);
 		break;
 
@@ -642,10 +642,10 @@ int __init control_selector_init(struct f_audio *audio)
 	list_add(&mute_control.list, &feature_unit.control);
 	list_add(&volume_control.list, &feature_unit.control);
 
-	volume_control.data[_CUR] = 0xffc0;
-	volume_control.data[_MIN] = 0xe3a0;
-	volume_control.data[_MAX] = 0xfff0;
-	volume_control.data[_RES] = 0x0030;
+	volume_control.data[UAC__CUR] = 0xffc0;
+	volume_control.data[UAC__MIN] = 0xe3a0;
+	volume_control.data[UAC__MAX] = 0xfff0;
+	volume_control.data[UAC__RES] = 0x0030;
 
 	return 0;
 }
diff --git a/drivers/usb/gadget/gmidi.c b/drivers/usb/gadget/gmidi.c
index b9312dc..d0b1e83 100644
--- a/drivers/usb/gadget/gmidi.c
+++ b/drivers/usb/gadget/gmidi.c
@@ -191,7 +191,7 @@ module_param(qlen, uint, S_IRUGO);
 #define GMIDI_MS_INTERFACE	1
 #define GMIDI_NUM_INTERFACES	2
 
-DECLARE_USB_AC_HEADER_DESCRIPTOR(1);
+DECLARE_UAC_AC_HEADER_DESCRIPTOR(1);
 DECLARE_USB_MIDI_OUT_JACK_DESCRIPTOR(1);
 DECLARE_USB_MS_ENDPOINT_DESCRIPTOR(1);
 
@@ -237,12 +237,12 @@ static const struct usb_interface_descriptor ac_interface_desc = {
 };
 
 /* B.3.2  Class-Specific AC Interface Descriptor */
-static const struct usb_ac_header_descriptor_1 ac_header_desc = {
-	.bLength =		USB_DT_AC_HEADER_SIZE(1),
+static const struct uac_ac_header_descriptor_1 ac_header_desc = {
+	.bLength =		UAC_DT_AC_HEADER_SIZE(1),
 	.bDescriptorType =	USB_DT_CS_INTERFACE,
 	.bDescriptorSubtype =	USB_MS_HEADER,
 	.bcdADC =		cpu_to_le16(0x0100),
-	.wTotalLength =		cpu_to_le16(USB_DT_AC_HEADER_SIZE(1)),
+	.wTotalLength =		cpu_to_le16(UAC_DT_AC_HEADER_SIZE(1)),
 	.bInCollection =	1,
 	.baInterfaceNr = {
 		[0] =		GMIDI_MS_INTERFACE,
diff --git a/include/linux/usb/audio.h b/include/linux/usb/audio.h
index f75092a..c3edfcb 100644
--- a/include/linux/usb/audio.h
+++ b/include/linux/usb/audio.h
@@ -25,80 +25,77 @@
 #define USB_SUBCLASS_AUDIOSTREAMING	0x02
 #define USB_SUBCLASS_MIDISTREAMING	0x03
 
-/* A.5 Audio Class-Specific AC interface Descriptor Subtypes*/
-#define HEADER				0x01
-#define INPUT_TERMINAL			0x02
-#define OUTPUT_TERMINAL			0x03
-#define MIXER_UNIT			0x04
-#define SELECTOR_UNIT			0x05
-#define FEATURE_UNIT			0x06
-#define PROCESSING_UNIT			0x07
-#define EXTENSION_UNIT			0x08
-
-#define AS_GENERAL			0x01
-#define FORMAT_TYPE			0x02
-#define FORMAT_SPECIFIC			0x03
-
-#define EP_GENERAL			0x01
-
-#define MS_GENERAL			0x01
-#define MIDI_IN_JACK			0x02
-#define MIDI_OUT_JACK			0x03
-
-/* cs endpoint attributes */
-#define EP_CS_ATTR_SAMPLE_RATE		0x01
-#define EP_CS_ATTR_PITCH_CONTROL	0x02
-#define EP_CS_ATTR_FILL_MAX		0x80
-
-/* Audio Class specific Request Codes */
-#define USB_AUDIO_SET_INTF		0x21
-#define USB_AUDIO_SET_ENDPOINT		0x22
-#define USB_AUDIO_GET_INTF		0xa1
-#define USB_AUDIO_GET_ENDPOINT		0xa2
-
-#define SET_	0x00
-#define GET_	0x80
-
-#define _CUR	0x1
-#define _MIN	0x2
-#define _MAX	0x3
-#define _RES	0x4
-#define _MEM	0x5
-
-#define SET_CUR		(SET_ | _CUR)
-#define GET_CUR		(GET_ | _CUR)
-#define SET_MIN		(SET_ | _MIN)
-#define GET_MIN		(GET_ | _MIN)
-#define SET_MAX		(SET_ | _MAX)
-#define GET_MAX		(GET_ | _MAX)
-#define SET_RES		(SET_ | _RES)
-#define GET_RES		(GET_ | _RES)
-#define SET_MEM		(SET_ | _MEM)
-#define GET_MEM		(GET_ | _MEM)
-
-#define GET_STAT	0xff
-
-#define USB_AC_TERMINAL_UNDEFINED	0x100
-#define USB_AC_TERMINAL_STREAMING	0x101
-#define USB_AC_TERMINAL_VENDOR_SPEC	0x1FF
+/* A.5 Audio Class-Specific AC Interface Descriptor Subtypes */
+#define UAC_HEADER			0x01
+#define UAC_INPUT_TERMINAL		0x02
+#define UAC_OUTPUT_TERMINAL		0x03
+#define UAC_MIXER_UNIT			0x04
+#define UAC_SELECTOR_UNIT		0x05
+#define UAC_FEATURE_UNIT		0x06
+#define UAC_PROCESSING_UNIT		0x07
+#define UAC_EXTENSION_UNIT		0x08
+
+/* A.6 Audio Class-Specific AS Interface Descriptor Subtypes */
+#define UAC_AS_GENERAL			0x01
+#define UAC_FORMAT_TYPE			0x02
+#define UAC_FORMAT_SPECIFIC		0x03
+
+/* A.8 Audio Class-Specific Endpoint Descriptor Subtypes */
+#define UAC_EP_GENERAL			0x01
+
+/* A.9 Audio Class-Specific Request Codes */
+#define UAC_SET_			0x00
+#define UAC_GET_			0x80
+
+#define UAC__CUR			0x1
+#define UAC__MIN			0x2
+#define UAC__MAX			0x3
+#define UAC__RES			0x4
+#define UAC__MEM			0x5
+
+#define UAC_SET_CUR			(UAC_SET_ | UAC__CUR)
+#define UAC_GET_CUR			(UAC_GET_ | UAC__CUR)
+#define UAC_SET_MIN			(UAC_SET_ | UAC__MIN)
+#define UAC_GET_MIN			(UAC_GET_ | UAC__MIN)
+#define UAC_SET_MAX			(UAC_SET_ | UAC__MAX)
+#define UAC_GET_MAX			(UAC_GET_ | UAC__MAX)
+#define UAC_SET_RES			(UAC_SET_ | UAC__RES)
+#define UAC_GET_RES			(UAC_GET_ | UAC__RES)
+#define UAC_SET_MEM			(UAC_SET_ | UAC__MEM)
+#define UAC_GET_MEM			(UAC_GET_ | UAC__MEM)
+
+#define UAC_GET_STAT			0xff
+
+/* MIDI - A.1 MS Class-Specific Interface Descriptor Subtypes */
+#define UAC_MS_HEADER			0x01
+#define UAC_MIDI_IN_JACK		0x02
+#define UAC_MIDI_OUT_JACK		0x03
+
+/* MIDI - A.1 MS Class-Specific Endpoint Descriptor Subtypes */
+#define UAC_MS_GENERAL			0x01
+
+/* Terminals - 2.1 USB Terminal Types */
+#define UAC_TERMINAL_UNDEFINED		0x100
+#define UAC_TERMINAL_STREAMING		0x101
+#define UAC_TERMINAL_VENDOR_SPEC	0x1FF
 
 /* Terminal Control Selectors */
 /* 4.3.2  Class-Specific AC Interface Descriptor */
-struct usb_ac_header_descriptor {
+struct uac_ac_header_descriptor {
 	__u8  bLength;			/* 8 + n */
 	__u8  bDescriptorType;		/* USB_DT_CS_INTERFACE */
-	__u8  bDescriptorSubtype;	/* USB_MS_HEADER */
+	__u8  bDescriptorSubtype;	/* UAC_MS_HEADER */
 	__le16 bcdADC;			/* 0x0100 */
 	__le16 wTotalLength;		/* includes Unit and Terminal desc. */
 	__u8  bInCollection;		/* n */
 	__u8  baInterfaceNr[];		/* [n] */
 } __attribute__ ((packed));
 
-#define USB_DT_AC_HEADER_SIZE(n)	(8 + (n))
+#define UAC_DT_AC_HEADER_SIZE(n)	(8 + (n))
 
 /* As above, but more useful for defining your own descriptors: */
-#define DECLARE_USB_AC_HEADER_DESCRIPTOR(n) 			\
-struct usb_ac_header_descriptor_##n {				\
+#define DECLARE_UAC_AC_HEADER_DESCRIPTOR(n) 			\
+struct uac_ac_header_descriptor_##n {				\
 	__u8  bLength;						\
 	__u8  bDescriptorType;					\
 	__u8  bDescriptorSubtype;				\
@@ -109,7 +106,7 @@ struct usb_ac_header_descriptor_##n {				\
 } __attribute__ ((packed))
 
 /* 4.3.2.1 Input Terminal Descriptor */
-struct usb_input_terminal_descriptor {
+struct uac_input_terminal_descriptor {
 	__u8  bLength;			/* in bytes: 12 */
 	__u8  bDescriptorType;		/* CS_INTERFACE descriptor type */
 	__u8  bDescriptorSubtype;	/* INPUT_TERMINAL descriptor subtype */
@@ -122,18 +119,19 @@ struct usb_input_terminal_descriptor {
 	__u8  iTerminal;
 } __attribute__ ((packed));
 
-#define USB_DT_AC_INPUT_TERMINAL_SIZE			12
+#define UAC_DT_INPUT_TERMINAL_SIZE			12
 
-#define USB_AC_INPUT_TERMINAL_UNDEFINED			0x200
-#define USB_AC_INPUT_TERMINAL_MICROPHONE		0x201
-#define USB_AC_INPUT_TERMINAL_DESKTOP_MICROPHONE	0x202
-#define USB_AC_INPUT_TERMINAL_PERSONAL_MICROPHONE	0x203
-#define USB_AC_INPUT_TERMINAL_OMNI_DIR_MICROPHONE	0x204
-#define USB_AC_INPUT_TERMINAL_MICROPHONE_ARRAY		0x205
-#define USB_AC_INPUT_TERMINAL_PROC_MICROPHONE_ARRAY	0x206
+/* Terminals - 2.2 Input Terminal Types */
+#define UAC_INPUT_TERMINAL_UNDEFINED			0x200
+#define UAC_INPUT_TERMINAL_MICROPHONE			0x201
+#define UAC_INPUT_TERMINAL_DESKTOP_MICROPHONE		0x202
+#define UAC_INPUT_TERMINAL_PERSONAL_MICROPHONE		0x203
+#define UAC_INPUT_TERMINAL_OMNI_DIR_MICROPHONE		0x204
+#define UAC_INPUT_TERMINAL_MICROPHONE_ARRAY		0x205
+#define UAC_INPUT_TERMINAL_PROC_MICROPHONE_ARRAY	0x206
 
 /* 4.3.2.2 Output Terminal Descriptor */
-struct usb_output_terminal_descriptor {
+struct uac_output_terminal_descriptor {
 	__u8  bLength;			/* in bytes: 9 */
 	__u8  bDescriptorType;		/* CS_INTERFACE descriptor type */
 	__u8  bDescriptorSubtype;	/* OUTPUT_TERMINAL descriptor subtype */
@@ -144,23 +142,24 @@ struct usb_output_terminal_descriptor {
 	__u8  iTerminal;
 } __attribute__ ((packed));
 
-#define USB_DT_AC_OUTPUT_TERMINAL_SIZE				9
+#define UAC_DT_OUTPUT_TERMINAL_SIZE			9
 
-#define USB_AC_OUTPUT_TERMINAL_UNDEFINED			0x300
-#define USB_AC_OUTPUT_TERMINAL_SPEAKER				0x301
-#define USB_AC_OUTPUT_TERMINAL_HEADPHONES			0x302
-#define USB_AC_OUTPUT_TERMINAL_HEAD_MOUNTED_DISPLAY_AUDIO	0x303
-#define USB_AC_OUTPUT_TERMINAL_DESKTOP_SPEAKER			0x304
-#define USB_AC_OUTPUT_TERMINAL_ROOM_SPEAKER			0x305
-#define USB_AC_OUTPUT_TERMINAL_COMMUNICATION_SPEAKER		0x306
-#define USB_AC_OUTPUT_TERMINAL_LOW_FREQ_EFFECTS_SPEAKER		0x307
+/* Terminals - 2.3 Output Terminal Types */
+#define UAC_OUTPUT_TERMINAL_UNDEFINED			0x300
+#define UAC_OUTPUT_TERMINAL_SPEAKER			0x301
+#define UAC_OUTPUT_TERMINAL_HEADPHONES			0x302
+#define UAC_OUTPUT_TERMINAL_HEAD_MOUNTED_DISPLAY_AUDIO	0x303
+#define UAC_OUTPUT_TERMINAL_DESKTOP_SPEAKER		0x304
+#define UAC_OUTPUT_TERMINAL_ROOM_SPEAKER		0x305
+#define UAC_OUTPUT_TERMINAL_COMMUNICATION_SPEAKER	0x306
+#define UAC_OUTPUT_TERMINAL_LOW_FREQ_EFFECTS_SPEAKER	0x307
 
 /* Set bControlSize = 2 as default setting */
-#define USB_DT_AC_FEATURE_UNIT_SIZE(ch)		(7 + ((ch) + 1) * 2)
+#define UAC_DT_FEATURE_UNIT_SIZE(ch)		(7 + ((ch) + 1) * 2)
 
 /* As above, but more useful for defining your own descriptors: */
-#define DECLARE_USB_AC_FEATURE_UNIT_DESCRIPTOR(ch) 		\
-struct usb_ac_feature_unit_descriptor_##ch {			\
+#define DECLARE_UAC_FEATURE_UNIT_DESCRIPTOR(ch) 		\
+struct uac_feature_unit_descriptor_##ch {			\
 	__u8  bLength;						\
 	__u8  bDescriptorType;					\
 	__u8  bDescriptorSubtype;				\
@@ -172,7 +171,7 @@ struct usb_ac_feature_unit_descriptor_##ch {			\
 } __attribute__ ((packed))
 
 /* 4.5.2 Class-Specific AS Interface Descriptor */
-struct usb_as_header_descriptor {
+struct uac_as_header_descriptor {
 	__u8  bLength;			/* in bytes: 7 */
 	__u8  bDescriptorType;		/* USB_DT_CS_INTERFACE */
 	__u8  bDescriptorSubtype;	/* AS_GENERAL */
@@ -181,16 +180,17 @@ struct usb_as_header_descriptor {
 	__le16 wFormatTag;		/* The Audio Data Format */
 } __attribute__ ((packed));
 
-#define USB_DT_AS_HEADER_SIZE		7
+#define UAC_DT_AS_HEADER_SIZE		7
 
-#define USB_AS_AUDIO_FORMAT_TYPE_I_UNDEFINED	0x0
-#define USB_AS_AUDIO_FORMAT_TYPE_I_PCM		0x1
-#define USB_AS_AUDIO_FORMAT_TYPE_I_PCM8		0x2
-#define USB_AS_AUDIO_FORMAT_TYPE_I_IEEE_FLOAT	0x3
-#define USB_AS_AUDIO_FORMAT_TYPE_I_ALAW		0x4
-#define USB_AS_AUDIO_FORMAT_TYPE_I_MULAW	0x5
+/* Formats - A.1.1 Audio Data Format Type I Codes */
+#define UAC_FORMAT_TYPE_I_UNDEFINED	0x0
+#define UAC_FORMAT_TYPE_I_PCM		0x1
+#define UAC_FORMAT_TYPE_I_PCM8		0x2
+#define UAC_FORMAT_TYPE_I_IEEE_FLOAT	0x3
+#define UAC_FORMAT_TYPE_I_ALAW		0x4
+#define UAC_FORMAT_TYPE_I_MULAW		0x5
 
-struct usb_as_format_type_i_continuous_descriptor {
+struct uac_format_type_i_continuous_descriptor {
 	__u8  bLength;			/* in bytes: 8 + (ns * 3) */
 	__u8  bDescriptorType;		/* USB_DT_CS_INTERFACE */
 	__u8  bDescriptorSubtype;	/* FORMAT_TYPE */
@@ -203,9 +203,9 @@ struct usb_as_format_type_i_continuous_descriptor {
 	__u8  tUpperSamFreq[3];
 } __attribute__ ((packed));
 
-#define USB_AS_FORMAT_TYPE_I_CONTINUOUS_DESC_SIZE	14
+#define UAC_FORMAT_TYPE_I_CONTINUOUS_DESC_SIZE	14
 
-struct usb_as_formate_type_i_discrete_descriptor {
+struct uac_format_type_i_discrete_descriptor {
 	__u8  bLength;			/* in bytes: 8 + (ns * 3) */
 	__u8  bDescriptorType;		/* USB_DT_CS_INTERFACE */
 	__u8  bDescriptorSubtype;	/* FORMAT_TYPE */
@@ -217,8 +217,8 @@ struct usb_as_formate_type_i_discrete_descriptor {
 	__u8  tSamFreq[][3];
 } __attribute__ ((packed));
 
-#define DECLARE_USB_AS_FORMAT_TYPE_I_DISCRETE_DESC(n) 		\
-struct usb_as_formate_type_i_discrete_descriptor_##n {		\
+#define DECLARE_UAC_FORMAT_TYPE_I_DISCRETE_DESC(n) 		\
+struct uac_format_type_i_discrete_descriptor_##n {		\
 	__u8  bLength;						\
 	__u8  bDescriptorType;					\
 	__u8  bDescriptorSubtype;				\
@@ -230,14 +230,15 @@ struct usb_as_formate_type_i_discrete_descriptor_##n {		\
 	__u8  tSamFreq[n][3];					\
 } __attribute__ ((packed))
 
-#define USB_AS_FORMAT_TYPE_I_DISCRETE_DESC_SIZE(n)	(8 + (n * 3))
+#define UAC_FORMAT_TYPE_I_DISCRETE_DESC_SIZE(n)	(8 + (n * 3))
 
-#define USB_AS_FORMAT_TYPE_UNDEFINED	0x0
-#define USB_AS_FORMAT_TYPE_I		0x1
-#define USB_AS_FORMAT_TYPE_II		0x2
-#define USB_AS_FORMAT_TYPE_III		0x3
+/* Formats - A.2 Format Type Codes */
+#define UAC_FORMAT_TYPE_UNDEFINED	0x0
+#define UAC_FORMAT_TYPE_I		0x1
+#define UAC_FORMAT_TYPE_II		0x2
+#define UAC_FORMAT_TYPE_III		0x3
 
-struct usb_as_iso_endpoint_descriptor {
+struct uac_iso_endpoint_descriptor {
 	__u8  bLength;			/* in bytes: 7 */
 	__u8  bDescriptorType;		/* USB_DT_CS_ENDPOINT */
 	__u8  bDescriptorSubtype;	/* EP_GENERAL */
@@ -245,30 +246,35 @@ struct usb_as_iso_endpoint_descriptor {
 	__u8  bLockDelayUnits;
 	__le16 wLockDelay;
 };
-#define USB_AS_ISO_ENDPOINT_DESC_SIZE	7
-
-#define FU_CONTROL_UNDEFINED		0x00
-#define MUTE_CONTROL			0x01
-#define VOLUME_CONTROL			0x02
-#define BASS_CONTROL			0x03
-#define MID_CONTROL			0x04
-#define TREBLE_CONTROL			0x05
-#define GRAPHIC_EQUALIZER_CONTROL	0x06
-#define AUTOMATIC_GAIN_CONTROL		0x07
-#define DELAY_CONTROL			0x08
-#define BASS_BOOST_CONTROL		0x09
-#define LOUDNESS_CONTROL		0x0a
-
-#define FU_MUTE		(1 << (MUTE_CONTROL - 1))
-#define FU_VOLUME	(1 << (VOLUME_CONTROL - 1))
-#define FU_BASS		(1 << (BASS_CONTROL - 1))
-#define FU_MID		(1 << (MID_CONTROL - 1))
-#define FU_TREBLE	(1 << (TREBLE_CONTROL - 1))
-#define FU_GRAPHIC_EQ	(1 << (GRAPHIC_EQUALIZER_CONTROL - 1))
-#define FU_AUTO_GAIN	(1 << (AUTOMATIC_GAIN_CONTROL - 1))
-#define FU_DELAY	(1 << (DELAY_CONTROL - 1))
-#define FU_BASS_BOOST	(1 << (BASS_BOOST_CONTROL - 1))
-#define FU_LOUDNESS	(1 << (LOUDNESS_CONTROL - 1))
+#define UAC_ISO_ENDPOINT_DESC_SIZE	7
+
+#define UAC_EP_CS_ATTR_SAMPLE_RATE	0x01
+#define UAC_EP_CS_ATTR_PITCH_CONTROL	0x02
+#define UAC_EP_CS_ATTR_FILL_MAX		0x80
+
+/* A.10.2 Feature Unit Control Selectors */
+#define UAC_FU_CONTROL_UNDEFINED	0x00
+#define UAC_MUTE_CONTROL		0x01
+#define UAC_VOLUME_CONTROL		0x02
+#define UAC_BASS_CONTROL		0x03
+#define UAC_MID_CONTROL			0x04
+#define UAC_TREBLE_CONTROL		0x05
+#define UAC_GRAPHIC_EQUALIZER_CONTROL	0x06
+#define UAC_AUTOMATIC_GAIN_CONTROL	0x07
+#define UAC_DELAY_CONTROL		0x08
+#define UAC_BASS_BOOST_CONTROL		0x09
+#define UAC_LOUDNESS_CONTROL		0x0a
+
+#define UAC_FU_MUTE		(1 << (UAC_MUTE_CONTROL - 1))
+#define UAC_FU_VOLUME		(1 << (UAC_VOLUME_CONTROL - 1))
+#define UAC_FU_BASS		(1 << (UAC_BASS_CONTROL - 1))
+#define UAC_FU_MID		(1 << (UAC_MID_CONTROL - 1))
+#define UAC_FU_TREBLE		(1 << (UAC_TREBLE_CONTROL - 1))
+#define UAC_FU_GRAPHIC_EQ	(1 << (UAC_GRAPHIC_EQUALIZER_CONTROL - 1))
+#define UAC_FU_AUTO_GAIN	(1 << (UAC_AUTOMATIC_GAIN_CONTROL - 1))
+#define UAC_FU_DELAY		(1 << (UAC_DELAY_CONTROL - 1))
+#define UAC_FU_BASS_BOOST	(1 << (UAC_BASS_BOOST_CONTROL - 1))
+#define UAC_FU_LOUDNESS		(1 << (UAC_LOUDNESS_CONTROL - 1))
 
 struct usb_audio_control {
 	struct list_head list;
-- 
cgit v0.10.2


From b95cd7ec3e93bae199e820bd65b21b23e4538acc Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@skynet.be>
Date: Sun, 21 Jun 2009 23:21:55 +0200
Subject: USB audio gadget: Un-inline generic_[gs]et_cmd

Those functions are used only used to fill the set/get members of
usb_audio_control. It doesn't make much sense to inline them.

Signed-off-by: Laurent Pinchart <laurent.pinchart@skynet.be>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/gadget/f_audio.c b/drivers/usb/gadget/f_audio.c
index 7b05b3c..98e9bb9 100644
--- a/drivers/usb/gadget/f_audio.c
+++ b/drivers/usb/gadget/f_audio.c
@@ -28,6 +28,9 @@ static int audio_buf_size = 48000;
 module_param(audio_buf_size, int, S_IRUGO);
 MODULE_PARM_DESC(audio_buf_size, "Audio buffer size");
 
+static int generic_set_cmd(struct usb_audio_control *con, u8 cmd, int value);
+static int generic_get_cmd(struct usb_audio_control *con, u8 cmd);
+
 /*
  * DESCRIPTORS ... most are static, but strings and full
  * configuration descriptors are built on demand.
@@ -632,6 +635,18 @@ f_audio_unbind(struct usb_configuration *c, struct usb_function *f)
 
 /*-------------------------------------------------------------------------*/
 
+static int generic_set_cmd(struct usb_audio_control *con, u8 cmd, int value)
+{
+	con->data[cmd] = value;
+
+	return 0;
+}
+
+static int generic_get_cmd(struct usb_audio_control *con, u8 cmd)
+{
+	return con->data[cmd];
+}
+
 /* Todo: add more control selecotor dynamically */
 int __init control_selector_init(struct f_audio *audio)
 {
diff --git a/include/linux/usb/audio.h b/include/linux/usb/audio.h
index c3edfcb..7b33c49 100644
--- a/include/linux/usb/audio.h
+++ b/include/linux/usb/audio.h
@@ -285,18 +285,6 @@ struct usb_audio_control {
 	int (*get)(struct usb_audio_control *con, u8 cmd);
 };
 
-static inline int generic_set_cmd(struct usb_audio_control *con, u8 cmd, int value)
-{
-	con->data[cmd] = value;
-
-	return 0;
-}
-
-static inline int generic_get_cmd(struct usb_audio_control *con, u8 cmd)
-{
-	return con->data[cmd];
-}
-
 struct usb_audio_control_selector {
 	struct list_head list;
 	struct list_head control;
-- 
cgit v0.10.2


From 586dfc8cafc25cf785332fdfe9530f392e26f30d Mon Sep 17 00:00:00 2001
From: Wan ZongShun <mcuos.com@gmail.com>
Date: Sat, 13 Jun 2009 09:14:28 +0800
Subject: USB: Add nuvoton Ehci driver for w90p910 platform

Add ehci support for w90p910 platform.

Signed-off-by: Wan ZongShun <mcuos.com@gmail.com>
Cc: David Brownell <david-b@pacbell.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/Kconfig b/drivers/usb/Kconfig
index dcd49f1..93cee3e 100644
--- a/drivers/usb/Kconfig
+++ b/drivers/usb/Kconfig
@@ -39,6 +39,7 @@ config USB_ARCH_HAS_OHCI
 	default y if ARCH_AT91
 	default y if ARCH_PNX4008 && I2C
 	default y if MFD_TC6393XB
+	default y if ARCH_W90X900
 	# PPC:
 	default y if STB03xxx
 	default y if PPC_MPC52xx
@@ -58,6 +59,7 @@ config USB_ARCH_HAS_EHCI
 	default y if PPC_83xx
 	default y if SOC_AU1200
 	default y if ARCH_IXP4XX
+	default y if ARCH_W90X900
 	default PCI
 
 # ARM SA1111 chips have a non-PCI based "OHCI-compatible" USB host interface.
diff --git a/drivers/usb/host/Kconfig b/drivers/usb/host/Kconfig
index f21ca7d..41b8d7d 100644
--- a/drivers/usb/host/Kconfig
+++ b/drivers/usb/host/Kconfig
@@ -113,6 +113,12 @@ config USB_EHCI_HCD_PPC_OF
 	  Enables support for the USB controller present on the PowerPC
 	  OpenFirmware platform bus.
 
+config USB_W90X900_EHCI
+	bool "W90X900(W90P910) EHCI support"
+	depends on USB_EHCI_HCD && ARCH_W90X900
+	---help---
+		Enables support for the W90X900 USB controller
+
 config USB_OXU210HP_HCD
 	tristate "OXU210HP HCD support"
 	depends on USB
diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c
index 11c627c..54715b8 100644
--- a/drivers/usb/host/ehci-hcd.c
+++ b/drivers/usb/host/ehci-hcd.c
@@ -1117,6 +1117,11 @@ MODULE_LICENSE ("GPL");
 #define	PLATFORM_DRIVER		ixp4xx_ehci_driver
 #endif
 
+#ifdef CONFIG_USB_W90X900_EHCI
+#include "ehci-w90x900.c"
+#define	PLATFORM_DRIVER		ehci_hcd_w90x900_driver
+#endif
+
 #if !defined(PCI_DRIVER) && !defined(PLATFORM_DRIVER) && \
     !defined(PS3_SYSTEM_BUS_DRIVER) && !defined(OF_PLATFORM_DRIVER)
 #error "missing bus glue for ehci-hcd"
diff --git a/drivers/usb/host/ehci-w90x900.c b/drivers/usb/host/ehci-w90x900.c
new file mode 100644
index 0000000..cfa21ea
--- /dev/null
+++ b/drivers/usb/host/ehci-w90x900.c
@@ -0,0 +1,181 @@
+/*
+ * linux/driver/usb/host/ehci-w90x900.c
+ *
+ * Copyright (c) 2008 Nuvoton technology corporation.
+ *
+ * Wan ZongShun <mcuos.com@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation;version 2 of the License.
+ *
+ */
+
+#include <linux/platform_device.h>
+
+/*ebable phy0 and phy1 for w90p910*/
+#define	ENPHY		(0x01<<8)
+#define PHY0_CTR	(0xA4)
+#define PHY1_CTR	(0xA8)
+
+static int __devinit usb_w90x900_probe(const struct hc_driver *driver,
+		      struct platform_device *pdev)
+{
+	struct usb_hcd *hcd;
+	struct ehci_hcd *ehci;
+	struct resource *res;
+	int retval = 0, irq;
+	unsigned long val;
+
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		retval = -ENXIO;
+		goto err1;
+	}
+
+	hcd = usb_create_hcd(driver, &pdev->dev, "w90x900 EHCI");
+	if (!hcd) {
+		retval = -ENOMEM;
+		goto err1;
+	}
+
+	hcd->rsrc_start = res->start;
+	hcd->rsrc_len = res->end - res->start + 1;
+
+	if (!request_mem_region(hcd->rsrc_start, hcd->rsrc_len, hcd_name)) {
+		retval = -EBUSY;
+		goto err2;
+	}
+
+	hcd->regs = ioremap(hcd->rsrc_start, hcd->rsrc_len);
+	if (hcd->regs == NULL) {
+		retval = -EFAULT;
+		goto err3;
+	}
+
+	ehci = hcd_to_ehci(hcd);
+	ehci->caps = hcd->regs;
+	ehci->regs = hcd->regs +
+		 HC_LENGTH(ehci_readl(ehci, &ehci->caps->hc_capbase));
+
+	/* enable PHY 0,1,the regs only apply to w90p910
+	*  0xA4,0xA8 were offsets of PHY0 and PHY1 controller of
+	*  w90p910 IC relative to ehci->regs.
+	*/
+	val = __raw_readl(ehci->regs+PHY0_CTR);
+	val |= ENPHY;
+	__raw_writel(val, ehci->regs+PHY0_CTR);
+
+	val = __raw_readl(ehci->regs+PHY1_CTR);
+	val |= ENPHY;
+	__raw_writel(val, ehci->regs+PHY1_CTR);
+
+	ehci->hcs_params = ehci_readl(ehci, &ehci->caps->hcs_params);
+	ehci->sbrn = 0x20;
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		goto err4;
+
+	retval = usb_add_hcd(hcd, irq, IRQF_SHARED);
+	if (retval != 0)
+		goto err4;
+
+	ehci_writel(ehci, 1, &ehci->regs->configured_flag);
+
+	return retval;
+err4:
+	iounmap(hcd->regs);
+err3:
+	release_mem_region(hcd->rsrc_start, hcd->rsrc_len);
+err2:
+	usb_put_hcd(hcd);
+err1:
+	return retval;
+}
+
+static
+void usb_w90x900_remove(struct usb_hcd *hcd, struct platform_device *pdev)
+{
+	usb_remove_hcd(hcd);
+	iounmap(hcd->regs);
+	release_mem_region(hcd->rsrc_start, hcd->rsrc_len);
+	usb_put_hcd(hcd);
+}
+
+static const struct hc_driver ehci_w90x900_hc_driver = {
+	.description = hcd_name,
+	.product_desc = "Nuvoton w90x900 EHCI Host Controller",
+	.hcd_priv_size = sizeof(struct ehci_hcd),
+
+	/*
+	 * generic hardware linkage
+	 */
+	.irq = ehci_irq,
+	.flags = HCD_USB2|HCD_MEMORY,
+
+	/*
+	 * basic lifecycle operations
+	 */
+	.reset = ehci_init,
+	.start = ehci_run,
+
+	.stop = ehci_stop,
+	.shutdown = ehci_shutdown,
+
+	/*
+	 * managing i/o requests and associated device resources
+	 */
+	.urb_enqueue = ehci_urb_enqueue,
+	.urb_dequeue = ehci_urb_dequeue,
+	.endpoint_disable = ehci_endpoint_disable,
+
+	/*
+	 * scheduling support
+	 */
+	.get_frame_number = ehci_get_frame,
+
+	/*
+	 * root hub support
+	 */
+	.hub_status_data = ehci_hub_status_data,
+	.hub_control = ehci_hub_control,
+#ifdef	CONFIG_PM
+	.bus_suspend = ehci_bus_suspend,
+	.bus_resume = ehci_bus_resume,
+#endif
+	.relinquish_port	= ehci_relinquish_port,
+	.port_handed_over	= ehci_port_handed_over,
+};
+
+static int __devinit ehci_w90x900_probe(struct platform_device *pdev)
+{
+	if (usb_disabled())
+		return -ENODEV;
+
+	return usb_w90x900_probe(&ehci_w90x900_hc_driver, pdev);
+}
+
+static int __devexit ehci_w90x900_remove(struct platform_device *pdev)
+{
+	struct usb_hcd *hcd = platform_get_drvdata(pdev);
+
+	usb_w90x900_remove(hcd, pdev);
+
+	return 0;
+}
+
+static struct platform_driver ehci_hcd_w90x900_driver = {
+	.probe  = ehci_w90x900_probe,
+	.remove = __devexit_p(ehci_w90x900_remove),
+	.driver = {
+		.name = "w90x900-ehci",
+		.owner = THIS_MODULE,
+	},
+};
+
+MODULE_AUTHOR("Wan ZongShun <mcuos.com@gmail.com>");
+MODULE_DESCRIPTION("w90p910 usb ehci driver!");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:w90p910-ehci");
-- 
cgit v0.10.2


From 831baa4915de465357b25c471bbb9b36472024df Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@csr.com>
Date: Wed, 24 Jun 2009 18:26:40 +0100
Subject: USB: whci-hcd: make endpoint_reset method async

usb_hcd_endpoint_reset() may be called in atomic context and must not
sleep.  So make whci-hcd's endpoint_reset() asynchronous.  URBs
submitted while the reset is in progress will be queued (on the std
list) and transfers will resume once the reset is complete.

Signed-off-by: David Vrabel <david.vrabel@csr.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/host/whci/asl.c b/drivers/usb/host/whci/asl.c
index c205078..c632437 100644
--- a/drivers/usb/host/whci/asl.c
+++ b/drivers/usb/host/whci/asl.c
@@ -227,11 +227,21 @@ void scan_async_work(struct work_struct *work)
 	/*
 	 * Now that the ASL is updated, complete the removal of any
 	 * removed qsets.
+	 *
+	 * If the qset was to be reset, do so and reinsert it into the
+	 * ASL if it has pending transfers.
 	 */
 	spin_lock_irq(&whc->lock);
 
 	list_for_each_entry_safe(qset, t, &whc->async_removed_list, list_node) {
 		qset_remove_complete(whc, qset);
+		if (qset->reset) {
+			qset_reset(whc, qset);
+			if (!list_empty(&qset->stds)) {
+				asl_qset_insert_begin(whc, qset);
+				queue_work(whc->workqueue, &whc->async_work);
+			}
+		}
 	}
 
 	spin_unlock_irq(&whc->lock);
@@ -267,7 +277,7 @@ int asl_urb_enqueue(struct whc *whc, struct urb *urb, gfp_t mem_flags)
 	else
 		err = qset_add_urb(whc, qset, urb, GFP_ATOMIC);
 	if (!err) {
-		if (!qset->in_sw_list)
+		if (!qset->in_sw_list && !qset->remove)
 			asl_qset_insert_begin(whc, qset);
 	} else
 		usb_hcd_unlink_urb_from_ep(&whc->wusbhc.usb_hcd, urb);
diff --git a/drivers/usb/host/whci/hcd.c b/drivers/usb/host/whci/hcd.c
index e019a50..687b622 100644
--- a/drivers/usb/host/whci/hcd.c
+++ b/drivers/usb/host/whci/hcd.c
@@ -192,19 +192,23 @@ static void whc_endpoint_reset(struct usb_hcd *usb_hcd,
 	struct wusbhc *wusbhc = usb_hcd_to_wusbhc(usb_hcd);
 	struct whc *whc = wusbhc_to_whc(wusbhc);
 	struct whc_qset *qset;
+	unsigned long flags;
+
+	spin_lock_irqsave(&whc->lock, flags);
 
 	qset = ep->hcpriv;
 	if (qset) {
 		qset->remove = 1;
+		qset->reset = 1;
 
 		if (usb_endpoint_xfer_bulk(&ep->desc)
 		    || usb_endpoint_xfer_control(&ep->desc))
 			queue_work(whc->workqueue, &whc->async_work);
 		else
 			queue_work(whc->workqueue, &whc->periodic_work);
-
-		qset_reset(whc, qset);
 	}
+
+	spin_unlock_irqrestore(&whc->lock, flags);
 }
 
 
diff --git a/drivers/usb/host/whci/pzl.c b/drivers/usb/host/whci/pzl.c
index ff4ef9e..a9e05ba 100644
--- a/drivers/usb/host/whci/pzl.c
+++ b/drivers/usb/host/whci/pzl.c
@@ -255,11 +255,21 @@ void scan_periodic_work(struct work_struct *work)
 	/*
 	 * Now that the PZL is updated, complete the removal of any
 	 * removed qsets.
+	 *
+	 * If the qset was to be reset, do so and reinsert it into the
+	 * PZL if it has pending transfers.
 	 */
 	spin_lock_irq(&whc->lock);
 
 	list_for_each_entry_safe(qset, t, &whc->periodic_removed_list, list_node) {
 		qset_remove_complete(whc, qset);
+		if (qset->reset) {
+			qset_reset(whc, qset);
+			if (!list_empty(&qset->stds)) {
+				qset_insert_in_sw_list(whc, qset);
+				queue_work(whc->workqueue, &whc->periodic_work);
+			}
+		}
 	}
 
 	spin_unlock_irq(&whc->lock);
@@ -295,7 +305,7 @@ int pzl_urb_enqueue(struct whc *whc, struct urb *urb, gfp_t mem_flags)
 	else
 		err = qset_add_urb(whc, qset, urb, GFP_ATOMIC);
 	if (!err) {
-		if (!qset->in_sw_list)
+		if (!qset->in_sw_list && !qset->remove)
 			qset_insert_in_sw_list(whc, qset);
 	} else
 		usb_hcd_unlink_urb_from_ep(&whc->wusbhc.usb_hcd, urb);
diff --git a/drivers/usb/host/whci/qset.c b/drivers/usb/host/whci/qset.c
index 640b38f..1b9dc15 100644
--- a/drivers/usb/host/whci/qset.c
+++ b/drivers/usb/host/whci/qset.c
@@ -103,7 +103,6 @@ static void qset_fill_qh(struct whc_qset *qset, struct urb *urb)
 void qset_clear(struct whc *whc, struct whc_qset *qset)
 {
 	qset->td_start = qset->td_end = qset->ntds = 0;
-	qset->remove = 0;
 
 	qset->qh.link = cpu_to_le32(QH_LINK_NTDS(8) | QH_LINK_T);
 	qset->qh.status = qset->qh.status & QH_STATUS_SEQ_MASK;
@@ -125,7 +124,7 @@ void qset_clear(struct whc *whc, struct whc_qset *qset)
  */
 void qset_reset(struct whc *whc, struct whc_qset *qset)
 {
-	wait_for_completion(&qset->remove_complete);
+	qset->reset = 0;
 
 	qset->qh.status &= ~QH_STATUS_SEQ_MASK;
 	qset->qh.cur_window = cpu_to_le32((1 << qset->max_burst) - 1);
@@ -156,6 +155,7 @@ struct whc_qset *get_qset(struct whc *whc, struct urb *urb,
 
 void qset_remove_complete(struct whc *whc, struct whc_qset *qset)
 {
+	qset->remove = 0;
 	list_del_init(&qset->list_node);
 	complete(&qset->remove_complete);
 }
diff --git a/drivers/usb/host/whci/whci-hc.h b/drivers/usb/host/whci/whci-hc.h
index 794dba0..e8d0001 100644
--- a/drivers/usb/host/whci/whci-hc.h
+++ b/drivers/usb/host/whci/whci-hc.h
@@ -264,6 +264,7 @@ struct whc_qset {
 	unsigned in_sw_list:1;
 	unsigned in_hw_list:1;
 	unsigned remove:1;
+	unsigned reset:1;
 	struct urb *pause_after_urb;
 	struct completion remove_complete;
 	int max_burst;
-- 
cgit v0.10.2


From 7cbe5dca399a50ce8aa74314b1d276e2fb904e1b Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Mon, 29 Jun 2009 10:56:54 -0400
Subject: USB: add API for userspace drivers to "claim" ports

This patch (as1258) implements a feature that users have been asking
for: It gives programs the ability to "claim" a port on a hub, via a
new usbfs ioctl.  A device plugged into a "claimed" port will not be
touched by the kernel beyond the immediate necessities of
initialization and enumeration.

In particular, when a device is plugged into a "claimed" port, the
kernel will not select and install a configuration.  And when a config
is installed by usbfs or sysfs, the kernel will not probe any drivers
for any of the interfaces.  (However the kernel will fetch various
string descriptors during enumeration.  One could argue that this
isn't really necessary, but the strings are exported in sysfs.)

The patch does not guarantee exclusive access to these devices; it is
still possible for more than one program to open the device file
concurrently.  Programs are responsible for coordinating access among
themselves.

A demonstration program showing how to use the new interface can be
found in an attachment to

	http://marc.info/?l=linux-usb&m=124345857431452&w=2

The patch also makes a small simplification to the hub driver,
replacing a bunch of more-or-less useless variants of "out of memory"
with a single message.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
index 4247ecc..165de5d 100644
--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -52,6 +52,7 @@
 
 #include "hcd.h"	/* for usbcore internals */
 #include "usb.h"
+#include "hub.h"
 
 #define USB_MAXBUS			64
 #define USB_DEVICE_MAX			USB_MAXBUS * 128
@@ -655,6 +656,7 @@ static int usbdev_release(struct inode *inode, struct file *file)
 	struct async *as;
 
 	usb_lock_device(dev);
+	usb_hub_release_all_ports(dev, ps);
 
 	/* Protect against simultaneous open */
 	mutex_lock(&usbfs_mutex);
@@ -1548,6 +1550,29 @@ static int proc_ioctl_compat(struct dev_state *ps, compat_uptr_t arg)
 }
 #endif
 
+static int proc_claim_port(struct dev_state *ps, void __user *arg)
+{
+	unsigned portnum;
+	int rc;
+
+	if (get_user(portnum, (unsigned __user *) arg))
+		return -EFAULT;
+	rc = usb_hub_claim_port(ps->dev, portnum, ps);
+	if (rc == 0)
+		snoop(&ps->dev->dev, "port %d claimed by process %d: %s\n",
+			portnum, task_pid_nr(current), current->comm);
+	return rc;
+}
+
+static int proc_release_port(struct dev_state *ps, void __user *arg)
+{
+	unsigned portnum;
+
+	if (get_user(portnum, (unsigned __user *) arg))
+		return -EFAULT;
+	return usb_hub_release_port(ps->dev, portnum, ps);
+}
+
 /*
  * NOTE:  All requests here that have interface numbers as parameters
  * are assuming that somehow the configuration has been prevented from
@@ -1689,6 +1714,16 @@ static int usbdev_ioctl(struct inode *inode, struct file *file,
 		snoop(&dev->dev, "%s: IOCTL\n", __func__);
 		ret = proc_ioctl_default(ps, p);
 		break;
+
+	case USBDEVFS_CLAIM_PORT:
+		snoop(&dev->dev, "%s: CLAIM_PORT\n", __func__);
+		ret = proc_claim_port(ps, p);
+		break;
+
+	case USBDEVFS_RELEASE_PORT:
+		snoop(&dev->dev, "%s: RELEASE_PORT\n", __func__);
+		ret = proc_release_port(ps, p);
+		break;
 	}
 	usb_unlock_device(dev);
 	if (ret >= 0)
diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c
index 69e5773..1bad4e5 100644
--- a/drivers/usb/core/driver.c
+++ b/drivers/usb/core/driver.c
@@ -207,6 +207,9 @@ static int usb_probe_interface(struct device *dev)
 
 	intf->needs_binding = 0;
 
+	if (usb_device_is_owned(udev))
+		return -ENODEV;
+
 	if (udev->authorized == 0) {
 		dev_err(&intf->dev, "Device is not authorized for usage\n");
 		return -ENODEV;
diff --git a/drivers/usb/core/generic.c b/drivers/usb/core/generic.c
index 30ecac3..05e6d31 100644
--- a/drivers/usb/core/generic.c
+++ b/drivers/usb/core/generic.c
@@ -158,7 +158,9 @@ static int generic_probe(struct usb_device *udev)
 	/* Choose and set the configuration.  This registers the interfaces
 	 * with the driver core and lets interface drivers bind to them.
 	 */
-	if (udev->authorized == 0)
+	if (usb_device_is_owned(udev))
+		;		/* Don't configure if the device is owned */
+	else if (udev->authorized == 0)
 		dev_err(&udev->dev, "Device is not authorized for usage\n");
 	else {
 		c = usb_choose_configuration(udev);
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 71f86c6..cb54420 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -78,6 +78,7 @@ struct usb_hub {
 	u8			indicator[USB_MAXCHILDREN];
 	struct delayed_work	leds;
 	struct delayed_work	init_work;
+	void			**port_owners;
 };
 
 
@@ -860,19 +861,17 @@ static int hub_configure(struct usb_hub *hub,
 	u16 wHubCharacteristics;
 	unsigned int pipe;
 	int maxp, ret;
-	char *message;
+	char *message = "out of memory";
 
 	hub->buffer = usb_buffer_alloc(hdev, sizeof(*hub->buffer), GFP_KERNEL,
 			&hub->buffer_dma);
 	if (!hub->buffer) {
-		message = "can't allocate hub irq buffer";
 		ret = -ENOMEM;
 		goto fail;
 	}
 
 	hub->status = kmalloc(sizeof(*hub->status), GFP_KERNEL);
 	if (!hub->status) {
-		message = "can't kmalloc hub status buffer";
 		ret = -ENOMEM;
 		goto fail;
 	}
@@ -880,7 +879,6 @@ static int hub_configure(struct usb_hub *hub,
 
 	hub->descriptor = kmalloc(sizeof(*hub->descriptor), GFP_KERNEL);
 	if (!hub->descriptor) {
-		message = "can't kmalloc hub descriptor";
 		ret = -ENOMEM;
 		goto fail;
 	}
@@ -904,6 +902,12 @@ static int hub_configure(struct usb_hub *hub,
 	dev_info (hub_dev, "%d port%s detected\n", hdev->maxchild,
 		(hdev->maxchild == 1) ? "" : "s");
 
+	hub->port_owners = kzalloc(hdev->maxchild * sizeof(void *), GFP_KERNEL);
+	if (!hub->port_owners) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
 	wHubCharacteristics = le16_to_cpu(hub->descriptor->wHubCharacteristics);
 
 	if (wHubCharacteristics & HUB_CHAR_COMPOUND) {
@@ -1082,7 +1086,6 @@ static int hub_configure(struct usb_hub *hub,
 
 	hub->urb = usb_alloc_urb(0, GFP_KERNEL);
 	if (!hub->urb) {
-		message = "couldn't allocate interrupt urb";
 		ret = -ENOMEM;
 		goto fail;
 	}
@@ -1131,11 +1134,13 @@ static void hub_disconnect(struct usb_interface *intf)
 	hub_quiesce(hub, HUB_DISCONNECT);
 
 	usb_set_intfdata (intf, NULL);
+	hub->hdev->maxchild = 0;
 
 	if (hub->hdev->speed == USB_SPEED_HIGH)
 		highspeed_hubs--;
 
 	usb_free_urb(hub->urb);
+	kfree(hub->port_owners);
 	kfree(hub->descriptor);
 	kfree(hub->status);
 	usb_buffer_free(hub->hdev, sizeof(*hub->buffer), hub->buffer,
@@ -1250,6 +1255,79 @@ hub_ioctl(struct usb_interface *intf, unsigned int code, void *user_data)
 	}
 }
 
+/*
+ * Allow user programs to claim ports on a hub.  When a device is attached
+ * to one of these "claimed" ports, the program will "own" the device.
+ */
+static int find_port_owner(struct usb_device *hdev, unsigned port1,
+		void ***ppowner)
+{
+	if (hdev->state == USB_STATE_NOTATTACHED)
+		return -ENODEV;
+	if (port1 == 0 || port1 > hdev->maxchild)
+		return -EINVAL;
+
+	/* This assumes that devices not managed by the hub driver
+	 * will always have maxchild equal to 0.
+	 */
+	*ppowner = &(hdev_to_hub(hdev)->port_owners[port1 - 1]);
+	return 0;
+}
+
+/* In the following three functions, the caller must hold hdev's lock */
+int usb_hub_claim_port(struct usb_device *hdev, unsigned port1, void *owner)
+{
+	int rc;
+	void **powner;
+
+	rc = find_port_owner(hdev, port1, &powner);
+	if (rc)
+		return rc;
+	if (*powner)
+		return -EBUSY;
+	*powner = owner;
+	return rc;
+}
+
+int usb_hub_release_port(struct usb_device *hdev, unsigned port1, void *owner)
+{
+	int rc;
+	void **powner;
+
+	rc = find_port_owner(hdev, port1, &powner);
+	if (rc)
+		return rc;
+	if (*powner != owner)
+		return -ENOENT;
+	*powner = NULL;
+	return rc;
+}
+
+void usb_hub_release_all_ports(struct usb_device *hdev, void *owner)
+{
+	int n;
+	void **powner;
+
+	n = find_port_owner(hdev, 1, &powner);
+	if (n == 0) {
+		for (; n < hdev->maxchild; (++n, ++powner)) {
+			if (*powner == owner)
+				*powner = NULL;
+		}
+	}
+}
+
+/* The caller must hold udev's lock */
+bool usb_device_is_owned(struct usb_device *udev)
+{
+	struct usb_hub *hub;
+
+	if (udev->state == USB_STATE_NOTATTACHED || !udev->parent)
+		return false;
+	hub = hdev_to_hub(udev->parent);
+	return !!hub->port_owners[udev->portnum - 1];
+}
+
 
 static void recursively_mark_NOTATTACHED(struct usb_device *udev)
 {
diff --git a/drivers/usb/core/usb.h b/drivers/usb/core/usb.h
index c0e0ae2..9a8b15e 100644
--- a/drivers/usb/core/usb.h
+++ b/drivers/usb/core/usb.h
@@ -37,6 +37,13 @@ extern int usb_match_device(struct usb_device *dev,
 extern void usb_forced_unbind_intf(struct usb_interface *intf);
 extern void usb_rebind_intf(struct usb_interface *intf);
 
+extern int usb_hub_claim_port(struct usb_device *hdev, unsigned port,
+		void *owner);
+extern int usb_hub_release_port(struct usb_device *hdev, unsigned port,
+		void *owner);
+extern void usb_hub_release_all_ports(struct usb_device *hdev, void *owner);
+extern bool usb_device_is_owned(struct usb_device *udev);
+
 extern int  usb_hub_init(void);
 extern void usb_hub_cleanup(void);
 extern int usb_major_init(void);
diff --git a/include/linux/usbdevice_fs.h b/include/linux/usbdevice_fs.h
index 0044d9b..00ceebe 100644
--- a/include/linux/usbdevice_fs.h
+++ b/include/linux/usbdevice_fs.h
@@ -175,4 +175,6 @@ struct usbdevfs_ioctl32 {
 #define USBDEVFS_CLEAR_HALT        _IOR('U', 21, unsigned int)
 #define USBDEVFS_DISCONNECT        _IO('U', 22)
 #define USBDEVFS_CONNECT           _IO('U', 23)
+#define USBDEVFS_CLAIM_PORT        _IOR('U', 24, unsigned int)
+#define USBDEVFS_RELEASE_PORT      _IOR('U', 25, unsigned int)
 #endif /* _LINUX_USBDEVICE_FS_H */
-- 
cgit v0.10.2


From ccf5b801cef4f9e2d708d3b87e91e2bc6abd5206 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Mon, 29 Jun 2009 11:00:01 -0400
Subject: USB: make intf.pm_usage an atomic_t

This patch (as1260) changes the pm_usage_cnt field in struct
usb_interface from an int to an atomic_t.  This is so that drivers can
invoke the usb_autopm_get_interface_async() and
usb_autopm_put_interface_async() routines without locking and without
fear of corrupting the pm_usage_cnt value.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c
index 1bad4e5..1c976c1 100644
--- a/drivers/usb/core/driver.c
+++ b/drivers/usb/core/driver.c
@@ -235,7 +235,7 @@ static int usb_probe_interface(struct device *dev)
 		/* The interface should always appear to be in use
 		 * unless the driver suports autosuspend.
 		 */
-		intf->pm_usage_cnt = !(driver->supports_autosuspend);
+		atomic_set(&intf->pm_usage_cnt, !driver->supports_autosuspend);
 
 		/* Carry out a deferred switch to altsetting 0 */
 		if (intf->needs_altsetting0) {
@@ -347,7 +347,7 @@ int usb_driver_claim_interface(struct usb_driver *driver,
 	usb_pm_lock(udev);
 	iface->condition = USB_INTERFACE_BOUND;
 	mark_active(iface);
-	iface->pm_usage_cnt = !(driver->supports_autosuspend);
+	atomic_set(&iface->pm_usage_cnt, !driver->supports_autosuspend);
 	usb_pm_unlock(udev);
 
 	/* if interface was already added, bind now; else let
@@ -1068,7 +1068,7 @@ static int autosuspend_check(struct usb_device *udev, int reschedule)
 			intf = udev->actconfig->interface[i];
 			if (!is_active(intf))
 				continue;
-			if (intf->pm_usage_cnt > 0)
+			if (atomic_read(&intf->pm_usage_cnt) > 0)
 				return -EBUSY;
 			if (intf->needs_remote_wakeup &&
 					!udev->do_remote_wakeup) {
@@ -1464,17 +1464,19 @@ static int usb_autopm_do_interface(struct usb_interface *intf,
 		status = -ENODEV;
 	else {
 		udev->auto_pm = 1;
-		intf->pm_usage_cnt += inc_usage_cnt;
+		atomic_add(inc_usage_cnt, &intf->pm_usage_cnt);
 		udev->last_busy = jiffies;
-		if (inc_usage_cnt >= 0 && intf->pm_usage_cnt > 0) {
+		if (inc_usage_cnt >= 0 &&
+				atomic_read(&intf->pm_usage_cnt) > 0) {
 			if (udev->state == USB_STATE_SUSPENDED)
 				status = usb_resume_both(udev,
 						PMSG_AUTO_RESUME);
 			if (status != 0)
-				intf->pm_usage_cnt -= inc_usage_cnt;
+				atomic_sub(inc_usage_cnt, &intf->pm_usage_cnt);
 			else
 				udev->last_busy = jiffies;
-		} else if (inc_usage_cnt <= 0 && intf->pm_usage_cnt <= 0) {
+		} else if (inc_usage_cnt <= 0 &&
+				atomic_read(&intf->pm_usage_cnt) <= 0) {
 			status = usb_suspend_both(udev, PMSG_AUTO_SUSPEND);
 		}
 	}
@@ -1519,7 +1521,7 @@ void usb_autopm_put_interface(struct usb_interface *intf)
 
 	status = usb_autopm_do_interface(intf, -1);
 	dev_vdbg(&intf->dev, "%s: status %d cnt %d\n",
-			__func__, status, intf->pm_usage_cnt);
+			__func__, status, atomic_read(&intf->pm_usage_cnt));
 }
 EXPORT_SYMBOL_GPL(usb_autopm_put_interface);
 
@@ -1547,10 +1549,10 @@ void usb_autopm_put_interface_async(struct usb_interface *intf)
 		status = -ENODEV;
 	} else {
 		udev->last_busy = jiffies;
-		--intf->pm_usage_cnt;
+		atomic_dec(&intf->pm_usage_cnt);
 		if (udev->autosuspend_disabled || udev->autosuspend_delay < 0)
 			status = -EPERM;
-		else if (intf->pm_usage_cnt <= 0 &&
+		else if (atomic_read(&intf->pm_usage_cnt) <= 0 &&
 				!timer_pending(&udev->autosuspend.timer)) {
 			queue_delayed_work(ksuspend_usb_wq, &udev->autosuspend,
 					round_jiffies_up_relative(
@@ -1558,7 +1560,7 @@ void usb_autopm_put_interface_async(struct usb_interface *intf)
 		}
 	}
 	dev_vdbg(&intf->dev, "%s: status %d cnt %d\n",
-			__func__, status, intf->pm_usage_cnt);
+			__func__, status, atomic_read(&intf->pm_usage_cnt));
 }
 EXPORT_SYMBOL_GPL(usb_autopm_put_interface_async);
 
@@ -1602,7 +1604,7 @@ int usb_autopm_get_interface(struct usb_interface *intf)
 
 	status = usb_autopm_do_interface(intf, 1);
 	dev_vdbg(&intf->dev, "%s: status %d cnt %d\n",
-			__func__, status, intf->pm_usage_cnt);
+			__func__, status, atomic_read(&intf->pm_usage_cnt));
 	return status;
 }
 EXPORT_SYMBOL_GPL(usb_autopm_get_interface);
@@ -1630,10 +1632,14 @@ int usb_autopm_get_interface_async(struct usb_interface *intf)
 		status = -ENODEV;
 	else if (udev->autoresume_disabled)
 		status = -EPERM;
-	else if (++intf->pm_usage_cnt > 0 && udev->state == USB_STATE_SUSPENDED)
-		queue_work(ksuspend_usb_wq, &udev->autoresume);
+	else {
+		atomic_inc(&intf->pm_usage_cnt);
+		if (atomic_read(&intf->pm_usage_cnt) > 0 &&
+				udev->state == USB_STATE_SUSPENDED)
+			queue_work(ksuspend_usb_wq, &udev->autoresume);
+	}
 	dev_vdbg(&intf->dev, "%s: status %d cnt %d\n",
-			__func__, status, intf->pm_usage_cnt);
+			__func__, status, atomic_read(&intf->pm_usage_cnt));
 	return status;
 }
 EXPORT_SYMBOL_GPL(usb_autopm_get_interface_async);
@@ -1655,7 +1661,7 @@ int usb_autopm_set_interface(struct usb_interface *intf)
 
 	status = usb_autopm_do_interface(intf, 0);
 	dev_vdbg(&intf->dev, "%s: status %d cnt %d\n",
-			__func__, status, intf->pm_usage_cnt);
+			__func__, status, atomic_read(&intf->pm_usage_cnt));
 	return status;
 }
 EXPORT_SYMBOL_GPL(usb_autopm_set_interface);
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index cb54420..69e3a96 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -373,7 +373,7 @@ static void kick_khubd(struct usb_hub *hub)
 	unsigned long	flags;
 
 	/* Suppress autosuspend until khubd runs */
-	to_usb_interface(hub->intfdev)->pm_usage_cnt = 1;
+	atomic_set(&to_usb_interface(hub->intfdev)->pm_usage_cnt, 1);
 
 	spin_lock_irqsave(&hub_event_lock, flags);
 	if (!hub->disconnected && list_empty(&hub->event_list)) {
@@ -678,7 +678,8 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type)
 					msecs_to_jiffies(delay));
 
 			/* Suppress autosuspend until init is done */
-			to_usb_interface(hub->intfdev)->pm_usage_cnt = 1;
+			atomic_set(&to_usb_interface(hub->intfdev)->
+					pm_usage_cnt, 1);
 			return;		/* Continues at init2: below */
 		} else {
 			hub_power_on(hub, true);
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 3b45a0d..a34fa89 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -195,7 +195,7 @@ struct usb_interface {
 
 	struct device dev;		/* interface specific device info */
 	struct device *usb_dev;
-	int pm_usage_cnt;		/* usage counter for autosuspend */
+	atomic_t pm_usage_cnt;		/* usage counter for autosuspend */
 	struct work_struct reset_ws;	/* for resets in atomic context */
 };
 #define	to_usb_interface(d) container_of(d, struct usb_interface, dev)
@@ -551,13 +551,13 @@ extern void usb_autopm_put_interface_async(struct usb_interface *intf);
 
 static inline void usb_autopm_enable(struct usb_interface *intf)
 {
-	intf->pm_usage_cnt = 0;
+	atomic_set(&intf->pm_usage_cnt, 0);
 	usb_autopm_set_interface(intf);
 }
 
 static inline void usb_autopm_disable(struct usb_interface *intf)
 {
-	intf->pm_usage_cnt = 1;
+	atomic_set(&intf->pm_usage_cnt, 1);
 	usb_autopm_set_interface(intf);
 }
 
-- 
cgit v0.10.2


From 4c6e8971cbe0148085fcf6fd30eaa3c39f8a8cce Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Mon, 29 Jun 2009 11:02:04 -0400
Subject: USB: make the "usbfs_snoop" log more pertinent

This patch (as1261) reduces the amount of detailed URB information
logged by usbfs when the usbfs_snoop parameter is enabled.

Currently we don't display the final status value for a completed URB.
But we do display the entire data buffer twice: both before submission
and after completion.  The after-completion display doesn't limit
itself to the actual_length value.  But since usbmon is readily
available in virtually all distributions, there's no reason for usbfs
to print out any buffer contents at all!

So this patch restricts the information to: userspace buffer pointer,
endpoint number, type, and direction, length or actual_length, and
timeout value or status.  Now everything fits neatly into a single
line.

Along with those changes, the patch also fixes the snoop output for
the REAPURBNDELAY and REAPURBNDELAY32 ioctls.  The current version
omits the 'N' from the names.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
index 165de5d..a1add77 100644
--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -100,11 +100,15 @@ MODULE_PARM_DESC(usbfs_snoop, "true to log all usbfs traffic");
 			dev_info(dev , format , ## arg);	\
 	} while (0)
 
-#define USB_DEVICE_DEV		MKDEV(USB_DEVICE_MAJOR, 0)
+enum snoop_when {
+	SUBMIT, COMPLETE
+};
 
+#define USB_DEVICE_DEV		MKDEV(USB_DEVICE_MAJOR, 0)
 
 #define	MAX_USBFS_BUFFER_SIZE	16384
 
+
 static int connected(struct dev_state *ps)
 {
 	return (!list_empty(&ps->list) &&
@@ -301,24 +305,42 @@ static struct async *async_getpending(struct dev_state *ps,
 	return NULL;
 }
 
-static void snoop_urb(struct urb *urb, void __user *userurb)
+static void snoop_urb(struct usb_device *udev,
+		void __user *userurb, int pipe, unsigned length,
+		int timeout_or_status, enum snoop_when when)
 {
-	unsigned j;
-	unsigned char *data = urb->transfer_buffer;
+	static const char *types[] = {"isoc", "int", "ctrl", "bulk"};
+	static const char *dirs[] = {"out", "in"};
+	int ep;
+	const char *t, *d;
 
 	if (!usbfs_snoop)
 		return;
 
-	dev_info(&urb->dev->dev, "direction=%s\n",
-			usb_urb_dir_in(urb) ? "IN" : "OUT");
-	dev_info(&urb->dev->dev, "userurb=%p\n", userurb);
-	dev_info(&urb->dev->dev, "transfer_buffer_length=%u\n",
-		 urb->transfer_buffer_length);
-	dev_info(&urb->dev->dev, "actual_length=%u\n", urb->actual_length);
-	dev_info(&urb->dev->dev, "data: ");
-	for (j = 0; j < urb->transfer_buffer_length; ++j)
-		printk("%02x ", data[j]);
-	printk("\n");
+	ep = usb_pipeendpoint(pipe);
+	t = types[usb_pipetype(pipe)];
+	d = dirs[!!usb_pipein(pipe)];
+
+	if (userurb) {		/* Async */
+		if (when == SUBMIT)
+			dev_info(&udev->dev, "userurb %p, ep%d %s-%s, "
+					"length %u\n",
+					userurb, ep, t, d, length);
+		else
+			dev_info(&udev->dev, "userurb %p, ep%d %s-%s, "
+					"actual_length %u status %d\n",
+					userurb, ep, t, d, length,
+					timeout_or_status);
+	} else {
+		if (when == SUBMIT)
+			dev_info(&udev->dev, "ep%d %s-%s, length %u, "
+					"timeout %d\n",
+					ep, t, d, length, timeout_or_status);
+		else
+			dev_info(&udev->dev, "ep%d %s-%s, actual_length %u, "
+					"status %d\n",
+					ep, t, d, length, timeout_or_status);
+	}
 }
 
 static void async_completed(struct urb *urb)
@@ -347,7 +369,8 @@ static void async_completed(struct urb *urb)
 		secid = as->secid;
 	}
 	snoop(&urb->dev->dev, "urb complete\n");
-	snoop_urb(urb, as->userurb);
+	snoop_urb(urb->dev, as->userurb, urb->pipe, urb->actual_length,
+			as->status, COMPLETE);
 	spin_unlock(&ps->lock);
 
 	if (signr)
@@ -690,7 +713,7 @@ static int proc_control(struct dev_state *ps, void __user *arg)
 	unsigned int tmo;
 	unsigned char *tbuf;
 	unsigned wLength;
-	int i, j, ret;
+	int i, pipe, ret;
 
 	if (copy_from_user(&ctrl, arg, sizeof(ctrl)))
 		return -EFAULT;
@@ -710,24 +733,17 @@ static int proc_control(struct dev_state *ps, void __user *arg)
 			free_page((unsigned long)tbuf);
 			return -EINVAL;
 		}
-		snoop(&dev->dev, "control read: bRequest=%02x "
-				"bRrequestType=%02x wValue=%04x "
-				"wIndex=%04x wLength=%04x\n",
-			ctrl.bRequest, ctrl.bRequestType, ctrl.wValue,
-				ctrl.wIndex, ctrl.wLength);
+		pipe = usb_rcvctrlpipe(dev, 0);
+		snoop_urb(dev, NULL, pipe, ctrl.wLength, tmo, SUBMIT);
 
 		usb_unlock_device(dev);
-		i = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), ctrl.bRequest,
+		i = usb_control_msg(dev, pipe, ctrl.bRequest,
 				    ctrl.bRequestType, ctrl.wValue, ctrl.wIndex,
 				    tbuf, ctrl.wLength, tmo);
 		usb_lock_device(dev);
+		snoop_urb(dev, NULL, pipe, max(i, 0), min(i, 0), COMPLETE);
+
 		if ((i > 0) && ctrl.wLength) {
-			if (usbfs_snoop) {
-				dev_info(&dev->dev, "control read: data ");
-				for (j = 0; j < i; ++j)
-					printk("%02x ", (u8)(tbuf)[j]);
-				printk("\n");
-			}
 			if (copy_to_user(ctrl.data, tbuf, i)) {
 				free_page((unsigned long)tbuf);
 				return -EFAULT;
@@ -740,22 +756,15 @@ static int proc_control(struct dev_state *ps, void __user *arg)
 				return -EFAULT;
 			}
 		}
-		snoop(&dev->dev, "control write: bRequest=%02x "
-				"bRrequestType=%02x wValue=%04x "
-				"wIndex=%04x wLength=%04x\n",
-			ctrl.bRequest, ctrl.bRequestType, ctrl.wValue,
-				ctrl.wIndex, ctrl.wLength);
-		if (usbfs_snoop) {
-			dev_info(&dev->dev, "control write: data: ");
-			for (j = 0; j < ctrl.wLength; ++j)
-				printk("%02x ", (unsigned char)(tbuf)[j]);
-			printk("\n");
-		}
+		pipe = usb_sndctrlpipe(dev, 0);
+		snoop_urb(dev, NULL, pipe, ctrl.wLength, tmo, SUBMIT);
+
 		usb_unlock_device(dev);
 		i = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), ctrl.bRequest,
 				    ctrl.bRequestType, ctrl.wValue, ctrl.wIndex,
 				    tbuf, ctrl.wLength, tmo);
 		usb_lock_device(dev);
+		snoop_urb(dev, NULL, pipe, max(i, 0), min(i, 0), COMPLETE);
 	}
 	free_page((unsigned long)tbuf);
 	if (i < 0 && i != -EPIPE) {
@@ -774,7 +783,7 @@ static int proc_bulk(struct dev_state *ps, void __user *arg)
 	unsigned int tmo, len1, pipe;
 	int len2;
 	unsigned char *tbuf;
-	int i, j, ret;
+	int i, ret;
 
 	if (copy_from_user(&bulk, arg, sizeof(bulk)))
 		return -EFAULT;
@@ -801,18 +810,14 @@ static int proc_bulk(struct dev_state *ps, void __user *arg)
 			kfree(tbuf);
 			return -EINVAL;
 		}
-		snoop(&dev->dev, "bulk read: len=0x%02x timeout=%04d\n",
-			bulk.len, bulk.timeout);
+		snoop_urb(dev, NULL, pipe, len1, tmo, SUBMIT);
+
 		usb_unlock_device(dev);
 		i = usb_bulk_msg(dev, pipe, tbuf, len1, &len2, tmo);
 		usb_lock_device(dev);
+		snoop_urb(dev, NULL, pipe, len2, i, COMPLETE);
+
 		if (!i && len2) {
-			if (usbfs_snoop) {
-				dev_info(&dev->dev, "bulk read: data ");
-				for (j = 0; j < len2; ++j)
-					printk("%02x ", (u8)(tbuf)[j]);
-				printk("\n");
-			}
 			if (copy_to_user(bulk.data, tbuf, len2)) {
 				kfree(tbuf);
 				return -EFAULT;
@@ -825,17 +830,12 @@ static int proc_bulk(struct dev_state *ps, void __user *arg)
 				return -EFAULT;
 			}
 		}
-		snoop(&dev->dev, "bulk write: len=0x%02x timeout=%04d\n",
-			bulk.len, bulk.timeout);
-		if (usbfs_snoop) {
-			dev_info(&dev->dev, "bulk write: data: ");
-			for (j = 0; j < len1; ++j)
-				printk("%02x ", (unsigned char)(tbuf)[j]);
-			printk("\n");
-		}
+		snoop_urb(dev, NULL, pipe, len1, tmo, SUBMIT);
+
 		usb_unlock_device(dev);
 		i = usb_bulk_msg(dev, pipe, tbuf, len1, &len2, tmo);
 		usb_lock_device(dev);
+		snoop_urb(dev, NULL, pipe, len2, i, COMPLETE);
 	}
 	kfree(tbuf);
 	if (i < 0)
@@ -1053,13 +1053,6 @@ static int proc_do_submiturb(struct dev_state *ps, struct usbdevfs_urb *uurb,
 			is_in = 0;
 			uurb->endpoint &= ~USB_DIR_IN;
 		}
-		snoop(&ps->dev->dev, "control urb: bRequest=%02x "
-			"bRrequestType=%02x wValue=%04x "
-			"wIndex=%04x wLength=%04x\n",
-			dr->bRequest, dr->bRequestType,
-			__le16_to_cpup(&dr->wValue),
-			__le16_to_cpup(&dr->wIndex),
-			__le16_to_cpup(&dr->wLength));
 		break;
 
 	case USBDEVFS_URB_TYPE_BULK:
@@ -1072,7 +1065,6 @@ static int proc_do_submiturb(struct dev_state *ps, struct usbdevfs_urb *uurb,
 		uurb->number_of_packets = 0;
 		if (uurb->buffer_length > MAX_USBFS_BUFFER_SIZE)
 			return -EINVAL;
-		snoop(&ps->dev->dev, "bulk urb\n");
 		break;
 
 	case USBDEVFS_URB_TYPE_ISO:
@@ -1104,7 +1096,6 @@ static int proc_do_submiturb(struct dev_state *ps, struct usbdevfs_urb *uurb,
 			return -EINVAL;
 		}
 		uurb->buffer_length = totlen;
-		snoop(&ps->dev->dev, "iso urb\n");
 		break;
 
 	case USBDEVFS_URB_TYPE_INTERRUPT:
@@ -1113,7 +1104,6 @@ static int proc_do_submiturb(struct dev_state *ps, struct usbdevfs_urb *uurb,
 			return -EINVAL;
 		if (uurb->buffer_length > MAX_USBFS_BUFFER_SIZE)
 			return -EINVAL;
-		snoop(&ps->dev->dev, "interrupt urb\n");
 		break;
 
 	default:
@@ -1200,11 +1190,14 @@ static int proc_do_submiturb(struct dev_state *ps, struct usbdevfs_urb *uurb,
 			return -EFAULT;
 		}
 	}
-	snoop_urb(as->urb, as->userurb);
+	snoop_urb(ps->dev, as->userurb, as->urb->pipe,
+			as->urb->transfer_buffer_length, 0, SUBMIT);
 	async_newpending(as);
 	if ((ret = usb_submit_urb(as->urb, GFP_KERNEL))) {
 		dev_printk(KERN_DEBUG, &ps->dev->dev,
 			   "usbfs: usb_submit_urb returned %d\n", ret);
+		snoop_urb(ps->dev, as->userurb, as->urb->pipe,
+				0, ret, COMPLETE);
 		async_removepending(as);
 		free_async(as);
 		return ret;
@@ -1670,7 +1663,7 @@ static int usbdev_ioctl(struct inode *inode, struct file *file,
 		break;
 
 	case USBDEVFS_REAPURBNDELAY32:
-		snoop(&dev->dev, "%s: REAPURBDELAY32\n", __func__);
+		snoop(&dev->dev, "%s: REAPURBNDELAY32\n", __func__);
 		ret = proc_reapurbnonblock_compat(ps, p);
 		break;
 
@@ -1691,7 +1684,7 @@ static int usbdev_ioctl(struct inode *inode, struct file *file,
 		break;
 
 	case USBDEVFS_REAPURBNDELAY:
-		snoop(&dev->dev, "%s: REAPURBDELAY\n", __func__);
+		snoop(&dev->dev, "%s: REAPURBNDELAY\n", __func__);
 		ret = proc_reapurbnonblock(ps, p);
 		break;
 
-- 
cgit v0.10.2


From ab26d20f3ef1105d889f702cd01fba8c6fb32f73 Mon Sep 17 00:00:00 2001
From: Philipp Zabel <philipp.zabel@gmail.com>
Date: Wed, 1 Jul 2009 03:46:25 -0700
Subject: USB: gadget: pxa25x: basic transceiver support

This adds very basic otg_transceiver support, with vbus_session
and vbus_draw callbacks.

Now VBUS sensing can be handled by an external driver which registers
the otg_transceiver interface. It also allows gadget drivers to configure
the current drawn from VBUS. The UDC driver just passes their requests
along to the transceiver driver.

Signed-off-by: Philipp Zabel <philipp.zabel@gmail.com>
Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/gadget/pxa25x_udc.c b/drivers/usb/gadget/pxa25x_udc.c
index ed21e26..e6fedbd 100644
--- a/drivers/usb/gadget/pxa25x_udc.c
+++ b/drivers/usb/gadget/pxa25x_udc.c
@@ -56,6 +56,7 @@
 
 #include <linux/usb/ch9.h>
 #include <linux/usb/gadget.h>
+#include <linux/usb/otg.h>
 
 /*
  * This driver is PXA25x only.  Grab the right register definitions.
@@ -1008,15 +1009,27 @@ static int pxa25x_udc_pullup(struct usb_gadget *_gadget, int is_active)
 	return 0;
 }
 
+/* boards may consume current from VBUS, up to 100-500mA based on config.
+ * the 500uA suspend ceiling means that exclusively vbus-powered PXA designs
+ * violate USB specs.
+ */
+static int pxa25x_udc_vbus_draw(struct usb_gadget *_gadget, unsigned mA)
+{
+	struct pxa25x_udc	*udc;
+
+	udc = container_of(_gadget, struct pxa25x_udc, gadget);
+
+	if (udc->transceiver)
+		return otg_set_power(udc->transceiver, mA);
+	return -EOPNOTSUPP;
+}
+
 static const struct usb_gadget_ops pxa25x_udc_ops = {
 	.get_frame	= pxa25x_udc_get_frame,
 	.wakeup		= pxa25x_udc_wakeup,
 	.vbus_session	= pxa25x_udc_vbus_session,
 	.pullup		= pxa25x_udc_pullup,
-
-	// .vbus_draw ... boards may consume current from VBUS, up to
-	// 100-500mA based on config.  the 500uA suspend ceiling means
-	// that exclusively vbus-powered PXA designs violate USB specs.
+	.vbus_draw	= pxa25x_udc_vbus_draw,
 };
 
 /*-------------------------------------------------------------------------*/
@@ -1303,9 +1316,23 @@ fail:
 	 * for set_configuration as well as eventual disconnect.
 	 */
 	DMSG("registered gadget driver '%s'\n", driver->driver.name);
+
+	/* connect to bus through transceiver */
+	if (dev->transceiver) {
+		retval = otg_set_peripheral(dev->transceiver, &dev->gadget);
+		if (retval) {
+			DMSG("can't bind to transceiver\n");
+			if (driver->unbind)
+				driver->unbind(&dev->gadget);
+			goto bind_fail;
+		}
+	}
+
 	pullup(dev);
 	dump_state(dev);
 	return 0;
+bind_fail:
+	return retval;
 }
 EXPORT_SYMBOL(usb_gadget_register_driver);
 
@@ -1351,6 +1378,9 @@ int usb_gadget_unregister_driver(struct usb_gadget_driver *driver)
 	stop_activity(dev, driver);
 	local_irq_enable();
 
+	if (dev->transceiver)
+		(void) otg_set_peripheral(dev->transceiver, NULL);
+
 	driver->unbind(&dev->gadget);
 	dev->gadget.dev.driver = NULL;
 	dev->driver = NULL;
@@ -2162,6 +2192,8 @@ static int __init pxa25x_udc_probe(struct platform_device *pdev)
 	dev->dev = &pdev->dev;
 	dev->mach = pdev->dev.platform_data;
 
+	dev->transceiver = otg_get_transceiver();
+
 	if (gpio_is_valid(dev->mach->gpio_vbus)) {
 		if ((retval = gpio_request(dev->mach->gpio_vbus,
 				"pxa25x_udc GPIO VBUS"))) {
@@ -2264,6 +2296,10 @@ lubbock_fail0:
 	if (gpio_is_valid(dev->mach->gpio_vbus))
 		gpio_free(dev->mach->gpio_vbus);
  err_gpio_vbus:
+	if (dev->transceiver) {
+		otg_put_transceiver(dev->transceiver);
+		dev->transceiver = NULL;
+	}
 	clk_put(dev->clk);
  err_clk:
 	return retval;
@@ -2305,6 +2341,11 @@ static int __exit pxa25x_udc_remove(struct platform_device *pdev)
 
 	clk_put(dev->clk);
 
+	if (dev->transceiver) {
+		otg_put_transceiver(dev->transceiver);
+		dev->transceiver = NULL;
+	}
+
 	platform_set_drvdata(pdev, NULL);
 	the_controller = NULL;
 	return 0;
diff --git a/drivers/usb/gadget/pxa25x_udc.h b/drivers/usb/gadget/pxa25x_udc.h
index 1d51aa2..f572c56 100644
--- a/drivers/usb/gadget/pxa25x_udc.h
+++ b/drivers/usb/gadget/pxa25x_udc.h
@@ -128,6 +128,7 @@ struct pxa25x_udc {
 	struct device				*dev;
 	struct clk				*clk;
 	struct pxa2xx_udc_mach_info		*mach;
+	struct otg_transceiver			*transceiver;
 	u64					dma_mask;
 	struct pxa25x_ep			ep [PXA_UDC_NUM_ENDPOINTS];
 
-- 
cgit v0.10.2


From 383cedc3bb435de7a27d31a92d622413daa5cb20 Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oliver@neukum.org>
Date: Wed, 1 Jul 2009 16:00:32 +0200
Subject: USB: serial: full autosuspend support for the option driver

this adds autosupport usable even in an always online mode.

- enables remote wakeup on open
- autoresume for sending
- timeout based autosuspend if nothing is sent or recieved
- autosuspend without remote wakeup support on open/close

Signed-off-by: Oliver Neukum <oliver@neukum.org>
Tested-off-by: Zhao Ming <zhao.ming9@zte.com.cn>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 67862d7..f66e398 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -596,6 +596,7 @@ static struct usb_driver option_driver = {
 #ifdef CONFIG_PM
 	.suspend    = usb_serial_suspend,
 	.resume     = usb_serial_resume,
+	.supports_autosuspend =	1,
 #endif
 	.id_table   = option_ids,
 	.no_dynamic_id = 	1,
@@ -643,6 +644,12 @@ static int debug;
 #define IN_BUFLEN 4096
 #define OUT_BUFLEN 4096
 
+struct option_intf_private {
+	spinlock_t susp_lock;
+	unsigned int suspended:1;
+	int in_flight;
+};
+
 struct option_port_private {
 	/* Input endpoints and buffer for this port */
 	struct urb *in_urbs[N_IN_URB];
@@ -651,6 +658,8 @@ struct option_port_private {
 	struct urb *out_urbs[N_OUT_URB];
 	u8 *out_buffer[N_OUT_URB];
 	unsigned long out_busy;		/* Bit vector of URBs in use */
+	int opened;
+	struct usb_anchor delayed;
 
 	/* Settings for the port */
 	int rts_state;	/* Handshaking pins (outputs) */
@@ -697,12 +706,17 @@ module_exit(option_exit);
 static int option_probe(struct usb_serial *serial,
 			const struct usb_device_id *id)
 {
+	struct option_intf_private *data;
 	/* D-Link DWM 652 still exposes CD-Rom emulation interface in modem mode */
 	if (serial->dev->descriptor.idVendor == DLINK_VENDOR_ID &&
 		serial->dev->descriptor.idProduct == DLINK_PRODUCT_DWM_652 &&
 		serial->interface->cur_altsetting->desc.bInterfaceClass == 0x8)
 		return -ENODEV;
 
+	data = serial->private = kzalloc(sizeof(struct option_intf_private), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+	spin_lock_init(&data->susp_lock);
 	return 0;
 }
 
@@ -759,12 +773,15 @@ static int option_write(struct tty_struct *tty, struct usb_serial_port *port,
 			const unsigned char *buf, int count)
 {
 	struct option_port_private *portdata;
+	struct option_intf_private *intfdata;
 	int i;
 	int left, todo;
 	struct urb *this_urb = NULL; /* spurious */
 	int err;
+	unsigned long flags;
 
 	portdata = usb_get_serial_port_data(port);
+	intfdata = port->serial->private;
 
 	dbg("%s: write (%d chars)", __func__, count);
 
@@ -786,17 +803,33 @@ static int option_write(struct tty_struct *tty, struct usb_serial_port *port,
 		dbg("%s: endpoint %d buf %d", __func__,
 			usb_pipeendpoint(this_urb->pipe), i);
 
+		err = usb_autopm_get_interface_async(port->serial->interface);
+		if (err < 0)
+			break;
+
 		/* send the data */
 		memcpy(this_urb->transfer_buffer, buf, todo);
 		this_urb->transfer_buffer_length = todo;
 
-		err = usb_submit_urb(this_urb, GFP_ATOMIC);
-		if (err) {
-			dbg("usb_submit_urb %p (write bulk) failed "
-				"(%d)", this_urb, err);
-			clear_bit(i, &portdata->out_busy);
-			continue;
+		spin_lock_irqsave(&intfdata->susp_lock, flags);
+		if (intfdata->suspended) {
+			usb_anchor_urb(this_urb, &portdata->delayed);
+			spin_unlock_irqrestore(&intfdata->susp_lock, flags);
+		} else {
+			intfdata->in_flight++;
+			spin_unlock_irqrestore(&intfdata->susp_lock, flags);
+			err = usb_submit_urb(this_urb, GFP_ATOMIC);
+			if (err) {
+				dbg("usb_submit_urb %p (write bulk) failed "
+					"(%d)", this_urb, err);
+				clear_bit(i, &portdata->out_busy);
+				spin_lock_irqsave(&intfdata->susp_lock, flags);
+				intfdata->in_flight--;
+				spin_unlock_irqrestore(&intfdata->susp_lock, flags);
+				continue;
+			}
 		}
+
 		portdata->tx_start_time[i] = jiffies;
 		buf += todo;
 		left -= todo;
@@ -840,7 +873,10 @@ static void option_indat_callback(struct urb *urb)
 			if (err)
 				printk(KERN_ERR "%s: resubmit read urb failed. "
 					"(%d)", __func__, err);
+			else
+				usb_mark_last_busy(port->serial->dev);
 		}
+
 	}
 	return;
 }
@@ -849,15 +885,21 @@ static void option_outdat_callback(struct urb *urb)
 {
 	struct usb_serial_port *port;
 	struct option_port_private *portdata;
+	struct option_intf_private *intfdata;
 	int i;
 
 	dbg("%s", __func__);
 
 	port =  urb->context;
+	intfdata = port->serial->private;
 
 	usb_serial_port_softint(port);
-
+	usb_autopm_put_interface_async(port->serial->interface);
 	portdata = usb_get_serial_port_data(port);
+	spin_lock(&intfdata->susp_lock);
+	intfdata->in_flight--;
+	spin_unlock(&intfdata->susp_lock);
+
 	for (i = 0; i < N_OUT_URB; ++i) {
 		if (portdata->out_urbs[i] == urb) {
 			smp_mb__before_clear_bit();
@@ -967,10 +1009,13 @@ static int option_chars_in_buffer(struct tty_struct *tty)
 static int option_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	struct option_port_private *portdata;
+	struct option_intf_private *intfdata;
+	struct usb_serial *serial = port->serial;
 	int i, err;
 	struct urb *urb;
 
 	portdata = usb_get_serial_port_data(port);
+	intfdata = serial->private;
 
 	dbg("%s", __func__);
 
@@ -989,6 +1034,12 @@ static int option_open(struct tty_struct *tty, struct usb_serial_port *port)
 
 	option_send_setup(port);
 
+	serial->interface->needs_remote_wakeup = 1;
+	spin_lock_irq(&intfdata->susp_lock);
+	portdata->opened = 1;
+	spin_unlock_irq(&intfdata->susp_lock);
+	usb_autopm_put_interface(serial->interface);
+
 	return 0;
 }
 
@@ -1013,16 +1064,23 @@ static void option_close(struct usb_serial_port *port)
 	int i;
 	struct usb_serial *serial = port->serial;
 	struct option_port_private *portdata;
+	struct option_intf_private *intfdata = port->serial->private;
 
 	dbg("%s", __func__);
 	portdata = usb_get_serial_port_data(port);
 
 	if (serial->dev) {
 		/* Stop reading/writing urbs */
+		spin_lock_irq(&intfdata->susp_lock);
+		portdata->opened = 0;
+		spin_unlock_irq(&intfdata->susp_lock);
+
 		for (i = 0; i < N_IN_URB; i++)
 			usb_kill_urb(portdata->in_urbs[i]);
 		for (i = 0; i < N_OUT_URB; i++)
 			usb_kill_urb(portdata->out_urbs[i]);
+		usb_autopm_get_interface(serial->interface);
+		serial->interface->needs_remote_wakeup = 0;
 	}
 }
 
@@ -1127,6 +1185,7 @@ static int option_startup(struct usb_serial *serial)
 					__func__, i);
 			return 1;
 		}
+		init_usb_anchor(&portdata->delayed);
 
 		for (j = 0; j < N_IN_URB; j++) {
 			buffer = (u8 *)__get_free_page(GFP_KERNEL);
@@ -1229,18 +1288,52 @@ static void option_release(struct usb_serial *serial)
 #ifdef CONFIG_PM
 static int option_suspend(struct usb_serial *serial, pm_message_t message)
 {
+	struct option_intf_private *intfdata = serial->private;
+	int b;
+
 	dbg("%s entered", __func__);
+
+	if (serial->dev->auto_pm) {
+		spin_lock_irq(&intfdata->susp_lock);
+		b = intfdata->in_flight;
+		spin_unlock_irq(&intfdata->susp_lock);
+
+		if (b)
+			return -EBUSY;
+	}
+
+	spin_lock_irq(&intfdata->susp_lock);
+	intfdata->suspended = 1;
+	spin_unlock_irq(&intfdata->susp_lock);
 	stop_read_write_urbs(serial);
 
 	return 0;
 }
 
+static void play_delayed(struct usb_serial_port *port)
+{
+	struct option_intf_private *data;
+	struct option_port_private *portdata;
+	struct urb *urb;
+	int err;
+
+	portdata = usb_get_serial_port_data(port);
+	data = port->serial->private;
+	while ((urb = usb_get_from_anchor(&portdata->delayed))) {
+		err = usb_submit_urb(urb, GFP_ATOMIC);
+		if (!err)
+			data->in_flight++;
+	}
+}
+
 static int option_resume(struct usb_serial *serial)
 {
-	int err, i, j;
+	int i, j;
 	struct usb_serial_port *port;
-	struct urb *urb;
+	struct option_intf_private *intfdata = serial->private;
 	struct option_port_private *portdata;
+	struct urb *urb;
+	int err = 0;
 
 	dbg("%s entered", __func__);
 	/* get the interrupt URBs resubmitted unconditionally */
@@ -1255,7 +1348,7 @@ static int option_resume(struct usb_serial *serial)
 		if (err < 0) {
 			err("%s: Error %d for interrupt URB of port%d",
 				 __func__, err, i);
-			return err;
+			goto err_out;
 		}
 	}
 
@@ -1263,27 +1356,32 @@ static int option_resume(struct usb_serial *serial)
 		/* walk all ports */
 		port = serial->port[i];
 		portdata = usb_get_serial_port_data(port);
-		mutex_lock(&port->mutex);
 
 		/* skip closed ports */
-		if (!port->port.count) {
-			mutex_unlock(&port->mutex);
+		spin_lock_irq(&intfdata->susp_lock);
+		if (!portdata->opened) {
+			spin_unlock_irq(&intfdata->susp_lock);
 			continue;
 		}
 
 		for (j = 0; j < N_IN_URB; j++) {
 			urb = portdata->in_urbs[j];
-			err = usb_submit_urb(urb, GFP_NOIO);
+			err = usb_submit_urb(urb, GFP_ATOMIC);
 			if (err < 0) {
-				mutex_unlock(&port->mutex);
 				err("%s: Error %d for bulk URB %d",
 					 __func__, err, i);
-				return err;
+				spin_unlock_irq(&intfdata->susp_lock);
+				goto err_out;
 			}
 		}
-		mutex_unlock(&port->mutex);
+		play_delayed(port);
+		spin_unlock_irq(&intfdata->susp_lock);
 	}
-	return 0;
+	spin_lock_irq(&intfdata->susp_lock);
+	intfdata->suspended = 0;
+	spin_unlock_irq(&intfdata->susp_lock);
+err_out:
+	return err;
 }
 #endif
 
-- 
cgit v0.10.2


From 75b48f09e577cbb91d9f2a36bede9a2507929714 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Fri, 3 Jul 2009 23:09:41 +0200
Subject: USB: usb-serial, remove unused variables

There are some unused variables in serial_do_down. Remove them.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index 9d7ca48..54bb37d 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -292,8 +292,6 @@ static int serial_open(struct tty_struct *tty, struct file *filp)
 static void serial_down(struct usb_serial_port *port)
 {
 	struct usb_serial_driver *drv = port->serial->type;
-	struct usb_serial *serial;
-	struct module *owner;
 
 	/*
 	 * The console is magical.  Do not hang up the console hardware
@@ -309,12 +307,8 @@ static void serial_down(struct usb_serial_port *port)
 		return;
 
 	mutex_lock(&port->mutex);
-	serial = port->serial;
-	owner = serial->type->driver.owner;
-
 	if (drv->close)
 		drv->close(port);
-
 	mutex_unlock(&port->mutex);
 }
 
-- 
cgit v0.10.2


From 86286883fc8218c81cc1deb04cd1b4a8464bba6f Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oliver@neukum.org>
Date: Thu, 2 Jul 2009 11:36:30 +0200
Subject: USB: usbtmc can do IO to device after disconnect

usbtmc will happily complete read/write requests even after disconnect
has returned. The fix is to introduce a flag.


Signed-off-by: Oliver Neukum <oliver@neukum.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c
index b09a527..0c9df97 100644
--- a/drivers/usb/class/usbtmc.c
+++ b/drivers/usb/class/usbtmc.c
@@ -86,6 +86,8 @@ struct usbtmc_device_data {
 	bool TermCharEnabled;
 	bool auto_abort;
 
+	bool zombie; /* fd of disconnected device */
+
 	struct usbtmc_dev_capabilities	capabilities;
 	struct kref kref;
 	struct mutex io_mutex;	/* only one i/o function running at a time */
@@ -384,6 +386,10 @@ static ssize_t usbtmc_read(struct file *filp, char __user *buf,
 		return -ENOMEM;
 
 	mutex_lock(&data->io_mutex);
+	if (data->zombie) {
+		retval = -ENODEV;
+		goto exit;
+	}
 
 	remaining = count;
 	done = 0;
@@ -496,6 +502,10 @@ static ssize_t usbtmc_write(struct file *filp, const char __user *buf,
 		return -ENOMEM;
 
 	mutex_lock(&data->io_mutex);
+	if (data->zombie) {
+		retval = -ENODEV;
+		goto exit;
+	}
 
 	remaining = count;
 	done = 0;
@@ -925,6 +935,10 @@ static long usbtmc_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 
 	data = file->private_data;
 	mutex_lock(&data->io_mutex);
+	if (data->zombie) {
+		retval = -ENODEV;
+		goto skip_io_on_zombie;
+	}
 
 	switch (cmd) {
 	case USBTMC_IOCTL_CLEAR_OUT_HALT:
@@ -952,6 +966,7 @@ static long usbtmc_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 		break;
 	}
 
+skip_io_on_zombie:
 	mutex_unlock(&data->io_mutex);
 	return retval;
 }
@@ -995,6 +1010,7 @@ static int usbtmc_probe(struct usb_interface *intf,
 	usb_set_intfdata(intf, data);
 	kref_init(&data->kref);
 	mutex_init(&data->io_mutex);
+	data->zombie = 0;
 
 	/* Initialize USBTMC bTag and other fields */
 	data->bTag	= 1;
@@ -1065,6 +1081,9 @@ static void usbtmc_disconnect(struct usb_interface *intf)
 	usb_deregister_dev(intf, &usbtmc_class);
 	sysfs_remove_group(&intf->dev.kobj, &capability_attr_grp);
 	sysfs_remove_group(&intf->dev.kobj, &data_attr_grp);
+	mutex_lock(&data->io_mutex);
+	data->zombie = 1;
+	mutex_unlock(&data->io_mutex);
 	kref_put(&data->kref, usbtmc_delete);
 }
 
-- 
cgit v0.10.2


From a4708103adeaf5731c329b37b0a2b397f814c55c Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oliver@neukum.org>
Date: Thu, 2 Jul 2009 11:44:33 +0200
Subject: USB: suspend/resume support for usbtmc

a class driver should have suspend/resume. This makes sure we
don't see a virtual disconnect unnecessarily.

Signed-off-by: Oliver Neukum <oliver@neukum.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c
index 0c9df97..4f0858f 100644
--- a/drivers/usb/class/usbtmc.c
+++ b/drivers/usb/class/usbtmc.c
@@ -1087,11 +1087,24 @@ static void usbtmc_disconnect(struct usb_interface *intf)
 	kref_put(&data->kref, usbtmc_delete);
 }
 
+static int usbtmc_suspend (struct usb_interface *intf, pm_message_t message)
+{
+	/* this driver does not have pending URBs */
+	return 0;
+}
+
+static int usbtmc_resume (struct usb_interface *intf)
+{
+	return 0;
+}
+
 static struct usb_driver usbtmc_driver = {
 	.name		= "usbtmc",
 	.id_table	= usbtmc_devices,
 	.probe		= usbtmc_probe,
-	.disconnect	= usbtmc_disconnect
+	.disconnect	= usbtmc_disconnect,
+	.suspend	= usbtmc_suspend,
+	.resume		= usbtmc_resume,
 };
 
 static int __init usbtmc_init(void)
-- 
cgit v0.10.2


From d35b4ce164f393ad58580ad3e9fdde86328739ad Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oliver@neukum.org>
Date: Thu, 2 Jul 2009 12:07:07 +0200
Subject: USB: legousbtower: make poll notice disconnect

poll needs to return an error if a device is disconnected
  - make poll check for device's presence
  - wake all waiters in disconnect

Signed-off-by: Oliver Neukum <oliver@neukum.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/misc/legousbtower.c b/drivers/usb/misc/legousbtower.c
index 97efeae..faa6d62 100644
--- a/drivers/usb/misc/legousbtower.c
+++ b/drivers/usb/misc/legousbtower.c
@@ -552,6 +552,9 @@ static unsigned int tower_poll (struct file *file, poll_table *wait)
 
 	dev = file->private_data;
 
+	if (!dev->udev)
+		return POLLERR | POLLHUP;
+
 	poll_wait(file, &dev->read_wait, wait);
 	poll_wait(file, &dev->write_wait, wait);
 
@@ -1025,6 +1028,9 @@ static void tower_disconnect (struct usb_interface *interface)
 		tower_delete (dev);
 	} else {
 		dev->udev = NULL;
+		/* wake up pollers */
+		wake_up_interruptible_all(&dev->read_wait);
+		wake_up_interruptible_all(&dev->write_wait);
 		mutex_unlock(&dev->lock);
 	}
 
-- 
cgit v0.10.2


From d12b85e7de1abce4db940ebb169f064583b5796e Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oliver@neukum.org>
Date: Thu, 2 Jul 2009 17:01:06 +0200
Subject: USB: ldusb should signal an error in poll if the device is
 disconnected

poll() should test for a disconnection of the device.

Signed-off-by: Oliver Neukum <oliver@neukum.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/misc/ldusb.c b/drivers/usb/misc/ldusb.c
index ad4fb15..90f1301 100644
--- a/drivers/usb/misc/ldusb.c
+++ b/drivers/usb/misc/ldusb.c
@@ -412,6 +412,9 @@ static unsigned int ld_usb_poll(struct file *file, poll_table *wait)
 
 	dev = file->private_data;
 
+	if (!dev->intf)
+		return POLLERR | POLLHUP;
+
 	poll_wait(file, &dev->read_wait, wait);
 	poll_wait(file, &dev->write_wait, wait);
 
@@ -767,6 +770,9 @@ static void ld_usb_disconnect(struct usb_interface *intf)
 		ld_usb_delete(dev);
 	} else {
 		dev->intf = NULL;
+		/* wake up pollers */
+		wake_up_interruptible_all(&dev->read_wait);
+		wake_up_interruptible_all(&dev->write_wait);
 		mutex_unlock(&dev->mutex);
 	}
 
-- 
cgit v0.10.2


From 058e698b6372aa32b3b0dbbb81e5531a2ae3e56c Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Sun, 12 Jul 2009 09:43:52 +0200
Subject: USB: gadget: Drop NULL test on list_entry result

list_entry, which is an alias for container_of, cannot return NULL, as
there is no way to add a NULL value to a doubly linked list.

A simplified version of the semantic match that findds this problem is as
follows:
(http://www.emn.fr/x-info/coccinelle/)

// <smpl>
@r@
expression x,E;
statement S1,S2;
position p,p1;
@@

*x = list_entry@p(...)
... when != x = E
*if@p1 (x == NULL) S1 else S2
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/gadget/amd5536udc.c b/drivers/usb/gadget/amd5536udc.c
index 77352cc..d5b6596 100644
--- a/drivers/usb/gadget/amd5536udc.c
+++ b/drivers/usb/gadget/amd5536udc.c
@@ -2378,40 +2378,34 @@ static irqreturn_t udc_data_in_isr(struct udc *dev, int ep_ix)
 		if (!ep->cancel_transfer && !list_empty(&ep->queue)) {
 			req = list_entry(ep->queue.next,
 					struct udc_request, queue);
-			if (req) {
-				/*
-				 * length bytes transfered
-				 * check dma done of last desc. in PPBDU mode
-				 */
-				if (use_dma_ppb_du) {
-					td = udc_get_last_dma_desc(req);
-					if (td) {
-						dma_done =
-							AMD_GETBITS(td->status,
-							UDC_DMA_IN_STS_BS);
-						/* don't care DMA done */
-						req->req.actual =
-							req->req.length;
-					}
-				} else {
-					/* assume all bytes transferred */
+			/*
+			 * length bytes transfered
+			 * check dma done of last desc. in PPBDU mode
+			 */
+			if (use_dma_ppb_du) {
+				td = udc_get_last_dma_desc(req);
+				if (td) {
+					dma_done =
+						AMD_GETBITS(td->status,
+						UDC_DMA_IN_STS_BS);
+					/* don't care DMA done */
 					req->req.actual = req->req.length;
 				}
+			} else {
+				/* assume all bytes transferred */
+				req->req.actual = req->req.length;
+			}
 
-				if (req->req.actual == req->req.length) {
-					/* complete req */
-					complete_req(ep, req, 0);
-					req->dma_going = 0;
-					/* further request available ? */
-					if (list_empty(&ep->queue)) {
-						/* disable interrupt */
-						tmp = readl(
-							&dev->regs->ep_irqmsk);
-						tmp |= AMD_BIT(ep->num);
-						writel(tmp,
-							&dev->regs->ep_irqmsk);
-					}
-
+			if (req->req.actual == req->req.length) {
+				/* complete req */
+				complete_req(ep, req, 0);
+				req->dma_going = 0;
+				/* further request available ? */
+				if (list_empty(&ep->queue)) {
+					/* disable interrupt */
+					tmp = readl(&dev->regs->ep_irqmsk);
+					tmp |= AMD_BIT(ep->num);
+					writel(tmp, &dev->regs->ep_irqmsk);
 				}
 			}
 		}
-- 
cgit v0.10.2


From b7800218bccb52dbcb1613bb51425b21441b81f9 Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Wed, 15 Jul 2009 20:12:30 +0200
Subject: USB: gadget: s3c-hsotg: missing parentheses

Add missing parentheses

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
CC: Ben Dooks <ben@simtec.co.uk>
Cc: David Brownell <david-b@pacbell.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/gadget/s3c-hsotg.c b/drivers/usb/gadget/s3c-hsotg.c
index 50c71aa..4b5dbd0 100644
--- a/drivers/usb/gadget/s3c-hsotg.c
+++ b/drivers/usb/gadget/s3c-hsotg.c
@@ -2392,7 +2392,7 @@ static int s3c_hsotg_corereset(struct s3c_hsotg *hsotg)
 		grstctl = readl(hsotg->regs + S3C_GRSTCTL);
 	} while (!(grstctl & S3C_GRSTCTL_CSftRst) && timeout-- > 0);
 
-	if (!grstctl & S3C_GRSTCTL_CSftRst) {
+	if (!(grstctl & S3C_GRSTCTL_CSftRst)) {
 		dev_err(hsotg->dev, "Failed to get CSftRst asserted\n");
 		return -EINVAL;
 	}
@@ -2514,8 +2514,8 @@ int usb_gadget_register_driver(struct usb_gadget_driver *driver)
 	 * DMA mode we may need this. */
 	writel(S3C_DOEPMSK_SetupMsk | S3C_DOEPMSK_AHBErrMsk |
 	       S3C_DOEPMSK_EPDisbldMsk |
-	       using_dma(hsotg) ? (S3C_DIEPMSK_XferComplMsk |
-				   S3C_DIEPMSK_TimeOUTMsk) : 0,
+	       (using_dma(hsotg) ? (S3C_DIEPMSK_XferComplMsk |
+				   S3C_DIEPMSK_TimeOUTMsk) : 0),
 	       hsotg->regs + S3C_DOEPMSK);
 
 	writel(0, hsotg->regs + S3C_DAINTMSK);
-- 
cgit v0.10.2


From d9bfbd167b4dac51fed4edde7f6cfc378c9aea98 Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oliver@neukum.org>
Date: Sun, 12 Jul 2009 23:58:23 +0200
Subject: USB: full power management support for the idmouse driver

usb: full runtime PM support for idmouse driver

- add suspend/resume support
- add reset_resume support
- add support for autosuspend

Signed-off-by: Oliver Neukum <oliver@neukum.org>
Tested-by: Andreas Deresch <aderesch@fs.tum.de>

diff --git a/drivers/usb/misc/idmouse.c b/drivers/usb/misc/idmouse.c
index 6da8887..1337a9c 100644
--- a/drivers/usb/misc/idmouse.c
+++ b/drivers/usb/misc/idmouse.c
@@ -96,6 +96,8 @@ static int idmouse_probe(struct usb_interface *interface,
 				const struct usb_device_id *id);
 
 static void idmouse_disconnect(struct usb_interface *interface);
+static int idmouse_suspend(struct usb_interface *intf, pm_message_t message);
+static int idmouse_resume(struct usb_interface *intf);
 
 /* file operation pointers */
 static const struct file_operations idmouse_fops = {
@@ -117,7 +119,11 @@ static struct usb_driver idmouse_driver = {
 	.name = DRIVER_SHORT,
 	.probe = idmouse_probe,
 	.disconnect = idmouse_disconnect,
+	.suspend = idmouse_suspend,
+	.resume = idmouse_resume,
+	.reset_resume = idmouse_resume,
 	.id_table = idmouse_table,
+	.supports_autosuspend = 1,
 };
 
 static int idmouse_create_image(struct usb_idmouse *dev)
@@ -197,6 +203,17 @@ reset:
 	return result;
 }
 
+/* PM operations are nops as this driver does IO only during open() */
+static int idmouse_suspend(struct usb_interface *intf, pm_message_t message)
+{
+	return 0;
+}
+
+static int idmouse_resume(struct usb_interface *intf)
+{
+	return 0;
+}
+
 static inline void idmouse_delete(struct usb_idmouse *dev)
 {
 	kfree(dev->bulk_in_buffer);
@@ -235,9 +252,13 @@ static int idmouse_open(struct inode *inode, struct file *file)
 	} else {
 
 		/* create a new image and check for success */
+		result = usb_autopm_get_interface(interface);
+		if (result)
+			goto error;
 		result = idmouse_create_image (dev);
 		if (result)
 			goto error;
+		usb_autopm_put_interface(interface);
 
 		/* increment our usage count for the driver */
 		++dev->open;
-- 
cgit v0.10.2


From 4d155eb5f55b879e9947c3553b33764746fb15d5 Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oliver@neukum.org>
Date: Mon, 13 Jul 2009 16:45:47 +0200
Subject: USB: full autosuspend and power management support for usbsevseg

This patch adds to the usbsevseg driver:

- suspend/resume support
- reset_resume support
- autosuspend using the display's power state to determine idleness

Signed-off-by: Oliver Neukum <oliver@neukum.org>
Signed-off-by: Harrison Metzger <harrisonmetz@gmail.com>

diff --git a/drivers/usb/misc/usbsevseg.c b/drivers/usb/misc/usbsevseg.c
index 28a6a3a..3db2555 100644
--- a/drivers/usb/misc/usbsevseg.c
+++ b/drivers/usb/misc/usbsevseg.c
@@ -38,6 +38,7 @@ static char *display_textmodes[] = {"raw", "hex", "ascii", NULL};
 
 struct usb_sevsegdev {
 	struct usb_device *udev;
+	struct usb_interface *intf;
 
 	u8 powered;
 	u8 mode_msb;
@@ -46,6 +47,8 @@ struct usb_sevsegdev {
 	u8 textmode;
 	u8 text[MAXLEN];
 	u16 textlength;
+
+	u8 shadow_power; /* for PM */
 };
 
 /* sysfs_streq can't replace this completely
@@ -65,6 +68,12 @@ static void update_display_powered(struct usb_sevsegdev *mydev)
 {
 	int rc;
 
+	if (!mydev->shadow_power && mydev->powered) {
+		rc = usb_autopm_get_interface(mydev->intf);
+		if (rc < 0)
+			return;
+	}
+
 	rc = usb_control_msg(mydev->udev,
 			usb_sndctrlpipe(mydev->udev, 0),
 			0x12,
@@ -76,12 +85,18 @@ static void update_display_powered(struct usb_sevsegdev *mydev)
 			2000);
 	if (rc < 0)
 		dev_dbg(&mydev->udev->dev, "power retval = %d\n", rc);
+
+	if (mydev->shadow_power && !mydev->powered)
+		usb_autopm_put_interface(mydev->intf);
 }
 
 static void update_display_mode(struct usb_sevsegdev *mydev)
 {
 	int rc;
 
+	if(mydev->shadow_power != 1)
+		return;
+
 	rc = usb_control_msg(mydev->udev,
 			usb_sndctrlpipe(mydev->udev, 0),
 			0x12,
@@ -96,14 +111,17 @@ static void update_display_mode(struct usb_sevsegdev *mydev)
 		dev_dbg(&mydev->udev->dev, "mode retval = %d\n", rc);
 }
 
-static void update_display_visual(struct usb_sevsegdev *mydev)
+static void update_display_visual(struct usb_sevsegdev *mydev, gfp_t mf)
 {
 	int rc;
 	int i;
 	unsigned char *buffer;
 	u8 decimals = 0;
 
-	buffer = kzalloc(MAXLEN, GFP_KERNEL);
+	if(mydev->shadow_power != 1)
+		return;
+
+	buffer = kzalloc(MAXLEN, mf);
 	if (!buffer) {
 		dev_err(&mydev->udev->dev, "out of memory\n");
 		return;
@@ -163,7 +181,7 @@ static ssize_t set_attr_##name(struct device *dev, 		\
 	struct usb_sevsegdev *mydev = usb_get_intfdata(intf);	\
 								\
 	mydev->name = simple_strtoul(buf, NULL, 10);		\
-	update_fcn(mydev); \
+	update_fcn(mydev); 					\
 								\
 	return count;						\
 }								\
@@ -194,7 +212,7 @@ static ssize_t set_attr_text(struct device *dev,
 	if (end > 0)
 		memcpy(mydev->text, buf, end);
 
-	update_display_visual(mydev);
+	update_display_visual(mydev, GFP_KERNEL);
 	return count;
 }
 
@@ -242,7 +260,7 @@ static ssize_t set_attr_decimals(struct device *dev,
 		if (buf[i] == '1')
 			mydev->decimals[end-1-i] = 1;
 
-	update_display_visual(mydev);
+	update_display_visual(mydev, GFP_KERNEL);
 
 	return count;
 }
@@ -286,7 +304,7 @@ static ssize_t set_attr_textmode(struct device *dev,
 	for (i = 0; display_textmodes[i]; i++) {
 		if (sysfs_streq(display_textmodes[i], buf)) {
 			mydev->textmode = i;
-			update_display_visual(mydev);
+			update_display_visual(mydev, GFP_KERNEL);
 			return count;
 		}
 	}
@@ -330,6 +348,7 @@ static int sevseg_probe(struct usb_interface *interface,
 	}
 
 	mydev->udev = usb_get_dev(udev);
+	mydev->intf = interface;
 	usb_set_intfdata(interface, mydev);
 
 	/*set defaults */
@@ -364,11 +383,49 @@ static void sevseg_disconnect(struct usb_interface *interface)
 	dev_info(&interface->dev, "USB 7 Segment now disconnected\n");
 }
 
+static int sevseg_suspend(struct usb_interface *intf, pm_message_t message)
+{
+	struct usb_sevsegdev *mydev;
+
+	mydev = usb_get_intfdata(intf);
+	mydev->shadow_power = 0;
+
+	return 0;
+}
+
+static int sevseg_resume(struct usb_interface *intf)
+{
+	struct usb_sevsegdev *mydev;
+
+	mydev = usb_get_intfdata(intf);
+	mydev->shadow_power = 1;
+	update_display_mode(mydev);
+	update_display_visual(mydev, GFP_NOIO);
+
+	return 0;
+}
+
+static int sevseg_reset_resume(struct usb_interface *intf)
+{
+	struct usb_sevsegdev *mydev;
+
+	mydev = usb_get_intfdata(intf);
+	mydev->shadow_power = 1;
+	update_display_mode(mydev);
+	update_display_visual(mydev, GFP_NOIO);
+
+	return 0;
+}
+
 static struct usb_driver sevseg_driver = {
 	.name =		"usbsevseg",
 	.probe =	sevseg_probe,
 	.disconnect =	sevseg_disconnect,
+	.suspend =	sevseg_suspend,
+	.resume =	sevseg_resume,
+	.reset_resume =	sevseg_reset_resume,
 	.id_table =	id_table,
+	.supports_autosuspend = 1,
 };
 
 static int __init usb_sevseg_init(void)
-- 
cgit v0.10.2


From 403dbd36739e344d2d25f56ebbe342248487bd48 Mon Sep 17 00:00:00 2001
From: Alek Du <alek.du@intel.com>
Date: Mon, 13 Jul 2009 17:30:41 +0800
Subject: USB: EHCI: add need_io_watchdog flag to ehci_hcd

Basically the io watchdog is only useful for those quirk HCDs. For most
good ones, it only brings unnecessary wakeups.  At least, I know the
Intel EHCI HCDs should turn off the flag.

Signed-off-by: Alek Du <alek.du@intel.com>
Cc: David Brownell <dbrownell@users.sourceforge.net>
Cc: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c
index 54715b8..2dc15f3 100644
--- a/drivers/usb/host/ehci-hcd.c
+++ b/drivers/usb/host/ehci-hcd.c
@@ -127,6 +127,8 @@ timer_action(struct ehci_hcd *ehci, enum ehci_timer_action action)
 
 		switch (action) {
 		case TIMER_IO_WATCHDOG:
+			if (!ehci->need_io_watchdog)
+				return;
 			t = EHCI_IO_JIFFIES;
 			break;
 		case TIMER_ASYNC_OFF:
@@ -508,6 +510,10 @@ static int ehci_init(struct usb_hcd *hcd)
 
 	spin_lock_init(&ehci->lock);
 
+	/*
+	 * keep io watchdog by default, those good HCDs could turn off it later
+	 */
+	ehci->need_io_watchdog = 1;
 	init_timer(&ehci->watchdog);
 	ehci->watchdog.function = ehci_watchdog;
 	ehci->watchdog.data = (unsigned long) ehci;
diff --git a/drivers/usb/host/ehci-pci.c b/drivers/usb/host/ehci-pci.c
index b5b83c4..a88ad51 100644
--- a/drivers/usb/host/ehci-pci.c
+++ b/drivers/usb/host/ehci-pci.c
@@ -129,6 +129,9 @@ static int ehci_pci_setup(struct usb_hcd *hcd)
 		return retval;
 
 	switch (pdev->vendor) {
+	case PCI_VENDOR_ID_INTEL:
+		ehci->need_io_watchdog = 0;
+		break;
 	case PCI_VENDOR_ID_TDI:
 		if (pdev->device == PCI_DEVICE_ID_TDI_EHCI) {
 			hcd->has_tt = 1;
diff --git a/drivers/usb/host/ehci.h b/drivers/usb/host/ehci.h
index 48b9e88..acec108 100644
--- a/drivers/usb/host/ehci.h
+++ b/drivers/usb/host/ehci.h
@@ -126,6 +126,7 @@ struct ehci_hcd {			/* one per controller */
 	unsigned		big_endian_mmio:1;
 	unsigned		big_endian_desc:1;
 	unsigned		has_amcc_usb23:1;
+	unsigned		need_io_watchdog:1;
 
 	/* required for usb32 quirk */
 	#define OHCI_CTRL_HCFS          (3 << 6)
-- 
cgit v0.10.2


From 3807e26d69b9ad3864fe03224ebebc9610d5802e Mon Sep 17 00:00:00 2001
From: Alek Du <alek.du@intel.com>
Date: Tue, 14 Jul 2009 07:23:29 +0800
Subject: USB: EHCI: split ehci_qh into hw and sw parts

The ehci_qh structure merged hw and sw together which is not good:
1. More and more items are being added into ehci_qh, the ehci_qh software
   part are unnecessary to be allocated in DMA qh_pool.
2. If HCD has local SRAM, the sw part will consume it too, and it won't
   bring any benefit.
3. For non-cache-coherence system, the entire ehci_qh is uncachable, actually
   we only need the hw part to be uncacheable. Spliting them will let the sw
   part to be cacheable.

Signed-off-by: Alek Du <alek.du@intel.com>
Cc: David Brownell <dbrownell@users.sourceforge.net>
CC: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/host/ehci-dbg.c b/drivers/usb/host/ehci-dbg.c
index 1104caa..874d200 100644
--- a/drivers/usb/host/ehci-dbg.c
+++ b/drivers/usb/host/ehci-dbg.c
@@ -134,10 +134,11 @@ dbg_qtd (const char *label, struct ehci_hcd *ehci, struct ehci_qtd *qtd)
 static void __maybe_unused
 dbg_qh (const char *label, struct ehci_hcd *ehci, struct ehci_qh *qh)
 {
+	struct ehci_qh_hw *hw = qh->hw;
+
 	ehci_dbg (ehci, "%s qh %p n%08x info %x %x qtd %x\n", label,
-		qh, qh->hw_next, qh->hw_info1, qh->hw_info2,
-		qh->hw_current);
-	dbg_qtd ("overlay", ehci, (struct ehci_qtd *) &qh->hw_qtd_next);
+		qh, hw->hw_next, hw->hw_info1, hw->hw_info2, hw->hw_current);
+	dbg_qtd("overlay", ehci, (struct ehci_qtd *) &hw->hw_qtd_next);
 }
 
 static void __maybe_unused
@@ -400,31 +401,32 @@ static void qh_lines (
 	char			*next = *nextp;
 	char			mark;
 	__le32			list_end = EHCI_LIST_END(ehci);
+	struct ehci_qh_hw	*hw = qh->hw;
 
-	if (qh->hw_qtd_next == list_end)	/* NEC does this */
+	if (hw->hw_qtd_next == list_end)	/* NEC does this */
 		mark = '@';
 	else
-		mark = token_mark(ehci, qh->hw_token);
+		mark = token_mark(ehci, hw->hw_token);
 	if (mark == '/') {	/* qh_alt_next controls qh advance? */
-		if ((qh->hw_alt_next & QTD_MASK(ehci))
-				== ehci->async->hw_alt_next)
+		if ((hw->hw_alt_next & QTD_MASK(ehci))
+				== ehci->async->hw->hw_alt_next)
 			mark = '#';	/* blocked */
-		else if (qh->hw_alt_next == list_end)
+		else if (hw->hw_alt_next == list_end)
 			mark = '.';	/* use hw_qtd_next */
 		/* else alt_next points to some other qtd */
 	}
-	scratch = hc32_to_cpup(ehci, &qh->hw_info1);
-	hw_curr = (mark == '*') ? hc32_to_cpup(ehci, &qh->hw_current) : 0;
+	scratch = hc32_to_cpup(ehci, &hw->hw_info1);
+	hw_curr = (mark == '*') ? hc32_to_cpup(ehci, &hw->hw_current) : 0;
 	temp = scnprintf (next, size,
 			"qh/%p dev%d %cs ep%d %08x %08x (%08x%c %s nak%d)",
 			qh, scratch & 0x007f,
 			speed_char (scratch),
 			(scratch >> 8) & 0x000f,
-			scratch, hc32_to_cpup(ehci, &qh->hw_info2),
-			hc32_to_cpup(ehci, &qh->hw_token), mark,
-			(cpu_to_hc32(ehci, QTD_TOGGLE) & qh->hw_token)
+			scratch, hc32_to_cpup(ehci, &hw->hw_info2),
+			hc32_to_cpup(ehci, &hw->hw_token), mark,
+			(cpu_to_hc32(ehci, QTD_TOGGLE) & hw->hw_token)
 				? "data1" : "data0",
-			(hc32_to_cpup(ehci, &qh->hw_alt_next) >> 1) & 0x0f);
+			(hc32_to_cpup(ehci, &hw->hw_alt_next) >> 1) & 0x0f);
 	size -= temp;
 	next += temp;
 
@@ -435,10 +437,10 @@ static void qh_lines (
 		mark = ' ';
 		if (hw_curr == td->qtd_dma)
 			mark = '*';
-		else if (qh->hw_qtd_next == cpu_to_hc32(ehci, td->qtd_dma))
+		else if (hw->hw_qtd_next == cpu_to_hc32(ehci, td->qtd_dma))
 			mark = '+';
 		else if (QTD_LENGTH (scratch)) {
-			if (td->hw_alt_next == ehci->async->hw_alt_next)
+			if (td->hw_alt_next == ehci->async->hw->hw_alt_next)
 				mark = '#';
 			else if (td->hw_alt_next != list_end)
 				mark = '/';
@@ -550,12 +552,15 @@ static ssize_t fill_periodic_buffer(struct debug_buffer *buf)
 		next += temp;
 
 		do {
+			struct ehci_qh_hw *hw;
+
 			switch (hc32_to_cpu(ehci, tag)) {
 			case Q_TYPE_QH:
+				hw = p.qh->hw;
 				temp = scnprintf (next, size, " qh%d-%04x/%p",
 						p.qh->period,
 						hc32_to_cpup(ehci,
-								&p.qh->hw_info2)
+							&hw->hw_info2)
 							/* uframe masks */
 							& (QH_CMASK | QH_SMASK),
 						p.qh);
@@ -576,7 +581,7 @@ static ssize_t fill_periodic_buffer(struct debug_buffer *buf)
 				/* show more info the first time around */
 				if (temp == seen_count) {
 					u32	scratch = hc32_to_cpup(ehci,
-							&p.qh->hw_info1);
+							&hw->hw_info1);
 					struct ehci_qtd	*qtd;
 					char		*type = "";
 
@@ -609,7 +614,7 @@ static ssize_t fill_periodic_buffer(struct debug_buffer *buf)
 				} else
 					temp = 0;
 				if (p.qh) {
-					tag = Q_NEXT_TYPE(ehci, p.qh->hw_next);
+					tag = Q_NEXT_TYPE(ehci, hw->hw_next);
 					p = p.qh->qh_next;
 				}
 				break;
diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c
index 2dc15f3..7bee163 100644
--- a/drivers/usb/host/ehci-hcd.c
+++ b/drivers/usb/host/ehci-hcd.c
@@ -507,6 +507,7 @@ static int ehci_init(struct usb_hcd *hcd)
 	u32			temp;
 	int			retval;
 	u32			hcc_params;
+	struct ehci_qh_hw	*hw;
 
 	spin_lock_init(&ehci->lock);
 
@@ -550,12 +551,13 @@ static int ehci_init(struct usb_hcd *hcd)
 	 * from automatically advancing to the next td after short reads.
 	 */
 	ehci->async->qh_next.qh = NULL;
-	ehci->async->hw_next = QH_NEXT(ehci, ehci->async->qh_dma);
-	ehci->async->hw_info1 = cpu_to_hc32(ehci, QH_HEAD);
-	ehci->async->hw_token = cpu_to_hc32(ehci, QTD_STS_HALT);
-	ehci->async->hw_qtd_next = EHCI_LIST_END(ehci);
+	hw = ehci->async->hw;
+	hw->hw_next = QH_NEXT(ehci, ehci->async->qh_dma);
+	hw->hw_info1 = cpu_to_hc32(ehci, QH_HEAD);
+	hw->hw_token = cpu_to_hc32(ehci, QTD_STS_HALT);
+	hw->hw_qtd_next = EHCI_LIST_END(ehci);
 	ehci->async->qh_state = QH_STATE_LINKED;
-	ehci->async->hw_alt_next = QTD_NEXT(ehci, ehci->async->dummy->qtd_dma);
+	hw->hw_alt_next = QTD_NEXT(ehci, ehci->async->dummy->qtd_dma);
 
 	/* clear interrupt enables, set irq latency */
 	if (log2_irq_thresh < 0 || log2_irq_thresh > 6)
@@ -985,7 +987,7 @@ rescan:
 	/* endpoints can be iso streams.  for now, we don't
 	 * accelerate iso completions ... so spin a while.
 	 */
-	if (qh->hw_info1 == 0) {
+	if (qh->hw->hw_info1 == 0) {
 		ehci_vdbg (ehci, "iso delay\n");
 		goto idle_timeout;
 	}
diff --git a/drivers/usb/host/ehci-mem.c b/drivers/usb/host/ehci-mem.c
index 10d5291..aeda96e 100644
--- a/drivers/usb/host/ehci-mem.c
+++ b/drivers/usb/host/ehci-mem.c
@@ -75,7 +75,8 @@ static void qh_destroy(struct ehci_qh *qh)
 	}
 	if (qh->dummy)
 		ehci_qtd_free (ehci, qh->dummy);
-	dma_pool_free (ehci->qh_pool, qh, qh->qh_dma);
+	dma_pool_free(ehci->qh_pool, qh->hw, qh->qh_dma);
+	kfree(qh);
 }
 
 static struct ehci_qh *ehci_qh_alloc (struct ehci_hcd *ehci, gfp_t flags)
@@ -83,12 +84,14 @@ static struct ehci_qh *ehci_qh_alloc (struct ehci_hcd *ehci, gfp_t flags)
 	struct ehci_qh		*qh;
 	dma_addr_t		dma;
 
-	qh = (struct ehci_qh *)
-		dma_pool_alloc (ehci->qh_pool, flags, &dma);
+	qh = kzalloc(sizeof *qh, GFP_ATOMIC);
 	if (!qh)
-		return qh;
-
-	memset (qh, 0, sizeof *qh);
+		goto done;
+	qh->hw = (struct ehci_qh_hw *)
+		dma_pool_alloc(ehci->qh_pool, flags, &dma);
+	if (!qh->hw)
+		goto fail;
+	memset(qh->hw, 0, sizeof *qh->hw);
 	qh->refcount = 1;
 	qh->ehci = ehci;
 	qh->qh_dma = dma;
@@ -99,10 +102,15 @@ static struct ehci_qh *ehci_qh_alloc (struct ehci_hcd *ehci, gfp_t flags)
 	qh->dummy = ehci_qtd_alloc (ehci, flags);
 	if (qh->dummy == NULL) {
 		ehci_dbg (ehci, "no dummy td\n");
-		dma_pool_free (ehci->qh_pool, qh, qh->qh_dma);
-		qh = NULL;
+		goto fail1;
 	}
+done:
 	return qh;
+fail1:
+	dma_pool_free(ehci->qh_pool, qh->hw, qh->qh_dma);
+fail:
+	kfree(qh);
+	return NULL;
 }
 
 /* to share a qh (cpu threads, or hc) */
@@ -180,7 +188,7 @@ static int ehci_mem_init (struct ehci_hcd *ehci, gfp_t flags)
 	/* QHs for control/bulk/intr transfers */
 	ehci->qh_pool = dma_pool_create ("ehci_qh",
 			ehci_to_hcd(ehci)->self.controller,
-			sizeof (struct ehci_qh),
+			sizeof(struct ehci_qh_hw),
 			32 /* byte alignment (for hw parts) */,
 			4096 /* can't cross 4K */);
 	if (!ehci->qh_pool) {
diff --git a/drivers/usb/host/ehci-q.c b/drivers/usb/host/ehci-q.c
index 7673554..377ed53 100644
--- a/drivers/usb/host/ehci-q.c
+++ b/drivers/usb/host/ehci-q.c
@@ -87,31 +87,33 @@ qtd_fill(struct ehci_hcd *ehci, struct ehci_qtd *qtd, dma_addr_t buf,
 static inline void
 qh_update (struct ehci_hcd *ehci, struct ehci_qh *qh, struct ehci_qtd *qtd)
 {
+	struct ehci_qh_hw *hw = qh->hw;
+
 	/* writes to an active overlay are unsafe */
 	BUG_ON(qh->qh_state != QH_STATE_IDLE);
 
-	qh->hw_qtd_next = QTD_NEXT(ehci, qtd->qtd_dma);
-	qh->hw_alt_next = EHCI_LIST_END(ehci);
+	hw->hw_qtd_next = QTD_NEXT(ehci, qtd->qtd_dma);
+	hw->hw_alt_next = EHCI_LIST_END(ehci);
 
 	/* Except for control endpoints, we make hardware maintain data
 	 * toggle (like OHCI) ... here (re)initialize the toggle in the QH,
 	 * and set the pseudo-toggle in udev. Only usb_clear_halt() will
 	 * ever clear it.
 	 */
-	if (!(qh->hw_info1 & cpu_to_hc32(ehci, 1 << 14))) {
+	if (!(hw->hw_info1 & cpu_to_hc32(ehci, 1 << 14))) {
 		unsigned	is_out, epnum;
 
 		is_out = !(qtd->hw_token & cpu_to_hc32(ehci, 1 << 8));
-		epnum = (hc32_to_cpup(ehci, &qh->hw_info1) >> 8) & 0x0f;
+		epnum = (hc32_to_cpup(ehci, &hw->hw_info1) >> 8) & 0x0f;
 		if (unlikely (!usb_gettoggle (qh->dev, epnum, is_out))) {
-			qh->hw_token &= ~cpu_to_hc32(ehci, QTD_TOGGLE);
+			hw->hw_token &= ~cpu_to_hc32(ehci, QTD_TOGGLE);
 			usb_settoggle (qh->dev, epnum, is_out, 1);
 		}
 	}
 
 	/* HC must see latest qtd and qh data before we clear ACTIVE+HALT */
 	wmb ();
-	qh->hw_token &= cpu_to_hc32(ehci, QTD_TOGGLE | QTD_STS_PING);
+	hw->hw_token &= cpu_to_hc32(ehci, QTD_TOGGLE | QTD_STS_PING);
 }
 
 /* if it weren't for a common silicon quirk (writing the dummy into the qh
@@ -129,7 +131,7 @@ qh_refresh (struct ehci_hcd *ehci, struct ehci_qh *qh)
 		qtd = list_entry (qh->qtd_list.next,
 				struct ehci_qtd, qtd_list);
 		/* first qtd may already be partially processed */
-		if (cpu_to_hc32(ehci, qtd->qtd_dma) == qh->hw_current)
+		if (cpu_to_hc32(ehci, qtd->qtd_dma) == qh->hw->hw_current)
 			qtd = NULL;
 	}
 
@@ -260,7 +262,7 @@ __acquires(ehci->lock)
 		struct ehci_qh	*qh = (struct ehci_qh *) urb->hcpriv;
 
 		/* S-mask in a QH means it's an interrupt urb */
-		if ((qh->hw_info2 & cpu_to_hc32(ehci, QH_SMASK)) != 0) {
+		if ((qh->hw->hw_info2 & cpu_to_hc32(ehci, QH_SMASK)) != 0) {
 
 			/* ... update hc-wide periodic stats (for usbfs) */
 			ehci_to_hcd(ehci)->self.bandwidth_int_reqs--;
@@ -315,6 +317,7 @@ qh_completions (struct ehci_hcd *ehci, struct ehci_qh *qh)
 	unsigned		count = 0;
 	u8			state;
 	__le32			halt = HALT_BIT(ehci);
+	struct ehci_qh_hw	*hw = qh->hw;
 
 	if (unlikely (list_empty (&qh->qtd_list)))
 		return count;
@@ -392,7 +395,8 @@ qh_completions (struct ehci_hcd *ehci, struct ehci_qh *qh)
 					qtd->hw_token = cpu_to_hc32(ehci,
 							token);
 					wmb();
-					qh->hw_token = cpu_to_hc32(ehci, token);
+					hw->hw_token = cpu_to_hc32(ehci,
+							token);
 					goto retry_xacterr;
 				}
 				stopped = 1;
@@ -435,8 +439,8 @@ qh_completions (struct ehci_hcd *ehci, struct ehci_qh *qh)
 			/* qh unlinked; token in overlay may be most current */
 			if (state == QH_STATE_IDLE
 					&& cpu_to_hc32(ehci, qtd->qtd_dma)
-						== qh->hw_current) {
-				token = hc32_to_cpu(ehci, qh->hw_token);
+						== hw->hw_current) {
+				token = hc32_to_cpu(ehci, hw->hw_token);
 
 				/* An unlink may leave an incomplete
 				 * async transaction in the TT buffer.
@@ -449,9 +453,9 @@ qh_completions (struct ehci_hcd *ehci, struct ehci_qh *qh)
 			 * patch the qh later and so that completions can't
 			 * activate it while we "know" it's stopped.
 			 */
-			if ((halt & qh->hw_token) == 0) {
+			if ((halt & hw->hw_token) == 0) {
 halt:
-				qh->hw_token |= halt;
+				hw->hw_token |= halt;
 				wmb ();
 			}
 		}
@@ -510,7 +514,7 @@ halt:
 	 * it after fault cleanup, or recovering from silicon wrongly
 	 * overlaying the dummy qtd (which reduces DMA chatter).
 	 */
-	if (stopped != 0 || qh->hw_qtd_next == EHCI_LIST_END(ehci)) {
+	if (stopped != 0 || hw->hw_qtd_next == EHCI_LIST_END(ehci)) {
 		switch (state) {
 		case QH_STATE_IDLE:
 			qh_refresh(ehci, qh);
@@ -528,7 +532,7 @@ halt:
 			 * except maybe high bandwidth ...
 			 */
 			if ((cpu_to_hc32(ehci, QH_SMASK)
-					& qh->hw_info2) != 0) {
+					& hw->hw_info2) != 0) {
 				intr_deschedule (ehci, qh);
 				(void) qh_schedule (ehci, qh);
 			} else
@@ -649,7 +653,7 @@ qh_urb_transaction (
 		 * (this will usually be overridden later.)
 		 */
 		if (is_input)
-			qtd->hw_alt_next = ehci->async->hw_alt_next;
+			qtd->hw_alt_next = ehci->async->hw->hw_alt_next;
 
 		/* qh makes control packets use qtd toggle; maybe switch it */
 		if ((maxpacket & (this_qtd_len + (maxpacket - 1))) == 0)
@@ -744,6 +748,7 @@ qh_make (
 	int			is_input, type;
 	int			maxp = 0;
 	struct usb_tt		*tt = urb->dev->tt;
+	struct ehci_qh_hw	*hw;
 
 	if (!qh)
 		return qh;
@@ -890,8 +895,9 @@ done:
 
 	/* init as live, toggle clear, advance to dummy */
 	qh->qh_state = QH_STATE_IDLE;
-	qh->hw_info1 = cpu_to_hc32(ehci, info1);
-	qh->hw_info2 = cpu_to_hc32(ehci, info2);
+	hw = qh->hw;
+	hw->hw_info1 = cpu_to_hc32(ehci, info1);
+	hw->hw_info2 = cpu_to_hc32(ehci, info2);
 	usb_settoggle (urb->dev, usb_pipeendpoint (urb->pipe), !is_input, 1);
 	qh_refresh (ehci, qh);
 	return qh;
@@ -933,11 +939,11 @@ static void qh_link_async (struct ehci_hcd *ehci, struct ehci_qh *qh)
 
 	/* splice right after start */
 	qh->qh_next = head->qh_next;
-	qh->hw_next = head->hw_next;
+	qh->hw->hw_next = head->hw->hw_next;
 	wmb ();
 
 	head->qh_next.qh = qh;
-	head->hw_next = dma;
+	head->hw->hw_next = dma;
 
 	qh_get(qh);
 	qh->xacterrs = 0;
@@ -984,7 +990,7 @@ static struct ehci_qh *qh_append_tds (
 
                         /* usb_reset_device() briefly reverts to address 0 */
                         if (usb_pipedevice (urb->pipe) == 0)
-                                qh->hw_info1 &= ~qh_addr_mask;
+				qh->hw->hw_info1 &= ~qh_addr_mask;
 		}
 
 		/* just one way to queue requests: swap with the dummy qtd.
@@ -1169,7 +1175,7 @@ static void start_unlink_async (struct ehci_hcd *ehci, struct ehci_qh *qh)
 	while (prev->qh_next.qh != qh)
 		prev = prev->qh_next.qh;
 
-	prev->hw_next = qh->hw_next;
+	prev->hw->hw_next = qh->hw->hw_next;
 	prev->qh_next = qh->qh_next;
 	wmb ();
 
diff --git a/drivers/usb/host/ehci-sched.c b/drivers/usb/host/ehci-sched.c
index edd61ee..327437a 100644
--- a/drivers/usb/host/ehci-sched.c
+++ b/drivers/usb/host/ehci-sched.c
@@ -60,6 +60,20 @@ periodic_next_shadow(struct ehci_hcd *ehci, union ehci_shadow *periodic,
 	}
 }
 
+static __hc32 *
+shadow_next_periodic(struct ehci_hcd *ehci, union ehci_shadow *periodic,
+		__hc32 tag)
+{
+	switch (hc32_to_cpu(ehci, tag)) {
+	/* our ehci_shadow.qh is actually software part */
+	case Q_TYPE_QH:
+		return &periodic->qh->hw->hw_next;
+	/* others are hw parts */
+	default:
+		return periodic->hw_next;
+	}
+}
+
 /* caller must hold ehci->lock */
 static void periodic_unlink (struct ehci_hcd *ehci, unsigned frame, void *ptr)
 {
@@ -71,7 +85,8 @@ static void periodic_unlink (struct ehci_hcd *ehci, unsigned frame, void *ptr)
 	while (here.ptr && here.ptr != ptr) {
 		prev_p = periodic_next_shadow(ehci, prev_p,
 				Q_NEXT_TYPE(ehci, *hw_p));
-		hw_p = here.hw_next;
+		hw_p = shadow_next_periodic(ehci, &here,
+				Q_NEXT_TYPE(ehci, *hw_p));
 		here = *prev_p;
 	}
 	/* an interrupt entry (at list end) could have been shared */
@@ -83,7 +98,7 @@ static void periodic_unlink (struct ehci_hcd *ehci, unsigned frame, void *ptr)
 	 */
 	*prev_p = *periodic_next_shadow(ehci, &here,
 			Q_NEXT_TYPE(ehci, *hw_p));
-	*hw_p = *here.hw_next;
+	*hw_p = *shadow_next_periodic(ehci, &here, Q_NEXT_TYPE(ehci, *hw_p));
 }
 
 /* how many of the uframe's 125 usecs are allocated? */
@@ -93,18 +108,20 @@ periodic_usecs (struct ehci_hcd *ehci, unsigned frame, unsigned uframe)
 	__hc32			*hw_p = &ehci->periodic [frame];
 	union ehci_shadow	*q = &ehci->pshadow [frame];
 	unsigned		usecs = 0;
+	struct ehci_qh_hw	*hw;
 
 	while (q->ptr) {
 		switch (hc32_to_cpu(ehci, Q_NEXT_TYPE(ehci, *hw_p))) {
 		case Q_TYPE_QH:
+			hw = q->qh->hw;
 			/* is it in the S-mask? */
-			if (q->qh->hw_info2 & cpu_to_hc32(ehci, 1 << uframe))
+			if (hw->hw_info2 & cpu_to_hc32(ehci, 1 << uframe))
 				usecs += q->qh->usecs;
 			/* ... or C-mask? */
-			if (q->qh->hw_info2 & cpu_to_hc32(ehci,
+			if (hw->hw_info2 & cpu_to_hc32(ehci,
 					1 << (8 + uframe)))
 				usecs += q->qh->c_usecs;
-			hw_p = &q->qh->hw_next;
+			hw_p = &hw->hw_next;
 			q = &q->qh->qh_next;
 			break;
 		// case Q_TYPE_FSTN:
@@ -237,10 +254,10 @@ periodic_tt_usecs (
 			continue;
 		case Q_TYPE_QH:
 			if (same_tt(dev, q->qh->dev)) {
-				uf = tt_start_uframe(ehci, q->qh->hw_info2);
+				uf = tt_start_uframe(ehci, q->qh->hw->hw_info2);
 				tt_usecs[uf] += q->qh->tt_usecs;
 			}
-			hw_p = &q->qh->hw_next;
+			hw_p = &q->qh->hw->hw_next;
 			q = &q->qh->qh_next;
 			continue;
 		case Q_TYPE_SITD:
@@ -375,6 +392,7 @@ static int tt_no_collision (
 	for (; frame < ehci->periodic_size; frame += period) {
 		union ehci_shadow	here;
 		__hc32			type;
+		struct ehci_qh_hw	*hw;
 
 		here = ehci->pshadow [frame];
 		type = Q_NEXT_TYPE(ehci, ehci->periodic [frame]);
@@ -385,17 +403,18 @@ static int tt_no_collision (
 				here = here.itd->itd_next;
 				continue;
 			case Q_TYPE_QH:
+				hw = here.qh->hw;
 				if (same_tt (dev, here.qh->dev)) {
 					u32		mask;
 
 					mask = hc32_to_cpu(ehci,
-							here.qh->hw_info2);
+							hw->hw_info2);
 					/* "knows" no gap is needed */
 					mask |= mask >> 8;
 					if (mask & uf_mask)
 						break;
 				}
-				type = Q_NEXT_TYPE(ehci, here.qh->hw_next);
+				type = Q_NEXT_TYPE(ehci, hw->hw_next);
 				here = here.qh->qh_next;
 				continue;
 			case Q_TYPE_SITD:
@@ -498,7 +517,8 @@ static int qh_link_periodic (struct ehci_hcd *ehci, struct ehci_qh *qh)
 
 	dev_dbg (&qh->dev->dev,
 		"link qh%d-%04x/%p start %d [%d/%d us]\n",
-		period, hc32_to_cpup(ehci, &qh->hw_info2) & (QH_CMASK | QH_SMASK),
+		period, hc32_to_cpup(ehci, &qh->hw->hw_info2)
+			& (QH_CMASK | QH_SMASK),
 		qh, qh->start, qh->usecs, qh->c_usecs);
 
 	/* high bandwidth, or otherwise every microframe */
@@ -517,7 +537,7 @@ static int qh_link_periodic (struct ehci_hcd *ehci, struct ehci_qh *qh)
 			if (type == cpu_to_hc32(ehci, Q_TYPE_QH))
 				break;
 			prev = periodic_next_shadow(ehci, prev, type);
-			hw_p = &here.qh->hw_next;
+			hw_p = shadow_next_periodic(ehci, &here, type);
 			here = *prev;
 		}
 
@@ -528,14 +548,14 @@ static int qh_link_periodic (struct ehci_hcd *ehci, struct ehci_qh *qh)
 			if (qh->period > here.qh->period)
 				break;
 			prev = &here.qh->qh_next;
-			hw_p = &here.qh->hw_next;
+			hw_p = &here.qh->hw->hw_next;
 			here = *prev;
 		}
 		/* link in this qh, unless some earlier pass did that */
 		if (qh != here.qh) {
 			qh->qh_next = here;
 			if (here.qh)
-				qh->hw_next = *hw_p;
+				qh->hw->hw_next = *hw_p;
 			wmb ();
 			prev->qh = qh;
 			*hw_p = QH_NEXT (ehci, qh->qh_dma);
@@ -581,7 +601,7 @@ static int qh_unlink_periodic(struct ehci_hcd *ehci, struct ehci_qh *qh)
 	dev_dbg (&qh->dev->dev,
 		"unlink qh%d-%04x/%p start %d [%d/%d us]\n",
 		qh->period,
-		hc32_to_cpup(ehci, &qh->hw_info2) & (QH_CMASK | QH_SMASK),
+		hc32_to_cpup(ehci, &qh->hw->hw_info2) & (QH_CMASK | QH_SMASK),
 		qh, qh->start, qh->usecs, qh->c_usecs);
 
 	/* qh->qh_next still "live" to HC */
@@ -596,6 +616,7 @@ static int qh_unlink_periodic(struct ehci_hcd *ehci, struct ehci_qh *qh)
 static void intr_deschedule (struct ehci_hcd *ehci, struct ehci_qh *qh)
 {
 	unsigned	wait;
+	struct ehci_qh_hw *hw = qh->hw;
 
 	qh_unlink_periodic (ehci, qh);
 
@@ -606,14 +627,14 @@ static void intr_deschedule (struct ehci_hcd *ehci, struct ehci_qh *qh)
 	 */
 	if (list_empty (&qh->qtd_list)
 			|| (cpu_to_hc32(ehci, QH_CMASK)
-					& qh->hw_info2) != 0)
+					& hw->hw_info2) != 0)
 		wait = 2;
 	else
 		wait = 55;	/* worst case: 3 * 1024 */
 
 	udelay (wait);
 	qh->qh_state = QH_STATE_IDLE;
-	qh->hw_next = EHCI_LIST_END(ehci);
+	hw->hw_next = EHCI_LIST_END(ehci);
 	wmb ();
 }
 
@@ -739,14 +760,15 @@ static int qh_schedule(struct ehci_hcd *ehci, struct ehci_qh *qh)
 	unsigned	uframe;
 	__hc32		c_mask;
 	unsigned	frame;		/* 0..(qh->period - 1), or NO_FRAME */
+	struct ehci_qh_hw	*hw = qh->hw;
 
 	qh_refresh(ehci, qh);
-	qh->hw_next = EHCI_LIST_END(ehci);
+	hw->hw_next = EHCI_LIST_END(ehci);
 	frame = qh->start;
 
 	/* reuse the previous schedule slots, if we can */
 	if (frame < qh->period) {
-		uframe = ffs(hc32_to_cpup(ehci, &qh->hw_info2) & QH_SMASK);
+		uframe = ffs(hc32_to_cpup(ehci, &hw->hw_info2) & QH_SMASK);
 		status = check_intr_schedule (ehci, frame, --uframe,
 				qh, &c_mask);
 	} else {
@@ -784,11 +806,11 @@ static int qh_schedule(struct ehci_hcd *ehci, struct ehci_qh *qh)
 		qh->start = frame;
 
 		/* reset S-frame and (maybe) C-frame masks */
-		qh->hw_info2 &= cpu_to_hc32(ehci, ~(QH_CMASK | QH_SMASK));
-		qh->hw_info2 |= qh->period
+		hw->hw_info2 &= cpu_to_hc32(ehci, ~(QH_CMASK | QH_SMASK));
+		hw->hw_info2 |= qh->period
 			? cpu_to_hc32(ehci, 1 << uframe)
 			: cpu_to_hc32(ehci, QH_SMASK);
-		qh->hw_info2 |= c_mask;
+		hw->hw_info2 |= c_mask;
 	} else
 		ehci_dbg (ehci, "reused qh %p schedule\n", qh);
 
@@ -2188,7 +2210,7 @@ restart:
 			case Q_TYPE_QH:
 				/* handle any completions */
 				temp.qh = qh_get (q.qh);
-				type = Q_NEXT_TYPE(ehci, q.qh->hw_next);
+				type = Q_NEXT_TYPE(ehci, q.qh->hw->hw_next);
 				q = q.qh->qh_next;
 				modified = qh_completions (ehci, temp.qh);
 				if (unlikely (list_empty (&temp.qh->qtd_list)))
diff --git a/drivers/usb/host/ehci.h b/drivers/usb/host/ehci.h
index acec108..fa12f20 100644
--- a/drivers/usb/host/ehci.h
+++ b/drivers/usb/host/ehci.h
@@ -299,8 +299,8 @@ union ehci_shadow {
  * These appear in both the async and (for interrupt) periodic schedules.
  */
 
-struct ehci_qh {
-	/* first part defined by EHCI spec */
+/* first part defined by EHCI spec */
+struct ehci_qh_hw {
 	__hc32			hw_next;	/* see EHCI 3.6.1 */
 	__hc32			hw_info1;       /* see EHCI 3.6.2 */
 #define	QH_HEAD		0x00008000
@@ -318,7 +318,10 @@ struct ehci_qh {
 	__hc32			hw_token;
 	__hc32			hw_buf [5];
 	__hc32			hw_buf_hi [5];
+} __attribute__ ((aligned(32)));
 
+struct ehci_qh {
+	struct ehci_qh_hw	*hw;
 	/* the rest is HCD-private */
 	dma_addr_t		qh_dma;		/* address of qh */
 	union ehci_shadow	qh_next;	/* ptr to qh; or periodic */
@@ -358,7 +361,7 @@ struct ehci_qh {
 
 	struct usb_device	*dev;		/* access to TT */
 	unsigned		clearing_tt:1;	/* Clear-TT-Buf in progress */
-} __attribute__ ((aligned (32)));
+};
 
 /*-------------------------------------------------------------------------*/
 
-- 
cgit v0.10.2


From 331ac6b288d9f3689514ced1878041fb0df7e13c Mon Sep 17 00:00:00 2001
From: Alek Du <alek.du@intel.com>
Date: Mon, 13 Jul 2009 12:41:20 +0800
Subject: USB: EHCI: Add Intel Moorestown EHCI controller HOSTPCx extensions
 and support phy low power mode

The Intel Moorestown EHCI controller supports non-standard HOSTPCx register
extension. This register controls the LPM behaviour and controls the behaviour
of each USB port.

Signed-off-by: Jacob Pan <jacob.jun.pan@intel.com>
Signed-off-by: Alek Du <alek.du@intel.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Cc: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c
index 7bee163..5fbc67c 100644
--- a/drivers/usb/host/ehci-hcd.c
+++ b/drivers/usb/host/ehci-hcd.c
@@ -249,6 +249,12 @@ static int ehci_reset (struct ehci_hcd *ehci)
 	retval = handshake (ehci, &ehci->regs->command,
 			    CMD_RESET, 0, 250 * 1000);
 
+	if (ehci->has_hostpc) {
+		ehci_writel(ehci, USBMODE_EX_HC | USBMODE_EX_VBPS,
+			(u32 __iomem *)(((u8 *)ehci->regs) + USBMODE_EX));
+		ehci_writel(ehci, TXFIFO_DEFAULT,
+			(u32 __iomem *)(((u8 *)ehci->regs) + TXFILLTUNING));
+	}
 	if (retval)
 		return retval;
 
diff --git a/drivers/usb/host/ehci-hub.c b/drivers/usb/host/ehci-hub.c
index f46ad27..6fef1ee 100644
--- a/drivers/usb/host/ehci-hub.c
+++ b/drivers/usb/host/ehci-hub.c
@@ -111,6 +111,7 @@ static int ehci_bus_suspend (struct usb_hcd *hcd)
 	struct ehci_hcd		*ehci = hcd_to_ehci (hcd);
 	int			port;
 	int			mask;
+	u32 __iomem		*hostpc_reg = NULL;
 
 	ehci_dbg(ehci, "suspend root hub\n");
 
@@ -142,6 +143,9 @@ static int ehci_bus_suspend (struct usb_hcd *hcd)
 		u32		t1 = ehci_readl(ehci, reg) & ~PORT_RWC_BITS;
 		u32		t2 = t1;
 
+		if (ehci->has_hostpc)
+			hostpc_reg = (u32 __iomem *)((u8 *)ehci->regs
+				+ HOSTPC0 + 4 * (port & 0xff));
 		/* keep track of which ports we suspend */
 		if (t1 & PORT_OWNER)
 			set_bit(port, &ehci->owned_ports);
@@ -151,15 +155,37 @@ static int ehci_bus_suspend (struct usb_hcd *hcd)
 		}
 
 		/* enable remote wakeup on all ports */
-		if (hcd->self.root_hub->do_remote_wakeup)
-			t2 |= PORT_WAKE_BITS;
-		else
+		if (hcd->self.root_hub->do_remote_wakeup) {
+			/* only enable appropriate wake bits, otherwise the
+			 * hardware can not go phy low power mode. If a race
+			 * condition happens here(connection change during bits
+			 * set), the port change detection will finally fix it.
+			 */
+			if (t1 & PORT_CONNECT) {
+				t2 |= PORT_WKOC_E | PORT_WKDISC_E;
+				t2 &= ~PORT_WKCONN_E;
+			} else {
+				t2 |= PORT_WKOC_E | PORT_WKCONN_E;
+				t2 &= ~PORT_WKDISC_E;
+			}
+		} else
 			t2 &= ~PORT_WAKE_BITS;
 
 		if (t1 != t2) {
 			ehci_vdbg (ehci, "port %d, %08x -> %08x\n",
 				port + 1, t1, t2);
 			ehci_writel(ehci, t2, reg);
+			if (hostpc_reg) {
+				u32	t3;
+
+				msleep(5);/* 5ms for HCD enter low pwr mode */
+				t3 = ehci_readl(ehci, hostpc_reg);
+				ehci_writel(ehci, t3 | HOSTPC_PHCD, hostpc_reg);
+				t3 = ehci_readl(ehci, hostpc_reg);
+				ehci_dbg(ehci, "Port%d phy low pwr mode %s\n",
+					port, (t3 & HOSTPC_PHCD) ?
+					"succeeded" : "failed");
+			}
 		}
 	}
 
@@ -563,7 +589,8 @@ static int ehci_hub_control (
 	int		ports = HCS_N_PORTS (ehci->hcs_params);
 	u32 __iomem	*status_reg = &ehci->regs->port_status[
 				(wIndex & 0xff) - 1];
-	u32		temp, status;
+	u32 __iomem	*hostpc_reg = NULL;
+	u32		temp, temp1, status;
 	unsigned long	flags;
 	int		retval = 0;
 	unsigned	selector;
@@ -575,6 +602,9 @@ static int ehci_hub_control (
 	 * power, "this is the one", etc.  EHCI spec supports this.
 	 */
 
+	if (ehci->has_hostpc)
+		hostpc_reg = (u32 __iomem *)((u8 *)ehci->regs
+				+ HOSTPC0 + 4 * ((wIndex & 0xff) - 1));
 	spin_lock_irqsave (&ehci->lock, flags);
 	switch (typeReq) {
 	case ClearHubFeature:
@@ -773,7 +803,11 @@ static int ehci_hub_control (
 		if (temp & PORT_CONNECT) {
 			status |= 1 << USB_PORT_FEAT_CONNECTION;
 			// status may be from integrated TT
-			status |= ehci_port_speed(ehci, temp);
+			if (ehci->has_hostpc) {
+				temp1 = ehci_readl(ehci, hostpc_reg);
+				status |= ehci_port_speed(ehci, temp1);
+			} else
+				status |= ehci_port_speed(ehci, temp);
 		}
 		if (temp & PORT_PE)
 			status |= 1 << USB_PORT_FEAT_ENABLE;
@@ -832,6 +866,24 @@ static int ehci_hub_control (
 					|| (temp & PORT_RESET) != 0)
 				goto error;
 			ehci_writel(ehci, temp | PORT_SUSPEND, status_reg);
+			/* After above check the port must be connected.
+			 * Set appropriate bit thus could put phy into low power
+			 * mode if we have hostpc feature
+			 */
+			if (hostpc_reg) {
+				temp &= ~PORT_WKCONN_E;
+				temp |= (PORT_WKDISC_E | PORT_WKOC_E);
+				ehci_writel(ehci, temp | PORT_SUSPEND,
+							status_reg);
+				msleep(5);/* 5ms for HCD enter low pwr mode */
+				temp1 = ehci_readl(ehci, hostpc_reg);
+				ehci_writel(ehci, temp1 | HOSTPC_PHCD,
+					hostpc_reg);
+				temp1 = ehci_readl(ehci, hostpc_reg);
+				ehci_dbg(ehci, "Port%d phy low pwr mode %s\n",
+					wIndex, (temp1 & HOSTPC_PHCD) ?
+					"succeeded" : "failed");
+			}
 			set_bit(wIndex, &ehci->suspended_ports);
 			break;
 		case USB_PORT_FEAT_POWER:
diff --git a/drivers/usb/host/ehci.h b/drivers/usb/host/ehci.h
index fa12f20..ec3dba6 100644
--- a/drivers/usb/host/ehci.h
+++ b/drivers/usb/host/ehci.h
@@ -136,6 +136,7 @@ struct ehci_hcd {			/* one per controller */
 	#define OHCI_HCCTRL_OFFSET      0x4
 	#define OHCI_HCCTRL_LEN         0x4
 	__hc32			*ohci_hcctrl_reg;
+	unsigned		has_hostpc:1;
 
 	u8			sbrn;		/* packed release number */
 
@@ -548,7 +549,7 @@ static inline unsigned int
 ehci_port_speed(struct ehci_hcd *ehci, unsigned int portsc)
 {
 	if (ehci_is_TDI(ehci)) {
-		switch ((portsc>>26)&3) {
+		switch ((portsc >> (ehci->has_hostpc ? 25 : 26)) & 3) {
 		case 0:
 			return 0;
 		case 1:
diff --git a/include/linux/usb/ehci_def.h b/include/linux/usb/ehci_def.h
index 5b88e36..4c4b701 100644
--- a/include/linux/usb/ehci_def.h
+++ b/include/linux/usb/ehci_def.h
@@ -132,6 +132,19 @@ struct ehci_regs {
 #define USBMODE_CM_HC	(3<<0)		/* host controller mode */
 #define USBMODE_CM_IDLE	(0<<0)		/* idle state */
 
+/* Moorestown has some non-standard registers, partially due to the fact that
+ * its EHCI controller has both TT and LPM support. HOSTPCx are extentions to
+ * PORTSCx
+ */
+#define HOSTPC0		0x84		/* HOSTPC extension */
+#define HOSTPC_PHCD	(1<<22)		/* Phy clock disable */
+#define HOSTPC_PSPD	(3<<25)		/* Port speed detection */
+#define USBMODE_EX	0xc8		/* USB Device mode extension */
+#define USBMODE_EX_VBPS	(1<<5)		/* VBus Power Select On */
+#define USBMODE_EX_HC	(3<<0)		/* host controller mode */
+#define TXFILLTUNING	0x24		/* TX FIFO Tuning register */
+#define TXFIFO_DEFAULT	(8<<16)		/* FIFO burst threshold 8 */
+
 /* Appendix C, Debug port ... intended for use with special "debug devices"
  * that can help if there's no serial console.  (nonstandard enumeration.)
  */
-- 
cgit v0.10.2


From 9da69c604d87afea37b5411867bb76e3c624cc92 Mon Sep 17 00:00:00 2001
From: Michael Hennerich <michael.hennerich@analog.com>
Date: Wed, 15 Jul 2009 23:22:54 -0400
Subject: USB: isp1760: allow platform devices to customize devflags

Platform device support was merged earlier, but support for boards to
customize the devflags aspect of the controller was not.  We want this on
Blackfin systems to control the bus width, but might as well expose all of
the fields while we're at it.

Signed-off-by: Michael Hennerich <michael.hennerich@analog.com>
Signed-off-by: Bryan Wu <cooloney@kernel.org>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/host/isp1760-hcd.c b/drivers/usb/host/isp1760-hcd.c
index 1543846..9600a58 100644
--- a/drivers/usb/host/isp1760-hcd.c
+++ b/drivers/usb/host/isp1760-hcd.c
@@ -386,6 +386,10 @@ static int isp1760_hc_setup(struct usb_hcd *hcd)
 		hwmode |= HW_DACK_POL_HIGH;
 	if (priv->devflags & ISP1760_FLAG_DREQ_POL_HIGH)
 		hwmode |= HW_DREQ_POL_HIGH;
+	if (priv->devflags & ISP1760_FLAG_INTR_POL_HIGH)
+		hwmode |= HW_INTR_HIGH_ACT;
+	if (priv->devflags & ISP1760_FLAG_INTR_EDGE_TRIG)
+		hwmode |= HW_INTR_EDGE_TRIG;
 
 	/*
 	 * We have to set this first in case we're in 16-bit mode.
diff --git a/drivers/usb/host/isp1760-hcd.h b/drivers/usb/host/isp1760-hcd.h
index 462f494..6931ef5 100644
--- a/drivers/usb/host/isp1760-hcd.h
+++ b/drivers/usb/host/isp1760-hcd.h
@@ -142,6 +142,8 @@ typedef void (packet_enqueue)(struct usb_hcd *hcd, struct isp1760_qh *qh,
 #define ISP1760_FLAG_DACK_POL_HIGH	0x00000010 /* DACK active high */
 #define ISP1760_FLAG_DREQ_POL_HIGH	0x00000020 /* DREQ active high */
 #define ISP1760_FLAG_ISP1761		0x00000040 /* Chip is ISP1761 */
+#define ISP1760_FLAG_INTR_POL_HIGH	0x00000080 /* Interrupt polarity active high */
+#define ISP1760_FLAG_INTR_EDGE_TRIG	0x00000100 /* Interrupt edge triggered */
 
 /* chip memory management */
 struct memory_chunk {
diff --git a/drivers/usb/host/isp1760-if.c b/drivers/usb/host/isp1760-if.c
index d4feebf..1c9f977 100644
--- a/drivers/usb/host/isp1760-if.c
+++ b/drivers/usb/host/isp1760-if.c
@@ -3,6 +3,7 @@
  * Currently there is support for
  * - OpenFirmware
  * - PCI
+ * - PDEV (generic platform device centralized driver model)
  *
  * (c) 2007 Sebastian Siewior <bigeasy@linutronix.de>
  *
@@ -11,6 +12,7 @@
 #include <linux/usb.h>
 #include <linux/io.h>
 #include <linux/platform_device.h>
+#include <linux/usb/isp1760.h>
 
 #include "../core/hcd.h"
 #include "isp1760-hcd.h"
@@ -308,6 +310,8 @@ static int __devinit isp1760_plat_probe(struct platform_device *pdev)
 	struct resource *mem_res;
 	struct resource *irq_res;
 	resource_size_t mem_size;
+	struct isp1760_platform_data *priv = pdev->dev.platform_data;
+	unsigned int devflags = 0;
 	unsigned long irqflags = IRQF_SHARED | IRQF_DISABLED;
 
 	mem_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -330,8 +334,23 @@ static int __devinit isp1760_plat_probe(struct platform_device *pdev)
 	}
 	irqflags |= irq_res->flags & IRQF_TRIGGER_MASK;
 
+	if (priv) {
+		if (priv->is_isp1761)
+			devflags |= ISP1760_FLAG_ISP1761;
+		if (priv->bus_width_16)
+			devflags |= ISP1760_FLAG_BUS_WIDTH_16;
+		if (priv->port1_otg)
+			devflags |= ISP1760_FLAG_OTG_EN;
+		if (priv->analog_oc)
+			devflags |= ISP1760_FLAG_ANALOG_OC;
+		if (priv->dack_polarity_high)
+			devflags |= ISP1760_FLAG_DACK_POL_HIGH;
+		if (priv->dreq_polarity_high)
+			devflags |= ISP1760_FLAG_DREQ_POL_HIGH;
+	}
+
 	hcd = isp1760_register(mem_res->start, mem_size, irq_res->start,
-			       irqflags, &pdev->dev, dev_name(&pdev->dev), 0);
+			       irqflags, &pdev->dev, dev_name(&pdev->dev), devflags);
 	if (IS_ERR(hcd)) {
 		pr_warning("isp1760: Failed to register the HCD device\n");
 		ret = -ENODEV;
diff --git a/include/linux/usb/isp1760.h b/include/linux/usb/isp1760.h
new file mode 100644
index 0000000..de7de53
--- /dev/null
+++ b/include/linux/usb/isp1760.h
@@ -0,0 +1,18 @@
+/*
+ * board initialization should put one of these into dev->platform_data
+ * and place the isp1760 onto platform_bus named "isp1760-hcd".
+ */
+
+#ifndef __LINUX_USB_ISP1760_H
+#define __LINUX_USB_ISP1760_H
+
+struct isp1760_platform_data {
+	unsigned is_isp1761:1;			/* Chip is ISP1761 */
+	unsigned bus_width_16:1;		/* 16/32-bit data bus width */
+	unsigned port1_otg:1;			/* Port 1 supports OTG */
+	unsigned analog_oc:1;			/* Analog overcurrent */
+	unsigned dack_polarity_high:1;		/* DACK active high */
+	unsigned dreq_polarity_high:1;		/* DREQ active high */
+};
+
+#endif /* __LINUX_USB_ISP1760_H */
-- 
cgit v0.10.2


From 981e60f037631ca725a9790e7b3512de4a60bba8 Mon Sep 17 00:00:00 2001
From: Maxin John <maxin.john@gmail.com>
Date: Mon, 20 Jul 2009 15:16:42 +0530
Subject: USB: serial: Spelling correction in Motorola USB Phone driver

Spelling correction in Motorola USB Phone driver

Changed: * Mororola should be using the CDC ACM USB spec, but instead
To: * Motorola should be using the CDC ACM USB spec, but instead

Signed-off-by: Maxin B. John <maxinbjohn@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/serial/moto_modem.c b/drivers/usb/serial/moto_modem.c
index b66b71c..99bd00f5 100644
--- a/drivers/usb/serial/moto_modem.c
+++ b/drivers/usb/serial/moto_modem.c
@@ -8,7 +8,7 @@
  *  published by the Free Software Foundation.
  *
  * {sigh}
- * Mororola should be using the CDC ACM USB spec, but instead
+ * Motorola should be using the CDC ACM USB spec, but instead
  * they try to just "do their own thing"...  This driver should handle a
  * few phones in which a basic "dumb serial connection" is needed to be
  * able to get a connection through to them.
-- 
cgit v0.10.2


From 64aebe73152ab3a9f5f426baaf65db632bd72c13 Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Sun, 19 Jul 2009 17:29:57 +0200
Subject: USB: storage: Drop an unneeded a NULL test

In each case, the NULL test is not necessary because the function is static
and at the only places where it is called, the us argument has already been
dereferenced.

The semantic patch that finds the problem is as follows:
(http://www.emn.fr/x-info/coccinelle/)

// <smpl>
@@
type T;
expression E,E1;
identifier i,fld;
statement S;
@@

- T i = E->fld;
+ T i;
  ... when != E=E1
      when != i
  if (E == NULL||...) S
+ i = E->fld;
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Cc: Matthew Dharm <mdharm-usb@one-eyed-alien.net>
Cc: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/storage/datafab.c b/drivers/usb/storage/datafab.c
index 2b6e565..ded836b 100644
--- a/drivers/usb/storage/datafab.c
+++ b/drivers/usb/storage/datafab.c
@@ -334,7 +334,7 @@ static int datafab_determine_lun(struct us_data *us,
 	unsigned char *buf;
 	int count = 0, rc;
 
-	if (!us || !info)
+	if (!info)
 		return USB_STOR_TRANSPORT_ERROR;
 
 	memcpy(command, scommand, 8);
@@ -399,7 +399,7 @@ static int datafab_id_device(struct us_data *us,
 	unsigned char *reply;
 	int rc;
 
-	if (!us || !info)
+	if (!info)
 		return USB_STOR_TRANSPORT_ERROR;
 
 	if (info->lun == -1) {
diff --git a/drivers/usb/storage/jumpshot.c b/drivers/usb/storage/jumpshot.c
index 1c69420..6168596 100644
--- a/drivers/usb/storage/jumpshot.c
+++ b/drivers/usb/storage/jumpshot.c
@@ -335,7 +335,7 @@ static int jumpshot_id_device(struct us_data *us,
 	unsigned char *reply;
 	int 	 rc;
 
-	if (!us || !info)
+	if (!info)
 		return USB_STOR_TRANSPORT_ERROR;
 
 	command[0] = 0xE0;
-- 
cgit v0.10.2


From 807fcb5e19877d339a4cc56f2c6ddaf3a147457a Mon Sep 17 00:00:00 2001
From: Manuel Lauss <manuel.lauss@googlemail.com>
Date: Wed, 29 Jul 2009 19:13:13 +0200
Subject: USB: au1xxx: add dev_pm_ops

move both ohci-au1xxx and ehci-au1xxx over to dev_pm_ops.

Tested on Au1200.

Signed-off-by: Manuel Lauss <manuel.lauss@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/host/ehci-au1xxx.c b/drivers/usb/host/ehci-au1xxx.c
index 59d208d..ed77be7 100644
--- a/drivers/usb/host/ehci-au1xxx.c
+++ b/drivers/usb/host/ehci-au1xxx.c
@@ -199,10 +199,9 @@ static int ehci_hcd_au1xxx_drv_remove(struct platform_device *pdev)
 }
 
 #ifdef CONFIG_PM
-static int ehci_hcd_au1xxx_drv_suspend(struct platform_device *pdev,
-					pm_message_t message)
+static int ehci_hcd_au1xxx_drv_suspend(struct device *dev)
 {
-	struct usb_hcd *hcd = platform_get_drvdata(pdev);
+	struct usb_hcd *hcd = dev_get_drvdata(dev);
 	struct ehci_hcd *ehci = hcd_to_ehci(hcd);
 	unsigned long flags;
 	int rc;
@@ -229,12 +228,6 @@ static int ehci_hcd_au1xxx_drv_suspend(struct platform_device *pdev,
 	ehci_writel(ehci, 0, &ehci->regs->intr_enable);
 	(void)ehci_readl(ehci, &ehci->regs->intr_enable);
 
-	/* make sure snapshot being resumed re-enumerates everything */
-	if (message.event == PM_EVENT_PRETHAW) {
-		ehci_halt(ehci);
-		ehci_reset(ehci);
-	}
-
 	clear_bit(HCD_FLAG_HW_ACCESSIBLE, &hcd->flags);
 
 	au1xxx_stop_ehc();
@@ -248,10 +241,9 @@ bail:
 	return rc;
 }
 
-
-static int ehci_hcd_au1xxx_drv_resume(struct platform_device *pdev)
+static int ehci_hcd_au1xxx_drv_resume(struct device *dev)
 {
-	struct usb_hcd *hcd = platform_get_drvdata(pdev);
+	struct usb_hcd *hcd = dev_get_drvdata(dev);
 	struct ehci_hcd *ehci = hcd_to_ehci(hcd);
 
 	au1xxx_start_ehc();
@@ -305,20 +297,25 @@ static int ehci_hcd_au1xxx_drv_resume(struct platform_device *pdev)
 	return 0;
 }
 
+static struct dev_pm_ops au1xxx_ehci_pmops = {
+	.suspend	= ehci_hcd_au1xxx_drv_suspend,
+	.resume		= ehci_hcd_au1xxx_drv_resume,
+};
+
+#define AU1XXX_EHCI_PMOPS &au1xxx_ehci_pmops
+
 #else
-#define ehci_hcd_au1xxx_drv_suspend NULL
-#define ehci_hcd_au1xxx_drv_resume NULL
+#define AU1XXX_EHCI_PMOPS NULL
 #endif
 
 static struct platform_driver ehci_hcd_au1xxx_driver = {
 	.probe		= ehci_hcd_au1xxx_drv_probe,
 	.remove		= ehci_hcd_au1xxx_drv_remove,
 	.shutdown	= usb_hcd_platform_shutdown,
-	.suspend	= ehci_hcd_au1xxx_drv_suspend,
-	.resume		= ehci_hcd_au1xxx_drv_resume,
 	.driver = {
 		.name	= "au1xxx-ehci",
 		.owner	= THIS_MODULE,
+		.pm	= AU1XXX_EHCI_PMOPS,
 	}
 };
 
diff --git a/drivers/usb/host/ohci-au1xxx.c b/drivers/usb/host/ohci-au1xxx.c
index 2ac4e02..e438008 100644
--- a/drivers/usb/host/ohci-au1xxx.c
+++ b/drivers/usb/host/ohci-au1xxx.c
@@ -248,10 +248,9 @@ static int ohci_hcd_au1xxx_drv_remove(struct platform_device *pdev)
 }
 
 #ifdef CONFIG_PM
-static int ohci_hcd_au1xxx_drv_suspend(struct platform_device *pdev,
-					pm_message_t message)
+static int ohci_hcd_au1xxx_drv_suspend(struct device *dev)
 {
-	struct usb_hcd *hcd = platform_get_drvdata(pdev);
+	struct usb_hcd *hcd = dev_get_drvdata(dev);
 	struct ohci_hcd	*ohci = hcd_to_ohci(hcd);
 	unsigned long flags;
 	int rc;
@@ -274,10 +273,6 @@ static int ohci_hcd_au1xxx_drv_suspend(struct platform_device *pdev,
 	ohci_writel(ohci, OHCI_INTR_MIE, &ohci->regs->intrdisable);
 	(void)ohci_readl(ohci, &ohci->regs->intrdisable);
 
-	/* make sure snapshot being resumed re-enumerates everything */
-	if (message.event == PM_EVENT_PRETHAW)
-		ohci_usb_reset(ohci);
-
 	clear_bit(HCD_FLAG_HW_ACCESSIBLE, &hcd->flags);
 
 	au1xxx_stop_ohc();
@@ -287,9 +282,9 @@ bail:
 	return rc;
 }
 
-static int ohci_hcd_au1xxx_drv_resume(struct platform_device *pdev)
+static int ohci_hcd_au1xxx_drv_resume(struct device *dev)
 {
-	struct usb_hcd *hcd = platform_get_drvdata(pdev);
+	struct usb_hcd *hcd = dev_get_drvdata(dev);
 
 	au1xxx_start_ohc();
 
@@ -298,20 +293,26 @@ static int ohci_hcd_au1xxx_drv_resume(struct platform_device *pdev)
 
 	return 0;
 }
+
+static struct dev_pm_ops au1xxx_ohci_pmops = {
+	.suspend	= ohci_hcd_au1xxx_drv_suspend,
+	.resume		= ohci_hcd_au1xxx_drv_resume,
+};
+
+#define AU1XXX_OHCI_PMOPS &au1xxx_ohci_pmops
+
 #else
-#define ohci_hcd_au1xxx_drv_suspend NULL
-#define ohci_hcd_au1xxx_drv_resume NULL
+#define AU1XXX_OHCI_PMOPS NULL
 #endif
 
 static struct platform_driver ohci_hcd_au1xxx_driver = {
 	.probe		= ohci_hcd_au1xxx_drv_probe,
 	.remove		= ohci_hcd_au1xxx_drv_remove,
 	.shutdown	= usb_hcd_platform_shutdown,
-	.suspend	= ohci_hcd_au1xxx_drv_suspend,
-	.resume		= ohci_hcd_au1xxx_drv_resume,
 	.driver		= {
 		.name	= "au1xxx-ohci",
 		.owner	= THIS_MODULE,
+		.pm	= AU1XXX_OHCI_PMOPS,
 	},
 };
 
-- 
cgit v0.10.2


From 81e5b23cd206d46d4872d25f3d7ff67a0f355c71 Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oliver@neukum.org>
Date: Tue, 21 Jul 2009 08:47:34 +0200
Subject: USB: fix wrong order of events in usb serial suspension

if a subdriver has an additional suspend method, it must be called
first to allow the subdriver to return -EBUSY, because the second
half cannot be easily undone.

Signed-off-by: Oliver Neukum <oliver@neukum.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index 54bb37d..45975b4 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -1157,15 +1157,19 @@ int usb_serial_suspend(struct usb_interface *intf, pm_message_t message)
 
 	serial->suspending = 1;
 
+	if (serial->type->suspend) {
+		r = serial->type->suspend(serial, message);
+		if (r < 0)
+			goto err_out;
+	}
+
 	for (i = 0; i < serial->num_ports; ++i) {
 		port = serial->port[i];
 		if (port)
 			kill_traffic(port);
 	}
 
-	if (serial->type->suspend)
-		r = serial->type->suspend(serial, message);
-
+err_out:
 	return r;
 }
 EXPORT_SYMBOL(usb_serial_suspend);
-- 
cgit v0.10.2


From 25118084ef03f4fc314ab33ef6a9d9271d0e616a Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Wed, 22 Jul 2009 14:41:18 -0400
Subject: USB: check for hub driver not bound to root hub device

This patch (as1267) changes usb_kick_khubd() and hdev_to_hub() to make
them more resilient against situations where a hub device isn't bound
to the hub driver.  The code assumes that if a root hub was
successfully registered then it must be bound to the hub driver.

But this assumption can fail if the user manually unbinds the hub
driver, or more importantly, if the host controller dies causing
usb_set_configuration to fail.

To protect against these possibilities, make hdev_to_hub() check that
the hub device is configured before dereferencing the active
configuration, and make usb_kick_khubd() check that the pointer to the
hub's private data structure isn't NULL.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 69e3a96..645686a 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -163,8 +163,10 @@ static inline char *portspeed(int portstatus)
 }
 
 /* Note that hdev or one of its children must be locked! */
-static inline struct usb_hub *hdev_to_hub(struct usb_device *hdev)
+static struct usb_hub *hdev_to_hub(struct usb_device *hdev)
 {
+	if (!hdev || !hdev->actconfig)
+		return NULL;
 	return usb_get_intfdata(hdev->actconfig->interface[0]);
 }
 
@@ -385,8 +387,10 @@ static void kick_khubd(struct usb_hub *hub)
 
 void usb_kick_khubd(struct usb_device *hdev)
 {
-	/* FIXME: What if hdev isn't bound to the hub driver? */
-	kick_khubd(hdev_to_hub(hdev));
+	struct usb_hub *hub = hdev_to_hub(hdev);
+
+	if (hub)
+		kick_khubd(hub);
 }
 
 
-- 
cgit v0.10.2


From 527101ce6a96c037a2555aa43222faa6fdd21e97 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Wed, 22 Jul 2009 14:42:54 -0400
Subject: USB: don't lose mode switch events on suspended devices

This patch (as1268) changes the way usbcore handles child devices that
undergo a disconnection and reconnection while the parent hub is
suspended.  Currently, if the child isn't enabled for remote wakeup we
leave it alone, figuring that it will go through a reset-resume when
somebody tries to use it.

However this isn't a good approach if the reason for the disconnection
is that the child decided to switch modes or in some other way alter
its descriptors.  In that case we want to re-enumerate it as soon as
possible, not wait until somebody forces a reset-resume.

To resolve the issue, this patch treats reconnected suspended child
devices as though they had requested a remote wakeup, even if they
weren't enabled for it.  The mode switch or descriptor change will be
detected during the reset part of the reset-resume, and the device
will be re-enumerated immediately.

The disadvantage of this change is that it will cause autosuspended
devices to be resumed when the computer wakes up from a system sleep
during which the root hub was reset or lost power.  This shouldn't
matter much; some people would even argue that autosuspended devices
should _always_ be resumed when the system wakes up!

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Tested-by: "Yang Fei-AFY095" <fei.yang@motorola.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 645686a..a880516 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -2932,14 +2932,7 @@ static void hub_port_connect_change(struct usb_hub *hub, int port1,
 			/* For a suspended device, treat this as a
 			 * remote wakeup event.
 			 */
-			if (udev->do_remote_wakeup)
-				status = remote_wakeup(udev);
-
-			/* Otherwise leave it be; devices can't tell the
-			 * difference between suspended and disabled.
-			 */
-			else
-				status = 0;
+			status = remote_wakeup(udev);
 #endif
 
 		} else {
-- 
cgit v0.10.2


From e9238221d3fef990e2fd01702ebe5af90dda52a2 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Wed, 22 Jul 2009 14:44:17 -0400
Subject: USB: dummy-hcd: accept mismatch between wLength and transfer length

This patch (as1269) fixes a bug in the way dummy_hcd handles control
URBs.  Currently it returns a -EOVERFLOW error if the wLength value in
the setup packet is different from the URB's transfer_buffer_length.

Other host controller drivers don't do this.  There's no reason the
two length values have to be equal, and in fact they sometimes aren't
-- a driver might set the transfer length to the maxpacket value in
order to handle buggy devices that don't respect wLength.

This patch simply removes the unnecessary check and error return.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/gadget/dummy_hcd.c b/drivers/usb/gadget/dummy_hcd.c
index a56b24d..5e09664 100644
--- a/drivers/usb/gadget/dummy_hcd.c
+++ b/drivers/usb/gadget/dummy_hcd.c
@@ -1306,11 +1306,6 @@ restart:
 			setup = *(struct usb_ctrlrequest*) urb->setup_packet;
 			w_index = le16_to_cpu(setup.wIndex);
 			w_value = le16_to_cpu(setup.wValue);
-			if (le16_to_cpu(setup.wLength) !=
-					urb->transfer_buffer_length) {
-				status = -EOVERFLOW;
-				goto return_urb;
-			}
 
 			/* paranoia, in case of stale queued data */
 			list_for_each_entry (req, &ep->queue, queue) {
-- 
cgit v0.10.2


From a9d43091c5be1e7a60d5abe84be4f3050236b26a Mon Sep 17 00:00:00 2001
From: Lothar Wassmann <LW@KARO-electronics.de>
Date: Thu, 16 Jul 2009 20:51:21 -0400
Subject: USB: NXP ISP1362 USB host driver

Signed-off-by: Lothar Wassmann <LW@KARO-electronics.de>
Signed-off-by: Michael Hennerich <michael.hennerich@analog.com>
Signed-off-by: Bryan Wu <cooloney@kernel.org>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/Makefile b/drivers/usb/Makefile
index 19cb7d5..dbe4fb6 100644
--- a/drivers/usb/Makefile
+++ b/drivers/usb/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_USB_UHCI_HCD)	+= host/
 obj-$(CONFIG_USB_FHCI_HCD)	+= host/
 obj-$(CONFIG_USB_XHCI_HCD)	+= host/
 obj-$(CONFIG_USB_SL811_HCD)	+= host/
+obj-$(CONFIG_USB_ISP1362_HCD)	+= host/
 obj-$(CONFIG_USB_U132_HCD)	+= host/
 obj-$(CONFIG_USB_R8A66597_HCD)	+= host/
 obj-$(CONFIG_USB_HWA_HCD)	+= host/
diff --git a/drivers/usb/host/Kconfig b/drivers/usb/host/Kconfig
index 41b8d7d..9b43b22 100644
--- a/drivers/usb/host/Kconfig
+++ b/drivers/usb/host/Kconfig
@@ -159,6 +159,18 @@ config USB_ISP1760_HCD
 	  To compile this driver as a module, choose M here: the
 	  module will be called isp1760.
 
+config USB_ISP1362_HCD
+	tristate "ISP1362 HCD support"
+	depends on USB
+	default N
+	---help---
+	  Supports the Philips ISP1362 chip as a host controller
+
+	  This driver does not support isochronous transfers.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called isp1362-hcd.
+
 config USB_OHCI_HCD
 	tristate "OHCI HCD support"
 	depends on USB && USB_ARCH_HAS_OHCI
diff --git a/drivers/usb/host/Makefile b/drivers/usb/host/Makefile
index 289d748..f58b249 100644
--- a/drivers/usb/host/Makefile
+++ b/drivers/usb/host/Makefile
@@ -21,6 +21,7 @@ obj-$(CONFIG_PCI)		+= pci-quirks.o
 obj-$(CONFIG_USB_EHCI_HCD)	+= ehci-hcd.o
 obj-$(CONFIG_USB_OXU210HP_HCD)	+= oxu210hp-hcd.o
 obj-$(CONFIG_USB_ISP116X_HCD)	+= isp116x-hcd.o
+obj-$(CONFIG_USB_ISP1362_HCD)	+= isp1362-hcd.o
 obj-$(CONFIG_USB_OHCI_HCD)	+= ohci-hcd.o
 obj-$(CONFIG_USB_UHCI_HCD)	+= uhci-hcd.o
 obj-$(CONFIG_USB_FHCI_HCD)	+= fhci.o
diff --git a/drivers/usb/host/isp1362-hcd.c b/drivers/usb/host/isp1362-hcd.c
new file mode 100644
index 0000000..5819e10
--- /dev/null
+++ b/drivers/usb/host/isp1362-hcd.c
@@ -0,0 +1,2905 @@
+/*
+ * ISP1362 HCD (Host Controller Driver) for USB.
+ *
+ * Copyright (C) 2005 Lothar Wassmann <LW@KARO-electronics.de>
+ *
+ * Derived from the SL811 HCD, rewritten for ISP116x.
+ * Copyright (C) 2005 Olav Kongas <ok@artecdesign.ee>
+ *
+ * Portions:
+ * Copyright (C) 2004 Psion Teklogix (for NetBook PRO)
+ * Copyright (C) 2004 David Brownell
+ */
+
+/*
+ * The ISP1362 chip requires a large delay (300ns and 462ns) between
+ * accesses to the address and data register.
+ * The following timing options exist:
+ *
+ * 1. Configure your memory controller to add such delays if it can (the best)
+ * 2. Implement platform-specific delay function possibly
+ *    combined with configuring the memory controller; see
+ *    include/linux/usb_isp1362.h for more info.
+ * 3. Use ndelay (easiest, poorest).
+ *
+ * Use the corresponding macros USE_PLATFORM_DELAY and USE_NDELAY in the
+ * platform specific section of isp1362.h to select the appropriate variant.
+ *
+ * Also note that according to the Philips "ISP1362 Errata" document
+ * Rev 1.00 from 27 May data corruption may occur when the #WR signal
+ * is reasserted (even with #CS deasserted) within 132ns after a
+ * write cycle to any controller register. If the hardware doesn't
+ * implement the recommended fix (gating the #WR with #CS) software
+ * must ensure that no further write cycle (not necessarily to the chip!)
+ * is issued by the CPU within this interval.
+
+ * For PXA25x this can be ensured by using VLIO with the maximum
+ * recovery time (MSCx = 0x7f8c) with a memory clock of 99.53 MHz.
+ */
+
+#ifdef CONFIG_USB_DEBUG
+# define ISP1362_DEBUG
+#else
+# undef ISP1362_DEBUG
+#endif
+
+/*
+ * The PXA255 UDC apparently doesn't handle GET_STATUS, GET_CONFIG and
+ * GET_INTERFACE requests correctly when the SETUP and DATA stages of the
+ * requests are carried out in separate frames. This will delay any SETUP
+ * packets until the start of the next frame so that this situation is
+ * unlikely to occur (and makes usbtest happy running with a PXA255 target
+ * device).
+ */
+#undef BUGGY_PXA2XX_UDC_USBTEST
+
+#undef PTD_TRACE
+#undef URB_TRACE
+#undef VERBOSE
+#undef REGISTERS
+
+/* This enables a memory test on the ISP1362 chip memory to make sure the
+ * chip access timing is correct.
+ */
+#undef CHIP_BUFFER_TEST
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/ioport.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/interrupt.h>
+#include <linux/usb.h>
+#include <linux/usb/isp1362.h>
+#include <linux/platform_device.h>
+#include <linux/pm.h>
+#include <linux/io.h>
+#include <linux/bitops.h>
+
+#include <asm/irq.h>
+#include <asm/system.h>
+#include <asm/byteorder.h>
+#include <asm/unaligned.h>
+
+static int dbg_level;
+#ifdef ISP1362_DEBUG
+module_param(dbg_level, int, 0644);
+#else
+module_param(dbg_level, int, 0);
+#define	STUB_DEBUG_FILE
+#endif
+
+#include "../core/hcd.h"
+#include "../core/usb.h"
+#include "isp1362.h"
+
+
+#define DRIVER_VERSION	"2005-04-04"
+#define DRIVER_DESC	"ISP1362 USB Host Controller Driver"
+
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_LICENSE("GPL");
+
+static const char hcd_name[] = "isp1362-hcd";
+
+static void isp1362_hc_stop(struct usb_hcd *hcd);
+static int isp1362_hc_start(struct usb_hcd *hcd);
+
+/*-------------------------------------------------------------------------*/
+
+/*
+ * When called from the interrupthandler only isp1362_hcd->irqenb is modified,
+ * since the interrupt handler will write isp1362_hcd->irqenb to HCuPINT upon
+ * completion.
+ * We don't need a 'disable' counterpart, since interrupts will be disabled
+ * only by the interrupt handler.
+ */
+static inline void isp1362_enable_int(struct isp1362_hcd *isp1362_hcd, u16 mask)
+{
+	if ((isp1362_hcd->irqenb | mask) == isp1362_hcd->irqenb)
+		return;
+	if (mask & ~isp1362_hcd->irqenb)
+		isp1362_write_reg16(isp1362_hcd, HCuPINT, mask & ~isp1362_hcd->irqenb);
+	isp1362_hcd->irqenb |= mask;
+	if (isp1362_hcd->irq_active)
+		return;
+	isp1362_write_reg16(isp1362_hcd, HCuPINTENB, isp1362_hcd->irqenb);
+}
+
+/*-------------------------------------------------------------------------*/
+
+static inline struct isp1362_ep_queue *get_ptd_queue(struct isp1362_hcd *isp1362_hcd,
+						     u16 offset)
+{
+	struct isp1362_ep_queue *epq = NULL;
+
+	if (offset < isp1362_hcd->istl_queue[1].buf_start)
+		epq = &isp1362_hcd->istl_queue[0];
+	else if (offset < isp1362_hcd->intl_queue.buf_start)
+		epq = &isp1362_hcd->istl_queue[1];
+	else if (offset < isp1362_hcd->atl_queue.buf_start)
+		epq = &isp1362_hcd->intl_queue;
+	else if (offset < isp1362_hcd->atl_queue.buf_start +
+		   isp1362_hcd->atl_queue.buf_size)
+		epq = &isp1362_hcd->atl_queue;
+
+	if (epq)
+		DBG(1, "%s: PTD $%04x is on %s queue\n", __func__, offset, epq->name);
+	else
+		pr_warning("%s: invalid PTD $%04x\n", __func__, offset);
+
+	return epq;
+}
+
+static inline int get_ptd_offset(struct isp1362_ep_queue *epq, u8 index)
+{
+	int offset;
+
+	if (index * epq->blk_size > epq->buf_size) {
+		pr_warning("%s: Bad %s index %d(%d)\n", __func__, epq->name, index,
+		     epq->buf_size / epq->blk_size);
+		return -EINVAL;
+	}
+	offset = epq->buf_start + index * epq->blk_size;
+	DBG(3, "%s: %s PTD[%02x] # %04x\n", __func__, epq->name, index, offset);
+
+	return offset;
+}
+
+/*-------------------------------------------------------------------------*/
+
+static inline u16 max_transfer_size(struct isp1362_ep_queue *epq, size_t size,
+				    int mps)
+{
+	u16 xfer_size = min_t(size_t, MAX_XFER_SIZE, size);
+
+	xfer_size = min_t(size_t, xfer_size, epq->buf_avail * epq->blk_size - PTD_HEADER_SIZE);
+	if (xfer_size < size && xfer_size % mps)
+		xfer_size -= xfer_size % mps;
+
+	return xfer_size;
+}
+
+static int claim_ptd_buffers(struct isp1362_ep_queue *epq,
+			     struct isp1362_ep *ep, u16 len)
+{
+	int ptd_offset = -EINVAL;
+	int index;
+	int num_ptds = ((len + PTD_HEADER_SIZE - 1) / epq->blk_size) + 1;
+	int found = -1;
+	int last = -1;
+
+	BUG_ON(len > epq->buf_size);
+
+	if (!epq->buf_avail)
+		return -ENOMEM;
+
+	if (ep->num_ptds)
+		pr_err("%s: %s len %d/%d num_ptds %d buf_map %08lx skip_map %08lx\n", __func__,
+		    epq->name, len, epq->blk_size, num_ptds, epq->buf_map, epq->skip_map);
+	BUG_ON(ep->num_ptds != 0);
+
+	for (index = 0; index <= epq->buf_count - num_ptds; index++) {
+		if (test_bit(index, &epq->buf_map))
+			continue;
+		found = index;
+		for (last = index + 1; last < index + num_ptds; last++) {
+			if (test_bit(last, &epq->buf_map)) {
+				found = -1;
+				break;
+			}
+		}
+		if (found >= 0)
+			break;
+	}
+	if (found < 0)
+		return -EOVERFLOW;
+
+	DBG(1, "%s: Found %d PTDs[%d] for %d/%d byte\n", __func__,
+	    num_ptds, found, len, (int)(epq->blk_size - PTD_HEADER_SIZE));
+	ptd_offset = get_ptd_offset(epq, found);
+	WARN_ON(ptd_offset < 0);
+	ep->ptd_offset = ptd_offset;
+	ep->num_ptds += num_ptds;
+	epq->buf_avail -= num_ptds;
+	BUG_ON(epq->buf_avail > epq->buf_count);
+	ep->ptd_index = found;
+	for (index = found; index < last; index++)
+		__set_bit(index, &epq->buf_map);
+	DBG(1, "%s: Done %s PTD[%d] $%04x, avail %d count %d claimed %d %08lx:%08lx\n",
+	    __func__, epq->name, ep->ptd_index, ep->ptd_offset,
+	    epq->buf_avail, epq->buf_count, num_ptds, epq->buf_map, epq->skip_map);
+
+	return found;
+}
+
+static inline void release_ptd_buffers(struct isp1362_ep_queue *epq, struct isp1362_ep *ep)
+{
+	int index = ep->ptd_index;
+	int last = ep->ptd_index + ep->num_ptds;
+
+	if (last > epq->buf_count)
+		pr_err("%s: ep %p req %d len %d %s PTD[%d] $%04x num_ptds %d buf_count %d buf_avail %d buf_map %08lx skip_map %08lx\n",
+		    __func__, ep, ep->num_req, ep->length, epq->name, ep->ptd_index,
+		    ep->ptd_offset, ep->num_ptds, epq->buf_count, epq->buf_avail,
+		    epq->buf_map, epq->skip_map);
+	BUG_ON(last > epq->buf_count);
+
+	for (; index < last; index++) {
+		__clear_bit(index, &epq->buf_map);
+		__set_bit(index, &epq->skip_map);
+	}
+	epq->buf_avail += ep->num_ptds;
+	epq->ptd_count--;
+
+	BUG_ON(epq->buf_avail > epq->buf_count);
+	BUG_ON(epq->ptd_count > epq->buf_count);
+
+	DBG(1, "%s: Done %s PTDs $%04x released %d avail %d count %d\n",
+	    __func__, epq->name,
+	    ep->ptd_offset, ep->num_ptds, epq->buf_avail, epq->buf_count);
+	DBG(1, "%s: buf_map %08lx skip_map %08lx\n", __func__,
+	    epq->buf_map, epq->skip_map);
+
+	ep->num_ptds = 0;
+	ep->ptd_offset = -EINVAL;
+	ep->ptd_index = -EINVAL;
+}
+
+/*-------------------------------------------------------------------------*/
+
+/*
+  Set up PTD's.
+*/
+static void prepare_ptd(struct isp1362_hcd *isp1362_hcd, struct urb *urb,
+			struct isp1362_ep *ep, struct isp1362_ep_queue *epq,
+			u16 fno)
+{
+	struct ptd *ptd;
+	int toggle;
+	int dir;
+	u16 len;
+	size_t buf_len = urb->transfer_buffer_length - urb->actual_length;
+
+	DBG(3, "%s: %s ep %p\n", __func__, epq->name, ep);
+
+	ptd = &ep->ptd;
+
+	ep->data = (unsigned char *)urb->transfer_buffer + urb->actual_length;
+
+	switch (ep->nextpid) {
+	case USB_PID_IN:
+		toggle = usb_gettoggle(urb->dev, ep->epnum, 0);
+		dir = PTD_DIR_IN;
+		if (usb_pipecontrol(urb->pipe)) {
+			len = min_t(size_t, ep->maxpacket, buf_len);
+		} else if (usb_pipeisoc(urb->pipe)) {
+			len = min_t(size_t, urb->iso_frame_desc[fno].length, MAX_XFER_SIZE);
+			ep->data = urb->transfer_buffer + urb->iso_frame_desc[fno].offset;
+		} else
+			len = max_transfer_size(epq, buf_len, ep->maxpacket);
+		DBG(1, "%s: IN    len %d/%d/%d from URB\n", __func__, len, ep->maxpacket,
+		    (int)buf_len);
+		break;
+	case USB_PID_OUT:
+		toggle = usb_gettoggle(urb->dev, ep->epnum, 1);
+		dir = PTD_DIR_OUT;
+		if (usb_pipecontrol(urb->pipe))
+			len = min_t(size_t, ep->maxpacket, buf_len);
+		else if (usb_pipeisoc(urb->pipe))
+			len = min_t(size_t, urb->iso_frame_desc[0].length, MAX_XFER_SIZE);
+		else
+			len = max_transfer_size(epq, buf_len, ep->maxpacket);
+		if (len == 0)
+			pr_info("%s: Sending ZERO packet: %d\n", __func__,
+			     urb->transfer_flags & URB_ZERO_PACKET);
+		DBG(1, "%s: OUT   len %d/%d/%d from URB\n", __func__, len, ep->maxpacket,
+		    (int)buf_len);
+		break;
+	case USB_PID_SETUP:
+		toggle = 0;
+		dir = PTD_DIR_SETUP;
+		len = sizeof(struct usb_ctrlrequest);
+		DBG(1, "%s: SETUP len %d\n", __func__, len);
+		ep->data = urb->setup_packet;
+		break;
+	case USB_PID_ACK:
+		toggle = 1;
+		len = 0;
+		dir = (urb->transfer_buffer_length && usb_pipein(urb->pipe)) ?
+			PTD_DIR_OUT : PTD_DIR_IN;
+		DBG(1, "%s: ACK   len %d\n", __func__, len);
+		break;
+	default:
+		toggle = dir = len = 0;
+		pr_err("%s@%d: ep->nextpid %02x\n", __func__, __LINE__, ep->nextpid);
+		BUG_ON(1);
+	}
+
+	ep->length = len;
+	if (!len)
+		ep->data = NULL;
+
+	ptd->count = PTD_CC_MSK | PTD_ACTIVE_MSK | PTD_TOGGLE(toggle);
+	ptd->mps = PTD_MPS(ep->maxpacket) | PTD_SPD(urb->dev->speed == USB_SPEED_LOW) |
+		PTD_EP(ep->epnum);
+	ptd->len = PTD_LEN(len) | PTD_DIR(dir);
+	ptd->faddr = PTD_FA(usb_pipedevice(urb->pipe));
+
+	if (usb_pipeint(urb->pipe)) {
+		ptd->faddr |= PTD_SF_INT(ep->branch);
+		ptd->faddr |= PTD_PR(ep->interval ? __ffs(ep->interval) : 0);
+	}
+	if (usb_pipeisoc(urb->pipe))
+		ptd->faddr |= PTD_SF_ISO(fno);
+
+	DBG(1, "%s: Finished\n", __func__);
+}
+
+static void isp1362_write_ptd(struct isp1362_hcd *isp1362_hcd, struct isp1362_ep *ep,
+			      struct isp1362_ep_queue *epq)
+{
+	struct ptd *ptd = &ep->ptd;
+	int len = PTD_GET_DIR(ptd) == PTD_DIR_IN ? 0 : ep->length;
+
+	_BUG_ON(ep->ptd_offset < 0);
+
+	prefetch(ptd);
+	isp1362_write_buffer(isp1362_hcd, ptd, ep->ptd_offset, PTD_HEADER_SIZE);
+	if (len)
+		isp1362_write_buffer(isp1362_hcd, ep->data,
+				     ep->ptd_offset + PTD_HEADER_SIZE, len);
+
+	dump_ptd(ptd);
+	dump_ptd_out_data(ptd, ep->data);
+}
+
+static void isp1362_read_ptd(struct isp1362_hcd *isp1362_hcd, struct isp1362_ep *ep,
+			     struct isp1362_ep_queue *epq)
+{
+	struct ptd *ptd = &ep->ptd;
+	int act_len;
+
+	WARN_ON(list_empty(&ep->active));
+	BUG_ON(ep->ptd_offset < 0);
+
+	list_del_init(&ep->active);
+	DBG(1, "%s: ep %p removed from active list %p\n", __func__, ep, &epq->active);
+
+	prefetchw(ptd);
+	isp1362_read_buffer(isp1362_hcd, ptd, ep->ptd_offset, PTD_HEADER_SIZE);
+	dump_ptd(ptd);
+	act_len = PTD_GET_COUNT(ptd);
+	if (PTD_GET_DIR(ptd) != PTD_DIR_IN || act_len == 0)
+		return;
+	if (act_len > ep->length)
+		pr_err("%s: ep %p PTD $%04x act_len %d ep->length %d\n", __func__, ep,
+			 ep->ptd_offset, act_len, ep->length);
+	BUG_ON(act_len > ep->length);
+	/* Only transfer the amount of data that has actually been overwritten
+	 * in the chip buffer. We don't want any data that doesn't belong to the
+	 * transfer to leak out of the chip to the callers transfer buffer!
+	 */
+	prefetchw(ep->data);
+	isp1362_read_buffer(isp1362_hcd, ep->data,
+			    ep->ptd_offset + PTD_HEADER_SIZE, act_len);
+	dump_ptd_in_data(ptd, ep->data);
+}
+
+/*
+ * INT PTDs will stay in the chip until data is available.
+ * This function will remove a PTD from the chip when the URB is dequeued.
+ * Must be called with the spinlock held and IRQs disabled
+ */
+static void remove_ptd(struct isp1362_hcd *isp1362_hcd, struct isp1362_ep *ep)
+
+{
+	int index;
+	struct isp1362_ep_queue *epq;
+
+	DBG(1, "%s: ep %p PTD[%d] $%04x\n", __func__, ep, ep->ptd_index, ep->ptd_offset);
+	BUG_ON(ep->ptd_offset < 0);
+
+	epq = get_ptd_queue(isp1362_hcd, ep->ptd_offset);
+	BUG_ON(!epq);
+
+	/* put ep in remove_list for cleanup */
+	WARN_ON(!list_empty(&ep->remove_list));
+	list_add_tail(&ep->remove_list, &isp1362_hcd->remove_list);
+	/* let SOF interrupt handle the cleanup */
+	isp1362_enable_int(isp1362_hcd, HCuPINT_SOF);
+
+	index = ep->ptd_index;
+	if (index < 0)
+		/* ISO queues don't have SKIP registers */
+		return;
+
+	DBG(1, "%s: Disabling PTD[%02x] $%04x %08lx|%08x\n", __func__,
+	    index, ep->ptd_offset, epq->skip_map, 1 << index);
+
+	/* prevent further processing of PTD (will be effective after next SOF) */
+	epq->skip_map |= 1 << index;
+	if (epq == &isp1362_hcd->atl_queue) {
+		DBG(2, "%s: ATLSKIP = %08x -> %08lx\n", __func__,
+		    isp1362_read_reg32(isp1362_hcd, HCATLSKIP), epq->skip_map);
+		isp1362_write_reg32(isp1362_hcd, HCATLSKIP, epq->skip_map);
+		if (~epq->skip_map == 0)
+			isp1362_clr_mask16(isp1362_hcd, HCBUFSTAT, HCBUFSTAT_ATL_ACTIVE);
+	} else if (epq == &isp1362_hcd->intl_queue) {
+		DBG(2, "%s: INTLSKIP = %08x -> %08lx\n", __func__,
+		    isp1362_read_reg32(isp1362_hcd, HCINTLSKIP), epq->skip_map);
+		isp1362_write_reg32(isp1362_hcd, HCINTLSKIP, epq->skip_map);
+		if (~epq->skip_map == 0)
+			isp1362_clr_mask16(isp1362_hcd, HCBUFSTAT, HCBUFSTAT_INTL_ACTIVE);
+	}
+}
+
+/*
+  Take done or failed requests out of schedule. Give back
+  processed urbs.
+*/
+static void finish_request(struct isp1362_hcd *isp1362_hcd, struct isp1362_ep *ep,
+			   struct urb *urb, int status)
+     __releases(isp1362_hcd->lock)
+     __acquires(isp1362_hcd->lock)
+{
+	urb->hcpriv = NULL;
+	ep->error_count = 0;
+
+	if (usb_pipecontrol(urb->pipe))
+		ep->nextpid = USB_PID_SETUP;
+
+	URB_DBG("%s: req %d FA %d ep%d%s %s: len %d/%d %s stat %d\n", __func__,
+		ep->num_req, usb_pipedevice(urb->pipe),
+		usb_pipeendpoint(urb->pipe),
+		!usb_pipein(urb->pipe) ? "out" : "in",
+		usb_pipecontrol(urb->pipe) ? "ctrl" :
+			usb_pipeint(urb->pipe) ? "int" :
+			usb_pipebulk(urb->pipe) ? "bulk" :
+			"iso",
+		urb->actual_length, urb->transfer_buffer_length,
+		!(urb->transfer_flags & URB_SHORT_NOT_OK) ?
+		"short_ok" : "", urb->status);
+
+
+	usb_hcd_unlink_urb_from_ep(isp1362_hcd_to_hcd(isp1362_hcd), urb);
+	spin_unlock(&isp1362_hcd->lock);
+	usb_hcd_giveback_urb(isp1362_hcd_to_hcd(isp1362_hcd), urb, status);
+	spin_lock(&isp1362_hcd->lock);
+
+	/* take idle endpoints out of the schedule right away */
+	if (!list_empty(&ep->hep->urb_list))
+		return;
+
+	/* async deschedule */
+	if (!list_empty(&ep->schedule)) {
+		list_del_init(&ep->schedule);
+		return;
+	}
+
+
+	if (ep->interval) {
+		/* periodic deschedule */
+		DBG(1, "deschedule qh%d/%p branch %d load %d bandwidth %d -> %d\n", ep->interval,
+		    ep, ep->branch, ep->load,
+		    isp1362_hcd->load[ep->branch],
+		    isp1362_hcd->load[ep->branch] - ep->load);
+		isp1362_hcd->load[ep->branch] -= ep->load;
+		ep->branch = PERIODIC_SIZE;
+	}
+}
+
+/*
+ * Analyze transfer results, handle partial transfers and errors
+*/
+static void postproc_ep(struct isp1362_hcd *isp1362_hcd, struct isp1362_ep *ep)
+{
+	struct urb *urb = get_urb(ep);
+	struct usb_device *udev;
+	struct ptd *ptd;
+	int short_ok;
+	u16 len;
+	int urbstat = -EINPROGRESS;
+	u8 cc;
+
+	DBG(2, "%s: ep %p req %d\n", __func__, ep, ep->num_req);
+
+	udev = urb->dev;
+	ptd = &ep->ptd;
+	cc = PTD_GET_CC(ptd);
+	if (cc == PTD_NOTACCESSED) {
+		pr_err("%s: req %d PTD %p Untouched by ISP1362\n", __func__,
+		    ep->num_req, ptd);
+		cc = PTD_DEVNOTRESP;
+	}
+
+	short_ok = !(urb->transfer_flags & URB_SHORT_NOT_OK);
+	len = urb->transfer_buffer_length - urb->actual_length;
+
+	/* Data underrun is special. For allowed underrun
+	   we clear the error and continue as normal. For
+	   forbidden underrun we finish the DATA stage
+	   immediately while for control transfer,
+	   we do a STATUS stage.
+	*/
+	if (cc == PTD_DATAUNDERRUN) {
+		if (short_ok) {
+			DBG(1, "%s: req %d Allowed data underrun short_%sok %d/%d/%d byte\n",
+			    __func__, ep->num_req, short_ok ? "" : "not_",
+			    PTD_GET_COUNT(ptd), ep->maxpacket, len);
+			cc = PTD_CC_NOERROR;
+			urbstat = 0;
+		} else {
+			DBG(1, "%s: req %d Data Underrun %s nextpid %02x short_%sok %d/%d/%d byte\n",
+			    __func__, ep->num_req,
+			    usb_pipein(urb->pipe) ? "IN" : "OUT", ep->nextpid,
+			    short_ok ? "" : "not_",
+			    PTD_GET_COUNT(ptd), ep->maxpacket, len);
+			if (usb_pipecontrol(urb->pipe)) {
+				ep->nextpid = USB_PID_ACK;
+				/* save the data underrun error code for later and
+				 * procede with the status stage
+				 */
+				urb->actual_length += PTD_GET_COUNT(ptd);
+				BUG_ON(urb->actual_length > urb->transfer_buffer_length);
+
+				if (urb->status == -EINPROGRESS)
+					urb->status = cc_to_error[PTD_DATAUNDERRUN];
+			} else {
+				usb_settoggle(udev, ep->epnum, ep->nextpid == USB_PID_OUT,
+					      PTD_GET_TOGGLE(ptd));
+				urbstat = cc_to_error[PTD_DATAUNDERRUN];
+			}
+			goto out;
+		}
+	}
+
+	if (cc != PTD_CC_NOERROR) {
+		if (++ep->error_count >= 3 || cc == PTD_CC_STALL || cc == PTD_DATAOVERRUN) {
+			urbstat = cc_to_error[cc];
+			DBG(1, "%s: req %d nextpid %02x, status %d, error %d, error_count %d\n",
+			    __func__, ep->num_req, ep->nextpid, urbstat, cc,
+			    ep->error_count);
+		}
+		goto out;
+	}
+
+	switch (ep->nextpid) {
+	case USB_PID_OUT:
+		if (PTD_GET_COUNT(ptd) != ep->length)
+			pr_err("%s: count=%d len=%d\n", __func__,
+			   PTD_GET_COUNT(ptd), ep->length);
+		BUG_ON(PTD_GET_COUNT(ptd) != ep->length);
+		urb->actual_length += ep->length;
+		BUG_ON(urb->actual_length > urb->transfer_buffer_length);
+		usb_settoggle(udev, ep->epnum, 1, PTD_GET_TOGGLE(ptd));
+		if (urb->actual_length == urb->transfer_buffer_length) {
+			DBG(3, "%s: req %d xfer complete %d/%d status %d -> 0\n", __func__,
+			    ep->num_req, len, ep->maxpacket, urbstat);
+			if (usb_pipecontrol(urb->pipe)) {
+				DBG(3, "%s: req %d %s Wait for ACK\n", __func__,
+				    ep->num_req,
+				    usb_pipein(urb->pipe) ? "IN" : "OUT");
+				ep->nextpid = USB_PID_ACK;
+			} else {
+				if (len % ep->maxpacket ||
+				    !(urb->transfer_flags & URB_ZERO_PACKET)) {
+					urbstat = 0;
+					DBG(3, "%s: req %d URB %s status %d count %d/%d/%d\n",
+					    __func__, ep->num_req, usb_pipein(urb->pipe) ? "IN" : "OUT",
+					    urbstat, len, ep->maxpacket, urb->actual_length);
+				}
+			}
+		}
+		break;
+	case USB_PID_IN:
+		len = PTD_GET_COUNT(ptd);
+		BUG_ON(len > ep->length);
+		urb->actual_length += len;
+		BUG_ON(urb->actual_length > urb->transfer_buffer_length);
+		usb_settoggle(udev, ep->epnum, 0, PTD_GET_TOGGLE(ptd));
+		/* if transfer completed or (allowed) data underrun */
+		if ((urb->transfer_buffer_length == urb->actual_length) ||
+		    len % ep->maxpacket) {
+			DBG(3, "%s: req %d xfer complete %d/%d status %d -> 0\n", __func__,
+			    ep->num_req, len, ep->maxpacket, urbstat);
+			if (usb_pipecontrol(urb->pipe)) {
+				DBG(3, "%s: req %d %s Wait for ACK\n", __func__,
+				    ep->num_req,
+				    usb_pipein(urb->pipe) ? "IN" : "OUT");
+				ep->nextpid = USB_PID_ACK;
+			} else {
+				urbstat = 0;
+				DBG(3, "%s: req %d URB %s status %d count %d/%d/%d\n",
+				    __func__, ep->num_req, usb_pipein(urb->pipe) ? "IN" : "OUT",
+				    urbstat, len, ep->maxpacket, urb->actual_length);
+			}
+		}
+		break;
+	case USB_PID_SETUP:
+		if (urb->transfer_buffer_length == urb->actual_length) {
+			ep->nextpid = USB_PID_ACK;
+		} else if (usb_pipeout(urb->pipe)) {
+			usb_settoggle(udev, 0, 1, 1);
+			ep->nextpid = USB_PID_OUT;
+		} else {
+			usb_settoggle(udev, 0, 0, 1);
+			ep->nextpid = USB_PID_IN;
+		}
+		break;
+	case USB_PID_ACK:
+		DBG(3, "%s: req %d got ACK %d -> 0\n", __func__, ep->num_req,
+		    urbstat);
+		WARN_ON(urbstat != -EINPROGRESS);
+		urbstat = 0;
+		ep->nextpid = 0;
+		break;
+	default:
+		BUG_ON(1);
+	}
+
+ out:
+	if (urbstat != -EINPROGRESS) {
+		DBG(2, "%s: Finishing ep %p req %d urb %p status %d\n", __func__,
+		    ep, ep->num_req, urb, urbstat);
+		finish_request(isp1362_hcd, ep, urb, urbstat);
+	}
+}
+
+static void finish_unlinks(struct isp1362_hcd *isp1362_hcd)
+{
+	struct isp1362_ep *ep;
+	struct isp1362_ep *tmp;
+
+	list_for_each_entry_safe(ep, tmp, &isp1362_hcd->remove_list, remove_list) {
+		struct isp1362_ep_queue *epq =
+			get_ptd_queue(isp1362_hcd, ep->ptd_offset);
+		int index = ep->ptd_index;
+
+		BUG_ON(epq == NULL);
+		if (index >= 0) {
+			DBG(1, "%s: remove PTD[%d] $%04x\n", __func__, index, ep->ptd_offset);
+			BUG_ON(ep->num_ptds == 0);
+			release_ptd_buffers(epq, ep);
+		}
+		if (!list_empty(&ep->hep->urb_list)) {
+			struct urb *urb = get_urb(ep);
+
+			DBG(1, "%s: Finishing req %d ep %p from remove_list\n", __func__,
+			    ep->num_req, ep);
+			finish_request(isp1362_hcd, ep, urb, -ESHUTDOWN);
+		}
+		WARN_ON(list_empty(&ep->active));
+		if (!list_empty(&ep->active)) {
+			list_del_init(&ep->active);
+			DBG(1, "%s: ep %p removed from active list\n", __func__, ep);
+		}
+		list_del_init(&ep->remove_list);
+		DBG(1, "%s: ep %p removed from remove_list\n", __func__, ep);
+	}
+	DBG(1, "%s: Done\n", __func__);
+}
+
+static inline void enable_atl_transfers(struct isp1362_hcd *isp1362_hcd, int count)
+{
+	if (count > 0) {
+		if (count < isp1362_hcd->atl_queue.ptd_count)
+			isp1362_write_reg16(isp1362_hcd, HCATLDTC, count);
+		isp1362_enable_int(isp1362_hcd, HCuPINT_ATL);
+		isp1362_write_reg32(isp1362_hcd, HCATLSKIP, isp1362_hcd->atl_queue.skip_map);
+		isp1362_set_mask16(isp1362_hcd, HCBUFSTAT, HCBUFSTAT_ATL_ACTIVE);
+	} else
+		isp1362_enable_int(isp1362_hcd, HCuPINT_SOF);
+}
+
+static inline void enable_intl_transfers(struct isp1362_hcd *isp1362_hcd)
+{
+	isp1362_enable_int(isp1362_hcd, HCuPINT_INTL);
+	isp1362_set_mask16(isp1362_hcd, HCBUFSTAT, HCBUFSTAT_INTL_ACTIVE);
+	isp1362_write_reg32(isp1362_hcd, HCINTLSKIP, isp1362_hcd->intl_queue.skip_map);
+}
+
+static inline void enable_istl_transfers(struct isp1362_hcd *isp1362_hcd, int flip)
+{
+	isp1362_enable_int(isp1362_hcd, flip ? HCuPINT_ISTL1 : HCuPINT_ISTL0);
+	isp1362_set_mask16(isp1362_hcd, HCBUFSTAT, flip ?
+			   HCBUFSTAT_ISTL1_FULL : HCBUFSTAT_ISTL0_FULL);
+}
+
+static int submit_req(struct isp1362_hcd *isp1362_hcd, struct urb *urb,
+		      struct isp1362_ep *ep, struct isp1362_ep_queue *epq)
+{
+	int index = epq->free_ptd;
+
+	prepare_ptd(isp1362_hcd, urb, ep, epq, 0);
+	index = claim_ptd_buffers(epq, ep, ep->length);
+	if (index == -ENOMEM) {
+		DBG(1, "%s: req %d No free %s PTD available: %d, %08lx:%08lx\n", __func__,
+		    ep->num_req, epq->name, ep->num_ptds, epq->buf_map, epq->skip_map);
+		return index;
+	} else if (index == -EOVERFLOW) {
+		DBG(1, "%s: req %d Not enough space for %d byte %s PTD %d %08lx:%08lx\n",
+		    __func__, ep->num_req, ep->length, epq->name, ep->num_ptds,
+		    epq->buf_map, epq->skip_map);
+		return index;
+	} else
+		BUG_ON(index < 0);
+	list_add_tail(&ep->active, &epq->active);
+	DBG(1, "%s: ep %p req %d len %d added to active list %p\n", __func__,
+	    ep, ep->num_req, ep->length, &epq->active);
+	DBG(1, "%s: Submitting %s PTD $%04x for ep %p req %d\n", __func__, epq->name,
+	    ep->ptd_offset, ep, ep->num_req);
+	isp1362_write_ptd(isp1362_hcd, ep, epq);
+	__clear_bit(ep->ptd_index, &epq->skip_map);
+
+	return 0;
+}
+
+static void start_atl_transfers(struct isp1362_hcd *isp1362_hcd)
+{
+	int ptd_count = 0;
+	struct isp1362_ep_queue *epq = &isp1362_hcd->atl_queue;
+	struct isp1362_ep *ep;
+	int defer = 0;
+
+	if (atomic_read(&epq->finishing)) {
+		DBG(1, "%s: finish_transfers is active for %s\n", __func__, epq->name);
+		return;
+	}
+
+	list_for_each_entry(ep, &isp1362_hcd->async, schedule) {
+		struct urb *urb = get_urb(ep);
+		int ret;
+
+		if (!list_empty(&ep->active)) {
+			DBG(2, "%s: Skipping active %s ep %p\n", __func__, epq->name, ep);
+			continue;
+		}
+
+		DBG(1, "%s: Processing %s ep %p req %d\n", __func__, epq->name,
+		    ep, ep->num_req);
+
+		ret = submit_req(isp1362_hcd, urb, ep, epq);
+		if (ret == -ENOMEM) {
+			defer = 1;
+			break;
+		} else if (ret == -EOVERFLOW) {
+			defer = 1;
+			continue;
+		}
+#ifdef BUGGY_PXA2XX_UDC_USBTEST
+		defer = ep->nextpid == USB_PID_SETUP;
+#endif
+		ptd_count++;
+	}
+
+	/* Avoid starving of endpoints */
+	if (isp1362_hcd->async.next != isp1362_hcd->async.prev) {
+		DBG(2, "%s: Cycling ASYNC schedule %d\n", __func__, ptd_count);
+		list_move(&isp1362_hcd->async, isp1362_hcd->async.next);
+	}
+	if (ptd_count || defer)
+		enable_atl_transfers(isp1362_hcd, defer ? 0 : ptd_count);
+
+	epq->ptd_count += ptd_count;
+	if (epq->ptd_count > epq->stat_maxptds) {
+		epq->stat_maxptds = epq->ptd_count;
+		DBG(0, "%s: max_ptds: %d\n", __func__, epq->stat_maxptds);
+	}
+}
+
+static void start_intl_transfers(struct isp1362_hcd *isp1362_hcd)
+{
+	int ptd_count = 0;
+	struct isp1362_ep_queue *epq = &isp1362_hcd->intl_queue;
+	struct isp1362_ep *ep;
+
+	if (atomic_read(&epq->finishing)) {
+		DBG(1, "%s: finish_transfers is active for %s\n", __func__, epq->name);
+		return;
+	}
+
+	list_for_each_entry(ep, &isp1362_hcd->periodic, schedule) {
+		struct urb *urb = get_urb(ep);
+		int ret;
+
+		if (!list_empty(&ep->active)) {
+			DBG(1, "%s: Skipping active %s ep %p\n", __func__,
+			    epq->name, ep);
+			continue;
+		}
+
+		DBG(1, "%s: Processing %s ep %p req %d\n", __func__,
+		    epq->name, ep, ep->num_req);
+		ret = submit_req(isp1362_hcd, urb, ep, epq);
+		if (ret == -ENOMEM)
+			break;
+		else if (ret == -EOVERFLOW)
+			continue;
+		ptd_count++;
+	}
+
+	if (ptd_count) {
+		static int last_count;
+
+		if (ptd_count != last_count) {
+			DBG(0, "%s: ptd_count: %d\n", __func__, ptd_count);
+			last_count = ptd_count;
+		}
+		enable_intl_transfers(isp1362_hcd);
+	}
+
+	epq->ptd_count += ptd_count;
+	if (epq->ptd_count > epq->stat_maxptds)
+		epq->stat_maxptds = epq->ptd_count;
+}
+
+static inline int next_ptd(struct isp1362_ep_queue *epq, struct isp1362_ep *ep)
+{
+	u16 ptd_offset = ep->ptd_offset;
+	int num_ptds = (ep->length + PTD_HEADER_SIZE + (epq->blk_size - 1)) / epq->blk_size;
+
+	DBG(2, "%s: PTD offset $%04x + %04x => %d * %04x -> $%04x\n", __func__, ptd_offset,
+	    ep->length, num_ptds, epq->blk_size, ptd_offset + num_ptds * epq->blk_size);
+
+	ptd_offset += num_ptds * epq->blk_size;
+	if (ptd_offset < epq->buf_start + epq->buf_size)
+		return ptd_offset;
+	else
+		return -ENOMEM;
+}
+
+static void start_iso_transfers(struct isp1362_hcd *isp1362_hcd)
+{
+	int ptd_count = 0;
+	int flip = isp1362_hcd->istl_flip;
+	struct isp1362_ep_queue *epq;
+	int ptd_offset;
+	struct isp1362_ep *ep;
+	struct isp1362_ep *tmp;
+	u16 fno = isp1362_read_reg32(isp1362_hcd, HCFMNUM);
+
+ fill2:
+	epq = &isp1362_hcd->istl_queue[flip];
+	if (atomic_read(&epq->finishing)) {
+		DBG(1, "%s: finish_transfers is active for %s\n", __func__, epq->name);
+		return;
+	}
+
+	if (!list_empty(&epq->active))
+		return;
+
+	ptd_offset = epq->buf_start;
+	list_for_each_entry_safe(ep, tmp, &isp1362_hcd->isoc, schedule) {
+		struct urb *urb = get_urb(ep);
+		s16 diff = fno - (u16)urb->start_frame;
+
+		DBG(1, "%s: Processing %s ep %p\n", __func__, epq->name, ep);
+
+		if (diff > urb->number_of_packets) {
+			/* time frame for this URB has elapsed */
+			finish_request(isp1362_hcd, ep, urb, -EOVERFLOW);
+			continue;
+		} else if (diff < -1) {
+			/* URB is not due in this frame or the next one.
+			 * Comparing with '-1' instead of '0' accounts for double
+			 * buffering in the ISP1362 which enables us to queue the PTD
+			 * one frame ahead of time
+			 */
+		} else if (diff == -1) {
+			/* submit PTD's that are due in the next frame */
+			prepare_ptd(isp1362_hcd, urb, ep, epq, fno);
+			if (ptd_offset + PTD_HEADER_SIZE + ep->length >
+			    epq->buf_start + epq->buf_size) {
+				pr_err("%s: Not enough ISO buffer space for %d byte PTD\n",
+				    __func__, ep->length);
+				continue;
+			}
+			ep->ptd_offset = ptd_offset;
+			list_add_tail(&ep->active, &epq->active);
+
+			ptd_offset = next_ptd(epq, ep);
+			if (ptd_offset < 0) {
+				pr_warning("%s: req %d No more %s PTD buffers available\n", __func__,
+				     ep->num_req, epq->name);
+				break;
+			}
+		}
+	}
+	list_for_each_entry(ep, &epq->active, active) {
+		if (epq->active.next == &ep->active)
+			ep->ptd.mps |= PTD_LAST_MSK;
+		isp1362_write_ptd(isp1362_hcd, ep, epq);
+		ptd_count++;
+	}
+
+	if (ptd_count)
+		enable_istl_transfers(isp1362_hcd, flip);
+
+	epq->ptd_count += ptd_count;
+	if (epq->ptd_count > epq->stat_maxptds)
+		epq->stat_maxptds = epq->ptd_count;
+
+	/* check, whether the second ISTL buffer may also be filled */
+	if (!(isp1362_read_reg16(isp1362_hcd, HCBUFSTAT) &
+	      (flip ? HCBUFSTAT_ISTL0_FULL : HCBUFSTAT_ISTL1_FULL))) {
+		fno++;
+		ptd_count = 0;
+		flip = 1 - flip;
+		goto fill2;
+	}
+}
+
+static void finish_transfers(struct isp1362_hcd *isp1362_hcd, unsigned long done_map,
+			     struct isp1362_ep_queue *epq)
+{
+	struct isp1362_ep *ep;
+	struct isp1362_ep *tmp;
+
+	if (list_empty(&epq->active)) {
+		DBG(1, "%s: Nothing to do for %s queue\n", __func__, epq->name);
+		return;
+	}
+
+	DBG(1, "%s: Finishing %s transfers %08lx\n", __func__, epq->name, done_map);
+
+	atomic_inc(&epq->finishing);
+	list_for_each_entry_safe(ep, tmp, &epq->active, active) {
+		int index = ep->ptd_index;
+
+		DBG(1, "%s: Checking %s PTD[%02x] $%04x\n", __func__, epq->name,
+		    index, ep->ptd_offset);
+
+		BUG_ON(index < 0);
+		if (__test_and_clear_bit(index, &done_map)) {
+			isp1362_read_ptd(isp1362_hcd, ep, epq);
+			epq->free_ptd = index;
+			BUG_ON(ep->num_ptds == 0);
+			release_ptd_buffers(epq, ep);
+
+			DBG(1, "%s: ep %p req %d removed from active list\n", __func__,
+			    ep, ep->num_req);
+			if (!list_empty(&ep->remove_list)) {
+				list_del_init(&ep->remove_list);
+				DBG(1, "%s: ep %p removed from remove list\n", __func__, ep);
+			}
+			DBG(1, "%s: Postprocessing %s ep %p req %d\n", __func__, epq->name,
+			    ep, ep->num_req);
+			postproc_ep(isp1362_hcd, ep);
+		}
+		if (!done_map)
+			break;
+	}
+	if (done_map)
+		pr_warning("%s: done_map not clear: %08lx:%08lx\n", __func__, done_map,
+		     epq->skip_map);
+	atomic_dec(&epq->finishing);
+}
+
+static void finish_iso_transfers(struct isp1362_hcd *isp1362_hcd, struct isp1362_ep_queue *epq)
+{
+	struct isp1362_ep *ep;
+	struct isp1362_ep *tmp;
+
+	if (list_empty(&epq->active)) {
+		DBG(1, "%s: Nothing to do for %s queue\n", __func__, epq->name);
+		return;
+	}
+
+	DBG(1, "%s: Finishing %s transfers\n", __func__, epq->name);
+
+	atomic_inc(&epq->finishing);
+	list_for_each_entry_safe(ep, tmp, &epq->active, active) {
+		DBG(1, "%s: Checking PTD $%04x\n", __func__, ep->ptd_offset);
+
+		isp1362_read_ptd(isp1362_hcd, ep, epq);
+		DBG(1, "%s: Postprocessing %s ep %p\n", __func__, epq->name, ep);
+		postproc_ep(isp1362_hcd, ep);
+	}
+	WARN_ON(epq->blk_size != 0);
+	atomic_dec(&epq->finishing);
+}
+
+static irqreturn_t isp1362_irq(struct usb_hcd *hcd)
+{
+	int handled = 0;
+	struct isp1362_hcd *isp1362_hcd = hcd_to_isp1362_hcd(hcd);
+	u16 irqstat;
+	u16 svc_mask;
+
+	spin_lock(&isp1362_hcd->lock);
+
+	BUG_ON(isp1362_hcd->irq_active++);
+
+	isp1362_write_reg16(isp1362_hcd, HCuPINTENB, 0);
+
+	irqstat = isp1362_read_reg16(isp1362_hcd, HCuPINT);
+	DBG(3, "%s: got IRQ %04x:%04x\n", __func__, irqstat, isp1362_hcd->irqenb);
+
+	/* only handle interrupts that are currently enabled */
+	irqstat &= isp1362_hcd->irqenb;
+	isp1362_write_reg16(isp1362_hcd, HCuPINT, irqstat);
+	svc_mask = irqstat;
+
+	if (irqstat & HCuPINT_SOF) {
+		isp1362_hcd->irqenb &= ~HCuPINT_SOF;
+		isp1362_hcd->irq_stat[ISP1362_INT_SOF]++;
+		handled = 1;
+		svc_mask &= ~HCuPINT_SOF;
+		DBG(3, "%s: SOF\n", __func__);
+		isp1362_hcd->fmindex = isp1362_read_reg32(isp1362_hcd, HCFMNUM);
+		if (!list_empty(&isp1362_hcd->remove_list))
+			finish_unlinks(isp1362_hcd);
+		if (!list_empty(&isp1362_hcd->async) && !(irqstat & HCuPINT_ATL)) {
+			if (list_empty(&isp1362_hcd->atl_queue.active)) {
+				start_atl_transfers(isp1362_hcd);
+			} else {
+				isp1362_enable_int(isp1362_hcd, HCuPINT_ATL);
+				isp1362_write_reg32(isp1362_hcd, HCATLSKIP,
+						    isp1362_hcd->atl_queue.skip_map);
+				isp1362_set_mask16(isp1362_hcd, HCBUFSTAT, HCBUFSTAT_ATL_ACTIVE);
+			}
+		}
+	}
+
+	if (irqstat & HCuPINT_ISTL0) {
+		isp1362_hcd->irq_stat[ISP1362_INT_ISTL0]++;
+		handled = 1;
+		svc_mask &= ~HCuPINT_ISTL0;
+		isp1362_clr_mask16(isp1362_hcd, HCBUFSTAT, HCBUFSTAT_ISTL0_FULL);
+		DBG(1, "%s: ISTL0\n", __func__);
+		WARN_ON((int)!!isp1362_hcd->istl_flip);
+		WARN_ON(isp1362_read_reg16(isp1362_hcd, HCBUFSTAT) & HCBUFSTAT_ISTL0_ACTIVE);
+		WARN_ON(!isp1362_read_reg16(isp1362_hcd, HCBUFSTAT) & HCBUFSTAT_ISTL0_DONE);
+		isp1362_hcd->irqenb &= ~HCuPINT_ISTL0;
+	}
+
+	if (irqstat & HCuPINT_ISTL1) {
+		isp1362_hcd->irq_stat[ISP1362_INT_ISTL1]++;
+		handled = 1;
+		svc_mask &= ~HCuPINT_ISTL1;
+		isp1362_clr_mask16(isp1362_hcd, HCBUFSTAT, HCBUFSTAT_ISTL1_FULL);
+		DBG(1, "%s: ISTL1\n", __func__);
+		WARN_ON(!(int)isp1362_hcd->istl_flip);
+		WARN_ON(isp1362_read_reg16(isp1362_hcd, HCBUFSTAT) & HCBUFSTAT_ISTL1_ACTIVE);
+		WARN_ON(!isp1362_read_reg16(isp1362_hcd, HCBUFSTAT) & HCBUFSTAT_ISTL1_DONE);
+		isp1362_hcd->irqenb &= ~HCuPINT_ISTL1;
+	}
+
+	if (irqstat & (HCuPINT_ISTL0 | HCuPINT_ISTL1)) {
+		WARN_ON((irqstat & (HCuPINT_ISTL0 | HCuPINT_ISTL1)) ==
+			(HCuPINT_ISTL0 | HCuPINT_ISTL1));
+		finish_iso_transfers(isp1362_hcd,
+				     &isp1362_hcd->istl_queue[isp1362_hcd->istl_flip]);
+		start_iso_transfers(isp1362_hcd);
+		isp1362_hcd->istl_flip = 1 - isp1362_hcd->istl_flip;
+	}
+
+	if (irqstat & HCuPINT_INTL) {
+		u32 done_map = isp1362_read_reg32(isp1362_hcd, HCINTLDONE);
+		u32 skip_map = isp1362_read_reg32(isp1362_hcd, HCINTLSKIP);
+		isp1362_hcd->irq_stat[ISP1362_INT_INTL]++;
+
+		DBG(2, "%s: INTL\n", __func__);
+
+		svc_mask &= ~HCuPINT_INTL;
+
+		isp1362_write_reg32(isp1362_hcd, HCINTLSKIP, skip_map | done_map);
+		if (~(done_map | skip_map) == 0)
+			/* All PTDs are finished, disable INTL processing entirely */
+			isp1362_clr_mask16(isp1362_hcd, HCBUFSTAT, HCBUFSTAT_INTL_ACTIVE);
+
+		handled = 1;
+		WARN_ON(!done_map);
+		if (done_map) {
+			DBG(3, "%s: INTL done_map %08x\n", __func__, done_map);
+			finish_transfers(isp1362_hcd, done_map, &isp1362_hcd->intl_queue);
+			start_intl_transfers(isp1362_hcd);
+		}
+	}
+
+	if (irqstat & HCuPINT_ATL) {
+		u32 done_map = isp1362_read_reg32(isp1362_hcd, HCATLDONE);
+		u32 skip_map = isp1362_read_reg32(isp1362_hcd, HCATLSKIP);
+		isp1362_hcd->irq_stat[ISP1362_INT_ATL]++;
+
+		DBG(2, "%s: ATL\n", __func__);
+
+		svc_mask &= ~HCuPINT_ATL;
+
+		isp1362_write_reg32(isp1362_hcd, HCATLSKIP, skip_map | done_map);
+		if (~(done_map | skip_map) == 0)
+			isp1362_clr_mask16(isp1362_hcd, HCBUFSTAT, HCBUFSTAT_ATL_ACTIVE);
+		if (done_map) {
+			DBG(3, "%s: ATL done_map %08x\n", __func__, done_map);
+			finish_transfers(isp1362_hcd, done_map, &isp1362_hcd->atl_queue);
+			start_atl_transfers(isp1362_hcd);
+		}
+		handled = 1;
+	}
+
+	if (irqstat & HCuPINT_OPR) {
+		u32 intstat = isp1362_read_reg32(isp1362_hcd, HCINTSTAT);
+		isp1362_hcd->irq_stat[ISP1362_INT_OPR]++;
+
+		svc_mask &= ~HCuPINT_OPR;
+		DBG(2, "%s: OPR %08x:%08x\n", __func__, intstat, isp1362_hcd->intenb);
+		intstat &= isp1362_hcd->intenb;
+		if (intstat & OHCI_INTR_UE) {
+			pr_err("Unrecoverable error\n");
+			/* FIXME: do here reset or cleanup or whatever */
+		}
+		if (intstat & OHCI_INTR_RHSC) {
+			isp1362_hcd->rhstatus = isp1362_read_reg32(isp1362_hcd, HCRHSTATUS);
+			isp1362_hcd->rhport[0] = isp1362_read_reg32(isp1362_hcd, HCRHPORT1);
+			isp1362_hcd->rhport[1] = isp1362_read_reg32(isp1362_hcd, HCRHPORT2);
+		}
+		if (intstat & OHCI_INTR_RD) {
+			pr_info("%s: RESUME DETECTED\n", __func__);
+			isp1362_show_reg(isp1362_hcd, HCCONTROL);
+			usb_hcd_resume_root_hub(hcd);
+		}
+		isp1362_write_reg32(isp1362_hcd, HCINTSTAT, intstat);
+		irqstat &= ~HCuPINT_OPR;
+		handled = 1;
+	}
+
+	if (irqstat & HCuPINT_SUSP) {
+		isp1362_hcd->irq_stat[ISP1362_INT_SUSP]++;
+		handled = 1;
+		svc_mask &= ~HCuPINT_SUSP;
+
+		pr_info("%s: SUSPEND IRQ\n", __func__);
+	}
+
+	if (irqstat & HCuPINT_CLKRDY) {
+		isp1362_hcd->irq_stat[ISP1362_INT_CLKRDY]++;
+		handled = 1;
+		isp1362_hcd->irqenb &= ~HCuPINT_CLKRDY;
+		svc_mask &= ~HCuPINT_CLKRDY;
+		pr_info("%s: CLKRDY IRQ\n", __func__);
+	}
+
+	if (svc_mask)
+		pr_err("%s: Unserviced interrupt(s) %04x\n", __func__, svc_mask);
+
+	isp1362_write_reg16(isp1362_hcd, HCuPINTENB, isp1362_hcd->irqenb);
+	isp1362_hcd->irq_active--;
+	spin_unlock(&isp1362_hcd->lock);
+
+	return IRQ_RETVAL(handled);
+}
+
+/*-------------------------------------------------------------------------*/
+
+#define	MAX_PERIODIC_LOAD	900	/* out of 1000 usec */
+static int balance(struct isp1362_hcd *isp1362_hcd, u16 interval, u16 load)
+{
+	int i, branch = -ENOSPC;
+
+	/* search for the least loaded schedule branch of that interval
+	 * which has enough bandwidth left unreserved.
+	 */
+	for (i = 0; i < interval; i++) {
+		if (branch < 0 || isp1362_hcd->load[branch] > isp1362_hcd->load[i]) {
+			int j;
+
+			for (j = i; j < PERIODIC_SIZE; j += interval) {
+				if ((isp1362_hcd->load[j] + load) > MAX_PERIODIC_LOAD) {
+					pr_err("%s: new load %d load[%02x] %d max %d\n", __func__,
+					    load, j, isp1362_hcd->load[j], MAX_PERIODIC_LOAD);
+					break;
+				}
+			}
+			if (j < PERIODIC_SIZE)
+				continue;
+			branch = i;
+		}
+	}
+	return branch;
+}
+
+/* NB! ALL the code above this point runs with isp1362_hcd->lock
+   held, irqs off
+*/
+
+/*-------------------------------------------------------------------------*/
+
+static int isp1362_urb_enqueue(struct usb_hcd *hcd,
+			       struct urb *urb,
+			       gfp_t mem_flags)
+{
+	struct isp1362_hcd *isp1362_hcd = hcd_to_isp1362_hcd(hcd);
+	struct usb_device *udev = urb->dev;
+	unsigned int pipe = urb->pipe;
+	int is_out = !usb_pipein(pipe);
+	int type = usb_pipetype(pipe);
+	int epnum = usb_pipeendpoint(pipe);
+	struct usb_host_endpoint *hep = urb->ep;
+	struct isp1362_ep *ep = NULL;
+	unsigned long flags;
+	int retval = 0;
+
+	DBG(3, "%s: urb %p\n", __func__, urb);
+
+	if (type == PIPE_ISOCHRONOUS) {
+		pr_err("Isochronous transfers not supported\n");
+		return -ENOSPC;
+	}
+
+	URB_DBG("%s: FA %d ep%d%s %s: len %d %s%s\n", __func__,
+		usb_pipedevice(pipe), epnum,
+		is_out ? "out" : "in",
+		usb_pipecontrol(pipe) ? "ctrl" :
+			usb_pipeint(pipe) ? "int" :
+			usb_pipebulk(pipe) ? "bulk" :
+			"iso",
+		urb->transfer_buffer_length,
+		(urb->transfer_flags & URB_ZERO_PACKET) ? "ZERO_PACKET " : "",
+		!(urb->transfer_flags & URB_SHORT_NOT_OK) ?
+		"short_ok" : "");
+
+	/* avoid all allocations within spinlocks: request or endpoint */
+	if (!hep->hcpriv) {
+		ep = kcalloc(1, sizeof *ep, mem_flags);
+		if (!ep)
+			return -ENOMEM;
+	}
+	spin_lock_irqsave(&isp1362_hcd->lock, flags);
+
+	/* don't submit to a dead or disabled port */
+	if (!((isp1362_hcd->rhport[0] | isp1362_hcd->rhport[1]) &
+	      (1 << USB_PORT_FEAT_ENABLE)) ||
+	    !HC_IS_RUNNING(hcd->state)) {
+		kfree(ep);
+		retval = -ENODEV;
+		goto fail_not_linked;
+	}
+
+	retval = usb_hcd_link_urb_to_ep(hcd, urb);
+	if (retval) {
+		kfree(ep);
+		goto fail_not_linked;
+	}
+
+	if (hep->hcpriv) {
+		ep = hep->hcpriv;
+	} else {
+		INIT_LIST_HEAD(&ep->schedule);
+		INIT_LIST_HEAD(&ep->active);
+		INIT_LIST_HEAD(&ep->remove_list);
+		ep->udev = usb_get_dev(udev);
+		ep->hep = hep;
+		ep->epnum = epnum;
+		ep->maxpacket = usb_maxpacket(udev, urb->pipe, is_out);
+		ep->ptd_offset = -EINVAL;
+		ep->ptd_index = -EINVAL;
+		usb_settoggle(udev, epnum, is_out, 0);
+
+		if (type == PIPE_CONTROL)
+			ep->nextpid = USB_PID_SETUP;
+		else if (is_out)
+			ep->nextpid = USB_PID_OUT;
+		else
+			ep->nextpid = USB_PID_IN;
+
+		switch (type) {
+		case PIPE_ISOCHRONOUS:
+		case PIPE_INTERRUPT:
+			if (urb->interval > PERIODIC_SIZE)
+				urb->interval = PERIODIC_SIZE;
+			ep->interval = urb->interval;
+			ep->branch = PERIODIC_SIZE;
+			ep->load = usb_calc_bus_time(udev->speed, !is_out,
+						     (type == PIPE_ISOCHRONOUS),
+						     usb_maxpacket(udev, pipe, is_out)) / 1000;
+			break;
+		}
+		hep->hcpriv = ep;
+	}
+	ep->num_req = isp1362_hcd->req_serial++;
+
+	/* maybe put endpoint into schedule */
+	switch (type) {
+	case PIPE_CONTROL:
+	case PIPE_BULK:
+		if (list_empty(&ep->schedule)) {
+			DBG(1, "%s: Adding ep %p req %d to async schedule\n",
+				__func__, ep, ep->num_req);
+			list_add_tail(&ep->schedule, &isp1362_hcd->async);
+		}
+		break;
+	case PIPE_ISOCHRONOUS:
+	case PIPE_INTERRUPT:
+		urb->interval = ep->interval;
+
+		/* urb submitted for already existing EP */
+		if (ep->branch < PERIODIC_SIZE)
+			break;
+
+		retval = balance(isp1362_hcd, ep->interval, ep->load);
+		if (retval < 0) {
+			pr_err("%s: balance returned %d\n", __func__, retval);
+			goto fail;
+		}
+		ep->branch = retval;
+		retval = 0;
+		isp1362_hcd->fmindex = isp1362_read_reg32(isp1362_hcd, HCFMNUM);
+		DBG(1, "%s: Current frame %04x branch %02x start_frame %04x(%04x)\n",
+		    __func__, isp1362_hcd->fmindex, ep->branch,
+		    ((isp1362_hcd->fmindex + PERIODIC_SIZE - 1) &
+		     ~(PERIODIC_SIZE - 1)) + ep->branch,
+		    (isp1362_hcd->fmindex & (PERIODIC_SIZE - 1)) + ep->branch);
+
+		if (list_empty(&ep->schedule)) {
+			if (type == PIPE_ISOCHRONOUS) {
+				u16 frame = isp1362_hcd->fmindex;
+
+				frame += max_t(u16, 8, ep->interval);
+				frame &= ~(ep->interval - 1);
+				frame |= ep->branch;
+				if (frame_before(frame, isp1362_hcd->fmindex))
+					frame += ep->interval;
+				urb->start_frame = frame;
+
+				DBG(1, "%s: Adding ep %p to isoc schedule\n", __func__, ep);
+				list_add_tail(&ep->schedule, &isp1362_hcd->isoc);
+			} else {
+				DBG(1, "%s: Adding ep %p to periodic schedule\n", __func__, ep);
+				list_add_tail(&ep->schedule, &isp1362_hcd->periodic);
+			}
+		} else
+			DBG(1, "%s: ep %p already scheduled\n", __func__, ep);
+
+		DBG(2, "%s: load %d bandwidth %d -> %d\n", __func__,
+		    ep->load / ep->interval, isp1362_hcd->load[ep->branch],
+		    isp1362_hcd->load[ep->branch] + ep->load);
+		isp1362_hcd->load[ep->branch] += ep->load;
+	}
+
+	urb->hcpriv = hep;
+	ALIGNSTAT(isp1362_hcd, urb->transfer_buffer);
+
+	switch (type) {
+	case PIPE_CONTROL:
+	case PIPE_BULK:
+		start_atl_transfers(isp1362_hcd);
+		break;
+	case PIPE_INTERRUPT:
+		start_intl_transfers(isp1362_hcd);
+		break;
+	case PIPE_ISOCHRONOUS:
+		start_iso_transfers(isp1362_hcd);
+		break;
+	default:
+		BUG();
+	}
+ fail:
+	if (retval)
+		usb_hcd_unlink_urb_from_ep(hcd, urb);
+
+
+ fail_not_linked:
+	spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+	if (retval)
+		DBG(0, "%s: urb %p failed with %d\n", __func__, urb, retval);
+	return retval;
+}
+
+static int isp1362_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
+{
+	struct isp1362_hcd *isp1362_hcd = hcd_to_isp1362_hcd(hcd);
+	struct usb_host_endpoint *hep;
+	unsigned long flags;
+	struct isp1362_ep *ep;
+	int retval = 0;
+
+	DBG(3, "%s: urb %p\n", __func__, urb);
+
+	spin_lock_irqsave(&isp1362_hcd->lock, flags);
+	retval = usb_hcd_check_unlink_urb(hcd, urb, status);
+	if (retval)
+		goto done;
+
+	hep = urb->hcpriv;
+
+	if (!hep) {
+		spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+		return -EIDRM;
+	}
+
+	ep = hep->hcpriv;
+	if (ep) {
+		/* In front of queue? */
+		if (ep->hep->urb_list.next == &urb->urb_list) {
+			if (!list_empty(&ep->active)) {
+				DBG(1, "%s: urb %p ep %p req %d active PTD[%d] $%04x\n", __func__,
+				    urb, ep, ep->num_req, ep->ptd_index, ep->ptd_offset);
+				/* disable processing and queue PTD for removal */
+				remove_ptd(isp1362_hcd, ep);
+				urb = NULL;
+			}
+		}
+		if (urb) {
+			DBG(1, "%s: Finishing ep %p req %d\n", __func__, ep,
+			    ep->num_req);
+			finish_request(isp1362_hcd, ep, urb, status);
+		} else
+			DBG(1, "%s: urb %p active; wait4irq\n", __func__, urb);
+	} else {
+		pr_warning("%s: No EP in URB %p\n", __func__, urb);
+		retval = -EINVAL;
+	}
+done:
+	spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+
+	DBG(3, "%s: exit\n", __func__);
+
+	return retval;
+}
+
+static void isp1362_endpoint_disable(struct usb_hcd *hcd, struct usb_host_endpoint *hep)
+{
+	struct isp1362_ep *ep = hep->hcpriv;
+	struct isp1362_hcd *isp1362_hcd = hcd_to_isp1362_hcd(hcd);
+	unsigned long flags;
+
+	DBG(1, "%s: ep %p\n", __func__, ep);
+	if (!ep)
+		return;
+	spin_lock_irqsave(&isp1362_hcd->lock, flags);
+	if (!list_empty(&hep->urb_list)) {
+		if (!list_empty(&ep->active) && list_empty(&ep->remove_list)) {
+			DBG(1, "%s: Removing ep %p req %d PTD[%d] $%04x\n", __func__,
+			    ep, ep->num_req, ep->ptd_index, ep->ptd_offset);
+			remove_ptd(isp1362_hcd, ep);
+			pr_info("%s: Waiting for Interrupt to clean up\n", __func__);
+		}
+	}
+	spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+	/* Wait for interrupt to clear out active list */
+	while (!list_empty(&ep->active))
+		msleep(1);
+
+	DBG(1, "%s: Freeing EP %p\n", __func__, ep);
+
+	usb_put_dev(ep->udev);
+	kfree(ep);
+	hep->hcpriv = NULL;
+}
+
+static int isp1362_get_frame(struct usb_hcd *hcd)
+{
+	struct isp1362_hcd *isp1362_hcd = hcd_to_isp1362_hcd(hcd);
+	u32 fmnum;
+	unsigned long flags;
+
+	spin_lock_irqsave(&isp1362_hcd->lock, flags);
+	fmnum = isp1362_read_reg32(isp1362_hcd, HCFMNUM);
+	spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+
+	return (int)fmnum;
+}
+
+/*-------------------------------------------------------------------------*/
+
+/* Adapted from ohci-hub.c */
+static int isp1362_hub_status_data(struct usb_hcd *hcd, char *buf)
+{
+	struct isp1362_hcd *isp1362_hcd = hcd_to_isp1362_hcd(hcd);
+	int ports, i, changed = 0;
+	unsigned long flags;
+
+	if (!HC_IS_RUNNING(hcd->state))
+		return -ESHUTDOWN;
+
+	/* Report no status change now, if we are scheduled to be
+	   called later */
+	if (timer_pending(&hcd->rh_timer))
+		return 0;
+
+	ports = isp1362_hcd->rhdesca & RH_A_NDP;
+	BUG_ON(ports > 2);
+
+	spin_lock_irqsave(&isp1362_hcd->lock, flags);
+	/* init status */
+	if (isp1362_hcd->rhstatus & (RH_HS_LPSC | RH_HS_OCIC))
+		buf[0] = changed = 1;
+	else
+		buf[0] = 0;
+
+	for (i = 0; i < ports; i++) {
+		u32 status = isp1362_hcd->rhport[i];
+
+		if (status & (RH_PS_CSC | RH_PS_PESC | RH_PS_PSSC |
+			      RH_PS_OCIC | RH_PS_PRSC)) {
+			changed = 1;
+			buf[0] |= 1 << (i + 1);
+			continue;
+		}
+
+		if (!(status & RH_PS_CCS))
+			continue;
+	}
+	spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+	return changed;
+}
+
+static void isp1362_hub_descriptor(struct isp1362_hcd *isp1362_hcd,
+				   struct usb_hub_descriptor *desc)
+{
+	u32 reg = isp1362_hcd->rhdesca;
+
+	DBG(3, "%s: enter\n", __func__);
+
+	desc->bDescriptorType = 0x29;
+	desc->bDescLength = 9;
+	desc->bHubContrCurrent = 0;
+	desc->bNbrPorts = reg & 0x3;
+	/* Power switching, device type, overcurrent. */
+	desc->wHubCharacteristics = cpu_to_le16((reg >> 8) & 0x1f);
+	DBG(0, "%s: hubcharacteristics = %02x\n", __func__, cpu_to_le16((reg >> 8) & 0x1f));
+	desc->bPwrOn2PwrGood = (reg >> 24) & 0xff;
+	/* two bitmaps:  ports removable, and legacy PortPwrCtrlMask */
+	desc->bitmap[0] = desc->bNbrPorts == 1 ? 1 << 1 : 3 << 1;
+	desc->bitmap[1] = ~0;
+
+	DBG(3, "%s: exit\n", __func__);
+}
+
+/* Adapted from ohci-hub.c */
+static int isp1362_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
+			       u16 wIndex, char *buf, u16 wLength)
+{
+	struct isp1362_hcd *isp1362_hcd = hcd_to_isp1362_hcd(hcd);
+	int retval = 0;
+	unsigned long flags;
+	unsigned long t1;
+	int ports = isp1362_hcd->rhdesca & RH_A_NDP;
+	u32 tmp = 0;
+
+	switch (typeReq) {
+	case ClearHubFeature:
+		DBG(0, "ClearHubFeature: ");
+		switch (wValue) {
+		case C_HUB_OVER_CURRENT:
+			_DBG(0, "C_HUB_OVER_CURRENT\n");
+			spin_lock_irqsave(&isp1362_hcd->lock, flags);
+			isp1362_write_reg32(isp1362_hcd, HCRHSTATUS, RH_HS_OCIC);
+			spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+		case C_HUB_LOCAL_POWER:
+			_DBG(0, "C_HUB_LOCAL_POWER\n");
+			break;
+		default:
+			goto error;
+		}
+		break;
+	case SetHubFeature:
+		DBG(0, "SetHubFeature: ");
+		switch (wValue) {
+		case C_HUB_OVER_CURRENT:
+		case C_HUB_LOCAL_POWER:
+			_DBG(0, "C_HUB_OVER_CURRENT or C_HUB_LOCAL_POWER\n");
+			break;
+		default:
+			goto error;
+		}
+		break;
+	case GetHubDescriptor:
+		DBG(0, "GetHubDescriptor\n");
+		isp1362_hub_descriptor(isp1362_hcd, (struct usb_hub_descriptor *)buf);
+		break;
+	case GetHubStatus:
+		DBG(0, "GetHubStatus\n");
+		put_unaligned(cpu_to_le32(0), (__le32 *) buf);
+		break;
+	case GetPortStatus:
+#ifndef VERBOSE
+		DBG(0, "GetPortStatus\n");
+#endif
+		if (!wIndex || wIndex > ports)
+			goto error;
+		tmp = isp1362_hcd->rhport[--wIndex];
+		put_unaligned(cpu_to_le32(tmp), (__le32 *) buf);
+		break;
+	case ClearPortFeature:
+		DBG(0, "ClearPortFeature: ");
+		if (!wIndex || wIndex > ports)
+			goto error;
+		wIndex--;
+
+		switch (wValue) {
+		case USB_PORT_FEAT_ENABLE:
+			_DBG(0, "USB_PORT_FEAT_ENABLE\n");
+			tmp = RH_PS_CCS;
+			break;
+		case USB_PORT_FEAT_C_ENABLE:
+			_DBG(0, "USB_PORT_FEAT_C_ENABLE\n");
+			tmp = RH_PS_PESC;
+			break;
+		case USB_PORT_FEAT_SUSPEND:
+			_DBG(0, "USB_PORT_FEAT_SUSPEND\n");
+			tmp = RH_PS_POCI;
+			break;
+		case USB_PORT_FEAT_C_SUSPEND:
+			_DBG(0, "USB_PORT_FEAT_C_SUSPEND\n");
+			tmp = RH_PS_PSSC;
+			break;
+		case USB_PORT_FEAT_POWER:
+			_DBG(0, "USB_PORT_FEAT_POWER\n");
+			tmp = RH_PS_LSDA;
+
+			break;
+		case USB_PORT_FEAT_C_CONNECTION:
+			_DBG(0, "USB_PORT_FEAT_C_CONNECTION\n");
+			tmp = RH_PS_CSC;
+			break;
+		case USB_PORT_FEAT_C_OVER_CURRENT:
+			_DBG(0, "USB_PORT_FEAT_C_OVER_CURRENT\n");
+			tmp = RH_PS_OCIC;
+			break;
+		case USB_PORT_FEAT_C_RESET:
+			_DBG(0, "USB_PORT_FEAT_C_RESET\n");
+			tmp = RH_PS_PRSC;
+			break;
+		default:
+			goto error;
+		}
+
+		spin_lock_irqsave(&isp1362_hcd->lock, flags);
+		isp1362_write_reg32(isp1362_hcd, HCRHPORT1 + wIndex, tmp);
+		isp1362_hcd->rhport[wIndex] =
+			isp1362_read_reg32(isp1362_hcd, HCRHPORT1 + wIndex);
+		spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+		break;
+	case SetPortFeature:
+		DBG(0, "SetPortFeature: ");
+		if (!wIndex || wIndex > ports)
+			goto error;
+		wIndex--;
+		switch (wValue) {
+		case USB_PORT_FEAT_SUSPEND:
+			_DBG(0, "USB_PORT_FEAT_SUSPEND\n");
+#ifdef	CONFIG_USB_OTG
+			if (ohci->hcd.self.otg_port == (wIndex + 1) &&
+			    ohci->hcd.self.b_hnp_enable) {
+				start_hnp(ohci);
+				break;
+			}
+#endif
+			spin_lock_irqsave(&isp1362_hcd->lock, flags);
+			isp1362_write_reg32(isp1362_hcd, HCRHPORT1 + wIndex, RH_PS_PSS);
+			isp1362_hcd->rhport[wIndex] =
+				isp1362_read_reg32(isp1362_hcd, HCRHPORT1 + wIndex);
+			spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+			break;
+		case USB_PORT_FEAT_POWER:
+			_DBG(0, "USB_PORT_FEAT_POWER\n");
+			spin_lock_irqsave(&isp1362_hcd->lock, flags);
+			isp1362_write_reg32(isp1362_hcd, HCRHPORT1 + wIndex, RH_PS_PPS);
+			isp1362_hcd->rhport[wIndex] =
+				isp1362_read_reg32(isp1362_hcd, HCRHPORT1 + wIndex);
+			spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+			break;
+		case USB_PORT_FEAT_RESET:
+			_DBG(0, "USB_PORT_FEAT_RESET\n");
+			spin_lock_irqsave(&isp1362_hcd->lock, flags);
+
+			t1 = jiffies + msecs_to_jiffies(USB_RESET_WIDTH);
+			while (time_before(jiffies, t1)) {
+				/* spin until any current reset finishes */
+				for (;;) {
+					tmp = isp1362_read_reg32(isp1362_hcd, HCRHPORT1 + wIndex);
+					if (!(tmp & RH_PS_PRS))
+						break;
+					udelay(500);
+				}
+				if (!(tmp & RH_PS_CCS))
+					break;
+				/* Reset lasts 10ms (claims datasheet) */
+				isp1362_write_reg32(isp1362_hcd, HCRHPORT1 + wIndex, (RH_PS_PRS));
+
+				spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+				msleep(10);
+				spin_lock_irqsave(&isp1362_hcd->lock, flags);
+			}
+
+			isp1362_hcd->rhport[wIndex] = isp1362_read_reg32(isp1362_hcd,
+									 HCRHPORT1 + wIndex);
+			spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+			break;
+		default:
+			goto error;
+		}
+		break;
+
+	default:
+ error:
+		/* "protocol stall" on error */
+		_DBG(0, "PROTOCOL STALL\n");
+		retval = -EPIPE;
+	}
+
+	return retval;
+}
+
+#ifdef	CONFIG_PM
+static int isp1362_bus_suspend(struct usb_hcd *hcd)
+{
+	int status = 0;
+	struct isp1362_hcd *isp1362_hcd = hcd_to_isp1362_hcd(hcd);
+	unsigned long flags;
+
+	if (time_before(jiffies, isp1362_hcd->next_statechange))
+		msleep(5);
+
+	spin_lock_irqsave(&isp1362_hcd->lock, flags);
+
+	isp1362_hcd->hc_control = isp1362_read_reg32(isp1362_hcd, HCCONTROL);
+	switch (isp1362_hcd->hc_control & OHCI_CTRL_HCFS) {
+	case OHCI_USB_RESUME:
+		DBG(0, "%s: resume/suspend?\n", __func__);
+		isp1362_hcd->hc_control &= ~OHCI_CTRL_HCFS;
+		isp1362_hcd->hc_control |= OHCI_USB_RESET;
+		isp1362_write_reg32(isp1362_hcd, HCCONTROL, isp1362_hcd->hc_control);
+		/* FALL THROUGH */
+	case OHCI_USB_RESET:
+		status = -EBUSY;
+		pr_warning("%s: needs reinit!\n", __func__);
+		goto done;
+	case OHCI_USB_SUSPEND:
+		pr_warning("%s: already suspended?\n", __func__);
+		goto done;
+	}
+	DBG(0, "%s: suspend root hub\n", __func__);
+
+	/* First stop any processing */
+	hcd->state = HC_STATE_QUIESCING;
+	if (!list_empty(&isp1362_hcd->atl_queue.active) ||
+	    !list_empty(&isp1362_hcd->intl_queue.active) ||
+	    !list_empty(&isp1362_hcd->istl_queue[0] .active) ||
+	    !list_empty(&isp1362_hcd->istl_queue[1] .active)) {
+		int limit;
+
+		isp1362_write_reg32(isp1362_hcd, HCATLSKIP, ~0);
+		isp1362_write_reg32(isp1362_hcd, HCINTLSKIP, ~0);
+		isp1362_write_reg16(isp1362_hcd, HCBUFSTAT, 0);
+		isp1362_write_reg16(isp1362_hcd, HCuPINTENB, 0);
+		isp1362_write_reg32(isp1362_hcd, HCINTSTAT, OHCI_INTR_SF);
+
+		DBG(0, "%s: stopping schedules ...\n", __func__);
+		limit = 2000;
+		while (limit > 0) {
+			udelay(250);
+			limit -= 250;
+			if (isp1362_read_reg32(isp1362_hcd, HCINTSTAT) & OHCI_INTR_SF)
+				break;
+		}
+		mdelay(7);
+		if (isp1362_read_reg16(isp1362_hcd, HCuPINT) & HCuPINT_ATL) {
+			u32 done_map = isp1362_read_reg32(isp1362_hcd, HCATLDONE);
+			finish_transfers(isp1362_hcd, done_map, &isp1362_hcd->atl_queue);
+		}
+		if (isp1362_read_reg16(isp1362_hcd, HCuPINT) & HCuPINT_INTL) {
+			u32 done_map = isp1362_read_reg32(isp1362_hcd, HCINTLDONE);
+			finish_transfers(isp1362_hcd, done_map, &isp1362_hcd->intl_queue);
+		}
+		if (isp1362_read_reg16(isp1362_hcd, HCuPINT) & HCuPINT_ISTL0)
+			finish_iso_transfers(isp1362_hcd, &isp1362_hcd->istl_queue[0]);
+		if (isp1362_read_reg16(isp1362_hcd, HCuPINT) & HCuPINT_ISTL1)
+			finish_iso_transfers(isp1362_hcd, &isp1362_hcd->istl_queue[1]);
+	}
+	DBG(0, "%s: HCINTSTAT: %08x\n", __func__,
+		    isp1362_read_reg32(isp1362_hcd, HCINTSTAT));
+	isp1362_write_reg32(isp1362_hcd, HCINTSTAT,
+			    isp1362_read_reg32(isp1362_hcd, HCINTSTAT));
+
+	/* Suspend hub */
+	isp1362_hcd->hc_control = OHCI_USB_SUSPEND;
+	isp1362_show_reg(isp1362_hcd, HCCONTROL);
+	isp1362_write_reg32(isp1362_hcd, HCCONTROL, isp1362_hcd->hc_control);
+	isp1362_show_reg(isp1362_hcd, HCCONTROL);
+
+#if 1
+	isp1362_hcd->hc_control = isp1362_read_reg32(isp1362_hcd, HCCONTROL);
+	if ((isp1362_hcd->hc_control & OHCI_CTRL_HCFS) != OHCI_USB_SUSPEND) {
+		pr_err("%s: controller won't suspend %08x\n", __func__,
+		    isp1362_hcd->hc_control);
+		status = -EBUSY;
+	} else
+#endif
+	{
+		/* no resumes until devices finish suspending */
+		isp1362_hcd->next_statechange = jiffies + msecs_to_jiffies(5);
+	}
+done:
+	if (status == 0) {
+		hcd->state = HC_STATE_SUSPENDED;
+		DBG(0, "%s: HCD suspended: %08x\n", __func__,
+		    isp1362_read_reg32(isp1362_hcd, HCCONTROL));
+	}
+	spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+	return status;
+}
+
+static int isp1362_bus_resume(struct usb_hcd *hcd)
+{
+	struct isp1362_hcd *isp1362_hcd = hcd_to_isp1362_hcd(hcd);
+	u32 port;
+	unsigned long flags;
+	int status = -EINPROGRESS;
+
+	if (time_before(jiffies, isp1362_hcd->next_statechange))
+		msleep(5);
+
+	spin_lock_irqsave(&isp1362_hcd->lock, flags);
+	isp1362_hcd->hc_control = isp1362_read_reg32(isp1362_hcd, HCCONTROL);
+	pr_info("%s: HCCONTROL: %08x\n", __func__, isp1362_hcd->hc_control);
+	if (hcd->state == HC_STATE_RESUMING) {
+		pr_warning("%s: duplicate resume\n", __func__);
+		status = 0;
+	} else
+		switch (isp1362_hcd->hc_control & OHCI_CTRL_HCFS) {
+		case OHCI_USB_SUSPEND:
+			DBG(0, "%s: resume root hub\n", __func__);
+			isp1362_hcd->hc_control &= ~OHCI_CTRL_HCFS;
+			isp1362_hcd->hc_control |= OHCI_USB_RESUME;
+			isp1362_write_reg32(isp1362_hcd, HCCONTROL, isp1362_hcd->hc_control);
+			break;
+		case OHCI_USB_RESUME:
+			/* HCFS changes sometime after INTR_RD */
+			DBG(0, "%s: remote wakeup\n", __func__);
+			break;
+		case OHCI_USB_OPER:
+			DBG(0, "%s: odd resume\n", __func__);
+			status = 0;
+			hcd->self.root_hub->dev.power.power_state = PMSG_ON;
+			break;
+		default:		/* RESET, we lost power */
+			DBG(0, "%s: root hub hardware reset\n", __func__);
+			status = -EBUSY;
+		}
+	spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+	if (status == -EBUSY) {
+		DBG(0, "%s: Restarting HC\n", __func__);
+		isp1362_hc_stop(hcd);
+		return isp1362_hc_start(hcd);
+	}
+	if (status != -EINPROGRESS)
+		return status;
+	spin_lock_irqsave(&isp1362_hcd->lock, flags);
+	port = isp1362_read_reg32(isp1362_hcd, HCRHDESCA) & RH_A_NDP;
+	while (port--) {
+		u32 stat = isp1362_read_reg32(isp1362_hcd, HCRHPORT1 + port);
+
+		/* force global, not selective, resume */
+		if (!(stat & RH_PS_PSS)) {
+			DBG(0, "%s: Not Resuming RH port %d\n", __func__, port);
+			continue;
+		}
+		DBG(0, "%s: Resuming RH port %d\n", __func__, port);
+		isp1362_write_reg32(isp1362_hcd, HCRHPORT1 + port, RH_PS_POCI);
+	}
+	spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+
+	/* Some controllers (lucent) need extra-long delays */
+	hcd->state = HC_STATE_RESUMING;
+	mdelay(20 /* usb 11.5.1.10 */ + 15);
+
+	isp1362_hcd->hc_control = OHCI_USB_OPER;
+	spin_lock_irqsave(&isp1362_hcd->lock, flags);
+	isp1362_show_reg(isp1362_hcd, HCCONTROL);
+	isp1362_write_reg32(isp1362_hcd, HCCONTROL, isp1362_hcd->hc_control);
+	spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+	/* TRSMRCY */
+	msleep(10);
+
+	/* keep it alive for ~5x suspend + resume costs */
+	isp1362_hcd->next_statechange = jiffies + msecs_to_jiffies(250);
+
+	hcd->self.root_hub->dev.power.power_state = PMSG_ON;
+	hcd->state = HC_STATE_RUNNING;
+	return 0;
+}
+#else
+#define	isp1362_bus_suspend	NULL
+#define	isp1362_bus_resume	NULL
+#endif
+
+/*-------------------------------------------------------------------------*/
+
+#ifdef STUB_DEBUG_FILE
+
+static inline void create_debug_file(struct isp1362_hcd *isp1362_hcd)
+{
+}
+static inline void remove_debug_file(struct isp1362_hcd *isp1362_hcd)
+{
+}
+
+#else
+
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+
+static void dump_irq(struct seq_file *s, char *label, u16 mask)
+{
+	seq_printf(s, "%-15s %04x%s%s%s%s%s%s\n", label, mask,
+		   mask & HCuPINT_CLKRDY ? " clkrdy" : "",
+		   mask & HCuPINT_SUSP ? " susp" : "",
+		   mask & HCuPINT_OPR ? " opr" : "",
+		   mask & HCuPINT_EOT ? " eot" : "",
+		   mask & HCuPINT_ATL ? " atl" : "",
+		   mask & HCuPINT_SOF ? " sof" : "");
+}
+
+static void dump_int(struct seq_file *s, char *label, u32 mask)
+{
+	seq_printf(s, "%-15s %08x%s%s%s%s%s%s%s\n", label, mask,
+		   mask & OHCI_INTR_MIE ? " MIE" : "",
+		   mask & OHCI_INTR_RHSC ? " rhsc" : "",
+		   mask & OHCI_INTR_FNO ? " fno" : "",
+		   mask & OHCI_INTR_UE ? " ue" : "",
+		   mask & OHCI_INTR_RD ? " rd" : "",
+		   mask & OHCI_INTR_SF ? " sof" : "",
+		   mask & OHCI_INTR_SO ? " so" : "");
+}
+
+static void dump_ctrl(struct seq_file *s, char *label, u32 mask)
+{
+	seq_printf(s, "%-15s %08x%s%s%s\n", label, mask,
+		   mask & OHCI_CTRL_RWC ? " rwc" : "",
+		   mask & OHCI_CTRL_RWE ? " rwe" : "",
+		   ({
+			   char *hcfs;
+			   switch (mask & OHCI_CTRL_HCFS) {
+			   case OHCI_USB_OPER:
+				   hcfs = " oper";
+				   break;
+			   case OHCI_USB_RESET:
+				   hcfs = " reset";
+				   break;
+			   case OHCI_USB_RESUME:
+				   hcfs = " resume";
+				   break;
+			   case OHCI_USB_SUSPEND:
+				   hcfs = " suspend";
+				   break;
+			   default:
+				   hcfs = " ?";
+			   }
+			   hcfs;
+		   }));
+}
+
+static void dump_regs(struct seq_file *s, struct isp1362_hcd *isp1362_hcd)
+{
+	seq_printf(s, "HCREVISION [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCREVISION),
+		   isp1362_read_reg32(isp1362_hcd, HCREVISION));
+	seq_printf(s, "HCCONTROL  [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCCONTROL),
+		   isp1362_read_reg32(isp1362_hcd, HCCONTROL));
+	seq_printf(s, "HCCMDSTAT  [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCCMDSTAT),
+		   isp1362_read_reg32(isp1362_hcd, HCCMDSTAT));
+	seq_printf(s, "HCINTSTAT  [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCINTSTAT),
+		   isp1362_read_reg32(isp1362_hcd, HCINTSTAT));
+	seq_printf(s, "HCINTENB   [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCINTENB),
+		   isp1362_read_reg32(isp1362_hcd, HCINTENB));
+	seq_printf(s, "HCFMINTVL  [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCFMINTVL),
+		   isp1362_read_reg32(isp1362_hcd, HCFMINTVL));
+	seq_printf(s, "HCFMREM    [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCFMREM),
+		   isp1362_read_reg32(isp1362_hcd, HCFMREM));
+	seq_printf(s, "HCFMNUM    [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCFMNUM),
+		   isp1362_read_reg32(isp1362_hcd, HCFMNUM));
+	seq_printf(s, "HCLSTHRESH [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCLSTHRESH),
+		   isp1362_read_reg32(isp1362_hcd, HCLSTHRESH));
+	seq_printf(s, "HCRHDESCA  [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCRHDESCA),
+		   isp1362_read_reg32(isp1362_hcd, HCRHDESCA));
+	seq_printf(s, "HCRHDESCB  [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCRHDESCB),
+		   isp1362_read_reg32(isp1362_hcd, HCRHDESCB));
+	seq_printf(s, "HCRHSTATUS [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCRHSTATUS),
+		   isp1362_read_reg32(isp1362_hcd, HCRHSTATUS));
+	seq_printf(s, "HCRHPORT1  [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCRHPORT1),
+		   isp1362_read_reg32(isp1362_hcd, HCRHPORT1));
+	seq_printf(s, "HCRHPORT2  [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCRHPORT2),
+		   isp1362_read_reg32(isp1362_hcd, HCRHPORT2));
+	seq_printf(s, "\n");
+	seq_printf(s, "HCHWCFG    [%02x]     %04x\n", ISP1362_REG_NO(ISP1362_REG_HCHWCFG),
+		   isp1362_read_reg16(isp1362_hcd, HCHWCFG));
+	seq_printf(s, "HCDMACFG   [%02x]     %04x\n", ISP1362_REG_NO(ISP1362_REG_HCDMACFG),
+		   isp1362_read_reg16(isp1362_hcd, HCDMACFG));
+	seq_printf(s, "HCXFERCTR  [%02x]     %04x\n", ISP1362_REG_NO(ISP1362_REG_HCXFERCTR),
+		   isp1362_read_reg16(isp1362_hcd, HCXFERCTR));
+	seq_printf(s, "HCuPINT    [%02x]     %04x\n", ISP1362_REG_NO(ISP1362_REG_HCuPINT),
+		   isp1362_read_reg16(isp1362_hcd, HCuPINT));
+	seq_printf(s, "HCuPINTENB [%02x]     %04x\n", ISP1362_REG_NO(ISP1362_REG_HCuPINTENB),
+		   isp1362_read_reg16(isp1362_hcd, HCuPINTENB));
+	seq_printf(s, "HCCHIPID   [%02x]     %04x\n", ISP1362_REG_NO(ISP1362_REG_HCCHIPID),
+		   isp1362_read_reg16(isp1362_hcd, HCCHIPID));
+	seq_printf(s, "HCSCRATCH  [%02x]     %04x\n", ISP1362_REG_NO(ISP1362_REG_HCSCRATCH),
+		   isp1362_read_reg16(isp1362_hcd, HCSCRATCH));
+	seq_printf(s, "HCBUFSTAT  [%02x]     %04x\n", ISP1362_REG_NO(ISP1362_REG_HCBUFSTAT),
+		   isp1362_read_reg16(isp1362_hcd, HCBUFSTAT));
+	seq_printf(s, "HCDIRADDR  [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCDIRADDR),
+		   isp1362_read_reg32(isp1362_hcd, HCDIRADDR));
+#if 0
+	seq_printf(s, "HCDIRDATA  [%02x]     %04x\n", ISP1362_REG_NO(HCDIRDATA),
+		   isp1362_read_reg16(isp1362_hcd, HCDIRDATA));
+#endif
+	seq_printf(s, "HCISTLBUFSZ[%02x]     %04x\n", ISP1362_REG_NO(ISP1362_REG_HCISTLBUFSZ),
+		   isp1362_read_reg16(isp1362_hcd, HCISTLBUFSZ));
+	seq_printf(s, "HCISTLRATE [%02x]     %04x\n", ISP1362_REG_NO(ISP1362_REG_HCISTLRATE),
+		   isp1362_read_reg16(isp1362_hcd, HCISTLRATE));
+	seq_printf(s, "\n");
+	seq_printf(s, "HCINTLBUFSZ[%02x]     %04x\n", ISP1362_REG_NO(ISP1362_REG_HCINTLBUFSZ),
+		   isp1362_read_reg16(isp1362_hcd, HCINTLBUFSZ));
+	seq_printf(s, "HCINTLBLKSZ[%02x]     %04x\n", ISP1362_REG_NO(ISP1362_REG_HCINTLBLKSZ),
+		   isp1362_read_reg16(isp1362_hcd, HCINTLBLKSZ));
+	seq_printf(s, "HCINTLDONE [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCINTLDONE),
+		   isp1362_read_reg32(isp1362_hcd, HCINTLDONE));
+	seq_printf(s, "HCINTLSKIP [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCINTLSKIP),
+		   isp1362_read_reg32(isp1362_hcd, HCINTLSKIP));
+	seq_printf(s, "HCINTLLAST [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCINTLLAST),
+		   isp1362_read_reg32(isp1362_hcd, HCINTLLAST));
+	seq_printf(s, "HCINTLCURR [%02x]     %04x\n", ISP1362_REG_NO(ISP1362_REG_HCINTLCURR),
+		   isp1362_read_reg16(isp1362_hcd, HCINTLCURR));
+	seq_printf(s, "\n");
+	seq_printf(s, "HCATLBUFSZ [%02x]     %04x\n", ISP1362_REG_NO(ISP1362_REG_HCATLBUFSZ),
+		   isp1362_read_reg16(isp1362_hcd, HCATLBUFSZ));
+	seq_printf(s, "HCATLBLKSZ [%02x]     %04x\n", ISP1362_REG_NO(ISP1362_REG_HCATLBLKSZ),
+		   isp1362_read_reg16(isp1362_hcd, HCATLBLKSZ));
+#if 0
+	seq_printf(s, "HCATLDONE  [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCATLDONE),
+		   isp1362_read_reg32(isp1362_hcd, HCATLDONE));
+#endif
+	seq_printf(s, "HCATLSKIP  [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCATLSKIP),
+		   isp1362_read_reg32(isp1362_hcd, HCATLSKIP));
+	seq_printf(s, "HCATLLAST  [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCATLLAST),
+		   isp1362_read_reg32(isp1362_hcd, HCATLLAST));
+	seq_printf(s, "HCATLCURR  [%02x]     %04x\n", ISP1362_REG_NO(ISP1362_REG_HCATLCURR),
+		   isp1362_read_reg16(isp1362_hcd, HCATLCURR));
+	seq_printf(s, "\n");
+	seq_printf(s, "HCATLDTC   [%02x]     %04x\n", ISP1362_REG_NO(ISP1362_REG_HCATLDTC),
+		   isp1362_read_reg16(isp1362_hcd, HCATLDTC));
+	seq_printf(s, "HCATLDTCTO [%02x]     %04x\n", ISP1362_REG_NO(ISP1362_REG_HCATLDTCTO),
+		   isp1362_read_reg16(isp1362_hcd, HCATLDTCTO));
+}
+
+static int proc_isp1362_show(struct seq_file *s, void *unused)
+{
+	struct isp1362_hcd *isp1362_hcd = s->private;
+	struct isp1362_ep *ep;
+	int i;
+
+	seq_printf(s, "%s\n%s version %s\n",
+		   isp1362_hcd_to_hcd(isp1362_hcd)->product_desc, hcd_name, DRIVER_VERSION);
+
+	/* collect statistics to help estimate potential win for
+	 * DMA engines that care about alignment (PXA)
+	 */
+	seq_printf(s, "alignment:  16b/%ld 8b/%ld 4b/%ld 2b/%ld 1b/%ld\n",
+		   isp1362_hcd->stat16, isp1362_hcd->stat8, isp1362_hcd->stat4,
+		   isp1362_hcd->stat2, isp1362_hcd->stat1);
+	seq_printf(s, "max # ptds in ATL  fifo: %d\n", isp1362_hcd->atl_queue.stat_maxptds);
+	seq_printf(s, "max # ptds in INTL fifo: %d\n", isp1362_hcd->intl_queue.stat_maxptds);
+	seq_printf(s, "max # ptds in ISTL fifo: %d\n",
+		   max(isp1362_hcd->istl_queue[0] .stat_maxptds,
+		       isp1362_hcd->istl_queue[1] .stat_maxptds));
+
+	/* FIXME: don't show the following in suspended state */
+	spin_lock_irq(&isp1362_hcd->lock);
+
+	dump_irq(s, "hc_irq_enable", isp1362_read_reg16(isp1362_hcd, HCuPINTENB));
+	dump_irq(s, "hc_irq_status", isp1362_read_reg16(isp1362_hcd, HCuPINT));
+	dump_int(s, "ohci_int_enable", isp1362_read_reg32(isp1362_hcd, HCINTENB));
+	dump_int(s, "ohci_int_status", isp1362_read_reg32(isp1362_hcd, HCINTSTAT));
+	dump_ctrl(s, "ohci_control", isp1362_read_reg32(isp1362_hcd, HCCONTROL));
+
+	for (i = 0; i < NUM_ISP1362_IRQS; i++)
+		if (isp1362_hcd->irq_stat[i])
+			seq_printf(s, "%-15s: %d\n",
+				   ISP1362_INT_NAME(i), isp1362_hcd->irq_stat[i]);
+
+	dump_regs(s, isp1362_hcd);
+	list_for_each_entry(ep, &isp1362_hcd->async, schedule) {
+		struct urb *urb;
+
+		seq_printf(s, "%p, ep%d%s, maxpacket %d:\n", ep, ep->epnum,
+			   ({
+				   char *s;
+				   switch (ep->nextpid) {
+				   case USB_PID_IN:
+					   s = "in";
+					   break;
+				   case USB_PID_OUT:
+					   s = "out";
+					   break;
+				   case USB_PID_SETUP:
+					   s = "setup";
+					   break;
+				   case USB_PID_ACK:
+					   s = "status";
+					   break;
+				   default:
+					   s = "?";
+					   break;
+				   };
+				   s;}), ep->maxpacket) ;
+		list_for_each_entry(urb, &ep->hep->urb_list, urb_list) {
+			seq_printf(s, "  urb%p, %d/%d\n", urb,
+				   urb->actual_length,
+				   urb->transfer_buffer_length);
+		}
+	}
+	if (!list_empty(&isp1362_hcd->async))
+		seq_printf(s, "\n");
+	dump_ptd_queue(&isp1362_hcd->atl_queue);
+
+	seq_printf(s, "periodic size= %d\n", PERIODIC_SIZE);
+
+	list_for_each_entry(ep, &isp1362_hcd->periodic, schedule) {
+		seq_printf(s, "branch:%2d load:%3d PTD[%d] $%04x:\n", ep->branch,
+			   isp1362_hcd->load[ep->branch], ep->ptd_index, ep->ptd_offset);
+
+		seq_printf(s, "   %d/%p (%sdev%d ep%d%s max %d)\n",
+			   ep->interval, ep,
+			   (ep->udev->speed == USB_SPEED_FULL) ? "" : "ls ",
+			   ep->udev->devnum, ep->epnum,
+			   (ep->epnum == 0) ? "" :
+			   ((ep->nextpid == USB_PID_IN) ?
+			    "in" : "out"), ep->maxpacket);
+	}
+	dump_ptd_queue(&isp1362_hcd->intl_queue);
+
+	seq_printf(s, "ISO:\n");
+
+	list_for_each_entry(ep, &isp1362_hcd->isoc, schedule) {
+		seq_printf(s, "   %d/%p (%sdev%d ep%d%s max %d)\n",
+			   ep->interval, ep,
+			   (ep->udev->speed == USB_SPEED_FULL) ? "" : "ls ",
+			   ep->udev->devnum, ep->epnum,
+			   (ep->epnum == 0) ? "" :
+			   ((ep->nextpid == USB_PID_IN) ?
+			    "in" : "out"), ep->maxpacket);
+	}
+
+	spin_unlock_irq(&isp1362_hcd->lock);
+	seq_printf(s, "\n");
+
+	return 0;
+}
+
+static int proc_isp1362_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, proc_isp1362_show, PDE(inode)->data);
+}
+
+static const struct file_operations proc_ops = {
+	.open = proc_isp1362_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+/* expect just one isp1362_hcd per system */
+static const char proc_filename[] = "driver/isp1362";
+
+static void create_debug_file(struct isp1362_hcd *isp1362_hcd)
+{
+	struct proc_dir_entry *pde;
+
+	pde = create_proc_entry(proc_filename, 0, NULL);
+	if (pde == NULL) {
+		pr_warning("%s: Failed to create debug file '%s'\n", __func__, proc_filename);
+		return;
+	}
+
+	pde->proc_fops = &proc_ops;
+	pde->data = isp1362_hcd;
+	isp1362_hcd->pde = pde;
+}
+
+static void remove_debug_file(struct isp1362_hcd *isp1362_hcd)
+{
+	if (isp1362_hcd->pde)
+		remove_proc_entry(proc_filename, 0);
+}
+
+#endif
+
+/*-------------------------------------------------------------------------*/
+
+static void isp1362_sw_reset(struct isp1362_hcd *isp1362_hcd)
+{
+	int tmp = 20;
+	unsigned long flags;
+
+	spin_lock_irqsave(&isp1362_hcd->lock, flags);
+
+	isp1362_write_reg16(isp1362_hcd, HCSWRES, HCSWRES_MAGIC);
+	isp1362_write_reg32(isp1362_hcd, HCCMDSTAT, OHCI_HCR);
+	while (--tmp) {
+		mdelay(1);
+		if (!(isp1362_read_reg32(isp1362_hcd, HCCMDSTAT) & OHCI_HCR))
+			break;
+	}
+	if (!tmp)
+		pr_err("Software reset timeout\n");
+	spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+}
+
+static int isp1362_mem_config(struct usb_hcd *hcd)
+{
+	struct isp1362_hcd *isp1362_hcd = hcd_to_isp1362_hcd(hcd);
+	unsigned long flags;
+	u32 total;
+	u16 istl_size = ISP1362_ISTL_BUFSIZE;
+	u16 intl_blksize = ISP1362_INTL_BLKSIZE + PTD_HEADER_SIZE;
+	u16 intl_size = ISP1362_INTL_BUFFERS * intl_blksize;
+	u16 atl_blksize = ISP1362_ATL_BLKSIZE + PTD_HEADER_SIZE;
+	u16 atl_buffers = (ISP1362_BUF_SIZE - (istl_size + intl_size)) / atl_blksize;
+	u16 atl_size;
+	int i;
+
+	WARN_ON(istl_size & 3);
+	WARN_ON(atl_blksize & 3);
+	WARN_ON(intl_blksize & 3);
+	WARN_ON(atl_blksize < PTD_HEADER_SIZE);
+	WARN_ON(intl_blksize < PTD_HEADER_SIZE);
+
+	BUG_ON((unsigned)ISP1362_INTL_BUFFERS > 32);
+	if (atl_buffers > 32)
+		atl_buffers = 32;
+	atl_size = atl_buffers * atl_blksize;
+	total = atl_size + intl_size + istl_size;
+	dev_info(hcd->self.controller, "ISP1362 Memory usage:\n");
+	dev_info(hcd->self.controller, "  ISTL:    2 * %4d:     %4d @ $%04x:$%04x\n",
+		 istl_size / 2, istl_size, 0, istl_size / 2);
+	dev_info(hcd->self.controller, "  INTL: %4d * (%3u+8):  %4d @ $%04x\n",
+		 ISP1362_INTL_BUFFERS, intl_blksize - PTD_HEADER_SIZE,
+		 intl_size, istl_size);
+	dev_info(hcd->self.controller, "  ATL : %4d * (%3u+8):  %4d @ $%04x\n",
+		 atl_buffers, atl_blksize - PTD_HEADER_SIZE,
+		 atl_size, istl_size + intl_size);
+	dev_info(hcd->self.controller, "  USED/FREE:   %4d      %4d\n", total,
+		 ISP1362_BUF_SIZE - total);
+
+	if (total > ISP1362_BUF_SIZE) {
+		dev_err(hcd->self.controller, "%s: Memory requested: %d, available %d\n",
+			__func__, total, ISP1362_BUF_SIZE);
+		return -ENOMEM;
+	}
+
+	total = istl_size + intl_size + atl_size;
+	spin_lock_irqsave(&isp1362_hcd->lock, flags);
+
+	for (i = 0; i < 2; i++) {
+		isp1362_hcd->istl_queue[i].buf_start = i * istl_size / 2,
+		isp1362_hcd->istl_queue[i].buf_size = istl_size / 2;
+		isp1362_hcd->istl_queue[i].blk_size = 4;
+		INIT_LIST_HEAD(&isp1362_hcd->istl_queue[i].active);
+		snprintf(isp1362_hcd->istl_queue[i].name,
+			 sizeof(isp1362_hcd->istl_queue[i].name), "ISTL%d", i);
+		DBG(3, "%s: %5s buf $%04x %d\n", __func__,
+		     isp1362_hcd->istl_queue[i].name,
+		     isp1362_hcd->istl_queue[i].buf_start,
+		     isp1362_hcd->istl_queue[i].buf_size);
+	}
+	isp1362_write_reg16(isp1362_hcd, HCISTLBUFSZ, istl_size / 2);
+
+	isp1362_hcd->intl_queue.buf_start = istl_size;
+	isp1362_hcd->intl_queue.buf_size = intl_size;
+	isp1362_hcd->intl_queue.buf_count = ISP1362_INTL_BUFFERS;
+	isp1362_hcd->intl_queue.blk_size = intl_blksize;
+	isp1362_hcd->intl_queue.buf_avail = isp1362_hcd->intl_queue.buf_count;
+	isp1362_hcd->intl_queue.skip_map = ~0;
+	INIT_LIST_HEAD(&isp1362_hcd->intl_queue.active);
+
+	isp1362_write_reg16(isp1362_hcd, HCINTLBUFSZ,
+			    isp1362_hcd->intl_queue.buf_size);
+	isp1362_write_reg16(isp1362_hcd, HCINTLBLKSZ,
+			    isp1362_hcd->intl_queue.blk_size - PTD_HEADER_SIZE);
+	isp1362_write_reg32(isp1362_hcd, HCINTLSKIP, ~0);
+	isp1362_write_reg32(isp1362_hcd, HCINTLLAST,
+			    1 << (ISP1362_INTL_BUFFERS - 1));
+
+	isp1362_hcd->atl_queue.buf_start = istl_size + intl_size;
+	isp1362_hcd->atl_queue.buf_size = atl_size;
+	isp1362_hcd->atl_queue.buf_count = atl_buffers;
+	isp1362_hcd->atl_queue.blk_size = atl_blksize;
+	isp1362_hcd->atl_queue.buf_avail = isp1362_hcd->atl_queue.buf_count;
+	isp1362_hcd->atl_queue.skip_map = ~0;
+	INIT_LIST_HEAD(&isp1362_hcd->atl_queue.active);
+
+	isp1362_write_reg16(isp1362_hcd, HCATLBUFSZ,
+			    isp1362_hcd->atl_queue.buf_size);
+	isp1362_write_reg16(isp1362_hcd, HCATLBLKSZ,
+			    isp1362_hcd->atl_queue.blk_size - PTD_HEADER_SIZE);
+	isp1362_write_reg32(isp1362_hcd, HCATLSKIP, ~0);
+	isp1362_write_reg32(isp1362_hcd, HCATLLAST,
+			    1 << (atl_buffers - 1));
+
+	snprintf(isp1362_hcd->atl_queue.name,
+		 sizeof(isp1362_hcd->atl_queue.name), "ATL");
+	snprintf(isp1362_hcd->intl_queue.name,
+		 sizeof(isp1362_hcd->intl_queue.name), "INTL");
+	DBG(3, "%s: %5s buf $%04x %2d * %4d = %4d\n", __func__,
+	     isp1362_hcd->intl_queue.name,
+	     isp1362_hcd->intl_queue.buf_start,
+	     ISP1362_INTL_BUFFERS, isp1362_hcd->intl_queue.blk_size,
+	     isp1362_hcd->intl_queue.buf_size);
+	DBG(3, "%s: %5s buf $%04x %2d * %4d = %4d\n", __func__,
+	     isp1362_hcd->atl_queue.name,
+	     isp1362_hcd->atl_queue.buf_start,
+	     atl_buffers, isp1362_hcd->atl_queue.blk_size,
+	     isp1362_hcd->atl_queue.buf_size);
+
+	spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+
+	return 0;
+}
+
+static int isp1362_hc_reset(struct usb_hcd *hcd)
+{
+	int ret = 0;
+	struct isp1362_hcd *isp1362_hcd = hcd_to_isp1362_hcd(hcd);
+	unsigned long t;
+	unsigned long timeout = 100;
+	unsigned long flags;
+	int clkrdy = 0;
+
+	pr_info("%s:\n", __func__);
+
+	if (isp1362_hcd->board && isp1362_hcd->board->reset) {
+		isp1362_hcd->board->reset(hcd->self.controller, 1);
+		msleep(20);
+		if (isp1362_hcd->board->clock)
+			isp1362_hcd->board->clock(hcd->self.controller, 1);
+		isp1362_hcd->board->reset(hcd->self.controller, 0);
+	} else
+		isp1362_sw_reset(isp1362_hcd);
+
+	/* chip has been reset. First we need to see a clock */
+	t = jiffies + msecs_to_jiffies(timeout);
+	while (!clkrdy && time_before_eq(jiffies, t)) {
+		spin_lock_irqsave(&isp1362_hcd->lock, flags);
+		clkrdy = isp1362_read_reg16(isp1362_hcd, HCuPINT) & HCuPINT_CLKRDY;
+		spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+		if (!clkrdy)
+			msleep(4);
+	}
+
+	spin_lock_irqsave(&isp1362_hcd->lock, flags);
+	isp1362_write_reg16(isp1362_hcd, HCuPINT, HCuPINT_CLKRDY);
+	spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+	if (!clkrdy) {
+		pr_err("Clock not ready after %lums\n", timeout);
+		ret = -ENODEV;
+	}
+	return ret;
+}
+
+static void isp1362_hc_stop(struct usb_hcd *hcd)
+{
+	struct isp1362_hcd *isp1362_hcd = hcd_to_isp1362_hcd(hcd);
+	unsigned long flags;
+	u32 tmp;
+
+	pr_info("%s:\n", __func__);
+
+	del_timer_sync(&hcd->rh_timer);
+
+	spin_lock_irqsave(&isp1362_hcd->lock, flags);
+
+	isp1362_write_reg16(isp1362_hcd, HCuPINTENB, 0);
+
+	/* Switch off power for all ports */
+	tmp = isp1362_read_reg32(isp1362_hcd, HCRHDESCA);
+	tmp &= ~(RH_A_NPS | RH_A_PSM);
+	isp1362_write_reg32(isp1362_hcd, HCRHDESCA, tmp);
+	isp1362_write_reg32(isp1362_hcd, HCRHSTATUS, RH_HS_LPS);
+
+	/* Reset the chip */
+	if (isp1362_hcd->board && isp1362_hcd->board->reset)
+		isp1362_hcd->board->reset(hcd->self.controller, 1);
+	else
+		isp1362_sw_reset(isp1362_hcd);
+
+	if (isp1362_hcd->board && isp1362_hcd->board->clock)
+		isp1362_hcd->board->clock(hcd->self.controller, 0);
+
+	spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+}
+
+#ifdef CHIP_BUFFER_TEST
+static int isp1362_chip_test(struct isp1362_hcd *isp1362_hcd)
+{
+	int ret = 0;
+	u16 *ref;
+	unsigned long flags;
+
+	ref = kmalloc(2 * ISP1362_BUF_SIZE, GFP_KERNEL);
+	if (ref) {
+		int offset;
+		u16 *tst = &ref[ISP1362_BUF_SIZE / 2];
+
+		for (offset = 0; offset < ISP1362_BUF_SIZE / 2; offset++) {
+			ref[offset] = ~offset;
+			tst[offset] = offset;
+		}
+
+		for (offset = 0; offset < 4; offset++) {
+			int j;
+
+			for (j = 0; j < 8; j++) {
+				spin_lock_irqsave(&isp1362_hcd->lock, flags);
+				isp1362_write_buffer(isp1362_hcd, (u8 *)ref + offset, 0, j);
+				isp1362_read_buffer(isp1362_hcd, (u8 *)tst + offset, 0, j);
+				spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+
+				if (memcmp(ref, tst, j)) {
+					ret = -ENODEV;
+					pr_err("%s: memory check with %d byte offset %d failed\n",
+					    __func__, j, offset);
+					dump_data((u8 *)ref + offset, j);
+					dump_data((u8 *)tst + offset, j);
+				}
+			}
+		}
+
+		spin_lock_irqsave(&isp1362_hcd->lock, flags);
+		isp1362_write_buffer(isp1362_hcd, ref, 0, ISP1362_BUF_SIZE);
+		isp1362_read_buffer(isp1362_hcd, tst, 0, ISP1362_BUF_SIZE);
+		spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+
+		if (memcmp(ref, tst, ISP1362_BUF_SIZE)) {
+			ret = -ENODEV;
+			pr_err("%s: memory check failed\n", __func__);
+			dump_data((u8 *)tst, ISP1362_BUF_SIZE / 2);
+		}
+
+		for (offset = 0; offset < 256; offset++) {
+			int test_size = 0;
+
+			yield();
+
+			memset(tst, 0, ISP1362_BUF_SIZE);
+			spin_lock_irqsave(&isp1362_hcd->lock, flags);
+			isp1362_write_buffer(isp1362_hcd, tst, 0, ISP1362_BUF_SIZE);
+			isp1362_read_buffer(isp1362_hcd, tst, 0, ISP1362_BUF_SIZE);
+			spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+			if (memcmp(tst, tst + (ISP1362_BUF_SIZE / (2 * sizeof(*tst))),
+				   ISP1362_BUF_SIZE / 2)) {
+				pr_err("%s: Failed to clear buffer\n", __func__);
+				dump_data((u8 *)tst, ISP1362_BUF_SIZE);
+				break;
+			}
+			spin_lock_irqsave(&isp1362_hcd->lock, flags);
+			isp1362_write_buffer(isp1362_hcd, ref, offset * 2, PTD_HEADER_SIZE);
+			isp1362_write_buffer(isp1362_hcd, ref + PTD_HEADER_SIZE / sizeof(*ref),
+					     offset * 2 + PTD_HEADER_SIZE, test_size);
+			isp1362_read_buffer(isp1362_hcd, tst, offset * 2,
+					    PTD_HEADER_SIZE + test_size);
+			spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+			if (memcmp(ref, tst, PTD_HEADER_SIZE + test_size)) {
+				dump_data(((u8 *)ref) + offset, PTD_HEADER_SIZE + test_size);
+				dump_data((u8 *)tst, PTD_HEADER_SIZE + test_size);
+				spin_lock_irqsave(&isp1362_hcd->lock, flags);
+				isp1362_read_buffer(isp1362_hcd, tst, offset * 2,
+						    PTD_HEADER_SIZE + test_size);
+				spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+				if (memcmp(ref, tst, PTD_HEADER_SIZE + test_size)) {
+					ret = -ENODEV;
+					pr_err("%s: memory check with offset %02x failed\n",
+					    __func__, offset);
+					break;
+				}
+				pr_warning("%s: memory check with offset %02x ok after second read\n",
+				     __func__, offset);
+			}
+		}
+		kfree(ref);
+	}
+	return ret;
+}
+#endif
+
+static int isp1362_hc_start(struct usb_hcd *hcd)
+{
+	int ret;
+	struct isp1362_hcd *isp1362_hcd = hcd_to_isp1362_hcd(hcd);
+	struct isp1362_platform_data *board = isp1362_hcd->board;
+	u16 hwcfg;
+	u16 chipid;
+	unsigned long flags;
+
+	pr_info("%s:\n", __func__);
+
+	spin_lock_irqsave(&isp1362_hcd->lock, flags);
+	chipid = isp1362_read_reg16(isp1362_hcd, HCCHIPID);
+	spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+
+	if ((chipid & HCCHIPID_MASK) != HCCHIPID_MAGIC) {
+		pr_err("%s: Invalid chip ID %04x\n", __func__, chipid);
+		return -ENODEV;
+	}
+
+#ifdef CHIP_BUFFER_TEST
+	ret = isp1362_chip_test(isp1362_hcd);
+	if (ret)
+		return -ENODEV;
+#endif
+	spin_lock_irqsave(&isp1362_hcd->lock, flags);
+	/* clear interrupt status and disable all interrupt sources */
+	isp1362_write_reg16(isp1362_hcd, HCuPINT, 0xff);
+	isp1362_write_reg16(isp1362_hcd, HCuPINTENB, 0);
+
+	/* HW conf */
+	hwcfg = HCHWCFG_INT_ENABLE | HCHWCFG_DBWIDTH(1);
+	if (board->sel15Kres)
+		hwcfg |= HCHWCFG_PULLDOWN_DS2 |
+			(MAX_ROOT_PORTS > 1) ? HCHWCFG_PULLDOWN_DS1 : 0;
+	if (board->clknotstop)
+		hwcfg |= HCHWCFG_CLKNOTSTOP;
+	if (board->oc_enable)
+		hwcfg |= HCHWCFG_ANALOG_OC;
+	if (board->int_act_high)
+		hwcfg |= HCHWCFG_INT_POL;
+	if (board->int_edge_triggered)
+		hwcfg |= HCHWCFG_INT_TRIGGER;
+	if (board->dreq_act_high)
+		hwcfg |= HCHWCFG_DREQ_POL;
+	if (board->dack_act_high)
+		hwcfg |= HCHWCFG_DACK_POL;
+	isp1362_write_reg16(isp1362_hcd, HCHWCFG, hwcfg);
+	isp1362_show_reg(isp1362_hcd, HCHWCFG);
+	isp1362_write_reg16(isp1362_hcd, HCDMACFG, 0);
+	spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+
+	ret = isp1362_mem_config(hcd);
+	if (ret)
+		return ret;
+
+	spin_lock_irqsave(&isp1362_hcd->lock, flags);
+
+	/* Root hub conf */
+	isp1362_hcd->rhdesca = 0;
+	if (board->no_power_switching)
+		isp1362_hcd->rhdesca |= RH_A_NPS;
+	if (board->power_switching_mode)
+		isp1362_hcd->rhdesca |= RH_A_PSM;
+	if (board->potpg)
+		isp1362_hcd->rhdesca |= (board->potpg << 24) & RH_A_POTPGT;
+	else
+		isp1362_hcd->rhdesca |= (25 << 24) & RH_A_POTPGT;
+
+	isp1362_write_reg32(isp1362_hcd, HCRHDESCA, isp1362_hcd->rhdesca & ~RH_A_OCPM);
+	isp1362_write_reg32(isp1362_hcd, HCRHDESCA, isp1362_hcd->rhdesca | RH_A_OCPM);
+	isp1362_hcd->rhdesca = isp1362_read_reg32(isp1362_hcd, HCRHDESCA);
+
+	isp1362_hcd->rhdescb = RH_B_PPCM;
+	isp1362_write_reg32(isp1362_hcd, HCRHDESCB, isp1362_hcd->rhdescb);
+	isp1362_hcd->rhdescb = isp1362_read_reg32(isp1362_hcd, HCRHDESCB);
+
+	isp1362_read_reg32(isp1362_hcd, HCFMINTVL);
+	isp1362_write_reg32(isp1362_hcd, HCFMINTVL, (FSMP(FI) << 16) | FI);
+	isp1362_write_reg32(isp1362_hcd, HCLSTHRESH, LSTHRESH);
+
+	spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+
+	isp1362_hcd->hc_control = OHCI_USB_OPER;
+	hcd->state = HC_STATE_RUNNING;
+
+	spin_lock_irqsave(&isp1362_hcd->lock, flags);
+	/* Set up interrupts */
+	isp1362_hcd->intenb = OHCI_INTR_MIE | OHCI_INTR_RHSC | OHCI_INTR_UE;
+	isp1362_hcd->intenb |= OHCI_INTR_RD;
+	isp1362_hcd->irqenb = HCuPINT_OPR | HCuPINT_SUSP;
+	isp1362_write_reg32(isp1362_hcd, HCINTENB, isp1362_hcd->intenb);
+	isp1362_write_reg16(isp1362_hcd, HCuPINTENB, isp1362_hcd->irqenb);
+
+	/* Go operational */
+	isp1362_write_reg32(isp1362_hcd, HCCONTROL, isp1362_hcd->hc_control);
+	/* enable global power */
+	isp1362_write_reg32(isp1362_hcd, HCRHSTATUS, RH_HS_LPSC | RH_HS_DRWE);
+
+	spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+
+	return 0;
+}
+
+/*-------------------------------------------------------------------------*/
+
+static struct hc_driver isp1362_hc_driver = {
+	.description =		hcd_name,
+	.product_desc =		"ISP1362 Host Controller",
+	.hcd_priv_size =	sizeof(struct isp1362_hcd),
+
+	.irq =			isp1362_irq,
+	.flags =		HCD_USB11 | HCD_MEMORY,
+
+	.reset =		isp1362_hc_reset,
+	.start =		isp1362_hc_start,
+	.stop =			isp1362_hc_stop,
+
+	.urb_enqueue =		isp1362_urb_enqueue,
+	.urb_dequeue =		isp1362_urb_dequeue,
+	.endpoint_disable =	isp1362_endpoint_disable,
+
+	.get_frame_number =	isp1362_get_frame,
+
+	.hub_status_data =	isp1362_hub_status_data,
+	.hub_control =		isp1362_hub_control,
+	.bus_suspend =		isp1362_bus_suspend,
+	.bus_resume =		isp1362_bus_resume,
+};
+
+/*-------------------------------------------------------------------------*/
+
+#define resource_len(r) (((r)->end - (r)->start) + 1)
+
+static int __devexit isp1362_remove(struct platform_device *pdev)
+{
+	struct usb_hcd *hcd = platform_get_drvdata(pdev);
+	struct isp1362_hcd *isp1362_hcd = hcd_to_isp1362_hcd(hcd);
+	struct resource *res;
+
+	remove_debug_file(isp1362_hcd);
+	DBG(0, "%s: Removing HCD\n", __func__);
+	usb_remove_hcd(hcd);
+
+	DBG(0, "%s: Unmapping data_reg @ %08x\n", __func__,
+	    (u32)isp1362_hcd->data_reg);
+	iounmap(isp1362_hcd->data_reg);
+
+	DBG(0, "%s: Unmapping addr_reg @ %08x\n", __func__,
+	    (u32)isp1362_hcd->addr_reg);
+	iounmap(isp1362_hcd->addr_reg);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	DBG(0, "%s: release mem_region: %08lx\n", __func__, (long unsigned int)res->start);
+	if (res)
+		release_mem_region(res->start, resource_len(res));
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	DBG(0, "%s: release mem_region: %08lx\n", __func__, (long unsigned int)res->start);
+	if (res)
+		release_mem_region(res->start, resource_len(res));
+
+	DBG(0, "%s: put_hcd\n", __func__);
+	usb_put_hcd(hcd);
+	DBG(0, "%s: Done\n", __func__);
+
+	return 0;
+}
+
+static int __init isp1362_probe(struct platform_device *pdev)
+{
+	struct usb_hcd *hcd;
+	struct isp1362_hcd *isp1362_hcd;
+	struct resource *addr, *data;
+	void __iomem *addr_reg;
+	void __iomem *data_reg;
+	int irq;
+	int retval = 0;
+
+	/* basic sanity checks first.  board-specific init logic should
+	 * have initialized this the three resources and probably board
+	 * specific platform_data.  we don't probe for IRQs, and do only
+	 * minimal sanity checking.
+	 */
+	if (pdev->num_resources < 3) {
+		retval = -ENODEV;
+		goto err1;
+	}
+
+	data = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	addr = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	irq = platform_get_irq(pdev, 0);
+	if (!addr || !data || irq < 0) {
+		retval = -ENODEV;
+		goto err1;
+	}
+
+#ifdef CONFIG_USB_HCD_DMA
+	if (pdev->dev.dma_mask) {
+		struct resource *dma_res = platform_get_resource(pdev, IORESOURCE_MEM, 2);
+
+		if (!dma_res) {
+			retval = -ENODEV;
+			goto err1;
+		}
+		isp1362_hcd->data_dma = dma_res->start;
+		isp1362_hcd->max_dma_size = resource_len(dma_res);
+	}
+#else
+	if (pdev->dev.dma_mask) {
+		DBG(1, "won't do DMA");
+		retval = -ENODEV;
+		goto err1;
+	}
+#endif
+
+	if (!request_mem_region(addr->start, resource_len(addr), hcd_name)) {
+		retval = -EBUSY;
+		goto err1;
+	}
+	addr_reg = ioremap(addr->start, resource_len(addr));
+	if (addr_reg == NULL) {
+		retval = -ENOMEM;
+		goto err2;
+	}
+
+	if (!request_mem_region(data->start, resource_len(data), hcd_name)) {
+		retval = -EBUSY;
+		goto err3;
+	}
+	data_reg = ioremap(data->start, resource_len(data));
+	if (data_reg == NULL) {
+		retval = -ENOMEM;
+		goto err4;
+	}
+
+	/* allocate and initialize hcd */
+	hcd = usb_create_hcd(&isp1362_hc_driver, &pdev->dev, dev_name(&pdev->dev));
+	if (!hcd) {
+		retval = -ENOMEM;
+		goto err5;
+	}
+	hcd->rsrc_start = data->start;
+	isp1362_hcd = hcd_to_isp1362_hcd(hcd);
+	isp1362_hcd->data_reg = data_reg;
+	isp1362_hcd->addr_reg = addr_reg;
+
+	isp1362_hcd->next_statechange = jiffies;
+	spin_lock_init(&isp1362_hcd->lock);
+	INIT_LIST_HEAD(&isp1362_hcd->async);
+	INIT_LIST_HEAD(&isp1362_hcd->periodic);
+	INIT_LIST_HEAD(&isp1362_hcd->isoc);
+	INIT_LIST_HEAD(&isp1362_hcd->remove_list);
+	isp1362_hcd->board = pdev->dev.platform_data;
+#if USE_PLATFORM_DELAY
+	if (!isp1362_hcd->board->delay) {
+		dev_err(hcd->self.controller, "No platform delay function given\n");
+		retval = -ENODEV;
+		goto err6;
+	}
+#endif
+
+#ifdef CONFIG_ARM
+	if (isp1362_hcd->board)
+		set_irq_type(irq, isp1362_hcd->board->int_act_high ? IRQT_RISING : IRQT_FALLING);
+#endif
+
+	retval = usb_add_hcd(hcd, irq, IRQF_TRIGGER_LOW | IRQF_DISABLED | IRQF_SHARED);
+	if (retval != 0)
+		goto err6;
+	pr_info("%s, irq %d\n", hcd->product_desc, irq);
+
+	create_debug_file(isp1362_hcd);
+
+	return 0;
+
+ err6:
+	DBG(0, "%s: Freeing dev %08x\n", __func__, (u32)isp1362_hcd);
+	usb_put_hcd(hcd);
+ err5:
+	DBG(0, "%s: Unmapping data_reg @ %08x\n", __func__, (u32)data_reg);
+	iounmap(data_reg);
+ err4:
+	DBG(0, "%s: Releasing mem region %08lx\n", __func__, (long unsigned int)data->start);
+	release_mem_region(data->start, resource_len(data));
+ err3:
+	DBG(0, "%s: Unmapping addr_reg @ %08x\n", __func__, (u32)addr_reg);
+	iounmap(addr_reg);
+ err2:
+	DBG(0, "%s: Releasing mem region %08lx\n", __func__, (long unsigned int)addr->start);
+	release_mem_region(addr->start, resource_len(addr));
+ err1:
+	pr_err("%s: init error, %d\n", __func__, retval);
+
+	return retval;
+}
+
+#ifdef	CONFIG_PM
+static int isp1362_suspend(struct platform_device *pdev, pm_message_t state)
+{
+	struct usb_hcd *hcd = platform_get_drvdata(pdev);
+	struct isp1362_hcd *isp1362_hcd = hcd_to_isp1362_hcd(hcd);
+	unsigned long flags;
+	int retval = 0;
+
+	DBG(0, "%s: Suspending device\n", __func__);
+
+	if (state.event == PM_EVENT_FREEZE) {
+		DBG(0, "%s: Suspending root hub\n", __func__);
+		retval = isp1362_bus_suspend(hcd);
+	} else {
+		DBG(0, "%s: Suspending RH ports\n", __func__);
+		spin_lock_irqsave(&isp1362_hcd->lock, flags);
+		isp1362_write_reg32(isp1362_hcd, HCRHSTATUS, RH_HS_LPS);
+		spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+	}
+	if (retval == 0)
+		pdev->dev.power.power_state = state;
+	return retval;
+}
+
+static int isp1362_resume(struct platform_device *pdev)
+{
+	struct usb_hcd *hcd = platform_get_drvdata(pdev);
+	struct isp1362_hcd *isp1362_hcd = hcd_to_isp1362_hcd(hcd);
+	unsigned long flags;
+
+	DBG(0, "%s: Resuming\n", __func__);
+
+	if (pdev->dev.power.power_state.event == PM_EVENT_SUSPEND) {
+		DBG(0, "%s: Resume RH ports\n", __func__);
+		spin_lock_irqsave(&isp1362_hcd->lock, flags);
+		isp1362_write_reg32(isp1362_hcd, HCRHSTATUS, RH_HS_LPSC);
+		spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+		return 0;
+	}
+
+	pdev->dev.power.power_state = PMSG_ON;
+
+	return isp1362_bus_resume(isp1362_hcd_to_hcd(isp1362_hcd));
+}
+#else
+#define	isp1362_suspend	NULL
+#define	isp1362_resume	NULL
+#endif
+
+static struct platform_driver isp1362_driver = {
+	.probe = isp1362_probe,
+	.remove = __devexit_p(isp1362_remove),
+
+	.suspend = isp1362_suspend,
+	.resume = isp1362_resume,
+	.driver = {
+		.name = (char *)hcd_name,
+		.owner = THIS_MODULE,
+	},
+};
+
+/*-------------------------------------------------------------------------*/
+
+static int __init isp1362_init(void)
+{
+	if (usb_disabled())
+		return -ENODEV;
+	pr_info("driver %s, %s\n", hcd_name, DRIVER_VERSION);
+	return platform_driver_register(&isp1362_driver);
+}
+module_init(isp1362_init);
+
+static void __exit isp1362_cleanup(void)
+{
+	platform_driver_unregister(&isp1362_driver);
+}
+module_exit(isp1362_cleanup);
diff --git a/drivers/usb/host/isp1362.h b/drivers/usb/host/isp1362.h
new file mode 100644
index 0000000..26e44fc
--- /dev/null
+++ b/drivers/usb/host/isp1362.h
@@ -0,0 +1,1079 @@
+/*
+ * ISP1362 HCD (Host Controller Driver) for USB.
+ *
+ * COPYRIGHT (C) by L. Wassmann <LW@KARO-electronics.de>
+ */
+
+/* ------------------------------------------------------------------------- */
+/*
+ * Platform specific compile time options
+ */
+#if defined(CONFIG_ARCH_KARO)
+#include <asm/arch/hardware.h>
+#include <asm/arch/pxa-regs.h>
+#include <asm/arch/karo.h>
+
+#define USE_32BIT		1
+
+
+/* These options are mutually eclusive */
+#define USE_PLATFORM_DELAY	1
+#define USE_NDELAY		0
+/*
+ * MAX_ROOT_PORTS: Number of downstream ports
+ *
+ * The chip has two USB ports, one of which can be configured as
+ * an USB device port, so the value of this constant is implementation
+ * specific.
+ */
+#define MAX_ROOT_PORTS		2
+#define DUMMY_DELAY_ACCESS do {} while (0)
+
+/* insert platform specific definitions for other machines here */
+#elif defined(CONFIG_BLACKFIN)
+
+#include <linux/io.h>
+#define USE_32BIT		0
+#define MAX_ROOT_PORTS		2
+#define USE_PLATFORM_DELAY	0
+#define USE_NDELAY		1
+
+#define DUMMY_DELAY_ACCESS \
+	do { \
+		bfin_read16(ASYNC_BANK0_BASE); \
+		bfin_read16(ASYNC_BANK0_BASE); \
+		bfin_read16(ASYNC_BANK0_BASE); \
+	} while (0)
+
+#undef insw
+#undef outsw
+
+#define insw  delayed_insw
+#define outsw  delayed_outsw
+
+static inline void delayed_outsw(unsigned int addr, void *buf, int len)
+{
+	unsigned short *bp = (unsigned short *)buf;
+	while (len--) {
+		DUMMY_DELAY_ACCESS;
+		outw(*bp++, addr);
+	}
+}
+
+static inline void delayed_insw(unsigned int addr, void *buf, int len)
+{
+	unsigned short *bp = (unsigned short *)buf;
+	while (len--) {
+		DUMMY_DELAY_ACCESS;
+		*bp++ = inw((void *)addr);
+	}
+}
+
+#else
+
+#define MAX_ROOT_PORTS		2
+
+#define USE_32BIT		0
+
+/* These options are mutually eclusive */
+#define USE_PLATFORM_DELAY	0
+#define USE_NDELAY		0
+
+#define DUMMY_DELAY_ACCESS do {} while (0)
+
+#endif
+
+
+/* ------------------------------------------------------------------------- */
+
+#define USB_RESET_WIDTH			50
+#define MAX_XFER_SIZE			1023
+
+/* Buffer sizes */
+#define ISP1362_BUF_SIZE		4096
+#define ISP1362_ISTL_BUFSIZE		512
+#define ISP1362_INTL_BLKSIZE		64
+#define ISP1362_INTL_BUFFERS		16
+#define ISP1362_ATL_BLKSIZE		64
+
+#define ISP1362_REG_WRITE_OFFSET	0x80
+
+#ifdef ISP1362_DEBUG
+typedef const unsigned int isp1362_reg_t;
+
+#define REG_WIDTH_16			0x000
+#define REG_WIDTH_32			0x100
+#define REG_WIDTH_MASK			0x100
+#define REG_NO_MASK			0x0ff
+
+#define REG_ACCESS_R			0x200
+#define REG_ACCESS_W			0x400
+#define REG_ACCESS_RW			0x600
+#define REG_ACCESS_MASK			0x600
+
+#define ISP1362_REG_NO(r)		((r) & REG_NO_MASK)
+
+#define _BUG_ON(x)	BUG_ON(x)
+#define _WARN_ON(x)	WARN_ON(x)
+
+#define ISP1362_REG(name, addr, width, rw) \
+static isp1362_reg_t ISP1362_REG_##name = ((addr) | (width) | (rw))
+
+#define REG_ACCESS_TEST(r)   BUG_ON(((r) & ISP1362_REG_WRITE_OFFSET) && !((r) & REG_ACCESS_W))
+#define REG_WIDTH_TEST(r, w) BUG_ON(((r) & REG_WIDTH_MASK) != (w))
+#else
+typedef const unsigned char isp1362_reg_t;
+#define ISP1362_REG_NO(r)		(r)
+#define _BUG_ON(x)			do {} while (0)
+#define _WARN_ON(x)			do {} while (0)
+
+#define ISP1362_REG(name, addr, width, rw) \
+static isp1362_reg_t ISP1362_REG_##name = addr
+
+#define REG_ACCESS_TEST(r)		do {} while (0)
+#define REG_WIDTH_TEST(r, w)		do {} while (0)
+#endif
+
+/* OHCI compatible registers */
+/*
+ * Note: Some of the ISP1362 'OHCI' registers implement only
+ * a subset of the bits defined in the OHCI spec.
+ *
+ * Bitmasks for the individual bits of these registers are defined in "ohci.h"
+ */
+ISP1362_REG(HCREVISION,	0x00,	REG_WIDTH_32,	REG_ACCESS_R);
+ISP1362_REG(HCCONTROL,	0x01,	REG_WIDTH_32,	REG_ACCESS_RW);
+ISP1362_REG(HCCMDSTAT,	0x02,	REG_WIDTH_32,	REG_ACCESS_RW);
+ISP1362_REG(HCINTSTAT,	0x03,	REG_WIDTH_32,	REG_ACCESS_RW);
+ISP1362_REG(HCINTENB,	0x04,	REG_WIDTH_32,	REG_ACCESS_RW);
+ISP1362_REG(HCINTDIS,	0x05,	REG_WIDTH_32,	REG_ACCESS_RW);
+ISP1362_REG(HCFMINTVL,	0x0d,	REG_WIDTH_32,	REG_ACCESS_RW);
+ISP1362_REG(HCFMREM,	0x0e,	REG_WIDTH_32,	REG_ACCESS_RW);
+ISP1362_REG(HCFMNUM,	0x0f,	REG_WIDTH_32,	REG_ACCESS_RW);
+ISP1362_REG(HCLSTHRESH,	0x11,	REG_WIDTH_32,	REG_ACCESS_RW);
+ISP1362_REG(HCRHDESCA,	0x12,	REG_WIDTH_32,	REG_ACCESS_RW);
+ISP1362_REG(HCRHDESCB,	0x13,	REG_WIDTH_32,	REG_ACCESS_RW);
+ISP1362_REG(HCRHSTATUS,	0x14,	REG_WIDTH_32,	REG_ACCESS_RW);
+ISP1362_REG(HCRHPORT1,	0x15,	REG_WIDTH_32,	REG_ACCESS_RW);
+ISP1362_REG(HCRHPORT2,	0x16,	REG_WIDTH_32,	REG_ACCESS_RW);
+
+/* Philips ISP1362 specific registers */
+ISP1362_REG(HCHWCFG,	0x20,	REG_WIDTH_16,	REG_ACCESS_RW);
+#define HCHWCFG_DISABLE_SUSPEND	(1 << 15)
+#define HCHWCFG_GLOBAL_PWRDOWN	(1 << 14)
+#define HCHWCFG_PULLDOWN_DS1	(1 << 13)
+#define HCHWCFG_PULLDOWN_DS2	(1 << 12)
+#define HCHWCFG_CLKNOTSTOP	(1 << 11)
+#define HCHWCFG_ANALOG_OC	(1 << 10)
+#define HCHWCFG_ONEINT		(1 << 9)
+#define HCHWCFG_DACK_MODE	(1 << 8)
+#define HCHWCFG_ONEDMA		(1 << 7)
+#define HCHWCFG_DACK_POL	(1 << 6)
+#define HCHWCFG_DREQ_POL	(1 << 5)
+#define HCHWCFG_DBWIDTH_MASK	(0x03 << 3)
+#define HCHWCFG_DBWIDTH(n)	(((n) << 3) & HCHWCFG_DBWIDTH_MASK)
+#define HCHWCFG_INT_POL		(1 << 2)
+#define HCHWCFG_INT_TRIGGER	(1 << 1)
+#define HCHWCFG_INT_ENABLE	(1 << 0)
+
+ISP1362_REG(HCDMACFG,	0x21,	REG_WIDTH_16,	REG_ACCESS_RW);
+#define HCDMACFG_CTR_ENABLE	(1 << 7)
+#define HCDMACFG_BURST_LEN_MASK	(0x03 << 5)
+#define HCDMACFG_BURST_LEN(n)	(((n) << 5) & HCDMACFG_BURST_LEN_MASK)
+#define HCDMACFG_BURST_LEN_1	HCDMACFG_BURST_LEN(0)
+#define HCDMACFG_BURST_LEN_4	HCDMACFG_BURST_LEN(1)
+#define HCDMACFG_BURST_LEN_8	HCDMACFG_BURST_LEN(2)
+#define HCDMACFG_DMA_ENABLE	(1 << 4)
+#define HCDMACFG_BUF_TYPE_MASK	(0x07 << 1)
+#define HCDMACFG_BUF_TYPE(n)	(((n) << 1) & HCDMACFG_BUF_TYPE_MASK)
+#define HCDMACFG_BUF_ISTL0	HCDMACFG_BUF_TYPE(0)
+#define HCDMACFG_BUF_ISTL1	HCDMACFG_BUF_TYPE(1)
+#define HCDMACFG_BUF_INTL	HCDMACFG_BUF_TYPE(2)
+#define HCDMACFG_BUF_ATL	HCDMACFG_BUF_TYPE(3)
+#define HCDMACFG_BUF_DIRECT	HCDMACFG_BUF_TYPE(4)
+#define HCDMACFG_DMA_RW_SELECT	(1 << 0)
+
+ISP1362_REG(HCXFERCTR,	0x22,	REG_WIDTH_16,	REG_ACCESS_RW);
+
+ISP1362_REG(HCuPINT,	0x24,	REG_WIDTH_16,	REG_ACCESS_RW);
+#define HCuPINT_SOF		(1 << 0)
+#define HCuPINT_ISTL0		(1 << 1)
+#define HCuPINT_ISTL1		(1 << 2)
+#define HCuPINT_EOT		(1 << 3)
+#define HCuPINT_OPR		(1 << 4)
+#define HCuPINT_SUSP		(1 << 5)
+#define HCuPINT_CLKRDY		(1 << 6)
+#define HCuPINT_INTL		(1 << 7)
+#define HCuPINT_ATL		(1 << 8)
+#define HCuPINT_OTG		(1 << 9)
+
+ISP1362_REG(HCuPINTENB,	0x25,	REG_WIDTH_16,	REG_ACCESS_RW);
+/* same bit definitions apply as for HCuPINT */
+
+ISP1362_REG(HCCHIPID,	0x27,	REG_WIDTH_16,	REG_ACCESS_R);
+#define HCCHIPID_MASK		0xff00
+#define HCCHIPID_MAGIC		0x3600
+
+ISP1362_REG(HCSCRATCH,	0x28,	REG_WIDTH_16,	REG_ACCESS_RW);
+
+ISP1362_REG(HCSWRES,	0x29,	REG_WIDTH_16,	REG_ACCESS_W);
+#define HCSWRES_MAGIC		0x00f6
+
+ISP1362_REG(HCBUFSTAT,	0x2c,	REG_WIDTH_16,	REG_ACCESS_RW);
+#define HCBUFSTAT_ISTL0_FULL	(1 << 0)
+#define HCBUFSTAT_ISTL1_FULL	(1 << 1)
+#define HCBUFSTAT_INTL_ACTIVE	(1 << 2)
+#define HCBUFSTAT_ATL_ACTIVE	(1 << 3)
+#define HCBUFSTAT_RESET_HWPP	(1 << 4)
+#define HCBUFSTAT_ISTL0_ACTIVE	(1 << 5)
+#define HCBUFSTAT_ISTL1_ACTIVE	(1 << 6)
+#define HCBUFSTAT_ISTL0_DONE	(1 << 8)
+#define HCBUFSTAT_ISTL1_DONE	(1 << 9)
+#define HCBUFSTAT_PAIRED_PTDPP	(1 << 10)
+
+ISP1362_REG(HCDIRADDR,	0x32,	REG_WIDTH_32,	REG_ACCESS_RW);
+#define HCDIRADDR_ADDR_MASK	0x0000ffff
+#define HCDIRADDR_ADDR(n)	(((n) << 0) & HCDIRADDR_ADDR_MASK)
+#define HCDIRADDR_COUNT_MASK	0xffff0000
+#define HCDIRADDR_COUNT(n)	(((n) << 16) & HCDIRADDR_COUNT_MASK)
+ISP1362_REG(HCDIRDATA,	0x45,	REG_WIDTH_16,	REG_ACCESS_RW);
+
+ISP1362_REG(HCISTLBUFSZ, 0x30,	REG_WIDTH_16,	REG_ACCESS_RW);
+ISP1362_REG(HCISTL0PORT, 0x40,	REG_WIDTH_16,	REG_ACCESS_RW);
+ISP1362_REG(HCISTL1PORT, 0x42,	REG_WIDTH_16,	REG_ACCESS_RW);
+ISP1362_REG(HCISTLRATE,	0x47,	REG_WIDTH_16,	REG_ACCESS_RW);
+
+ISP1362_REG(HCINTLBUFSZ, 0x33,	REG_WIDTH_16,	REG_ACCESS_RW);
+ISP1362_REG(HCINTLPORT,	0x43,	REG_WIDTH_16,	REG_ACCESS_RW);
+ISP1362_REG(HCINTLBLKSZ, 0x53,	REG_WIDTH_16,	REG_ACCESS_RW);
+ISP1362_REG(HCINTLDONE,	0x17,	REG_WIDTH_32,	REG_ACCESS_R);
+ISP1362_REG(HCINTLSKIP,	0x18,	REG_WIDTH_32,	REG_ACCESS_RW);
+ISP1362_REG(HCINTLLAST,	0x19,	REG_WIDTH_32,	REG_ACCESS_RW);
+ISP1362_REG(HCINTLCURR,	0x1a,	REG_WIDTH_16,	REG_ACCESS_R);
+
+ISP1362_REG(HCATLBUFSZ, 0x34,	REG_WIDTH_16,	REG_ACCESS_RW);
+ISP1362_REG(HCATLPORT,	0x44,	REG_WIDTH_16,	REG_ACCESS_RW);
+ISP1362_REG(HCATLBLKSZ, 0x54,	REG_WIDTH_16,	REG_ACCESS_RW);
+ISP1362_REG(HCATLDONE,	0x1b,	REG_WIDTH_32,	REG_ACCESS_R);
+ISP1362_REG(HCATLSKIP,	0x1c,	REG_WIDTH_32,	REG_ACCESS_RW);
+ISP1362_REG(HCATLLAST,	0x1d,	REG_WIDTH_32,	REG_ACCESS_RW);
+ISP1362_REG(HCATLCURR,	0x1e,	REG_WIDTH_16,	REG_ACCESS_R);
+
+ISP1362_REG(HCATLDTC,	0x51,	REG_WIDTH_16,	REG_ACCESS_RW);
+ISP1362_REG(HCATLDTCTO,	0x52,	REG_WIDTH_16,	REG_ACCESS_RW);
+
+
+ISP1362_REG(OTGCONTROL,	0x62,	REG_WIDTH_16,	REG_ACCESS_RW);
+ISP1362_REG(OTGSTATUS,	0x67,	REG_WIDTH_16,	REG_ACCESS_R);
+ISP1362_REG(OTGINT,	0x68,	REG_WIDTH_16,	REG_ACCESS_RW);
+ISP1362_REG(OTGINTENB,	0x69,	REG_WIDTH_16,	REG_ACCESS_RW);
+ISP1362_REG(OTGTIMER,	0x6A,	REG_WIDTH_16,	REG_ACCESS_RW);
+ISP1362_REG(OTGALTTMR,	0x6C,	REG_WIDTH_16,	REG_ACCESS_RW);
+
+/* Philips transfer descriptor, cpu-endian */
+struct ptd {
+	u16 count;
+#define	PTD_COUNT_MSK	(0x3ff << 0)
+#define	PTD_TOGGLE_MSK	(1 << 10)
+#define	PTD_ACTIVE_MSK	(1 << 11)
+#define	PTD_CC_MSK	(0xf << 12)
+	u16 mps;
+#define	PTD_MPS_MSK	(0x3ff << 0)
+#define	PTD_SPD_MSK	(1 << 10)
+#define	PTD_LAST_MSK	(1 << 11)
+#define	PTD_EP_MSK	(0xf << 12)
+	u16 len;
+#define	PTD_LEN_MSK	(0x3ff << 0)
+#define	PTD_DIR_MSK	(3 << 10)
+#define	PTD_DIR_SETUP	(0)
+#define	PTD_DIR_OUT	(1)
+#define	PTD_DIR_IN	(2)
+	u16 faddr;
+#define	PTD_FA_MSK	(0x7f << 0)
+/* PTD Byte 7: [StartingFrame (if ISO PTD) | StartingFrame[0..4], PollingRate[0..2] (if INT PTD)] */
+#define PTD_SF_ISO_MSK	(0xff << 8)
+#define PTD_SF_INT_MSK	(0x1f << 8)
+#define PTD_PR_MSK	(0x07 << 13)
+} __attribute__ ((packed, aligned(2)));
+#define PTD_HEADER_SIZE sizeof(struct ptd)
+
+/* ------------------------------------------------------------------------- */
+/* Copied from ohci.h: */
+/*
+ * Hardware transfer status codes -- CC from PTD
+ */
+#define PTD_CC_NOERROR      0x00
+#define PTD_CC_CRC          0x01
+#define PTD_CC_BITSTUFFING  0x02
+#define PTD_CC_DATATOGGLEM  0x03
+#define PTD_CC_STALL        0x04
+#define PTD_DEVNOTRESP      0x05
+#define PTD_PIDCHECKFAIL    0x06
+#define PTD_UNEXPECTEDPID   0x07
+#define PTD_DATAOVERRUN     0x08
+#define PTD_DATAUNDERRUN    0x09
+    /* 0x0A, 0x0B reserved for hardware */
+#define PTD_BUFFEROVERRUN   0x0C
+#define PTD_BUFFERUNDERRUN  0x0D
+    /* 0x0E, 0x0F reserved for HCD */
+#define PTD_NOTACCESSED     0x0F
+
+
+/* map OHCI TD status codes (CC) to errno values */
+static const int cc_to_error[16] = {
+	/* No  Error  */               0,
+	/* CRC Error  */               -EILSEQ,
+	/* Bit Stuff  */               -EPROTO,
+	/* Data Togg  */               -EILSEQ,
+	/* Stall      */               -EPIPE,
+	/* DevNotResp */               -ETIMEDOUT,
+	/* PIDCheck   */               -EPROTO,
+	/* UnExpPID   */               -EPROTO,
+	/* DataOver   */               -EOVERFLOW,
+	/* DataUnder  */               -EREMOTEIO,
+	/* (for hw)   */               -EIO,
+	/* (for hw)   */               -EIO,
+	/* BufferOver */               -ECOMM,
+	/* BuffUnder  */               -ENOSR,
+	/* (for HCD)  */               -EALREADY,
+	/* (for HCD)  */               -EALREADY
+};
+
+
+/*
+ * HcControl (control) register masks
+ */
+#define OHCI_CTRL_HCFS	(3 << 6)	/* host controller functional state */
+#define OHCI_CTRL_RWC	(1 << 9)	/* remote wakeup connected */
+#define OHCI_CTRL_RWE	(1 << 10)	/* remote wakeup enable */
+
+/* pre-shifted values for HCFS */
+#	define OHCI_USB_RESET	(0 << 6)
+#	define OHCI_USB_RESUME	(1 << 6)
+#	define OHCI_USB_OPER	(2 << 6)
+#	define OHCI_USB_SUSPEND	(3 << 6)
+
+/*
+ * HcCommandStatus (cmdstatus) register masks
+ */
+#define OHCI_HCR	(1 << 0)	/* host controller reset */
+#define OHCI_SOC  	(3 << 16)	/* scheduling overrun count */
+
+/*
+ * masks used with interrupt registers:
+ * HcInterruptStatus (intrstatus)
+ * HcInterruptEnable (intrenable)
+ * HcInterruptDisable (intrdisable)
+ */
+#define OHCI_INTR_SO	(1 << 0)	/* scheduling overrun */
+#define OHCI_INTR_WDH	(1 << 1)	/* writeback of done_head */
+#define OHCI_INTR_SF	(1 << 2)	/* start frame */
+#define OHCI_INTR_RD	(1 << 3)	/* resume detect */
+#define OHCI_INTR_UE	(1 << 4)	/* unrecoverable error */
+#define OHCI_INTR_FNO	(1 << 5)	/* frame number overflow */
+#define OHCI_INTR_RHSC	(1 << 6)	/* root hub status change */
+#define OHCI_INTR_OC	(1 << 30)	/* ownership change */
+#define OHCI_INTR_MIE	(1 << 31)	/* master interrupt enable */
+
+/* roothub.portstatus [i] bits */
+#define RH_PS_CCS            0x00000001   	/* current connect status */
+#define RH_PS_PES            0x00000002   	/* port enable status*/
+#define RH_PS_PSS            0x00000004   	/* port suspend status */
+#define RH_PS_POCI           0x00000008   	/* port over current indicator */
+#define RH_PS_PRS            0x00000010  	/* port reset status */
+#define RH_PS_PPS            0x00000100   	/* port power status */
+#define RH_PS_LSDA           0x00000200    	/* low speed device attached */
+#define RH_PS_CSC            0x00010000 	/* connect status change */
+#define RH_PS_PESC           0x00020000   	/* port enable status change */
+#define RH_PS_PSSC           0x00040000    	/* port suspend status change */
+#define RH_PS_OCIC           0x00080000    	/* over current indicator change */
+#define RH_PS_PRSC           0x00100000   	/* port reset status change */
+
+/* roothub.status bits */
+#define RH_HS_LPS	     0x00000001		/* local power status */
+#define RH_HS_OCI	     0x00000002		/* over current indicator */
+#define RH_HS_DRWE	     0x00008000		/* device remote wakeup enable */
+#define RH_HS_LPSC	     0x00010000		/* local power status change */
+#define RH_HS_OCIC	     0x00020000		/* over current indicator change */
+#define RH_HS_CRWE	     0x80000000		/* clear remote wakeup enable */
+
+/* roothub.b masks */
+#define RH_B_DR		0x0000ffff		/* device removable flags */
+#define RH_B_PPCM	0xffff0000		/* port power control mask */
+
+/* roothub.a masks */
+#define	RH_A_NDP	(0xff << 0)		/* number of downstream ports */
+#define	RH_A_PSM	(1 << 8)		/* power switching mode */
+#define	RH_A_NPS	(1 << 9)		/* no power switching */
+#define	RH_A_DT		(1 << 10)		/* device type (mbz) */
+#define	RH_A_OCPM	(1 << 11)		/* over current protection mode */
+#define	RH_A_NOCP	(1 << 12)		/* no over current protection */
+#define	RH_A_POTPGT	(0xff << 24)		/* power on to power good time */
+
+#define	FI			0x2edf		/* 12000 bits per frame (-1) */
+#define	FSMP(fi) 		(0x7fff & ((6 * ((fi) - 210)) / 7))
+#define LSTHRESH		0x628		/* lowspeed bit threshold */
+
+/* ------------------------------------------------------------------------- */
+
+/* PTD accessor macros. */
+#define PTD_GET_COUNT(p)	(((p)->count & PTD_COUNT_MSK) >> 0)
+#define PTD_COUNT(v)		(((v) << 0) & PTD_COUNT_MSK)
+#define PTD_GET_TOGGLE(p)	(((p)->count & PTD_TOGGLE_MSK) >> 10)
+#define PTD_TOGGLE(v)		(((v) << 10) & PTD_TOGGLE_MSK)
+#define PTD_GET_ACTIVE(p)	(((p)->count & PTD_ACTIVE_MSK) >> 11)
+#define PTD_ACTIVE(v)		(((v) << 11) & PTD_ACTIVE_MSK)
+#define PTD_GET_CC(p)		(((p)->count & PTD_CC_MSK) >> 12)
+#define PTD_CC(v)		(((v) << 12) & PTD_CC_MSK)
+#define PTD_GET_MPS(p)		(((p)->mps & PTD_MPS_MSK) >> 0)
+#define PTD_MPS(v)		(((v) << 0) & PTD_MPS_MSK)
+#define PTD_GET_SPD(p)		(((p)->mps & PTD_SPD_MSK) >> 10)
+#define PTD_SPD(v)		(((v) << 10) & PTD_SPD_MSK)
+#define PTD_GET_LAST(p)		(((p)->mps & PTD_LAST_MSK) >> 11)
+#define PTD_LAST(v)		(((v) << 11) & PTD_LAST_MSK)
+#define PTD_GET_EP(p)		(((p)->mps & PTD_EP_MSK) >> 12)
+#define PTD_EP(v)		(((v) << 12) & PTD_EP_MSK)
+#define PTD_GET_LEN(p)		(((p)->len & PTD_LEN_MSK) >> 0)
+#define PTD_LEN(v)		(((v) << 0) & PTD_LEN_MSK)
+#define PTD_GET_DIR(p)		(((p)->len & PTD_DIR_MSK) >> 10)
+#define PTD_DIR(v)		(((v) << 10) & PTD_DIR_MSK)
+#define PTD_GET_FA(p)		(((p)->faddr & PTD_FA_MSK) >> 0)
+#define PTD_FA(v)		(((v) << 0) & PTD_FA_MSK)
+#define PTD_GET_SF_INT(p)	(((p)->faddr & PTD_SF_INT_MSK) >> 8)
+#define PTD_SF_INT(v)		(((v) << 8) & PTD_SF_INT_MSK)
+#define PTD_GET_SF_ISO(p)	(((p)->faddr & PTD_SF_ISO_MSK) >> 8)
+#define PTD_SF_ISO(v)		(((v) << 8) & PTD_SF_ISO_MSK)
+#define PTD_GET_PR(p)		(((p)->faddr & PTD_PR_MSK) >> 13)
+#define PTD_PR(v)		(((v) << 13) & PTD_PR_MSK)
+
+#define	LOG2_PERIODIC_SIZE	5	/* arbitrary; this matches OHCI */
+#define	PERIODIC_SIZE		(1 << LOG2_PERIODIC_SIZE)
+
+struct isp1362_ep {
+	struct usb_host_endpoint *hep;
+	struct usb_device	*udev;
+
+	/* philips transfer descriptor */
+	struct ptd		ptd;
+
+	u8			maxpacket;
+	u8			epnum;
+	u8			nextpid;
+	u16			error_count;
+	u16			length;		/* of current packet */
+	s16			ptd_offset;	/* buffer offset in ISP1362 where
+						   PTD has been stored
+						   (for access thru HCDIRDATA) */
+	int			ptd_index;
+	int num_ptds;
+	void 			*data;		/* to databuf */
+	/* queue of active EPs (the ones transmitted to the chip) */
+	struct list_head	active;
+
+	/* periodic schedule */
+	u8			branch;
+	u16			interval;
+	u16			load;
+	u16			last_iso;
+
+	/* async schedule */
+	struct list_head	schedule;	/* list of all EPs that need processing */
+	struct list_head	remove_list;
+	int			num_req;
+};
+
+struct isp1362_ep_queue {
+	struct list_head	active;		/* list of PTDs currently processed by HC */
+	atomic_t		finishing;
+	unsigned long		buf_map;
+	unsigned long		skip_map;
+	int			free_ptd;
+	u16			buf_start;
+	u16			buf_size;
+	u16			blk_size;	/* PTD buffer block size for ATL and INTL */
+	u8			buf_count;
+	u8			buf_avail;
+	char			name[16];
+
+	/* for statistical tracking */
+	u8			stat_maxptds;	/* Max # of ptds seen simultaneously in fifo */
+	u8			ptd_count;	/* number of ptds submitted to this queue */
+};
+
+struct isp1362_hcd {
+	spinlock_t		lock;
+	void __iomem		*addr_reg;
+	void __iomem		*data_reg;
+
+	struct isp1362_platform_data *board;
+
+	struct proc_dir_entry	*pde;
+	unsigned long		stat1, stat2, stat4, stat8, stat16;
+
+	/* HC registers */
+	u32			intenb;		/* "OHCI" interrupts */
+	u16			irqenb;		/* uP interrupts */
+
+	/* Root hub registers */
+	u32			rhdesca;
+	u32			rhdescb;
+	u32			rhstatus;
+	u32			rhport[MAX_ROOT_PORTS];
+	unsigned long		next_statechange;
+
+	/* HC control reg shadow copy */
+	u32			hc_control;
+
+	/* async schedule: control, bulk */
+	struct list_head	async;
+
+	/* periodic schedule: int */
+	u16			load[PERIODIC_SIZE];
+	struct list_head	periodic;
+	u16			fmindex;
+
+	/* periodic schedule: isochronous */
+	struct list_head	isoc;
+	int			istl_flip:1;
+	int			irq_active:1;
+
+	/* Schedules for the current frame */
+	struct isp1362_ep_queue atl_queue;
+	struct isp1362_ep_queue intl_queue;
+	struct isp1362_ep_queue istl_queue[2];
+
+	/* list of PTDs retrieved from HC */
+	struct list_head	remove_list;
+	enum {
+		ISP1362_INT_SOF,
+		ISP1362_INT_ISTL0,
+		ISP1362_INT_ISTL1,
+		ISP1362_INT_EOT,
+		ISP1362_INT_OPR,
+		ISP1362_INT_SUSP,
+		ISP1362_INT_CLKRDY,
+		ISP1362_INT_INTL,
+		ISP1362_INT_ATL,
+		ISP1362_INT_OTG,
+		NUM_ISP1362_IRQS
+	} IRQ_NAMES;
+	unsigned int		irq_stat[NUM_ISP1362_IRQS];
+	int			req_serial;
+};
+
+static inline const char *ISP1362_INT_NAME(int n)
+{
+	switch (n) {
+	case ISP1362_INT_SOF:    return "SOF";
+	case ISP1362_INT_ISTL0:  return "ISTL0";
+	case ISP1362_INT_ISTL1:  return "ISTL1";
+	case ISP1362_INT_EOT:    return "EOT";
+	case ISP1362_INT_OPR:    return "OPR";
+	case ISP1362_INT_SUSP:   return "SUSP";
+	case ISP1362_INT_CLKRDY: return "CLKRDY";
+	case ISP1362_INT_INTL:   return "INTL";
+	case ISP1362_INT_ATL:    return "ATL";
+	case ISP1362_INT_OTG:    return "OTG";
+	default:                 return "unknown";
+	}
+}
+
+static inline void ALIGNSTAT(struct isp1362_hcd *isp1362_hcd, void *ptr)
+{
+	unsigned p = (unsigned)ptr;
+	if (!(p & 0xf))
+		isp1362_hcd->stat16++;
+	else if (!(p & 0x7))
+		isp1362_hcd->stat8++;
+	else if (!(p & 0x3))
+		isp1362_hcd->stat4++;
+	else if (!(p & 0x1))
+		isp1362_hcd->stat2++;
+	else
+		isp1362_hcd->stat1++;
+}
+
+static inline struct isp1362_hcd *hcd_to_isp1362_hcd(struct usb_hcd *hcd)
+{
+	return (struct isp1362_hcd *) (hcd->hcd_priv);
+}
+
+static inline struct usb_hcd *isp1362_hcd_to_hcd(struct isp1362_hcd *isp1362_hcd)
+{
+	return container_of((void *)isp1362_hcd, struct usb_hcd, hcd_priv);
+}
+
+#define frame_before(f1, f2)	((s16)((u16)f1 - (u16)f2) < 0)
+
+/*
+ * ISP1362 HW Interface
+ */
+
+#ifdef ISP1362_DEBUG
+#define DBG(level, fmt...) \
+	do { \
+		if (dbg_level > level) \
+			pr_debug(fmt); \
+	} while (0)
+#define _DBG(level, fmt...)	\
+	do { \
+		if (dbg_level > level) \
+			printk(fmt); \
+	} while (0)
+#else
+#define DBG(fmt...)		do {} while (0)
+#define _DBG DBG
+#endif
+
+#ifdef VERBOSE
+#    define VDBG(fmt...)	DBG(3, fmt)
+#else
+#    define VDBG(fmt...)	do {} while (0)
+#endif
+
+#ifdef REGISTERS
+#    define RDBG(fmt...)	DBG(1, fmt)
+#else
+#    define RDBG(fmt...)	do {} while (0)
+#endif
+
+#ifdef URB_TRACE
+#define URB_DBG(fmt...)		DBG(0, fmt)
+#else
+#define URB_DBG(fmt...)		do {} while (0)
+#endif
+
+
+#if USE_PLATFORM_DELAY
+#if USE_NDELAY
+#error USE_PLATFORM_DELAY and USE_NDELAY defined simultaneously.
+#endif
+#define	isp1362_delay(h, d)	(h)->board->delay(isp1362_hcd_to_hcd(h)->self.controller, d)
+#elif USE_NDELAY
+#define	isp1362_delay(h, d)	ndelay(d)
+#else
+#define	isp1362_delay(h, d)	do {} while (0)
+#endif
+
+#define get_urb(ep) ({							\
+	BUG_ON(list_empty(&ep->hep->urb_list));				\
+	container_of(ep->hep->urb_list.next, struct urb, urb_list);	\
+})
+
+/* basic access functions for ISP1362 chip registers */
+/* NOTE: The contents of the address pointer register cannot be read back! The driver must ensure,
+ * that all register accesses are performed with interrupts disabled, since the interrupt
+ * handler has no way of restoring the previous state.
+ */
+static void isp1362_write_addr(struct isp1362_hcd *isp1362_hcd, isp1362_reg_t reg)
+{
+	/*_BUG_ON((reg & ISP1362_REG_WRITE_OFFSET) && !(reg & REG_ACCESS_W));*/
+	REG_ACCESS_TEST(reg);
+	_BUG_ON(!irqs_disabled());
+	DUMMY_DELAY_ACCESS;
+	writew(ISP1362_REG_NO(reg), isp1362_hcd->addr_reg);
+	DUMMY_DELAY_ACCESS;
+	isp1362_delay(isp1362_hcd, 1);
+}
+
+static void isp1362_write_data16(struct isp1362_hcd *isp1362_hcd, u16 val)
+{
+	_BUG_ON(!irqs_disabled());
+	DUMMY_DELAY_ACCESS;
+	writew(val, isp1362_hcd->data_reg);
+}
+
+static u16 isp1362_read_data16(struct isp1362_hcd *isp1362_hcd)
+{
+	u16 val;
+
+	_BUG_ON(!irqs_disabled());
+	DUMMY_DELAY_ACCESS;
+	val = readw(isp1362_hcd->data_reg);
+
+	return val;
+}
+
+static void isp1362_write_data32(struct isp1362_hcd *isp1362_hcd, u32 val)
+{
+	_BUG_ON(!irqs_disabled());
+#if USE_32BIT
+	DUMMY_DELAY_ACCESS;
+	writel(val, isp1362_hcd->data_reg);
+#else
+	DUMMY_DELAY_ACCESS;
+	writew((u16)val, isp1362_hcd->data_reg);
+	DUMMY_DELAY_ACCESS;
+	writew(val >> 16, isp1362_hcd->data_reg);
+#endif
+}
+
+static u32 isp1362_read_data32(struct isp1362_hcd *isp1362_hcd)
+{
+	u32 val;
+
+	_BUG_ON(!irqs_disabled());
+#if USE_32BIT
+	DUMMY_DELAY_ACCESS;
+	val = readl(isp1362_hcd->data_reg);
+#else
+	DUMMY_DELAY_ACCESS;
+	val = (u32)readw(isp1362_hcd->data_reg);
+	DUMMY_DELAY_ACCESS;
+	val |= (u32)readw(isp1362_hcd->data_reg) << 16;
+#endif
+	return val;
+}
+
+/* use readsw/writesw to access the fifo whenever possible */
+/* assume HCDIRDATA or XFERCTR & addr_reg have been set up */
+static void isp1362_read_fifo(struct isp1362_hcd *isp1362_hcd, void *buf, u16 len)
+{
+	u8 *dp = buf;
+	u16 data;
+
+	if (!len)
+		return;
+
+	_BUG_ON(!irqs_disabled());
+
+	RDBG("%s: Reading %d byte from fifo to mem @ %p\n", __func__, len, buf);
+#if USE_32BIT
+	if (len >= 4) {
+		RDBG("%s: Using readsl for %d dwords\n", __func__, len >> 2);
+		readsl(isp1362_hcd->data_reg, dp, len >> 2);
+		dp += len & ~3;
+		len &= 3;
+	}
+#endif
+	if (len >= 2) {
+		RDBG("%s: Using readsw for %d words\n", __func__, len >> 1);
+		insw((unsigned long)isp1362_hcd->data_reg, dp, len >> 1);
+		dp += len & ~1;
+		len &= 1;
+	}
+
+	BUG_ON(len & ~1);
+	if (len > 0) {
+		data = isp1362_read_data16(isp1362_hcd);
+		RDBG("%s: Reading trailing byte %02x to mem @ %08x\n", __func__,
+		     (u8)data, (u32)dp);
+		*dp = (u8)data;
+	}
+}
+
+static void isp1362_write_fifo(struct isp1362_hcd *isp1362_hcd, void *buf, u16 len)
+{
+	u8 *dp = buf;
+	u16 data;
+
+	if (!len)
+		return;
+
+	if ((unsigned)dp & 0x1) {
+		/* not aligned */
+		for (; len > 1; len -= 2) {
+			data = *dp++;
+			data |= *dp++ << 8;
+			isp1362_write_data16(isp1362_hcd, data);
+		}
+		if (len)
+			isp1362_write_data16(isp1362_hcd, *dp);
+		return;
+	}
+
+	_BUG_ON(!irqs_disabled());
+
+	RDBG("%s: Writing %d byte to fifo from memory @%p\n", __func__, len, buf);
+#if USE_32BIT
+	if (len >= 4) {
+		RDBG("%s: Using writesl for %d dwords\n", __func__, len >> 2);
+		writesl(isp1362_hcd->data_reg, dp, len >> 2);
+		dp += len & ~3;
+		len &= 3;
+	}
+#endif
+	if (len >= 2) {
+		RDBG("%s: Using writesw for %d words\n", __func__, len >> 1);
+		outsw((unsigned long)isp1362_hcd->data_reg, dp, len >> 1);
+		dp += len & ~1;
+		len &= 1;
+	}
+
+	BUG_ON(len & ~1);
+	if (len > 0) {
+		/* finally write any trailing byte; we don't need to care
+		 * about the high byte of the last word written
+		 */
+		data = (u16)*dp;
+		RDBG("%s: Sending trailing byte %02x from mem @ %08x\n", __func__,
+			data, (u32)dp);
+		isp1362_write_data16(isp1362_hcd, data);
+	}
+}
+
+#define isp1362_read_reg16(d, r)		({			\
+	u16 __v;							\
+	REG_WIDTH_TEST(ISP1362_REG_##r, REG_WIDTH_16);			\
+	isp1362_write_addr(d, ISP1362_REG_##r);				\
+	__v = isp1362_read_data16(d);					\
+	RDBG("%s: Read %04x from %s[%02x]\n", __func__, __v, #r,	\
+	     ISP1362_REG_NO(ISP1362_REG_##r));				\
+	__v;								\
+})
+
+#define isp1362_read_reg32(d, r)		({			\
+	u32 __v;							\
+	REG_WIDTH_TEST(ISP1362_REG_##r, REG_WIDTH_32);			\
+	isp1362_write_addr(d, ISP1362_REG_##r);				\
+	__v = isp1362_read_data32(d);					\
+	RDBG("%s: Read %08x from %s[%02x]\n", __func__, __v, #r,	\
+	     ISP1362_REG_NO(ISP1362_REG_##r));				\
+	__v;								\
+})
+
+#define isp1362_write_reg16(d, r, v)	{					\
+	REG_WIDTH_TEST(ISP1362_REG_##r, REG_WIDTH_16);				\
+	isp1362_write_addr(d, (ISP1362_REG_##r) | ISP1362_REG_WRITE_OFFSET);	\
+	isp1362_write_data16(d, (u16)(v));					\
+	RDBG("%s: Wrote %04x to %s[%02x]\n", __func__, (u16)(v), #r,	\
+	     ISP1362_REG_NO(ISP1362_REG_##r));					\
+}
+
+#define isp1362_write_reg32(d, r, v)	{					\
+	REG_WIDTH_TEST(ISP1362_REG_##r, REG_WIDTH_32);				\
+	isp1362_write_addr(d, (ISP1362_REG_##r) | ISP1362_REG_WRITE_OFFSET);	\
+	isp1362_write_data32(d, (u32)(v));					\
+	RDBG("%s: Wrote %08x to %s[%02x]\n", __func__, (u32)(v), #r,	\
+	     ISP1362_REG_NO(ISP1362_REG_##r));					\
+}
+
+#define isp1362_set_mask16(d, r, m) {			\
+	u16 __v;					\
+	__v = isp1362_read_reg16(d, r);			\
+	if ((__v | m) != __v)				\
+		isp1362_write_reg16(d, r, __v | m);	\
+}
+
+#define isp1362_clr_mask16(d, r, m) {			\
+	u16 __v;					\
+	__v = isp1362_read_reg16(d, r);			\
+	if ((__v & ~m) != __v)			\
+		isp1362_write_reg16(d, r, __v & ~m);	\
+}
+
+#define isp1362_set_mask32(d, r, m) {			\
+	u32 __v;					\
+	__v = isp1362_read_reg32(d, r);			\
+	if ((__v | m) != __v)				\
+		isp1362_write_reg32(d, r, __v | m);	\
+}
+
+#define isp1362_clr_mask32(d, r, m) {			\
+	u32 __v;					\
+	__v = isp1362_read_reg32(d, r);			\
+	if ((__v & ~m) != __v)			\
+		isp1362_write_reg32(d, r, __v & ~m);	\
+}
+
+#ifdef ISP1362_DEBUG
+#define isp1362_show_reg(d, r) {								\
+	if ((ISP1362_REG_##r & REG_WIDTH_MASK) == REG_WIDTH_32)			\
+		DBG(0, "%-12s[%02x]: %08x\n", #r,					\
+			ISP1362_REG_NO(ISP1362_REG_##r), isp1362_read_reg32(d, r));	\
+	else									\
+		DBG(0, "%-12s[%02x]:     %04x\n", #r,					\
+			ISP1362_REG_NO(ISP1362_REG_##r), isp1362_read_reg16(d, r));	\
+}
+#else
+#define isp1362_show_reg(d, r)	do {} while (0)
+#endif
+
+static void __attribute__((__unused__)) isp1362_show_regs(struct isp1362_hcd *isp1362_hcd)
+{
+	isp1362_show_reg(isp1362_hcd, HCREVISION);
+	isp1362_show_reg(isp1362_hcd, HCCONTROL);
+	isp1362_show_reg(isp1362_hcd, HCCMDSTAT);
+	isp1362_show_reg(isp1362_hcd, HCINTSTAT);
+	isp1362_show_reg(isp1362_hcd, HCINTENB);
+	isp1362_show_reg(isp1362_hcd, HCFMINTVL);
+	isp1362_show_reg(isp1362_hcd, HCFMREM);
+	isp1362_show_reg(isp1362_hcd, HCFMNUM);
+	isp1362_show_reg(isp1362_hcd, HCLSTHRESH);
+	isp1362_show_reg(isp1362_hcd, HCRHDESCA);
+	isp1362_show_reg(isp1362_hcd, HCRHDESCB);
+	isp1362_show_reg(isp1362_hcd, HCRHSTATUS);
+	isp1362_show_reg(isp1362_hcd, HCRHPORT1);
+	isp1362_show_reg(isp1362_hcd, HCRHPORT2);
+
+	isp1362_show_reg(isp1362_hcd, HCHWCFG);
+	isp1362_show_reg(isp1362_hcd, HCDMACFG);
+	isp1362_show_reg(isp1362_hcd, HCXFERCTR);
+	isp1362_show_reg(isp1362_hcd, HCuPINT);
+
+	if (in_interrupt())
+		DBG(0, "%-12s[%02x]:     %04x\n", "HCuPINTENB",
+			 ISP1362_REG_NO(ISP1362_REG_HCuPINTENB), isp1362_hcd->irqenb);
+	else
+		isp1362_show_reg(isp1362_hcd, HCuPINTENB);
+	isp1362_show_reg(isp1362_hcd, HCCHIPID);
+	isp1362_show_reg(isp1362_hcd, HCSCRATCH);
+	isp1362_show_reg(isp1362_hcd, HCBUFSTAT);
+	isp1362_show_reg(isp1362_hcd, HCDIRADDR);
+	/* Access would advance fifo
+	 * isp1362_show_reg(isp1362_hcd, HCDIRDATA);
+	 */
+	isp1362_show_reg(isp1362_hcd, HCISTLBUFSZ);
+	isp1362_show_reg(isp1362_hcd, HCISTLRATE);
+	isp1362_show_reg(isp1362_hcd, HCINTLBUFSZ);
+	isp1362_show_reg(isp1362_hcd, HCINTLBLKSZ);
+	isp1362_show_reg(isp1362_hcd, HCINTLDONE);
+	isp1362_show_reg(isp1362_hcd, HCINTLSKIP);
+	isp1362_show_reg(isp1362_hcd, HCINTLLAST);
+	isp1362_show_reg(isp1362_hcd, HCINTLCURR);
+	isp1362_show_reg(isp1362_hcd, HCATLBUFSZ);
+	isp1362_show_reg(isp1362_hcd, HCATLBLKSZ);
+	/* only valid after ATL_DONE interrupt
+	 * isp1362_show_reg(isp1362_hcd, HCATLDONE);
+	 */
+	isp1362_show_reg(isp1362_hcd, HCATLSKIP);
+	isp1362_show_reg(isp1362_hcd, HCATLLAST);
+	isp1362_show_reg(isp1362_hcd, HCATLCURR);
+	isp1362_show_reg(isp1362_hcd, HCATLDTC);
+	isp1362_show_reg(isp1362_hcd, HCATLDTCTO);
+}
+
+static void isp1362_write_diraddr(struct isp1362_hcd *isp1362_hcd, u16 offset, u16 len)
+{
+	_BUG_ON(offset & 1);
+	_BUG_ON(offset >= ISP1362_BUF_SIZE);
+	_BUG_ON(len > ISP1362_BUF_SIZE);
+	_BUG_ON(offset + len > ISP1362_BUF_SIZE);
+	len = (len + 1) & ~1;
+
+	isp1362_clr_mask16(isp1362_hcd, HCDMACFG, HCDMACFG_CTR_ENABLE);
+	isp1362_write_reg32(isp1362_hcd, HCDIRADDR,
+			    HCDIRADDR_ADDR(offset) | HCDIRADDR_COUNT(len));
+}
+
+static void isp1362_read_buffer(struct isp1362_hcd *isp1362_hcd, void *buf, u16 offset, int len)
+{
+	_BUG_ON(offset & 1);
+
+	isp1362_write_diraddr(isp1362_hcd, offset, len);
+
+	DBG(3, "%s: Reading %d byte from buffer @%04x to memory @ %08x\n", __func__,
+	    len, offset, (u32)buf);
+
+	isp1362_write_reg16(isp1362_hcd, HCuPINT, HCuPINT_EOT);
+	_WARN_ON((isp1362_read_reg16(isp1362_hcd, HCuPINT) & HCuPINT_EOT));
+
+	isp1362_write_addr(isp1362_hcd, ISP1362_REG_HCDIRDATA);
+
+	isp1362_read_fifo(isp1362_hcd, buf, len);
+	_WARN_ON(!(isp1362_read_reg16(isp1362_hcd, HCuPINT) & HCuPINT_EOT));
+	isp1362_write_reg16(isp1362_hcd, HCuPINT, HCuPINT_EOT);
+	_WARN_ON((isp1362_read_reg16(isp1362_hcd, HCuPINT) & HCuPINT_EOT));
+}
+
+static void isp1362_write_buffer(struct isp1362_hcd *isp1362_hcd, void *buf, u16 offset, int len)
+{
+	_BUG_ON(offset & 1);
+
+	isp1362_write_diraddr(isp1362_hcd, offset, len);
+
+	DBG(3, "%s: Writing %d byte to buffer @%04x from memory @ %08x\n", __func__,
+	    len, offset, (u32)buf);
+
+	isp1362_write_reg16(isp1362_hcd, HCuPINT, HCuPINT_EOT);
+	_WARN_ON((isp1362_read_reg16(isp1362_hcd, HCuPINT) & HCuPINT_EOT));
+
+	isp1362_write_addr(isp1362_hcd, ISP1362_REG_HCDIRDATA | ISP1362_REG_WRITE_OFFSET);
+	isp1362_write_fifo(isp1362_hcd, buf, len);
+
+	_WARN_ON(!(isp1362_read_reg16(isp1362_hcd, HCuPINT) & HCuPINT_EOT));
+	isp1362_write_reg16(isp1362_hcd, HCuPINT, HCuPINT_EOT);
+	_WARN_ON((isp1362_read_reg16(isp1362_hcd, HCuPINT) & HCuPINT_EOT));
+}
+
+static void __attribute__((unused)) dump_data(char *buf, int len)
+{
+	if (dbg_level > 0) {
+		int k;
+		int lf = 0;
+
+		for (k = 0; k < len; ++k) {
+			if (!lf)
+				DBG(0, "%04x:", k);
+			printk(" %02x", ((u8 *) buf)[k]);
+			lf = 1;
+			if (!k)
+				continue;
+			if (k % 16 == 15) {
+				printk("\n");
+				lf = 0;
+				continue;
+			}
+			if (k % 8 == 7)
+				printk(" ");
+			if (k % 4 == 3)
+				printk(" ");
+		}
+		if (lf)
+			printk("\n");
+	}
+}
+
+#if defined(ISP1362_DEBUG) && defined(PTD_TRACE)
+
+static void dump_ptd(struct ptd *ptd)
+{
+	DBG(0, "EP %p: CC=%x EP=%d DIR=%x CNT=%d LEN=%d MPS=%d TGL=%x ACT=%x FA=%d SPD=%x SF=%x PR=%x LST=%x\n",
+	    container_of(ptd, struct isp1362_ep, ptd),
+	    PTD_GET_CC(ptd), PTD_GET_EP(ptd), PTD_GET_DIR(ptd),
+	    PTD_GET_COUNT(ptd), PTD_GET_LEN(ptd), PTD_GET_MPS(ptd),
+	    PTD_GET_TOGGLE(ptd), PTD_GET_ACTIVE(ptd), PTD_GET_FA(ptd),
+	    PTD_GET_SPD(ptd), PTD_GET_SF_INT(ptd), PTD_GET_PR(ptd), PTD_GET_LAST(ptd));
+	DBG(0, "  %04x %04x %04x %04x\n", ptd->count, ptd->mps, ptd->len, ptd->faddr);
+}
+
+static void dump_ptd_out_data(struct ptd *ptd, u8 *buf)
+{
+	if (dbg_level > 0) {
+		if (PTD_GET_DIR(ptd) != PTD_DIR_IN && PTD_GET_LEN(ptd)) {
+			DBG(0, "--out->\n");
+			dump_data(buf, PTD_GET_LEN(ptd));
+		}
+	}
+}
+
+static void dump_ptd_in_data(struct ptd *ptd, u8 *buf)
+{
+	if (dbg_level > 0) {
+		if (PTD_GET_DIR(ptd) == PTD_DIR_IN && PTD_GET_COUNT(ptd)) {
+			DBG(0, "<--in--\n");
+			dump_data(buf, PTD_GET_COUNT(ptd));
+		}
+		DBG(0, "-----\n");
+	}
+}
+
+static void dump_ptd_queue(struct isp1362_ep_queue *epq)
+{
+	struct isp1362_ep *ep;
+	int dbg = dbg_level;
+
+	dbg_level = 1;
+	list_for_each_entry(ep, &epq->active, active) {
+		dump_ptd(&ep->ptd);
+		dump_data(ep->data, ep->length);
+	}
+	dbg_level = dbg;
+}
+#else
+#define dump_ptd(ptd)			do {} while (0)
+#define dump_ptd_in_data(ptd, buf)	do {} while (0)
+#define dump_ptd_out_data(ptd, buf)	do {} while (0)
+#define dump_ptd_data(ptd, buf)		do {} while (0)
+#define dump_ptd_queue(epq)		do {} while (0)
+#endif
diff --git a/include/linux/usb/isp1362.h b/include/linux/usb/isp1362.h
new file mode 100644
index 0000000..642684b
--- /dev/null
+++ b/include/linux/usb/isp1362.h
@@ -0,0 +1,46 @@
+/*
+ * board initialization code should put one of these into dev->platform_data
+ * and place the isp1362 onto platform_bus.
+ */
+
+#ifndef __LINUX_USB_ISP1362_H__
+#define __LINUX_USB_ISP1362_H__
+
+struct isp1362_platform_data {
+	/* Enable internal pulldown resistors on downstream ports */
+	unsigned sel15Kres:1;
+	/* Clock cannot be stopped */
+	unsigned clknotstop:1;
+	/* On-chip overcurrent protection */
+	unsigned oc_enable:1;
+	/* INT output polarity */
+	unsigned int_act_high:1;
+	/* INT edge or level triggered */
+	unsigned int_edge_triggered:1;
+	/* DREQ output polarity */
+	unsigned dreq_act_high:1;
+	/* DACK input polarity */
+	unsigned dack_act_high:1;
+	/* chip can be resumed via H_WAKEUP pin */
+	unsigned remote_wakeup_connected:1;
+	/* Switch or not to switch (keep always powered) */
+	unsigned no_power_switching:1;
+	/* Ganged port power switching (0) or individual port power switching (1) */
+	unsigned power_switching_mode:1;
+	/* Given port_power, msec/2 after power on till power good */
+	u8 potpg;
+	/* Hardware reset set/clear */
+	void (*reset) (struct device *dev, int set);
+	/* Clock start/stop */
+	void (*clock) (struct device *dev, int start);
+	/* Inter-io delay (ns). The chip is picky about access timings; it
+	 * expects at least:
+	 * 110ns delay between consecutive accesses to DATA_REG,
+	 * 300ns delay between access to ADDR_REG and DATA_REG (registers)
+	 * 462ns delay between access to ADDR_REG and DATA_REG (buffer memory)
+	 * WE MUST NOT be activated during these intervals (even without CS!)
+	 */
+	void (*delay) (struct device *dev, unsigned int delay);
+};
+
+#endif
-- 
cgit v0.10.2


From 665d7662d15441b4b3e54131a9418a1a198d0d31 Mon Sep 17 00:00:00 2001
From: Guus Sliepen <guus@sliepen.org>
Date: Wed, 22 Jul 2009 17:39:42 +0200
Subject: USB: usbtmc: sanity checks for DEV_DEP_MSG_IN urbs

According to the specifications, an instrument should not return more data in a
DEV_DEP_MSG_IN urb than requested.  However, some instruments can send more
than requested. This could cause the kernel to write the extra data past the
end of the buffer provided by read().

Fix this by checking that the value of the TranserSize field is not larger than
the urb itself and not larger than the size of the userspace buffer. Also
correctly decrement the remaining size of the buffer when userspace read()s
more than USBTMC_SIZE_IOBUFFER.

Signed-off-by: Guus Sliepen <guus@sliepen.org>
Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c
index 4f0858f..5bab8e5 100644
--- a/drivers/usb/class/usbtmc.c
+++ b/drivers/usb/class/usbtmc.c
@@ -369,13 +369,13 @@ static ssize_t usbtmc_read(struct file *filp, char __user *buf,
 {
 	struct usbtmc_device_data *data;
 	struct device *dev;
-	unsigned long int n_characters;
+	u32 n_characters;
 	u8 *buffer;
 	int actual;
-	int done;
-	int remaining;
+	size_t done;
+	size_t remaining;
 	int retval;
-	int this_part;
+	size_t this_part;
 
 	/* Get pointer to private data structure */
 	data = filp->private_data;
@@ -461,6 +461,18 @@ static ssize_t usbtmc_read(struct file *filp, char __user *buf,
 			       (buffer[6] << 16) +
 			       (buffer[7] << 24);
 
+		/* Ensure the instrument doesn't lie about it */
+		if(n_characters > actual - 12) {
+			dev_err(dev, "Device lies about message size: %zu > %zu\n", n_characters, actual - 12);
+			n_characters = actual - 12;
+		}
+
+		/* Ensure the instrument doesn't send more back than requested */
+		if(n_characters > this_part) {
+			dev_err(dev, "Device returns more than requested: %zu > %zu\n", done + n_characters, done + this_part);
+			n_characters = this_part;
+		}
+
 		/* Copy buffer to user space */
 		if (copy_to_user(buf + done, &buffer[12], n_characters)) {
 			/* There must have been an addressing problem */
@@ -471,6 +483,8 @@ static ssize_t usbtmc_read(struct file *filp, char __user *buf,
 		done += n_characters;
 		if (n_characters < USBTMC_SIZE_IOBUFFER)
 			remaining = 0;
+		else
+			remaining -= n_characters;
 	}
 
 	/* Update file position value */
-- 
cgit v0.10.2


From a2fbf10eba3a38407e3984bc9503342de2b5e399 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Tue, 28 Jul 2009 11:22:41 -0700
Subject: USB: usbtmc: fix printk format warnings

Fix printk format warnings:
drivers/usb/class/usbtmc.c:466: warning: format '%zu' expects type 'size_t', but argument 4 has type 'u32'
drivers/usb/class/usbtmc.c:466: warning: format '%zu' expects type 'size_t', but argument 5 has type 'int'

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c
index 5bab8e5..40ef4da 100644
--- a/drivers/usb/class/usbtmc.c
+++ b/drivers/usb/class/usbtmc.c
@@ -463,7 +463,7 @@ static ssize_t usbtmc_read(struct file *filp, char __user *buf,
 
 		/* Ensure the instrument doesn't lie about it */
 		if(n_characters > actual - 12) {
-			dev_err(dev, "Device lies about message size: %zu > %zu\n", n_characters, actual - 12);
+			dev_err(dev, "Device lies about message size: %u > %d\n", n_characters, actual - 12);
 			n_characters = actual - 12;
 		}
 
-- 
cgit v0.10.2


From c0ad7291aae3f76920bdddbc517e20b8d4338ec2 Mon Sep 17 00:00:00 2001
From: Bob Liu <yjfpb04@gmail.com>
Date: Tue, 28 Jul 2009 22:31:06 +0800
Subject: USB: uhci: rm repeatedly evaluation for urbp->qh

Signed-off-by: Bob Liu <yjfpb04@gmail.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/host/uhci-q.c b/drivers/usb/host/uhci-q.c
index 64e57bf..acd582c 100644
--- a/drivers/usb/host/uhci-q.c
+++ b/drivers/usb/host/uhci-q.c
@@ -1422,7 +1422,6 @@ static int uhci_urb_enqueue(struct usb_hcd *hcd,
 		goto err_submit_failed;
 
 	/* Add this URB to the QH */
-	urbp->qh = qh;
 	list_add_tail(&urbp->node, &qh->queue);
 
 	/* If the new URB is the first and only one on this QH then either
-- 
cgit v0.10.2


From 501c9c0802d9fee05efb300de06c8b3d04f17458 Mon Sep 17 00:00:00 2001
From: Nicolas Ferre <nicolas.ferre@atmel.com>
Date: Mon, 27 Jul 2009 14:47:40 -0700
Subject: USB: at91: Add USB EHCI driver for at91sam9g45 series

Add host USB High speed driver for at91sam9g45 series.
The host driver is an EHCI with its companion OHCI. EHCI is
handled by the new ehci-atmel.c whereas the OHCI is always
handled by ohci-at91.c.

Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Acked-by: David Brownell <dbrownell@users.sourceforge.net>

diff --git a/drivers/usb/Kconfig b/drivers/usb/Kconfig
index 93cee3e..ebd7237 100644
--- a/drivers/usb/Kconfig
+++ b/drivers/usb/Kconfig
@@ -60,6 +60,7 @@ config USB_ARCH_HAS_EHCI
 	default y if SOC_AU1200
 	default y if ARCH_IXP4XX
 	default y if ARCH_W90X900
+	default y if ARCH_AT91SAM9G45
 	default PCI
 
 # ARM SA1111 chips have a non-PCI based "OHCI-compatible" USB host interface.
diff --git a/drivers/usb/host/ehci-atmel.c b/drivers/usb/host/ehci-atmel.c
new file mode 100644
index 0000000..87c1b7c
--- /dev/null
+++ b/drivers/usb/host/ehci-atmel.c
@@ -0,0 +1,230 @@
+/*
+ * Driver for EHCI UHP on Atmel chips
+ *
+ *  Copyright (C) 2009 Atmel Corporation,
+ *                     Nicolas Ferre <nicolas.ferre@atmel.com>
+ *
+ *  Based on various ehci-*.c drivers
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of this archive for
+ * more details.
+ */
+
+#include <linux/clk.h>
+#include <linux/platform_device.h>
+
+/* interface and function clocks */
+static struct clk *iclk, *fclk;
+static int clocked;
+
+/*-------------------------------------------------------------------------*/
+
+static void atmel_start_clock(void)
+{
+	clk_enable(iclk);
+	clk_enable(fclk);
+	clocked = 1;
+}
+
+static void atmel_stop_clock(void)
+{
+	clk_disable(fclk);
+	clk_disable(iclk);
+	clocked = 0;
+}
+
+static void atmel_start_ehci(struct platform_device *pdev)
+{
+	dev_dbg(&pdev->dev, "start\n");
+	atmel_start_clock();
+}
+
+static void atmel_stop_ehci(struct platform_device *pdev)
+{
+	dev_dbg(&pdev->dev, "stop\n");
+	atmel_stop_clock();
+}
+
+/*-------------------------------------------------------------------------*/
+
+static int ehci_atmel_setup(struct usb_hcd *hcd)
+{
+	struct ehci_hcd *ehci = hcd_to_ehci(hcd);
+	int retval = 0;
+
+	/* registers start at offset 0x0 */
+	ehci->caps = hcd->regs;
+	ehci->regs = hcd->regs +
+		HC_LENGTH(ehci_readl(ehci, &ehci->caps->hc_capbase));
+	dbg_hcs_params(ehci, "reset");
+	dbg_hcc_params(ehci, "reset");
+
+	/* cache this readonly data; minimize chip reads */
+	ehci->hcs_params = ehci_readl(ehci, &ehci->caps->hcs_params);
+
+	retval = ehci_halt(ehci);
+	if (retval)
+		return retval;
+
+	/* data structure init */
+	retval = ehci_init(hcd);
+	if (retval)
+		return retval;
+
+	ehci->sbrn = 0x20;
+
+	ehci_reset(ehci);
+	ehci_port_power(ehci, 0);
+
+	return retval;
+}
+
+static const struct hc_driver ehci_atmel_hc_driver = {
+	.description		= hcd_name,
+	.product_desc		= "Atmel EHCI UHP HS",
+	.hcd_priv_size		= sizeof(struct ehci_hcd),
+
+	/* generic hardware linkage */
+	.irq			= ehci_irq,
+	.flags			= HCD_MEMORY | HCD_USB2,
+
+	/* basic lifecycle operations */
+	.reset			= ehci_atmel_setup,
+	.start			= ehci_run,
+	.stop			= ehci_stop,
+	.shutdown		= ehci_shutdown,
+
+	/* managing i/o requests and associated device resources */
+	.urb_enqueue		= ehci_urb_enqueue,
+	.urb_dequeue		= ehci_urb_dequeue,
+	.endpoint_disable	= ehci_endpoint_disable,
+
+	/* scheduling support */
+	.get_frame_number	= ehci_get_frame,
+
+	/* root hub support */
+	.hub_status_data	= ehci_hub_status_data,
+	.hub_control		= ehci_hub_control,
+	.bus_suspend		= ehci_bus_suspend,
+	.bus_resume		= ehci_bus_resume,
+	.relinquish_port	= ehci_relinquish_port,
+	.port_handed_over	= ehci_port_handed_over,
+};
+
+static int __init ehci_atmel_drv_probe(struct platform_device *pdev)
+{
+	struct usb_hcd *hcd;
+	const struct hc_driver *driver = &ehci_atmel_hc_driver;
+	struct resource *res;
+	int irq;
+	int retval;
+
+	if (usb_disabled())
+		return -ENODEV;
+
+	pr_debug("Initializing Atmel-SoC USB Host Controller\n");
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq <= 0) {
+		dev_err(&pdev->dev,
+			"Found HC with no IRQ. Check %s setup!\n",
+			dev_name(&pdev->dev));
+		retval = -ENODEV;
+		goto fail_create_hcd;
+	}
+
+	hcd = usb_create_hcd(driver, &pdev->dev, dev_name(&pdev->dev));
+	if (!hcd) {
+		retval = -ENOMEM;
+		goto fail_create_hcd;
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(&pdev->dev,
+			"Found HC with no register addr. Check %s setup!\n",
+			dev_name(&pdev->dev));
+		retval = -ENODEV;
+		goto fail_request_resource;
+	}
+	hcd->rsrc_start = res->start;
+	hcd->rsrc_len = res->end - res->start + 1;
+
+	if (!request_mem_region(hcd->rsrc_start, hcd->rsrc_len,
+				driver->description)) {
+		dev_dbg(&pdev->dev, "controller already in use\n");
+		retval = -EBUSY;
+		goto fail_request_resource;
+	}
+
+	hcd->regs = ioremap_nocache(hcd->rsrc_start, hcd->rsrc_len);
+	if (hcd->regs == NULL) {
+		dev_dbg(&pdev->dev, "error mapping memory\n");
+		retval = -EFAULT;
+		goto fail_ioremap;
+	}
+
+	iclk = clk_get(&pdev->dev, "ehci_clk");
+	if (IS_ERR(iclk)) {
+		dev_err(&pdev->dev, "Error getting interface clock\n");
+		retval = -ENOENT;
+		goto fail_get_iclk;
+	}
+	fclk = clk_get(&pdev->dev, "uhpck");
+	if (IS_ERR(fclk)) {
+		dev_err(&pdev->dev, "Error getting function clock\n");
+		retval = -ENOENT;
+		goto fail_get_fclk;
+	}
+
+	atmel_start_ehci(pdev);
+
+	retval = usb_add_hcd(hcd, irq, IRQF_SHARED);
+	if (retval)
+		goto fail_add_hcd;
+
+	return retval;
+
+fail_add_hcd:
+	atmel_stop_ehci(pdev);
+	clk_put(fclk);
+fail_get_fclk:
+	clk_put(iclk);
+fail_get_iclk:
+	iounmap(hcd->regs);
+fail_ioremap:
+	release_mem_region(hcd->rsrc_start, hcd->rsrc_len);
+fail_request_resource:
+	usb_put_hcd(hcd);
+fail_create_hcd:
+	dev_err(&pdev->dev, "init %s fail, %d\n",
+		dev_name(&pdev->dev), retval);
+
+	return retval;
+}
+
+static int __exit ehci_atmel_drv_remove(struct platform_device *pdev)
+{
+	struct usb_hcd *hcd = platform_get_drvdata(pdev);
+
+	ehci_shutdown(hcd);
+	usb_remove_hcd(hcd);
+	iounmap(hcd->regs);
+	release_mem_region(hcd->rsrc_start, hcd->rsrc_len);
+	usb_put_hcd(hcd);
+
+	atmel_stop_ehci(pdev);
+	clk_put(fclk);
+	clk_put(iclk);
+	fclk = iclk = NULL;
+
+	return 0;
+}
+
+static struct platform_driver ehci_atmel_driver = {
+	.probe		= ehci_atmel_drv_probe,
+	.remove		= __exit_p(ehci_atmel_drv_remove),
+	.shutdown	= usb_hcd_platform_shutdown,
+	.driver.name	= "atmel-ehci",
+};
diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c
index 5fbc67c..c227ba4 100644
--- a/drivers/usb/host/ehci-hcd.c
+++ b/drivers/usb/host/ehci-hcd.c
@@ -1136,6 +1136,11 @@ MODULE_LICENSE ("GPL");
 #define	PLATFORM_DRIVER		ehci_hcd_w90x900_driver
 #endif
 
+#ifdef CONFIG_ARCH_AT91
+#include "ehci-atmel.c"
+#define	PLATFORM_DRIVER		ehci_atmel_driver
+#endif
+
 #if !defined(PCI_DRIVER) && !defined(PLATFORM_DRIVER) && \
     !defined(PS3_SYSTEM_BUS_DRIVER) && !defined(OF_PLATFORM_DRIVER)
 #error "missing bus glue for ehci-hcd"
-- 
cgit v0.10.2


From aa781af00a7f55ade0ce8a21d4b08f1f6c77e8cd Mon Sep 17 00:00:00 2001
From: Nicolas Ferre <nicolas.ferre@atmel.com>
Date: Mon, 27 Jul 2009 15:00:35 -0700
Subject: USB: at91: Add USB gadget driver selection for at91sam9g45 series

Add gadget USB drivers for at91sam9g45 series. Those SOC include
high speed USB interfaces.
The gadget driver is the already available atmel_usba_udc.

Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Acked-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig
index 9f986b4..3537d91 100644
--- a/drivers/usb/gadget/Kconfig
+++ b/drivers/usb/gadget/Kconfig
@@ -124,7 +124,7 @@ choice
 
 config USB_GADGET_AT91
 	boolean "Atmel AT91 USB Device Port"
-	depends on ARCH_AT91 && !ARCH_AT91SAM9RL && !ARCH_AT91CAP9
+	depends on ARCH_AT91 && !ARCH_AT91SAM9RL && !ARCH_AT91CAP9 && !ARCH_AT91SAM9G45
 	select USB_GADGET_SELECTED
 	help
 	   Many Atmel AT91 processors (such as the AT91RM2000) have a
@@ -143,7 +143,7 @@ config USB_AT91
 config USB_GADGET_ATMEL_USBA
 	boolean "Atmel USBA"
 	select USB_GADGET_DUALSPEED
-	depends on AVR32 || ARCH_AT91CAP9 || ARCH_AT91SAM9RL
+	depends on AVR32 || ARCH_AT91CAP9 || ARCH_AT91SAM9RL || ARCH_AT91SAM9G45
 	help
 	  USBA is the integrated high-speed USB Device controller on
 	  the AT32AP700x, some AT91SAM9 and AT91CAP9 processors from Atmel.
-- 
cgit v0.10.2


From 2f2cac3c1af2bfc72c55b0054b6b95309882e27b Mon Sep 17 00:00:00 2001
From: Nicolas Ferre <nicolas.ferre@atmel.com>
Date: Mon, 27 Jul 2009 14:59:24 -0700
Subject: USB: at91: modify OHCI driver to allow shared interrupts

At91sam9g45 series has a set of high speed USB interfaces.
The host driver is an EHCI with its companion OHCI. OHCI is
always handled by ohci-at91.c.
This wrapper is just modified to allow IRQ sharing
between two controllers.

Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Acked-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/host/ohci-at91.c b/drivers/usb/host/ohci-at91.c
index bb5e6f6..7ccffcb 100644
--- a/drivers/usb/host/ohci-at91.c
+++ b/drivers/usb/host/ohci-at91.c
@@ -148,7 +148,7 @@ static int usb_hcd_at91_probe(const struct hc_driver *driver,
 	at91_start_hc(pdev);
 	ohci_hcd_init(hcd_to_ohci(hcd));
 
-	retval = usb_add_hcd(hcd, pdev->resource[1].start, IRQF_DISABLED);
+	retval = usb_add_hcd(hcd, pdev->resource[1].start, IRQF_SHARED);
 	if (retval == 0)
 		return retval;
 
-- 
cgit v0.10.2


From c35013087aa9b10e4674b53b7c8f7966de83c194 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Wed, 29 Jul 2009 14:23:25 +0300
Subject: USB: audio: guard kernel-only code with __KERNEL__

include/linux/usb/audio.h is exported to userspace,
so part of this file that is for internal kernel
usage need to be guarded with ifdef __KERNEL__.
This way make headers_install will stript it out.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/include/linux/usb/audio.h b/include/linux/usb/audio.h
index 7b33c49..eaf9dff 100644
--- a/include/linux/usb/audio.h
+++ b/include/linux/usb/audio.h
@@ -276,6 +276,8 @@ struct uac_iso_endpoint_descriptor {
 #define UAC_FU_BASS_BOOST	(1 << (UAC_BASS_BOOST_CONTROL - 1))
 #define UAC_FU_LOUDNESS		(1 << (UAC_LOUDNESS_CONTROL - 1))
 
+#ifdef __KERNEL__
+
 struct usb_audio_control {
 	struct list_head list;
 	const char *name;
@@ -294,4 +296,6 @@ struct usb_audio_control_selector {
 	struct usb_descriptor_header *desc;
 };
 
+#endif /* __KERNEL__ */
+
 #endif /* __LINUX_USB_AUDIO_H */
-- 
cgit v0.10.2


From 6e23ec4ff2f2181c22ea02cf3774b882acef27e0 Mon Sep 17 00:00:00 2001
From: Anand Gadiyar <gadiyar@ti.com>
Date: Tue, 28 Jul 2009 23:52:21 +0530
Subject: USB: EHCI: OHCI: Remove unnecessary includes of reboot.h

EHCI: OHCI: Remove unnecessary includes of reboot.h

Signed-off-by: Anand Gadiyar <gadiyar@ti.com>
Acked-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c
index c227ba4..6887aac 100644
--- a/drivers/usb/host/ehci-hcd.c
+++ b/drivers/usb/host/ehci-hcd.c
@@ -30,7 +30,6 @@
 #include <linux/timer.h>
 #include <linux/list.h>
 #include <linux/interrupt.h>
-#include <linux/reboot.h>
 #include <linux/usb.h>
 #include <linux/moduleparam.h>
 #include <linux/dma-mapping.h>
diff --git a/drivers/usb/host/ohci-hcd.c b/drivers/usb/host/ohci-hcd.c
index 5815168..78bb771 100644
--- a/drivers/usb/host/ohci-hcd.c
+++ b/drivers/usb/host/ohci-hcd.c
@@ -34,7 +34,6 @@
 #include <linux/usb/otg.h>
 #include <linux/dma-mapping.h>
 #include <linux/dmapool.h>
-#include <linux/reboot.h>
 #include <linux/workqueue.h>
 #include <linux/debugfs.h>
 
diff --git a/drivers/usb/host/oxu210hp-hcd.c b/drivers/usb/host/oxu210hp-hcd.c
index 5ac489e..50f57f4 100644
--- a/drivers/usb/host/oxu210hp-hcd.c
+++ b/drivers/usb/host/oxu210hp-hcd.c
@@ -33,7 +33,6 @@
 #include <linux/timer.h>
 #include <linux/list.h>
 #include <linux/interrupt.h>
-#include <linux/reboot.h>
 #include <linux/usb.h>
 #include <linux/moduleparam.h>
 #include <linux/dma-mapping.h>
-- 
cgit v0.10.2


From 7b4361f0848193ddc36dfb2c9a7391c56a9df2ad Mon Sep 17 00:00:00 2001
From: Aric Blumer <aric@sdgsystems.com>
Date: Thu, 30 Jul 2009 13:26:58 -0400
Subject: USB: ohci-pxa27x: Allow NOCP and OCPM to be cleared

Some ohci-pxa27x platforms may require OCPM and NOCP in UHCRHDA to be
clear, but the existing code was only allowing setting.  This patch
ensures that these bits are clear if the respective flags are not set.
This is particularly important for the PXA3xx family where the
documentation says OCPM must be cleared, but it is set after reset.

Signed-off-by: Aric Blumer <aric@sdgsystems.com>
Cc: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/host/ohci-pxa27x.c b/drivers/usb/host/ohci-pxa27x.c
index e44dc2c..b5294a9 100644
--- a/drivers/usb/host/ohci-pxa27x.c
+++ b/drivers/usb/host/ohci-pxa27x.c
@@ -177,9 +177,13 @@ static inline void pxa27x_setup_hc(struct pxa27x_ohci *ohci,
 
 	if (inf->flags & NO_OC_PROTECTION)
 		uhcrhda |= UHCRHDA_NOCP;
+	else
+		uhcrhda &= ~UHCRHDA_NOCP;
 
 	if (inf->flags & OC_MODE_PERPORT)
 		uhcrhda |= UHCRHDA_OCPM;
+	else
+		uhcrhda &= ~UHCRHDA_OCPM;
 
 	if (inf->power_on_delay) {
 		uhcrhda &= ~UHCRHDA_POTPGT(0xff);
-- 
cgit v0.10.2


From 5128a66c6605d8178f69b7a8f2a70060933a26b4 Mon Sep 17 00:00:00 2001
From: Ondrej Zary <linux@rainbow-software.org>
Date: Thu, 6 Aug 2009 16:09:52 -0700
Subject: USB: ark3116: add IrDA support for Gembird UIR-22

Add IrDA support to ark3116 driver.  This makes Gembird UIR-22 USB to IrDA
adapter work (vendor ID 0x18ec, device ID 0x3118).  This adapter contains
ARK3116T USB serial chip and an IrDA transceiver, thus a command like
"irattach /dev/ttyUSB0 -s" is needed.

All magic numbers were captured using usbsnoop from windows driver that
came with the device.

Signed-off-by: Ondrej Zary <linux@rainbow-software.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/serial/ark3116.c b/drivers/usb/serial/ark3116.c
index 5d25d3e..131e61a 100644
--- a/drivers/usb/serial/ark3116.c
+++ b/drivers/usb/serial/ark3116.c
@@ -31,10 +31,20 @@ static int debug;
 
 static struct usb_device_id id_table [] = {
 	{ USB_DEVICE(0x6547, 0x0232) },
+	{ USB_DEVICE(0x18ec, 0x3118) },		/* USB to IrDA adapter */
 	{ },
 };
 MODULE_DEVICE_TABLE(usb, id_table);
 
+static int is_irda(struct usb_serial *serial)
+{
+	struct usb_device *dev = serial->dev;
+	if (le16_to_cpu(dev->descriptor.idVendor) == 0x18ec &&
+			le16_to_cpu(dev->descriptor.idProduct) == 0x3118)
+		return 1;
+	return 0;
+}
+
 static inline void ARK3116_SND(struct usb_serial *serial, int seq,
 			       __u8 request, __u8 requesttype,
 			       __u16 value, __u16 index)
@@ -84,11 +94,21 @@ static int ark3116_attach(struct usb_serial *serial)
 		return -ENOMEM;
 	}
 
+	if (is_irda(serial))
+		dbg("IrDA mode");
+
 	/* 3 */
 	ARK3116_SND(serial, 3, 0xFE, 0x40, 0x0008, 0x0002);
 	ARK3116_SND(serial, 4, 0xFE, 0x40, 0x0008, 0x0001);
 	ARK3116_SND(serial, 5, 0xFE, 0x40, 0x0000, 0x0008);
-	ARK3116_SND(serial, 6, 0xFE, 0x40, 0x0000, 0x000B);
+	ARK3116_SND(serial, 6, 0xFE, 0x40, is_irda(serial) ? 0x0001 : 0x0000,
+		    0x000B);
+
+	if (is_irda(serial)) {
+		ARK3116_SND(serial, 1001, 0xFE, 0x40, 0x0000, 0x000C);
+		ARK3116_SND(serial, 1002, 0xFE, 0x40, 0x0041, 0x000D);
+		ARK3116_SND(serial, 1003, 0xFE, 0x40, 0x0001, 0x000A);
+	}
 
 	/* <-- seq7 */
 	ARK3116_RCV(serial,  7, 0xFE, 0xC0, 0x0000, 0x0003, 0x00, buf);
@@ -125,6 +145,8 @@ static int ark3116_attach(struct usb_serial *serial)
 	ARK3116_SND(serial, 147, 0xFE, 0x40, 0x0083, 0x0003);
 	ARK3116_SND(serial, 148, 0xFE, 0x40, 0x0038, 0x0000);
 	ARK3116_SND(serial, 149, 0xFE, 0x40, 0x0001, 0x0001);
+	if (is_irda(serial))
+		ARK3116_SND(serial, 1004, 0xFE, 0x40, 0x0000, 0x0009);
 	ARK3116_SND(serial, 150, 0xFE, 0x40, 0x0003, 0x0003);
 	ARK3116_RCV(serial, 151, 0xFE, 0xC0, 0x0000, 0x0004, 0x03, buf);
 	ARK3116_SND(serial, 152, 0xFE, 0x40, 0x0000, 0x0003);
-- 
cgit v0.10.2


From d0defb855c8504c49b92bdc0203689ce9b4cf7ba Mon Sep 17 00:00:00 2001
From: fangxiaozhi <huananhu@huawei.com>
Date: Fri, 7 Aug 2009 12:30:35 +0800
Subject: USB: usb-storage fails to attach to Huawei Datacard cdrom device

In this patch, we always make the return value of function
usb_stor_huawei_e220_init to be zero. Then it will not prevent usb-storage
driver from attaching to the CDROM device of Huawei Datacard.

Signed-off-by: fangxiaozhi <huananhu@huawei.com>
Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/storage/initializers.c b/drivers/usb/storage/initializers.c
index ec17c96..105d900 100644
--- a/drivers/usb/storage/initializers.c
+++ b/drivers/usb/storage/initializers.c
@@ -102,5 +102,5 @@ int usb_stor_huawei_e220_init(struct us_data *us)
 				      USB_TYPE_STANDARD | USB_RECIP_DEVICE,
 				      0x01, 0x0, NULL, 0x0, 1000);
 	US_DEBUGP("Huawei mode set result is %d\n", result);
-	return (result ? 0 : -ENODEV);
+	return 0;
 }
-- 
cgit v0.10.2


From 417b57b3e4e34df07a2aceaf75baffeacdd9385f Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Thu, 6 Aug 2009 16:09:51 -0700
Subject: USB: gadget: Read buffer overflow

Check whether index is within bounds before testing the element.

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Cc: David Brownell <david-b@pacbell.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
index 59e8523..d05397e 100644
--- a/drivers/usb/gadget/composite.c
+++ b/drivers/usb/gadget/composite.c
@@ -602,7 +602,7 @@ static int get_string(struct usb_composite_dev *cdev,
 			}
 		}
 
-		for (len = 0; s->wData[len] && len <= 126; len++)
+		for (len = 0; len <= 126 && s->wData[len]; len++)
 			continue;
 		if (!len)
 			return -EINVAL;
-- 
cgit v0.10.2


From e792b1b0b83c276ca786b01cad662dc2e5d18843 Mon Sep 17 00:00:00 2001
From: Robin Callender <robin_callender@hotmail.com>
Date: Sun, 2 Aug 2009 11:38:58 -0700
Subject: USB: gadget: audio driver seg-fault fix

The included patch can be applied to the new usb gadget audio driver.

It addresses a seg-fault in uncovered in g_audio.ko.
The fault occurs in the function u_audio.c::gaudio_open_end_dev() when
device /dev/snd/pcmC0D0c (FILE_PCM_CAPTURE) is not present.

I suspect there may be similar problems with device /dev/snd/pcmC0D0p
(FILE_PCM_PLAYBACK) handling also.  I leave that for the developer(s),
as I was unsure as to the side-effects of not calling
playback_default_hw_params() in the initialization phase.

Signed-off-by: Robin Callender <robin_callender@hotmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/gadget/u_audio.c b/drivers/usb/gadget/u_audio.c
index 0f3d22f..b5200d5 100644
--- a/drivers/usb/gadget/u_audio.c
+++ b/drivers/usb/gadget/u_audio.c
@@ -253,11 +253,13 @@ static int gaudio_open_snd_dev(struct gaudio *card)
 	snd->filp = filp_open(fn_cap, O_RDONLY, 0);
 	if (IS_ERR(snd->filp)) {
 		ERROR(card, "No such PCM capture device: %s\n", fn_cap);
-		snd->filp = NULL;
+		snd->substream = NULL;
+		snd->card = NULL;
+	} else {
+		pcm_file = snd->filp->private_data;
+		snd->substream = pcm_file->substream;
+		snd->card = card;
 	}
-	pcm_file = snd->filp->private_data;
-	snd->substream = pcm_file->substream;
-	snd->card = card;
 
 	return 0;
 }
-- 
cgit v0.10.2


From 7949f4e16456183bae0bc19ffe92072a27d0553e Mon Sep 17 00:00:00 2001
From: Ken MacLeod <ken@bitsko.slc.ut.us>
Date: Thu, 6 Aug 2009 14:18:27 -0500
Subject: USB: isp1362: fix pulldown register defines and conf logic

HCHWCFG_PULLDOWN_DS2 and HCHWCFG_PULLDOWN_DS1 were swapped.  Incorrect
operator precedence in isp1362_hc_start() hid part of the problem.
This fixes a problem where Port 1 in Host mode fails to see disconnects.

Signed-Off-By: Ken MacLeod <ken@bitsko.slc.ut.us>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/host/isp1362-hcd.c b/drivers/usb/host/isp1362-hcd.c
index 5819e10..16ba1ea 100644
--- a/drivers/usb/host/isp1362-hcd.c
+++ b/drivers/usb/host/isp1362-hcd.c
@@ -2563,7 +2563,7 @@ static int isp1362_hc_start(struct usb_hcd *hcd)
 	hwcfg = HCHWCFG_INT_ENABLE | HCHWCFG_DBWIDTH(1);
 	if (board->sel15Kres)
 		hwcfg |= HCHWCFG_PULLDOWN_DS2 |
-			(MAX_ROOT_PORTS > 1) ? HCHWCFG_PULLDOWN_DS1 : 0;
+			((MAX_ROOT_PORTS > 1) ? HCHWCFG_PULLDOWN_DS1 : 0);
 	if (board->clknotstop)
 		hwcfg |= HCHWCFG_CLKNOTSTOP;
 	if (board->oc_enable)
diff --git a/drivers/usb/host/isp1362.h b/drivers/usb/host/isp1362.h
index 26e44fc..fe60f62 100644
--- a/drivers/usb/host/isp1362.h
+++ b/drivers/usb/host/isp1362.h
@@ -161,8 +161,8 @@ ISP1362_REG(HCRHPORT2,	0x16,	REG_WIDTH_32,	REG_ACCESS_RW);
 ISP1362_REG(HCHWCFG,	0x20,	REG_WIDTH_16,	REG_ACCESS_RW);
 #define HCHWCFG_DISABLE_SUSPEND	(1 << 15)
 #define HCHWCFG_GLOBAL_PWRDOWN	(1 << 14)
-#define HCHWCFG_PULLDOWN_DS1	(1 << 13)
-#define HCHWCFG_PULLDOWN_DS2	(1 << 12)
+#define HCHWCFG_PULLDOWN_DS2	(1 << 13)
+#define HCHWCFG_PULLDOWN_DS1	(1 << 12)
 #define HCHWCFG_CLKNOTSTOP	(1 << 11)
 #define HCHWCFG_ANALOG_OC	(1 << 10)
 #define HCHWCFG_ONEINT		(1 << 9)
-- 
cgit v0.10.2


From 5971897f3025249c0eea1987fb12efb8c65c93a4 Mon Sep 17 00:00:00 2001
From: Markus Rechberger <mrechberger@gmail.com>
Date: Sun, 9 Aug 2009 21:23:34 +0200
Subject: USB: increase usbdevfs max isoc buffer size

The current limit only allows isochronous transfers up to 32kbyte/urb,
updating this to 192 kbyte/urb improves the reliability of the
transfer. USB 2.0 transfer is possible with 32kbyte but increases the
chance of corrupted/incomplete data when the system is performing some
other tasks in the background.

http://www.spinics.net/lists/linux-usb/msg19955.html

Signed-off-by: Markus Rechberger <mrechberger@gmail.com>
Cc: Oliver Neukum <oliver@neukum.org>
Cc: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
index a1add77..71514be 100644
--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -1091,7 +1091,8 @@ static int proc_do_submiturb(struct dev_state *ps, struct usbdevfs_urb *uurb,
 			}
 			totlen += isopkt[u].length;
 		}
-		if (totlen > 32768) {
+		/* 3072 * 64 microframes */
+		if (totlen > 196608) {
 			kfree(isopkt);
 			return -EINVAL;
 		}
-- 
cgit v0.10.2


From db8be50c4307dac2b37305fc59c8dc0f978d09ea Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw2@infradead.org>
Date: Mon, 3 Aug 2009 12:40:27 +0100
Subject: USB: Work around BIOS bugs by quiescing USB controllers earlier

We are seeing a number of crashes in SMM, when VT-d is enabled while
'Legacy USB support' is enabled in various BIOSes.

The BIOS is supposed to indicate which addresses it uses for DMA in a
special ACPI table ("RMRR"), so that we can punch a hole for it when we
set up the IOMMU.

The problem is, as usual, that BIOS engineers are totally incompetent.
They write code which will crash if the DMA goes AWOL, and then they
either neglect to provide an RMRR table at all, or they put the wrong
addresses in it. And of course they don't do _any_ QA, since that would
take too much time away from their crack-smoking habit.

The real fix, of course, is for consumers to refuse to buy motherboards
which only have closed-source firmware available. If we had _open_
firmware, bugs like this would be easy to fix.

Since that's something I can only dream about, this patch implements an
alternative -- ensuring that the USB controllers are handed off from the
BIOS and quiesced _before_ the IOMMU is initialised. That would have
been a much better design than this RMRR nonsense in the first place, of
course. The bootloader has no business doing DMA after the OS has booted
anyway.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/host/pci-quirks.c b/drivers/usb/host/pci-quirks.c
index 83b5f9c..23cf3bd 100644
--- a/drivers/usb/host/pci-quirks.c
+++ b/drivers/usb/host/pci-quirks.c
@@ -475,4 +475,4 @@ static void __devinit quirk_usb_early_handoff(struct pci_dev *pdev)
 	else if (pdev->class == PCI_CLASS_SERIAL_USB_XHCI)
 		quirk_usb_handoff_xhci(pdev);
 }
-DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID, quirk_usb_early_handoff);
+DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, quirk_usb_early_handoff);
-- 
cgit v0.10.2


From c740d0d80d385b178c319f3d6e627ade8f732488 Mon Sep 17 00:00:00 2001
From: Ajay Kumar Gupta <ajay.gupta@ti.com>
Date: Mon, 3 Aug 2009 11:43:40 +0530
Subject: USB: musb: fix put_device() call sequence

Invoke put_device(musb->xceiv->dev) before musb_platform_exit()as
xceiv is getting unregistered in musb_platform_exit().

Fixes put_device() panic when module insert/removal is performed
multiple times.

Signed-off-by: Ajay Kumar Gupta <ajay.gupta@ti.com>
Acked-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c
index 1d26bed..3a61ddb 100644
--- a/drivers/usb/musb/musb_core.c
+++ b/drivers/usb/musb/musb_core.c
@@ -1850,6 +1850,10 @@ static void musb_free(struct musb *musb)
 		dma_controller_destroy(c);
 	}
 
+#ifdef CONFIG_USB_MUSB_OTG
+	put_device(musb->xceiv->dev);
+#endif
+
 	musb_writeb(musb->mregs, MUSB_DEVCTL, 0);
 	musb_platform_exit(musb);
 	musb_writeb(musb->mregs, MUSB_DEVCTL, 0);
@@ -1859,10 +1863,6 @@ static void musb_free(struct musb *musb)
 		clk_put(musb->clock);
 	}
 
-#ifdef CONFIG_USB_MUSB_OTG
-	put_device(musb->xceiv->dev);
-#endif
-
 #ifdef CONFIG_USB_MUSB_HDRC_HCD
 	usb_put_hcd(musb_to_hcd(musb));
 #else
-- 
cgit v0.10.2


From 0ffd3b2902e28b13d8379df0f09a55668f330f77 Mon Sep 17 00:00:00 2001
From: Ming Lei <tom.leiming@gmail.com>
Date: Sat, 1 Aug 2009 20:39:57 +0800
Subject: USB: otg: twl4030-usb.c: mark .init as subsys_initcall_sync

This patch fixes the .probe failure of twl4030_usb driver if
it is compiled into kernel.

Since twl4030_usb USB transceiver .probe depends on
twl4030-regulator, marking twl4030_usb_init as subsys_initcall_sync
can make it called after twl4030-regulator initialization is finished,
then twl4030_usb USB transceiver driver can be probed successfully.

Signed-off-by: Ming Lei <tom.leiming@gmail.com>
Cc: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/otg/twl4030-usb.c b/drivers/usb/otg/twl4030-usb.c
index 9e3e7a5..d1852d4 100644
--- a/drivers/usb/otg/twl4030-usb.c
+++ b/drivers/usb/otg/twl4030-usb.c
@@ -774,7 +774,7 @@ static int __init twl4030_usb_init(void)
 {
 	return platform_driver_register(&twl4030_usb_driver);
 }
-subsys_initcall(twl4030_usb_init);
+subsys_initcall_sync(twl4030_usb_init);
 
 static void __exit twl4030_usb_exit(void)
 {
-- 
cgit v0.10.2


From 015798b2f166725b1dae2b07b5ffb127ab187be0 Mon Sep 17 00:00:00 2001
From: Jon Hunter <jon-hunter@ti.com>
Date: Wed, 12 Aug 2009 11:57:59 -0400
Subject: USB: EHCI: ensure all watchdog timer events are deleted when
 suspending usb

This patch was previously discussed in the following thread:
http://thread.gmane.org/gmane.linux.usb.general/19472/focus=19484

On the OMAP3 device the usbhost controller is in a separate internal
power-domain. So when the usbhost is inactive or suspend is called,
we can disable clocks and power-down the usbhost to save power.

Recently we found that after calling ehci_bus_suspend() and disabling
the usbhost clocks we would see the ehci watchdog timer event fire. This
was causing a kernel panic because the usbhost controllers clocks were
disabled and inside the watchdog timer function the clocks were not
being re-enabled, so when the ehci registers were accessed this resulted
in a CPU data-abort.

To avoid this panic, per recommendation from Alan Stern (see above thread), we
make sure any pending timer events (that may have been scheduled by calling
ehci_work within the ehci_bus_suspend() function) are deleted before returning.

Signed-off-by: Fei Yang <fei.yang@motorola.com>
Signed-off-by: Jon Hunter <jon-hunter@ti.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/host/ehci-hub.c b/drivers/usb/host/ehci-hub.c
index 6fef1ee..818647c 100644
--- a/drivers/usb/host/ehci-hub.c
+++ b/drivers/usb/host/ehci-hub.c
@@ -209,6 +209,11 @@ static int ehci_bus_suspend (struct usb_hcd *hcd)
 
 	ehci->next_statechange = jiffies + msecs_to_jiffies(10);
 	spin_unlock_irq (&ehci->lock);
+
+	/* ehci_work() may have re-enabled the watchdog timer, which we do not
+	 * want, and so we must delete any pending watchdog timer events.
+	 */
+	del_timer_sync(&ehci->watchdog);
 	return 0;
 }
 
-- 
cgit v0.10.2


From 3d2b0814f15f94506156943c8148da90b6491453 Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Wed, 12 Aug 2009 16:51:09 +0200
Subject: USB: isp1362: Correct use of ! and &

Correct priority problem in the use of ! and &.

The semantic patch that makes this change is as follows:
(http://www.emn.fr/x-info/coccinelle/)

// <smpl>
@@ expression E; constant C; @@
- !E & C
+ !(E & C)
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/host/isp1362-hcd.c b/drivers/usb/host/isp1362-hcd.c
index 16ba1ea..e35d828 100644
--- a/drivers/usb/host/isp1362-hcd.c
+++ b/drivers/usb/host/isp1362-hcd.c
@@ -1075,8 +1075,10 @@ static irqreturn_t isp1362_irq(struct usb_hcd *hcd)
 		isp1362_clr_mask16(isp1362_hcd, HCBUFSTAT, HCBUFSTAT_ISTL0_FULL);
 		DBG(1, "%s: ISTL0\n", __func__);
 		WARN_ON((int)!!isp1362_hcd->istl_flip);
-		WARN_ON(isp1362_read_reg16(isp1362_hcd, HCBUFSTAT) & HCBUFSTAT_ISTL0_ACTIVE);
-		WARN_ON(!isp1362_read_reg16(isp1362_hcd, HCBUFSTAT) & HCBUFSTAT_ISTL0_DONE);
+		WARN_ON(isp1362_read_reg16(isp1362_hcd, HCBUFSTAT) &
+			HCBUFSTAT_ISTL0_ACTIVE);
+		WARN_ON(!(isp1362_read_reg16(isp1362_hcd, HCBUFSTAT) &
+			HCBUFSTAT_ISTL0_DONE));
 		isp1362_hcd->irqenb &= ~HCuPINT_ISTL0;
 	}
 
@@ -1087,8 +1089,10 @@ static irqreturn_t isp1362_irq(struct usb_hcd *hcd)
 		isp1362_clr_mask16(isp1362_hcd, HCBUFSTAT, HCBUFSTAT_ISTL1_FULL);
 		DBG(1, "%s: ISTL1\n", __func__);
 		WARN_ON(!(int)isp1362_hcd->istl_flip);
-		WARN_ON(isp1362_read_reg16(isp1362_hcd, HCBUFSTAT) & HCBUFSTAT_ISTL1_ACTIVE);
-		WARN_ON(!isp1362_read_reg16(isp1362_hcd, HCBUFSTAT) & HCBUFSTAT_ISTL1_DONE);
+		WARN_ON(isp1362_read_reg16(isp1362_hcd, HCBUFSTAT) &
+			HCBUFSTAT_ISTL1_ACTIVE);
+		WARN_ON(!(isp1362_read_reg16(isp1362_hcd, HCBUFSTAT) &
+			HCBUFSTAT_ISTL1_DONE));
 		isp1362_hcd->irqenb &= ~HCuPINT_ISTL1;
 	}
 
-- 
cgit v0.10.2


From 5429c7316577fcd859f6b53e10884bb8e1e3bcef Mon Sep 17 00:00:00 2001
From: Li Yang <leoli@freescale.com>
Date: Tue, 11 Aug 2009 11:11:11 +0800
Subject: USB: gadget: Update Freescale UDC entry in MAINTAINERS

Change the F entry for file rename, and make it also cover fsl_qe_udc
driver.  Update the name accordingly.

Signed-off-by: Li Yang <leoli@freescale.com>
Acked-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Cc: Joe Perches <joe@perches.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/MAINTAINERS b/MAINTAINERS
index d24c882..5e1bf0c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2107,12 +2107,12 @@ S:	Supported
 F:	arch/powerpc/sysdev/qe_lib/
 F:	arch/powerpc/include/asm/*qe.h
 
-FREESCALE HIGHSPEED USB DEVICE DRIVER
+FREESCALE USB PERIPHERIAL DRIVERS
 M:	Li Yang <leoli@freescale.com>
 L:	linux-usb@vger.kernel.org
 L:	linuxppc-dev@ozlabs.org
 S:	Maintained
-F:	drivers/usb/gadget/fsl_usb2_udc.c
+F:	drivers/usb/gadget/fsl*
 
 FREESCALE QUICC ENGINE UCC ETHERNET DRIVER
 M:	Li Yang <leoli@freescale.com>
-- 
cgit v0.10.2


From 9ca33a0f1abdefea3811666d9e87af11fd0af6c6 Mon Sep 17 00:00:00 2001
From: Brian Niebuhr <bniebuhr@efjohnson.com>
Date: Mon, 10 Aug 2009 16:46:59 -0500
Subject: USB: Fix CDC EEM host driver 'sentinel' CRC validation

This is an alternate solution to the EEM 'sentinel' CRC valiation issue.

CDC EEM allows using a 'sentinel' ethernet frame CRC of 0xdeadbeef in
place of a real CRC.  The 'sentinel' value is transmitted in big-endian
order whereas the normal CRC is little-endian.  This patch handles both
cases appropriately.

Signed-off-by: Brian Niebuhr <bniebuhr@efjohnson.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/net/usb/cdc_eem.c b/drivers/net/usb/cdc_eem.c
index 45cebfb..2330065 100644
--- a/drivers/net/usb/cdc_eem.c
+++ b/drivers/net/usb/cdc_eem.c
@@ -300,20 +300,23 @@ static int eem_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
 					return 0;
 			}
 
-			crc = get_unaligned_le32(skb2->data
-					+ len - ETH_FCS_LEN);
-			skb_trim(skb2, len - ETH_FCS_LEN);
-
 			/*
 			 * The bmCRC helps to denote when the CRC field in
 			 * the Ethernet frame contains a calculated CRC:
 			 *	bmCRC = 1	: CRC is calculated
 			 *	bmCRC = 0	: CRC = 0xDEADBEEF
 			 */
-			if (header & BIT(14))
-				crc2 = ~crc32_le(~0, skb2->data, skb2->len);
-			else
+			if (header & BIT(14)) {
+				crc = get_unaligned_le32(skb2->data
+						+ len - ETH_FCS_LEN);
+				crc2 = ~crc32_le(~0, skb2->data, skb2->len
+						- ETH_FCS_LEN);
+			} else {
+				crc = get_unaligned_be32(skb2->data
+						+ len - ETH_FCS_LEN);
 				crc2 = 0xdeadbeef;
+			}
+			skb_trim(skb2, len - ETH_FCS_LEN);
 
 			if (is_last)
 				return crc == crc2;
-- 
cgit v0.10.2


From 63ead6a00d54a645667c141d8aaf0f6ffe1212be Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Tue, 11 Aug 2009 11:31:31 -0700
Subject: USB: otg: fix twl4030-usb build

subsys_initcall_sync() is only defined for built-in code, not for
loadable modules, so this driver build fails when built as a module.
However, the _sync() forms of the initcalls are not implemented,
so this should not be used -- just use the non-sync form of it.

drivers/usb/otg/twl4030-usb.c:777: warning: data definition has no type or storage class
drivers/usb/otg/twl4030-usb.c:777: warning: type defaults to 'int' in declaration of 'subsys_initcall_sync'
drivers/usb/otg/twl4030-usb.c:777: warning: parameter names (without types) in function declaration

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Felipe Balbi <felipe.balbi@nokia.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/otg/twl4030-usb.c b/drivers/usb/otg/twl4030-usb.c
index d1852d4..9e3e7a5 100644
--- a/drivers/usb/otg/twl4030-usb.c
+++ b/drivers/usb/otg/twl4030-usb.c
@@ -774,7 +774,7 @@ static int __init twl4030_usb_init(void)
 {
 	return platform_driver_register(&twl4030_usb_driver);
 }
-subsys_initcall_sync(twl4030_usb_init);
+subsys_initcall(twl4030_usb_init);
 
 static void __exit twl4030_usb_exit(void)
 {
-- 
cgit v0.10.2


From 74aee796c613f54e9f089170df548c0b3f15af69 Mon Sep 17 00:00:00 2001
From: H Hartley Sweeten <hartleys@visionengravers.com>
Date: Thu, 13 Aug 2009 13:18:02 -0400
Subject: USB: ohci-ep93xx.c: remove unused variable

Remove unused variable in ohci-ep93xx.c.

This only shows up when CONFIG_PM is enabled.

Signed-off-by: H Hartley Sweeten <hsweeten@visionengravers.com>
Cc: Lennert Buytenhek <kernel@wantstofly.org>
Cc: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/host/ohci-ep93xx.c b/drivers/usb/host/ohci-ep93xx.c
index b0dbf41..4e68161 100644
--- a/drivers/usb/host/ohci-ep93xx.c
+++ b/drivers/usb/host/ohci-ep93xx.c
@@ -188,7 +188,6 @@ static int ohci_hcd_ep93xx_drv_resume(struct platform_device *pdev)
 {
 	struct usb_hcd *hcd = platform_get_drvdata(pdev);
 	struct ohci_hcd *ohci = hcd_to_ohci(hcd);
-	int status;
 
 	if (time_before(jiffies, ohci->next_statechange))
 		msleep(5);
-- 
cgit v0.10.2


From 8e8dce065088833fc418bfa5fbf035cb0726c04c Mon Sep 17 00:00:00 2001
From: David VomLehn <dvomlehn@cisco.com>
Date: Fri, 28 Aug 2009 12:54:27 -0700
Subject: USB: use kfifo to buffer usb-generic serial writes

When do_output_char() attempts to write a carriage return/line feed sequence,
it first checks to see how much buffer room is available. If there are at least
two characters free, it will write the carriage return/line feed with two calls
to tty_put_char(). It calls the tty_operation functions write() for devices that
don't support the tty_operations function put_char(). If the USB generic serial
device's write URB is not in use, it will return the buffer size when asked how
much room is available. The write() of the carriage return will cause it to mark
the write URB busy, so the subsequent write() of the line feed will be ignored.

This patch uses the kfifo infrastructure to implement a write FIFO that
accurately returns the amount of space available in the buffer.

Signed-off-by: David VomLehn <dvomlehn@cisco.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c
index d9398e9..deba08c 100644
--- a/drivers/usb/serial/generic.c
+++ b/drivers/usb/serial/generic.c
@@ -19,7 +19,7 @@
 #include <linux/usb.h>
 #include <linux/usb/serial.h>
 #include <linux/uaccess.h>
-
+#include <linux/kfifo.h>
 
 static int debug;
 
@@ -166,24 +166,6 @@ static void generic_cleanup(struct usb_serial_port *port)
 	}
 }
 
-int usb_serial_generic_resume(struct usb_serial *serial)
-{
-	struct usb_serial_port *port;
-	int i, c = 0, r;
-
-	for (i = 0; i < serial->num_ports; i++) {
-		port = serial->port[i];
-		if (port->port.count && port->read_urb) {
-			r = usb_submit_urb(port->read_urb, GFP_NOIO);
-			if (r < 0)
-				c++;
-		}
-	}
-
-	return c ? -EIO : 0;
-}
-EXPORT_SYMBOL_GPL(usb_serial_generic_resume);
-
 void usb_serial_generic_close(struct usb_serial_port *port)
 {
 	dbg("%s - port %d", __func__, port->number);
@@ -272,12 +254,81 @@ error_no_buffer:
 	return bwrite;
 }
 
+/**
+ * usb_serial_generic_write_start - kick off an URB write
+ * @port:	Pointer to the &struct usb_serial_port data
+ *
+ * Returns the number of bytes queued on success. This will be zero if there
+ * was nothing to send. Otherwise, it returns a negative errno value
+ */
+static int usb_serial_generic_write_start(struct usb_serial_port *port)
+{
+	struct usb_serial *serial = port->serial;
+	unsigned char *data;
+	int result;
+	int count;
+	unsigned long flags;
+	bool start_io;
+
+	/* Atomically determine whether we can and need to start a USB
+	 * operation. */
+	spin_lock_irqsave(&port->lock, flags);
+	if (port->write_urb_busy)
+		start_io = false;
+	else {
+		start_io = (__kfifo_len(port->write_fifo) != 0);
+		port->write_urb_busy = start_io;
+	}
+	spin_unlock_irqrestore(&port->lock, flags);
+
+	if (!start_io)
+		return 0;
+
+	data = port->write_urb->transfer_buffer;
+	count = kfifo_get(port->write_fifo, data, port->bulk_out_size);
+	usb_serial_debug_data(debug, &port->dev, __func__, count, data);
+
+	/* set up our urb */
+	usb_fill_bulk_urb(port->write_urb, serial->dev,
+			   usb_sndbulkpipe(serial->dev,
+				port->bulk_out_endpointAddress),
+			   port->write_urb->transfer_buffer, count,
+			   ((serial->type->write_bulk_callback) ?
+			     serial->type->write_bulk_callback :
+			     usb_serial_generic_write_bulk_callback),
+			   port);
+
+	/* send the data out the bulk port */
+	result = usb_submit_urb(port->write_urb, GFP_ATOMIC);
+	if (result) {
+		dev_err(&port->dev,
+			"%s - failed submitting write urb, error %d\n",
+						__func__, result);
+		/* don't have to grab the lock here, as we will
+		   retry if != 0 */
+		port->write_urb_busy = 0;
+	} else
+		result = count;
+
+	return result;
+}
+
+/**
+ * usb_serial_generic_write - generic write function for serial USB devices
+ * @tty:	Pointer to &struct tty_struct for the device
+ * @port:	Pointer to the &usb_serial_port structure for the device
+ * @buf:	Pointer to the data to write
+ * @count:	Number of bytes to write
+ *
+ * Returns the number of characters actually written, which may be anything
+ * from zero to @count. If an error occurs, it returns the negative errno
+ * value.
+ */
 int usb_serial_generic_write(struct tty_struct *tty,
 	struct usb_serial_port *port, const unsigned char *buf, int count)
 {
 	struct usb_serial *serial = port->serial;
 	int result;
-	unsigned char *data;
 
 	dbg("%s - port %d", __func__, port->number);
 
@@ -287,57 +338,20 @@ int usb_serial_generic_write(struct tty_struct *tty,
 	}
 
 	/* only do something if we have a bulk out endpoint */
-	if (serial->num_bulk_out) {
-		unsigned long flags;
-
-		if (serial->type->max_in_flight_urbs)
-			return usb_serial_multi_urb_write(tty, port,
-							  buf, count);
-
-		spin_lock_irqsave(&port->lock, flags);
-		if (port->write_urb_busy) {
-			spin_unlock_irqrestore(&port->lock, flags);
-			dbg("%s - already writing", __func__);
-			return 0;
-		}
-		port->write_urb_busy = 1;
-		spin_unlock_irqrestore(&port->lock, flags);
-
-		count = (count > port->bulk_out_size) ?
-					port->bulk_out_size : count;
-
-		memcpy(port->write_urb->transfer_buffer, buf, count);
-		data = port->write_urb->transfer_buffer;
-		usb_serial_debug_data(debug, &port->dev, __func__, count, data);
+	if (!serial->num_bulk_out)
+		return 0;
 
-		/* set up our urb */
-		usb_fill_bulk_urb(port->write_urb, serial->dev,
-				   usb_sndbulkpipe(serial->dev,
-					port->bulk_out_endpointAddress),
-				   port->write_urb->transfer_buffer, count,
-				   ((serial->type->write_bulk_callback) ?
-				     serial->type->write_bulk_callback :
-				     usb_serial_generic_write_bulk_callback),
-				   port);
+	if (serial->type->max_in_flight_urbs)
+		return usb_serial_multi_urb_write(tty, port,
+						  buf, count);
 
-		/* send the data out the bulk port */
-		port->write_urb_busy = 1;
-		result = usb_submit_urb(port->write_urb, GFP_ATOMIC);
-		if (result) {
-			dev_err(&port->dev,
-				"%s - failed submitting write urb, error %d\n",
-							__func__, result);
-			/* don't have to grab the lock here, as we will
-			   retry if != 0 */
-			port->write_urb_busy = 0;
-		} else
-			result = count;
+	count = kfifo_put(port->write_fifo, buf, count);
+	result = usb_serial_generic_write_start(port);
 
-		return result;
-	}
+	if (result >= 0)
+		result = count;
 
-	/* no bulk out, so return 0 bytes written */
-	return 0;
+	return result;
 }
 EXPORT_SYMBOL_GPL(usb_serial_generic_write);
 
@@ -355,9 +369,8 @@ int usb_serial_generic_write_room(struct tty_struct *tty)
 			room = port->bulk_out_size *
 				(serial->type->max_in_flight_urbs -
 				 port->urbs_in_flight);
-	} else if (serial->num_bulk_out && !(port->write_urb_busy)) {
-		room = port->bulk_out_size;
-	}
+	} else if (serial->num_bulk_out)
+		room = port->write_fifo->size - __kfifo_len(port->write_fifo);
 	spin_unlock_irqrestore(&port->lock, flags);
 
 	dbg("%s - returns %d", __func__, room);
@@ -377,11 +390,8 @@ int usb_serial_generic_chars_in_buffer(struct tty_struct *tty)
 		spin_lock_irqsave(&port->lock, flags);
 		chars = port->tx_bytes_flight;
 		spin_unlock_irqrestore(&port->lock, flags);
-	} else if (serial->num_bulk_out) {
-		/* FIXME: Locking */
-		if (port->write_urb_busy)
-			chars = port->write_urb->transfer_buffer_length;
-	}
+	} else if (serial->num_bulk_out)
+		chars = kfifo_len(port->write_fifo);
 
 	dbg("%s - returns %d", __func__, chars);
 	return chars;
@@ -485,16 +495,23 @@ void usb_serial_generic_write_bulk_callback(struct urb *urb)
 		if (port->urbs_in_flight < 0)
 			port->urbs_in_flight = 0;
 		spin_unlock_irqrestore(&port->lock, flags);
+
+		if (status) {
+			dbg("%s - nonzero multi-urb write bulk status "
+				"received: %d", __func__, status);
+			return;
+		}
 	} else {
-		/* Handle the case for single urb mode */
 		port->write_urb_busy = 0;
-	}
 
-	if (status) {
-		dbg("%s - nonzero write bulk status received: %d",
-		    __func__, status);
-		return;
+		if (status) {
+			dbg("%s - nonzero multi-urb write bulk status "
+				"received: %d", __func__, status);
+			kfifo_reset(port->write_fifo);
+		} else
+			usb_serial_generic_write_start(port);
 	}
+
 	usb_serial_port_softint(port);
 }
 EXPORT_SYMBOL_GPL(usb_serial_generic_write_bulk_callback);
@@ -559,6 +576,33 @@ int usb_serial_handle_break(struct usb_serial_port *port)
 }
 EXPORT_SYMBOL_GPL(usb_serial_handle_break);
 
+int usb_serial_generic_resume(struct usb_serial *serial)
+{
+	struct usb_serial_port *port;
+	int i, c = 0, r;
+
+	for (i = 0; i < serial->num_ports; i++) {
+		port = serial->port[i];
+		if (!port->port.count)
+			continue;
+
+		if (port->read_urb) {
+			r = usb_submit_urb(port->read_urb, GFP_NOIO);
+			if (r < 0)
+				c++;
+		}
+
+		if (port->write_urb) {
+			r = usb_serial_generic_write_start(port);
+			if (r < 0)
+				c++;
+		}
+	}
+
+	return c ? -EIO : 0;
+}
+EXPORT_SYMBOL_GPL(usb_serial_generic_resume);
+
 void usb_serial_generic_disconnect(struct usb_serial *serial)
 {
 	int i;
diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index 45975b4..ff75a35 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -35,6 +35,7 @@
 #include <linux/serial.h>
 #include <linux/usb.h>
 #include <linux/usb/serial.h>
+#include <linux/kfifo.h>
 #include "pl2303.h"
 
 /*
@@ -625,6 +626,8 @@ static void port_release(struct device *dev)
 	usb_free_urb(port->write_urb);
 	usb_free_urb(port->interrupt_in_urb);
 	usb_free_urb(port->interrupt_out_urb);
+	if (!IS_ERR(port->write_fifo) && port->write_fifo)
+		kfifo_free(port->write_fifo);
 	kfree(port->bulk_in_buffer);
 	kfree(port->bulk_out_buffer);
 	kfree(port->interrupt_in_buffer);
@@ -964,6 +967,10 @@ int usb_serial_probe(struct usb_interface *interface,
 			dev_err(&interface->dev, "No free urbs available\n");
 			goto probe_error;
 		}
+		port->write_fifo = kfifo_alloc(PAGE_SIZE, GFP_KERNEL,
+			&port->lock);
+		if (IS_ERR(port->write_fifo))
+			goto probe_error;
 		buffer_size = le16_to_cpu(endpoint->wMaxPacketSize);
 		port->bulk_out_size = buffer_size;
 		port->bulk_out_endpointAddress = endpoint->bEndpointAddress;
diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h
index 7b85e32..c17eb64 100644
--- a/include/linux/usb/serial.h
+++ b/include/linux/usb/serial.h
@@ -59,6 +59,7 @@ enum port_dev_state {
  * @bulk_out_buffer: pointer to the bulk out buffer for this port.
  * @bulk_out_size: the size of the bulk_out_buffer, in bytes.
  * @write_urb: pointer to the bulk out struct urb for this port.
+ * @write_fifo: kfifo used to buffer outgoing data
  * @write_urb_busy: port`s writing status
  * @bulk_out_endpointAddress: endpoint address for the bulk out pipe for this
  *	port.
@@ -96,6 +97,7 @@ struct usb_serial_port {
 	unsigned char		*bulk_out_buffer;
 	int			bulk_out_size;
 	struct urb		*write_urb;
+	struct kfifo		*write_fifo;
 	int			write_urb_busy;
 	__u8			bulk_out_endpointAddress;
 
-- 
cgit v0.10.2


From 877accca79b706afe5d78b9a92cf4f22919fb2b0 Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oliver@neukum.org>
Date: Tue, 18 Aug 2009 16:17:45 +0200
Subject: USB: remove unneeded printks from microtek driver

These printks can be removed as they only provide information
about the driver not the device and nobody has ever provided
feedback.

Signed-off-by: Oliver Neukum <oliver@neukum.org>

diff --git a/drivers/usb/image/microtek.c b/drivers/usb/image/microtek.c
index 4541dfc..459a728 100644
--- a/drivers/usb/image/microtek.c
+++ b/drivers/usb/image/microtek.c
@@ -653,33 +653,6 @@ static struct scsi_host_template mts_scsi_host_template = {
 	.max_sectors=		256, /* 128 K */
 };
 
-struct vendor_product
-{
-	char* name;
-	enum
-	{
-		mts_sup_unknown=0,
-		mts_sup_alpha,
-		mts_sup_full
-	}
-	support_status;
-} ;
-
-
-/* These are taken from the msmUSB.inf file on the Windows driver CD */
-static const struct vendor_product mts_supported_products[] =
-{
-	{ "Phantom 336CX",	mts_sup_unknown},
-	{ "Phantom 336CX",	mts_sup_unknown},
-	{ "Scanmaker X6",	mts_sup_alpha},
-	{ "Phantom C6",		mts_sup_unknown},
-	{ "Phantom 336CX",	mts_sup_unknown},
-	{ "ScanMaker V6USL",	mts_sup_unknown},
-	{ "ScanMaker V6USL",	mts_sup_unknown},
-	{ "Scanmaker V6UL",	mts_sup_unknown},
-	{ "Scanmaker V6UPL",	mts_sup_alpha},
-};
-
 /* The entries of microtek_table must correspond, line-by-line to
    the entries of mts_supported_products[]. */
 
@@ -711,7 +684,6 @@ static int mts_usb_probe(struct usb_interface *intf,
 	int err_retval = -ENOMEM;
 
 	struct mts_desc * new_desc;
-	struct vendor_product const* p;
 	struct usb_device *dev = interface_to_usbdev (intf);
 
 	/* the current altsetting on the interface we're probing */
@@ -726,15 +698,6 @@ static int mts_usb_probe(struct usb_interface *intf,
 
 	MTS_DEBUG_GOT_HERE();
 
-	p = &mts_supported_products[id - mts_usb_ids];
-
-	MTS_DEBUG_GOT_HERE();
-
-	MTS_DEBUG( "found model %s\n", p->name );
-	if ( p->support_status != mts_sup_full )
-		MTS_MESSAGE( "model %s is not known to be fully supported, reports welcome!\n",
-			     p->name );
-
 	/* the current altsetting on the interface we're probing */
 	altsetting = intf->cur_altsetting;
 
-- 
cgit v0.10.2


From 9b39e9ddedeef48569f8aac60a7b4c1fbb127c7d Mon Sep 17 00:00:00 2001
From: Brian Niebuhr <bniebuhr@efjohnson.com>
Date: Fri, 14 Aug 2009 10:04:22 -0500
Subject: USB: gadget: Add EEM gadget driver

This patch adds a CDC EEM ethernet gadget driver.  CDC EEM is a newer
USB ethernet specification that uses a simpler interface than the older
CDC ECM.  This makes CDC EEM usable by a wider set of USB hardware.
By default the ethernet gadget will still use CDC ECM/Subset, but kernel
configuration and/or a module parameter will allow alternative use of
the CDC EEM protocol.

Changes since last version:
	- Brought in missing RNDIS changes that caused compile error
	- Modified 'sentinel CRC' checking to match EEM host driver

Signed-off-by: Brian Niebuhr <bniebuhr@efjohnson.com>
Cc: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig
index 3537d91..4d8ab47 100644
--- a/drivers/usb/gadget/Kconfig
+++ b/drivers/usb/gadget/Kconfig
@@ -628,8 +628,8 @@ config USB_ETH
 	tristate "Ethernet Gadget (with CDC Ethernet support)"
 	depends on NET
 	help
-	  This driver implements Ethernet style communication, in either
-	  of two ways:
+	  This driver implements Ethernet style communication, in one of
+	  several ways:
 	  
 	   - The "Communication Device Class" (CDC) Ethernet Control Model.
 	     That protocol is often avoided with pure Ethernet adapters, in
@@ -639,7 +639,11 @@ config USB_ETH
 	   - On hardware can't implement that protocol, a simple CDC subset
 	     is used, placing fewer demands on USB.
 
-	  RNDIS support is a third option, more demanding than that subset.
+	   - CDC Ethernet Emulation Model (EEM) is a newer standard that has
+	     a simpler interface that can be used by more USB hardware.
+
+	  RNDIS support is an additional option, more demanding than than
+	  subset.
 
 	  Within the USB device, this gadget driver exposes a network device
 	  "usbX", where X depends on what other networking devices you have.
@@ -672,6 +676,22 @@ config USB_ETH_RNDIS
 	   XP, you'll need to download drivers from Microsoft's website; a URL
 	   is given in comments found in that info file.
 
+config USB_ETH_EEM
+       bool "Ethernet Emulation Model (EEM) support"
+       depends on USB_ETH
+       default n
+       help
+         CDC EEM is a newer USB standard that is somewhat simpler than CDC ECM
+         and therefore can be supported by more hardware.  Technically ECM and
+         EEM are designed for different applications.  The ECM model extends
+         the network interface to the target (e.g. a USB cable modem), and the
+         EEM model is for mobile devices to communicate with hosts using
+         ethernet over USB.  For Linux gadgets, however, the interface with
+         the host is the same (a usbX device), so the differences are minimal.
+
+         If you say "y" here, the Ethernet gadget driver will use the EEM
+         protocol rather than ECM.  If unsure, say "n".
+
 config USB_GADGETFS
 	tristate "Gadget Filesystem (EXPERIMENTAL)"
 	depends on EXPERIMENTAL
diff --git a/drivers/usb/gadget/ether.c b/drivers/usb/gadget/ether.c
index bd102f5..f37de28 100644
--- a/drivers/usb/gadget/ether.c
+++ b/drivers/usb/gadget/ether.c
@@ -61,6 +61,11 @@
  * simpler, Microsoft pushes their own approach: RNDIS.  The published
  * RNDIS specs are ambiguous and appear to be incomplete, and are also
  * needlessly complex.  They borrow more from CDC ACM than CDC ECM.
+ *
+ * While CDC ECM, CDC Subset, and RNDIS are designed to extend the ethernet
+ * interface to the target, CDC EEM was designed to use ethernet over the USB
+ * link between the host and target.  CDC EEM is implemented as an alternative
+ * to those other protocols when that communication model is more appropriate
  */
 
 #define DRIVER_DESC		"Ethernet Gadget"
@@ -114,6 +119,7 @@ static inline bool has_rndis(void)
 #include "f_rndis.c"
 #include "rndis.c"
 #endif
+#include "f_eem.c"
 #include "u_ether.c"
 
 /*-------------------------------------------------------------------------*/
@@ -150,6 +156,10 @@ static inline bool has_rndis(void)
 #define RNDIS_VENDOR_NUM	0x0525	/* NetChip */
 #define RNDIS_PRODUCT_NUM	0xa4a2	/* Ethernet/RNDIS Gadget */
 
+/* For EEM gadgets */
+#define EEM_VENDOR_NUM	0x0525	/* INVALID - NEEDS TO BE ALLOCATED */
+#define EEM_PRODUCT_NUM	0xa4a1	/* INVALID - NEEDS TO BE ALLOCATED */
+
 /*-------------------------------------------------------------------------*/
 
 static struct usb_device_descriptor device_desc = {
@@ -246,8 +256,16 @@ static struct usb_configuration rndis_config_driver = {
 
 /*-------------------------------------------------------------------------*/
 
+#ifdef CONFIG_USB_ETH_EEM
+static int use_eem = 1;
+#else
+static int use_eem;
+#endif
+module_param(use_eem, bool, 0);
+MODULE_PARM_DESC(use_eem, "use CDC EEM mode");
+
 /*
- * We _always_ have an ECM or CDC Subset configuration.
+ * We _always_ have an ECM, CDC Subset, or EEM configuration.
  */
 static int __init eth_do_config(struct usb_configuration *c)
 {
@@ -258,7 +276,9 @@ static int __init eth_do_config(struct usb_configuration *c)
 		c->bmAttributes |= USB_CONFIG_ATT_WAKEUP;
 	}
 
-	if (can_support_ecm(c->cdev->gadget))
+	if (use_eem)
+		return eem_bind_config(c);
+	else if (can_support_ecm(c->cdev->gadget))
 		return ecm_bind_config(c, hostaddr);
 	else
 		return geth_bind_config(c, hostaddr);
@@ -286,7 +306,12 @@ static int __init eth_bind(struct usb_composite_dev *cdev)
 		return status;
 
 	/* set up main config label and device descriptor */
-	if (can_support_ecm(cdev->gadget)) {
+	if (use_eem) {
+		/* EEM */
+		eth_config_driver.label = "CDC Ethernet (EEM)";
+		device_desc.idVendor = cpu_to_le16(EEM_VENDOR_NUM);
+		device_desc.idProduct = cpu_to_le16(EEM_PRODUCT_NUM);
+	} else if (can_support_ecm(cdev->gadget)) {
 		/* ECM */
 		eth_config_driver.label = "CDC Ethernet (ECM)";
 	} else {
diff --git a/drivers/usb/gadget/f_eem.c b/drivers/usb/gadget/f_eem.c
new file mode 100644
index 0000000..0a577d5
--- /dev/null
+++ b/drivers/usb/gadget/f_eem.c
@@ -0,0 +1,562 @@
+/*
+ * f_eem.c -- USB CDC Ethernet (EEM) link function driver
+ *
+ * Copyright (C) 2003-2005,2008 David Brownell
+ * Copyright (C) 2008 Nokia Corporation
+ * Copyright (C) 2009 EF Johnson Technologies
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/etherdevice.h>
+#include <linux/crc32.h>
+
+#include "u_ether.h"
+
+#define EEM_HLEN 2
+
+/*
+ * This function is a "CDC Ethernet Emulation Model" (CDC EEM)
+ * Ethernet link.
+ */
+
+struct eem_ep_descs {
+	struct usb_endpoint_descriptor	*in;
+	struct usb_endpoint_descriptor	*out;
+};
+
+struct f_eem {
+	struct gether			port;
+	u8				ctrl_id;
+
+	struct eem_ep_descs		fs;
+	struct eem_ep_descs		hs;
+};
+
+static inline struct f_eem *func_to_eem(struct usb_function *f)
+{
+	return container_of(f, struct f_eem, port.func);
+}
+
+/*-------------------------------------------------------------------------*/
+
+/* interface descriptor: */
+
+static struct usb_interface_descriptor eem_intf __initdata = {
+	.bLength =		sizeof eem_intf,
+	.bDescriptorType =	USB_DT_INTERFACE,
+
+	/* .bInterfaceNumber = DYNAMIC */
+	.bNumEndpoints =	2,
+	.bInterfaceClass =	USB_CLASS_COMM,
+	.bInterfaceSubClass =	USB_CDC_SUBCLASS_EEM,
+	.bInterfaceProtocol =	USB_CDC_PROTO_EEM,
+	/* .iInterface = DYNAMIC */
+};
+
+/* full speed support: */
+
+static struct usb_endpoint_descriptor eem_fs_in_desc __initdata = {
+	.bLength =		USB_DT_ENDPOINT_SIZE,
+	.bDescriptorType =	USB_DT_ENDPOINT,
+
+	.bEndpointAddress =	USB_DIR_IN,
+	.bmAttributes =		USB_ENDPOINT_XFER_BULK,
+};
+
+static struct usb_endpoint_descriptor eem_fs_out_desc __initdata = {
+	.bLength =		USB_DT_ENDPOINT_SIZE,
+	.bDescriptorType =	USB_DT_ENDPOINT,
+
+	.bEndpointAddress =	USB_DIR_OUT,
+	.bmAttributes =		USB_ENDPOINT_XFER_BULK,
+};
+
+static struct usb_descriptor_header *eem_fs_function[] __initdata = {
+	/* CDC EEM control descriptors */
+	(struct usb_descriptor_header *) &eem_intf,
+	(struct usb_descriptor_header *) &eem_fs_in_desc,
+	(struct usb_descriptor_header *) &eem_fs_out_desc,
+	NULL,
+};
+
+/* high speed support: */
+
+static struct usb_endpoint_descriptor eem_hs_in_desc __initdata = {
+	.bLength =		USB_DT_ENDPOINT_SIZE,
+	.bDescriptorType =	USB_DT_ENDPOINT,
+
+	.bEndpointAddress =	USB_DIR_IN,
+	.bmAttributes =		USB_ENDPOINT_XFER_BULK,
+	.wMaxPacketSize =	cpu_to_le16(512),
+};
+
+static struct usb_endpoint_descriptor eem_hs_out_desc __initdata = {
+	.bLength =		USB_DT_ENDPOINT_SIZE,
+	.bDescriptorType =	USB_DT_ENDPOINT,
+
+	.bEndpointAddress =	USB_DIR_OUT,
+	.bmAttributes =		USB_ENDPOINT_XFER_BULK,
+	.wMaxPacketSize =	cpu_to_le16(512),
+};
+
+static struct usb_descriptor_header *eem_hs_function[] __initdata = {
+	/* CDC EEM control descriptors */
+	(struct usb_descriptor_header *) &eem_intf,
+	(struct usb_descriptor_header *) &eem_hs_in_desc,
+	(struct usb_descriptor_header *) &eem_hs_out_desc,
+	NULL,
+};
+
+/* string descriptors: */
+
+static struct usb_string eem_string_defs[] = {
+	[0].s = "CDC Ethernet Emulation Model (EEM)",
+	{  } /* end of list */
+};
+
+static struct usb_gadget_strings eem_string_table = {
+	.language =		0x0409,	/* en-us */
+	.strings =		eem_string_defs,
+};
+
+static struct usb_gadget_strings *eem_strings[] = {
+	&eem_string_table,
+	NULL,
+};
+
+/*-------------------------------------------------------------------------*/
+
+static int eem_setup(struct usb_function *f, const struct usb_ctrlrequest *ctrl)
+{
+	struct usb_composite_dev *cdev = f->config->cdev;
+	int			value = -EOPNOTSUPP;
+	u16			w_index = le16_to_cpu(ctrl->wIndex);
+	u16			w_value = le16_to_cpu(ctrl->wValue);
+	u16			w_length = le16_to_cpu(ctrl->wLength);
+
+	DBG(cdev, "invalid control req%02x.%02x v%04x i%04x l%d\n",
+		ctrl->bRequestType, ctrl->bRequest,
+		w_value, w_index, w_length);
+
+	/* device either stalls (value < 0) or reports success */
+	return value;
+}
+
+
+static int eem_set_alt(struct usb_function *f, unsigned intf, unsigned alt)
+{
+	struct f_eem		*eem = func_to_eem(f);
+	struct usb_composite_dev *cdev = f->config->cdev;
+	struct net_device	*net;
+
+	/* we know alt == 0, so this is an activation or a reset */
+	if (alt != 0)
+		goto fail;
+
+	if (intf == eem->ctrl_id) {
+
+		if (eem->port.in_ep->driver_data) {
+			DBG(cdev, "reset eem\n");
+			gether_disconnect(&eem->port);
+		}
+
+		if (!eem->port.in) {
+			DBG(cdev, "init eem\n");
+			eem->port.in = ep_choose(cdev->gadget,
+					eem->hs.in, eem->fs.in);
+			eem->port.out = ep_choose(cdev->gadget,
+					eem->hs.out, eem->fs.out);
+		}
+
+		/* zlps should not occur because zero-length EEM packets
+		 * will be inserted in those cases where they would occur
+		 */
+		eem->port.is_zlp_ok = 1;
+		eem->port.cdc_filter = DEFAULT_FILTER;
+		DBG(cdev, "activate eem\n");
+		net = gether_connect(&eem->port);
+		if (IS_ERR(net))
+			return PTR_ERR(net);
+	} else
+		goto fail;
+
+	return 0;
+fail:
+	return -EINVAL;
+}
+
+static void eem_disable(struct usb_function *f)
+{
+	struct f_eem		*eem = func_to_eem(f);
+	struct usb_composite_dev *cdev = f->config->cdev;
+
+	DBG(cdev, "eem deactivated\n");
+
+	if (eem->port.in_ep->driver_data)
+		gether_disconnect(&eem->port);
+}
+
+/*-------------------------------------------------------------------------*/
+
+/* EEM function driver setup/binding */
+
+static int __init
+eem_bind(struct usb_configuration *c, struct usb_function *f)
+{
+	struct usb_composite_dev *cdev = c->cdev;
+	struct f_eem		*eem = func_to_eem(f);
+	int			status;
+	struct usb_ep		*ep;
+
+	/* allocate instance-specific interface IDs */
+	status = usb_interface_id(c, f);
+	if (status < 0)
+		goto fail;
+	eem->ctrl_id = status;
+	eem_intf.bInterfaceNumber = status;
+
+	status = -ENODEV;
+
+	/* allocate instance-specific endpoints */
+	ep = usb_ep_autoconfig(cdev->gadget, &eem_fs_in_desc);
+	if (!ep)
+		goto fail;
+	eem->port.in_ep = ep;
+	ep->driver_data = cdev;	/* claim */
+
+	ep = usb_ep_autoconfig(cdev->gadget, &eem_fs_out_desc);
+	if (!ep)
+		goto fail;
+	eem->port.out_ep = ep;
+	ep->driver_data = cdev;	/* claim */
+
+	status = -ENOMEM;
+
+	/* copy descriptors, and track endpoint copies */
+	f->descriptors = usb_copy_descriptors(eem_fs_function);
+	if (!f->descriptors)
+		goto fail;
+
+	eem->fs.in = usb_find_endpoint(eem_fs_function,
+			f->descriptors, &eem_fs_in_desc);
+	eem->fs.out = usb_find_endpoint(eem_fs_function,
+			f->descriptors, &eem_fs_out_desc);
+
+	/* support all relevant hardware speeds... we expect that when
+	 * hardware is dual speed, all bulk-capable endpoints work at
+	 * both speeds
+	 */
+	if (gadget_is_dualspeed(c->cdev->gadget)) {
+		eem_hs_in_desc.bEndpointAddress =
+				eem_fs_in_desc.bEndpointAddress;
+		eem_hs_out_desc.bEndpointAddress =
+				eem_fs_out_desc.bEndpointAddress;
+
+		/* copy descriptors, and track endpoint copies */
+		f->hs_descriptors = usb_copy_descriptors(eem_hs_function);
+		if (!f->hs_descriptors)
+			goto fail;
+
+		eem->hs.in = usb_find_endpoint(eem_hs_function,
+				f->hs_descriptors, &eem_hs_in_desc);
+		eem->hs.out = usb_find_endpoint(eem_hs_function,
+				f->hs_descriptors, &eem_hs_out_desc);
+	}
+
+	DBG(cdev, "CDC Ethernet (EEM): %s speed IN/%s OUT/%s\n",
+			gadget_is_dualspeed(c->cdev->gadget) ? "dual" : "full",
+			eem->port.in_ep->name, eem->port.out_ep->name);
+	return 0;
+
+fail:
+	if (f->descriptors)
+		usb_free_descriptors(f->descriptors);
+
+	/* we might as well release our claims on endpoints */
+	if (eem->port.out)
+		eem->port.out_ep->driver_data = NULL;
+	if (eem->port.in)
+		eem->port.in_ep->driver_data = NULL;
+
+	ERROR(cdev, "%s: can't bind, err %d\n", f->name, status);
+
+	return status;
+}
+
+static void
+eem_unbind(struct usb_configuration *c, struct usb_function *f)
+{
+	struct f_eem	*eem = func_to_eem(f);
+
+	DBG(c->cdev, "eem unbind\n");
+
+	if (gadget_is_dualspeed(c->cdev->gadget))
+		usb_free_descriptors(f->hs_descriptors);
+	usb_free_descriptors(f->descriptors);
+	kfree(eem);
+}
+
+static void eem_cmd_complete(struct usb_ep *ep, struct usb_request *req)
+{
+}
+
+/*
+ * Add the EEM header and ethernet checksum.
+ * We currently do not attempt to put multiple ethernet frames
+ * into a single USB transfer
+ */
+static struct sk_buff *eem_wrap(struct gether *port, struct sk_buff *skb)
+{
+	struct sk_buff	*skb2 = NULL;
+	struct usb_ep	*in = port->in_ep;
+	int		padlen = 0;
+	u16		len = skb->len;
+
+	if (!skb_cloned(skb)) {
+		int headroom = skb_headroom(skb);
+		int tailroom = skb_tailroom(skb);
+
+		/* When (len + EEM_HLEN + ETH_FCS_LEN) % in->maxpacket) is 0,
+		 * stick two bytes of zero-length EEM packet on the end.
+		 */
+		if (((len + EEM_HLEN + ETH_FCS_LEN) % in->maxpacket) == 0)
+			padlen += 2;
+
+		if ((tailroom >= (ETH_FCS_LEN + padlen)) &&
+				(headroom >= EEM_HLEN))
+			goto done;
+	}
+
+	skb2 = skb_copy_expand(skb, EEM_HLEN, ETH_FCS_LEN + padlen, GFP_ATOMIC);
+	dev_kfree_skb_any(skb);
+	skb = skb2;
+	if (!skb)
+		return skb;
+
+done:
+	/* use the "no CRC" option */
+	put_unaligned_be32(0xdeadbeef, skb_put(skb, 4));
+
+	/* EEM packet header format:
+	 * b0..13:	length of ethernet frame
+	 * b14:		bmCRC (0 == sentinel CRC)
+	 * b15:		bmType (0 == data)
+	 */
+	len = skb->len;
+	put_unaligned_le16((len & 0x3FFF) | BIT(14), skb_push(skb, 2));
+
+	/* add a zero-length EEM packet, if needed */
+	if (padlen)
+		put_unaligned_le16(0, skb_put(skb, 2));
+
+	return skb;
+}
+
+/*
+ * Remove the EEM header.  Note that there can be many EEM packets in a single
+ * USB transfer, so we need to break them out and handle them independently.
+ */
+static int eem_unwrap(struct gether *port,
+			struct sk_buff *skb,
+			struct sk_buff_head *list)
+{
+	struct usb_composite_dev	*cdev = port->func.config->cdev;
+	int				status = 0;
+
+	do {
+		struct sk_buff	*skb2;
+		u16		header;
+		u16		len = 0;
+
+		if (skb->len < EEM_HLEN) {
+			status = -EINVAL;
+			DBG(cdev, "invalid EEM header\n");
+			goto error;
+		}
+
+		/* remove the EEM header */
+		header = get_unaligned_le16(skb->data);
+		skb_pull(skb, EEM_HLEN);
+
+		/* EEM packet header format:
+		 * b0..14:	EEM type dependent (data or command)
+		 * b15:		bmType (0 == data, 1 == command)
+		 */
+		if (header & BIT(15)) {
+			struct usb_request	*req = cdev->req;
+			u16			bmEEMCmd;
+
+			/* EEM command packet format:
+			 * b0..10:	bmEEMCmdParam
+			 * b11..13:	bmEEMCmd
+			 * b14:		reserved (must be zero)
+			 * b15:		bmType (1 == command)
+			 */
+			if (header & BIT(14))
+				continue;
+
+			bmEEMCmd = (header >> 11) & 0x7;
+			switch (bmEEMCmd) {
+			case 0: /* echo */
+				len = header & 0x7FF;
+				if (skb->len < len) {
+					status = -EOVERFLOW;
+					goto error;
+				}
+
+				skb2 = skb_clone(skb, GFP_ATOMIC);
+				if (unlikely(!skb2)) {
+					DBG(cdev, "EEM echo response error\n");
+					goto next;
+				}
+				skb_trim(skb2, len);
+				put_unaligned_le16(BIT(15) | BIT(11) | len,
+							skb_push(skb2, 2));
+				skb_copy_bits(skb, 0, req->buf, skb->len);
+				req->length = skb->len;
+				req->complete = eem_cmd_complete;
+				req->zero = 1;
+				if (usb_ep_queue(port->in_ep, req, GFP_ATOMIC))
+					DBG(cdev, "echo response queue fail\n");
+				break;
+
+			case 1:  /* echo response */
+			case 2:  /* suspend hint */
+			case 3:  /* response hint */
+			case 4:  /* response complete hint */
+			case 5:  /* tickle */
+			default: /* reserved */
+				continue;
+			}
+		} else {
+			u32		crc, crc2;
+			struct sk_buff	*skb3;
+
+			/* check for zero-length EEM packet */
+			if (header == 0)
+				continue;
+
+			/* EEM data packet format:
+			 * b0..13:	length of ethernet frame
+			 * b14:		bmCRC (0 == sentinel, 1 == calculated)
+			 * b15:		bmType (0 == data)
+			 */
+			len = header & 0x3FFF;
+			if ((skb->len < len)
+					|| (len < (ETH_HLEN + ETH_FCS_LEN))) {
+				status = -EINVAL;
+				goto error;
+			}
+
+			/* validate CRC */
+			crc = get_unaligned_le32(skb->data + len - ETH_FCS_LEN);
+			if (header & BIT(14)) {
+				crc = get_unaligned_le32(skb->data + len
+							- ETH_FCS_LEN);
+				crc2 = ~crc32_le(~0,
+						skb->data,
+						skb->len - ETH_FCS_LEN);
+			} else {
+				crc = get_unaligned_be32(skb->data + len
+							- ETH_FCS_LEN);
+				crc2 = 0xdeadbeef;
+			}
+			if (crc != crc2) {
+				DBG(cdev, "invalid EEM CRC\n");
+				goto next;
+			}
+
+			skb2 = skb_clone(skb, GFP_ATOMIC);
+			if (unlikely(!skb2)) {
+				DBG(cdev, "unable to unframe EEM packet\n");
+				continue;
+			}
+			skb_trim(skb2, len - ETH_FCS_LEN);
+
+			skb3 = skb_copy_expand(skb2,
+						NET_IP_ALIGN,
+						0,
+						GFP_ATOMIC);
+			if (unlikely(!skb3)) {
+				DBG(cdev, "unable to realign EEM packet\n");
+				dev_kfree_skb_any(skb2);
+				continue;
+			}
+			dev_kfree_skb_any(skb2);
+			skb_queue_tail(list, skb3);
+		}
+next:
+		skb_pull(skb, len);
+	} while (skb->len);
+
+error:
+	dev_kfree_skb_any(skb);
+	return status;
+}
+
+/**
+ * eem_bind_config - add CDC Ethernet (EEM) network link to a configuration
+ * @c: the configuration to support the network link
+ * Context: single threaded during gadget setup
+ *
+ * Returns zero on success, else negative errno.
+ *
+ * Caller must have called @gether_setup().  Caller is also responsible
+ * for calling @gether_cleanup() before module unload.
+ */
+int __init eem_bind_config(struct usb_configuration *c)
+{
+	struct f_eem	*eem;
+	int		status;
+
+	/* maybe allocate device-global string IDs */
+	if (eem_string_defs[0].id == 0) {
+
+		/* control interface label */
+		status = usb_string_id(c->cdev);
+		if (status < 0)
+			return status;
+		eem_string_defs[0].id = status;
+		eem_intf.iInterface = status;
+	}
+
+	/* allocate and initialize one new instance */
+	eem = kzalloc(sizeof *eem, GFP_KERNEL);
+	if (!eem)
+		return -ENOMEM;
+
+	eem->port.cdc_filter = DEFAULT_FILTER;
+
+	eem->port.func.name = "cdc_eem";
+	eem->port.func.strings = eem_strings;
+	/* descriptors are per-instance copies */
+	eem->port.func.bind = eem_bind;
+	eem->port.func.unbind = eem_unbind;
+	eem->port.func.set_alt = eem_set_alt;
+	eem->port.func.setup = eem_setup;
+	eem->port.func.disable = eem_disable;
+	eem->port.wrap = eem_wrap;
+	eem->port.unwrap = eem_unwrap;
+	eem->port.header_len = EEM_HLEN;
+
+	status = usb_add_function(c, &eem->port.func);
+	if (status)
+		kfree(eem);
+	return status;
+}
+
diff --git a/drivers/usb/gadget/f_rndis.c b/drivers/usb/gadget/f_rndis.c
index 424a37c..c9966cc 100644
--- a/drivers/usb/gadget/f_rndis.c
+++ b/drivers/usb/gadget/f_rndis.c
@@ -286,12 +286,17 @@ static struct usb_gadget_strings *rndis_strings[] = {
 
 /*-------------------------------------------------------------------------*/
 
-static struct sk_buff *rndis_add_header(struct sk_buff *skb)
+static struct sk_buff *rndis_add_header(struct gether *port,
+					struct sk_buff *skb)
 {
-	skb = skb_realloc_headroom(skb, sizeof(struct rndis_packet_msg_type));
-	if (skb)
-		rndis_add_hdr(skb);
-	return skb;
+	struct sk_buff *skb2;
+
+	skb2 = skb_realloc_headroom(skb, sizeof(struct rndis_packet_msg_type));
+	if (skb2)
+		rndis_add_hdr(skb2);
+
+	dev_kfree_skb_any(skb);
+	return skb2;
 }
 
 static void rndis_response_available(void *_rndis)
diff --git a/drivers/usb/gadget/rndis.c b/drivers/usb/gadget/rndis.c
index ca41b0b..48267bc 100644
--- a/drivers/usb/gadget/rndis.c
+++ b/drivers/usb/gadget/rndis.c
@@ -1022,22 +1022,29 @@ static rndis_resp_t *rndis_add_response (int configNr, u32 length)
 	return r;
 }
 
-int rndis_rm_hdr(struct sk_buff *skb)
+int rndis_rm_hdr(struct gether *port,
+			struct sk_buff *skb,
+			struct sk_buff_head *list)
 {
 	/* tmp points to a struct rndis_packet_msg_type */
 	__le32		*tmp = (void *) skb->data;
 
 	/* MessageType, MessageLength */
 	if (cpu_to_le32(REMOTE_NDIS_PACKET_MSG)
-			!= get_unaligned(tmp++))
+			!= get_unaligned(tmp++)) {
+		dev_kfree_skb_any(skb);
 		return -EINVAL;
+	}
 	tmp++;
 
 	/* DataOffset, DataLength */
-	if (!skb_pull(skb, get_unaligned_le32(tmp++) + 8))
+	if (!skb_pull(skb, get_unaligned_le32(tmp++) + 8)) {
+		dev_kfree_skb_any(skb);
 		return -EOVERFLOW;
+	}
 	skb_trim(skb, get_unaligned_le32(tmp++));
 
+	skb_queue_tail(list, skb);
 	return 0;
 }
 
diff --git a/drivers/usb/gadget/rndis.h b/drivers/usb/gadget/rndis.h
index aac61df..c236aaa 100644
--- a/drivers/usb/gadget/rndis.h
+++ b/drivers/usb/gadget/rndis.h
@@ -251,7 +251,8 @@ int  rndis_set_param_vendor (u8 configNr, u32 vendorID,
 			    const char *vendorDescr);
 int  rndis_set_param_medium (u8 configNr, u32 medium, u32 speed);
 void rndis_add_hdr (struct sk_buff *skb);
-int rndis_rm_hdr (struct sk_buff *skb);
+int rndis_rm_hdr(struct gether *port, struct sk_buff *skb,
+			struct sk_buff_head *list);
 u8   *rndis_get_next_response (int configNr, u32 *length);
 void rndis_free_response (int configNr, u8 *buf);
 
diff --git a/drivers/usb/gadget/u_ether.c b/drivers/usb/gadget/u_ether.c
index c665219..f8751ff 100644
--- a/drivers/usb/gadget/u_ether.c
+++ b/drivers/usb/gadget/u_ether.c
@@ -37,8 +37,9 @@
  * one (!) network link through the USB gadget stack, normally "usb0".
  *
  * The control and data models are handled by the function driver which
- * connects to this code; such as CDC Ethernet, "CDC Subset", or RNDIS.
- * That includes all descriptor and endpoint management.
+ * connects to this code; such as CDC Ethernet (ECM or EEM),
+ * "CDC Subset", or RNDIS.  That includes all descriptor and endpoint
+ * management.
  *
  * Link level addressing is handled by this component using module
  * parameters; if no such parameters are provided, random link level
@@ -68,9 +69,13 @@ struct eth_dev {
 	struct list_head	tx_reqs, rx_reqs;
 	atomic_t		tx_qlen;
 
+	struct sk_buff_head	rx_frames;
+
 	unsigned		header_len;
-	struct sk_buff		*(*wrap)(struct sk_buff *skb);
-	int			(*unwrap)(struct sk_buff *skb);
+	struct sk_buff		*(*wrap)(struct gether *, struct sk_buff *skb);
+	int			(*unwrap)(struct gether *,
+						struct sk_buff *skb,
+						struct sk_buff_head *list);
 
 	struct work_struct	work;
 
@@ -269,7 +274,7 @@ enomem:
 
 static void rx_complete(struct usb_ep *ep, struct usb_request *req)
 {
-	struct sk_buff	*skb = req->context;
+	struct sk_buff	*skb = req->context, *skb2;
 	struct eth_dev	*dev = ep->driver_data;
 	int		status = req->status;
 
@@ -278,26 +283,47 @@ static void rx_complete(struct usb_ep *ep, struct usb_request *req)
 	/* normal completion */
 	case 0:
 		skb_put(skb, req->actual);
-		if (dev->unwrap)
-			status = dev->unwrap(skb);
-		if (status < 0
-				|| ETH_HLEN > skb->len
-				|| skb->len > ETH_FRAME_LEN) {
-			dev->net->stats.rx_errors++;
-			dev->net->stats.rx_length_errors++;
-			DBG(dev, "rx length %d\n", skb->len);
-			break;
-		}
 
-		skb->protocol = eth_type_trans(skb, dev->net);
-		dev->net->stats.rx_packets++;
-		dev->net->stats.rx_bytes += skb->len;
+		if (dev->unwrap) {
+			unsigned long	flags;
 
-		/* no buffer copies needed, unless hardware can't
-		 * use skb buffers.
-		 */
-		status = netif_rx(skb);
+			spin_lock_irqsave(&dev->lock, flags);
+			if (dev->port_usb) {
+				status = dev->unwrap(dev->port_usb,
+							skb,
+							&dev->rx_frames);
+			} else {
+				dev_kfree_skb_any(skb);
+				status = -ENOTCONN;
+			}
+			spin_unlock_irqrestore(&dev->lock, flags);
+		} else {
+			skb_queue_tail(&dev->rx_frames, skb);
+		}
 		skb = NULL;
+
+		skb2 = skb_dequeue(&dev->rx_frames);
+		while (skb2) {
+			if (status < 0
+					|| ETH_HLEN > skb2->len
+					|| skb2->len > ETH_FRAME_LEN) {
+				dev->net->stats.rx_errors++;
+				dev->net->stats.rx_length_errors++;
+				DBG(dev, "rx length %d\n", skb2->len);
+				dev_kfree_skb_any(skb2);
+				goto next_frame;
+			}
+			skb2->protocol = eth_type_trans(skb2, dev->net);
+			dev->net->stats.rx_packets++;
+			dev->net->stats.rx_bytes += skb2->len;
+
+			/* no buffer copies needed, unless hardware can't
+			 * use skb buffers.
+			 */
+			status = netif_rx(skb2);
+next_frame:
+			skb2 = skb_dequeue(&dev->rx_frames);
+		}
 		break;
 
 	/* software-driven interface shutdown */
@@ -537,14 +563,15 @@ static netdev_tx_t eth_start_xmit(struct sk_buff *skb,
 	 * or there's not enough space for extra headers we need
 	 */
 	if (dev->wrap) {
-		struct sk_buff	*skb_new;
+		unsigned long	flags;
 
-		skb_new = dev->wrap(skb);
-		if (!skb_new)
+		spin_lock_irqsave(&dev->lock, flags);
+		if (dev->port_usb)
+			skb = dev->wrap(dev->port_usb, skb);
+		spin_unlock_irqrestore(&dev->lock, flags);
+		if (!skb)
 			goto drop;
 
-		dev_kfree_skb_any(skb);
-		skb = skb_new;
 		length = skb->len;
 	}
 	req->buf = skb->data;
@@ -578,9 +605,9 @@ static netdev_tx_t eth_start_xmit(struct sk_buff *skb,
 	}
 
 	if (retval) {
+		dev_kfree_skb_any(skb);
 drop:
 		dev->net->stats.tx_dropped++;
-		dev_kfree_skb_any(skb);
 		spin_lock_irqsave(&dev->req_lock, flags);
 		if (list_empty(&dev->tx_reqs))
 			netif_start_queue(net);
@@ -753,6 +780,8 @@ int __init gether_setup(struct usb_gadget *g, u8 ethaddr[ETH_ALEN])
 	INIT_LIST_HEAD(&dev->tx_reqs);
 	INIT_LIST_HEAD(&dev->rx_reqs);
 
+	skb_queue_head_init(&dev->rx_frames);
+
 	/* network device setup */
 	dev->net = net;
 	strcpy(net->name, "usb%d");
diff --git a/drivers/usb/gadget/u_ether.h b/drivers/usb/gadget/u_ether.h
index 0d1f7ae..91b39ff 100644
--- a/drivers/usb/gadget/u_ether.h
+++ b/drivers/usb/gadget/u_ether.h
@@ -60,12 +60,13 @@ struct gether {
 
 	u16				cdc_filter;
 
-	/* hooks for added framing, as needed for RNDIS and EEM.
-	 * we currently don't support multiple frames per SKB.
-	 */
+	/* hooks for added framing, as needed for RNDIS and EEM. */
 	u32				header_len;
-	struct sk_buff			*(*wrap)(struct sk_buff *skb);
-	int				(*unwrap)(struct sk_buff *skb);
+	struct sk_buff			*(*wrap)(struct gether *port,
+						struct sk_buff *skb);
+	int				(*unwrap)(struct gether *port,
+						struct sk_buff *skb,
+						struct sk_buff_head *list);
 
 	/* called on network open/close */
 	void				(*open)(struct gether *);
@@ -109,6 +110,7 @@ static inline bool can_support_ecm(struct usb_gadget *gadget)
 /* each configuration may bind one instance of an ethernet link */
 int geth_bind_config(struct usb_configuration *c, u8 ethaddr[ETH_ALEN]);
 int ecm_bind_config(struct usb_configuration *c, u8 ethaddr[ETH_ALEN]);
+int eem_bind_config(struct usb_configuration *c);
 
 #ifdef CONFIG_USB_ETH_RNDIS
 
-- 
cgit v0.10.2


From 823c3fd9cc71714fe22ea415a68da746800d5a9a Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Mon, 3 Aug 2009 11:05:59 -0400
Subject: USB: s3c2410: unregister should call unbind, not disconnect

This patch (as1275) fixes the s3c2410 device controller driver.  Its
usb_gadget_unregister_driver() routine is supposed to call the gadget
driver's unbind method, not the disconnect method.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/gadget/s3c2410_udc.c b/drivers/usb/gadget/s3c2410_udc.c
index a9b452f..d5f4c1d 100644
--- a/drivers/usb/gadget/s3c2410_udc.c
+++ b/drivers/usb/gadget/s3c2410_udc.c
@@ -1703,8 +1703,7 @@ int usb_gadget_unregister_driver(struct usb_gadget_driver *driver)
 	dprintk(DEBUG_NORMAL,"usb_gadget_register_driver() '%s'\n",
 		driver->driver.name);
 
-	if (driver->disconnect)
-		driver->disconnect(&udc->gadget);
+	driver->unbind(&udc->gadget);
 
 	device_del(&udc->gadget.dev);
 	udc->driver = NULL;
-- 
cgit v0.10.2


From 492896f011a411d17d02e696adbc4a9b4ff68e7f Mon Sep 17 00:00:00 2001
From: Tim Small <tim@buttersideup.com>
Date: Mon, 17 Aug 2009 13:21:57 +0100
Subject: USb: Break support for WinChipHead CH341 340 USB->Serial "chip"

Here is a patch to the ch341 driver which adds serial break support.

Signed-off-by: Tim Small <tim@seoss.co.uk>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c
index 8c894a7..59eff72 100644
--- a/drivers/usb/serial/ch341.c
+++ b/drivers/usb/serial/ch341.c
@@ -56,6 +56,18 @@
 #define CH341_BAUDBASE_FACTOR 1532620800
 #define CH341_BAUDBASE_DIVMAX 3
 
+/* Break support - the information used to implement this was gleaned from
+ * the Net/FreeBSD uchcom.c driver by Takanori Watanabe.  Domo arigato.
+ */
+
+#define CH341_REQ_WRITE_REG    0x9A
+#define CH341_REQ_READ_REG     0x95
+#define CH341_REG_BREAK1       0x05
+#define CH341_REG_BREAK2       0x18
+#define CH341_NBREAK_BITS_REG1 0x01
+#define CH341_NBREAK_BITS_REG2 0x40
+
+
 static int debug;
 
 static struct usb_device_id id_table [] = {
@@ -373,6 +385,45 @@ static void ch341_set_termios(struct tty_struct *tty,
 	 */
 }
 
+static void ch341_break_ctl(struct tty_struct *tty, int break_state)
+{
+	const uint16_t ch341_break_reg =
+		CH341_REG_BREAK1 | ((uint16_t) CH341_REG_BREAK2 << 8);
+	struct usb_serial_port *port = tty->driver_data;
+	int r;
+	uint16_t reg_contents;
+	uint8_t break_reg[2];
+
+	dbg("%s()", __func__);
+
+	r = ch341_control_in(port->serial->dev, CH341_REQ_READ_REG,
+			ch341_break_reg, 0, break_reg, sizeof(break_reg));
+	if (r < 0) {
+		printk(KERN_WARNING "%s: USB control read error whilst getting"
+				" break register contents.\n", __FILE__);
+		return;
+	}
+	dbg("%s - initial ch341 break register contents - reg1: %x, reg2: %x",
+			__func__, break_reg[0], break_reg[1]);
+	if (break_state != 0) {
+		dbg("%s - Enter break state requested", __func__);
+		break_reg[0] &= ~CH341_NBREAK_BITS_REG1;
+		break_reg[1] &= ~CH341_NBREAK_BITS_REG2;
+	} else {
+		dbg("%s - Leave break state requested", __func__);
+		break_reg[0] |= CH341_NBREAK_BITS_REG1;
+		break_reg[1] |= CH341_NBREAK_BITS_REG2;
+	}
+	dbg("%s - New ch341 break register contents - reg1: %x, reg2: %x",
+			__func__, break_reg[0], break_reg[1]);
+	reg_contents = (uint16_t)break_reg[0] | ((uint16_t)break_reg[1] << 8);
+	r = ch341_control_out(port->serial->dev, CH341_REQ_WRITE_REG,
+			ch341_break_reg, reg_contents);
+	if (r < 0)
+		printk(KERN_WARNING "%s: USB control write error whilst setting"
+				" break register contents.\n", __FILE__);
+}
+
 static int ch341_tiocmset(struct tty_struct *tty, struct file *file,
 			  unsigned int set, unsigned int clear)
 {
@@ -576,6 +627,7 @@ static struct usb_serial_driver ch341_device = {
 	.close             = ch341_close,
 	.ioctl             = ch341_ioctl,
 	.set_termios       = ch341_set_termios,
+	.break_ctl         = ch341_break_ctl,
 	.tiocmget          = ch341_tiocmget,
 	.tiocmset          = ch341_tiocmset,
 	.read_int_callback = ch341_read_int_callback,
-- 
cgit v0.10.2


From c2cd26e15b84b964c489f2aff278cdaf03840c93 Mon Sep 17 00:00:00 2001
From: Steve Holland <sdh4@iastate.edu>
Date: Thu, 18 Jun 2009 17:37:49 -0500
Subject: USB: usbtmc: Fix short reads in usbtmc_read()

The header size should not be included in the number of bytes requested of the
instrument

Signed-off-by: Steve Holland <sdh4@iastate.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c
index 40ef4da..f2fde7c 100644
--- a/drivers/usb/class/usbtmc.c
+++ b/drivers/usb/class/usbtmc.c
@@ -407,10 +407,10 @@ static ssize_t usbtmc_read(struct file *filp, char __user *buf,
 		buffer[1] = data->bTag;
 		buffer[2] = ~(data->bTag);
 		buffer[3] = 0; /* Reserved */
-		buffer[4] = (this_part - 12 - 3) & 255;
-		buffer[5] = ((this_part - 12 - 3) >> 8) & 255;
-		buffer[6] = ((this_part - 12 - 3) >> 16) & 255;
-		buffer[7] = ((this_part - 12 - 3) >> 24) & 255;
+		buffer[4] = (this_part) & 255;
+		buffer[5] = ((this_part) >> 8) & 255;
+		buffer[6] = ((this_part) >> 16) & 255;
+		buffer[7] = ((this_part) >> 24) & 255;
 		buffer[8] = data->TermCharEnabled * 2;
 		/* Use term character? */
 		buffer[9] = data->TermChar;
-- 
cgit v0.10.2


From 92d07e422df3cc5370d0d9b95a671abb69d50ef1 Mon Sep 17 00:00:00 2001
From: Steve Holland <sdh4@iastate.edu>
Date: Thu, 18 Jun 2009 17:37:49 -0500
Subject: USB: usbtmc: inhibit corruption

Limit data copied to userspace to amount requested.  Prevents a faulty
instrument from overwriting user memory.

Signed-off-by: Steve Holland <sdh4@iastate.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c
index f2fde7c..91d3a94 100644
--- a/drivers/usb/class/usbtmc.c
+++ b/drivers/usb/class/usbtmc.c
@@ -473,6 +473,10 @@ static ssize_t usbtmc_read(struct file *filp, char __user *buf,
 			n_characters = this_part;
 		}
 
+		/* Bound amount of data received by amount of data requested */
+		if (n_characters > this_part)
+			n_characters = this_part;
+
 		/* Copy buffer to user space */
 		if (copy_to_user(buf + done, &buffer[12], n_characters)) {
 			/* There must have been an addressing problem */
-- 
cgit v0.10.2


From 4143d178e7b39c00d5277040c69a1522c4d98871 Mon Sep 17 00:00:00 2001
From: Steve Holland <sdh4@iastate.edu>
Date: Thu, 18 Jun 2009 17:37:49 -0500
Subject: USB: usbtmc: correct termination condition for reads.

Follow T&M convention of obeying EOM flag.  Avoid exception cases where
instrument response size matches a buffer size.

Signed-off-by: Steve Holland <sdh4@iastate.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c
index 91d3a94..6395f22 100644
--- a/drivers/usb/class/usbtmc.c
+++ b/drivers/usb/class/usbtmc.c
@@ -485,7 +485,8 @@ static ssize_t usbtmc_read(struct file *filp, char __user *buf,
 		}
 
 		done += n_characters;
-		if (n_characters < USBTMC_SIZE_IOBUFFER)
+		/* Terminate if end-of-message bit recieved from device */
+		if ((buffer[8] &  0x01) && (actual >= n_characters + 12))
 			remaining = 0;
 		else
 			remaining -= n_characters;
-- 
cgit v0.10.2


From 27043930b573fc57923c7494d50933efb2e40138 Mon Sep 17 00:00:00 2001
From: Olivier Bornet <Olivier.Bornet@puck.ch>
Date: Tue, 18 Aug 2009 21:05:53 +0200
Subject: USB: iuu_phoenix: Don't reset the device at close

Resetting the device cause the device to have a new name in the /dev.

Signed-off-by: Olivier Bornet <Olivier.Bornet@puck.ch>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/serial/iuu_phoenix.c b/drivers/usb/serial/iuu_phoenix.c
index 6138c1c..8dc6058 100644
--- a/drivers/usb/serial/iuu_phoenix.c
+++ b/drivers/usb/serial/iuu_phoenix.c
@@ -1009,11 +1009,7 @@ static void iuu_close(struct usb_serial_port *port)
 		usb_kill_urb(port->write_urb);
 		usb_kill_urb(port->read_urb);
 		usb_kill_urb(port->interrupt_in_urb);
-		msleep(1000);
-		/* wait one second to free all buffers */
 		iuu_led(port, 0, 0, 0xF000, 0xFF);
-		msleep(1000);
-		usb_reset_device(port->serial->dev);
 	}
 }
 
-- 
cgit v0.10.2


From 8844a32d54988ddf9eaf8f439085491547debcc8 Mon Sep 17 00:00:00 2001
From: Olivier Bornet <Olivier.Bornet@puck.ch>
Date: Tue, 18 Aug 2009 21:05:54 +0200
Subject: USB: iuu_phoenix: clean-up parameter's descriptions

Signed-off-by: Olivier Bornet <Olivier.Bornet@puck.ch>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/serial/iuu_phoenix.c b/drivers/usb/serial/iuu_phoenix.c
index 8dc6058..60a7d8d 100644
--- a/drivers/usb/serial/iuu_phoenix.c
+++ b/drivers/usb/serial/iuu_phoenix.c
@@ -1234,14 +1234,15 @@ module_param(debug, bool, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(debug, "Debug enabled or not");
 
 module_param(xmas, bool, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(xmas, "xmas color enabled or not");
+MODULE_PARM_DESC(xmas, "Xmas colors enabled or not");
 
 module_param(boost, int, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(boost, "overclock boost percent 100 to 500");
+MODULE_PARM_DESC(boost, "Card overclock boost (in percent 100-500)");
 
 module_param(clockmode, int, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(clockmode, "1=3Mhz579,2=3Mhz680,3=6Mhz");
+MODULE_PARM_DESC(clockmode, "Card clock mode (1=3.579 MHz, 2=3.680 MHz, "
+		"3=6 Mhz)");
 
 module_param(cdmode, int, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(cdmode, "Card detect mode 0=none, 1=CD, 2=!CD, 3=DSR, "
-		 "4=!DSR, 5=CTS, 6=!CTS, 7=RING, 8=!RING");
+MODULE_PARM_DESC(cdmode, "Card detect mode (0=none, 1=CD, 2=!CD, 3=DSR, "
+		 "4=!DSR, 5=CTS, 6=!CTS, 7=RING, 8=!RING)");
-- 
cgit v0.10.2


From 20eda943cc55b2bfdb6b6e4a3da23c8f198910c8 Mon Sep 17 00:00:00 2001
From: Olivier Bornet <Olivier.Bornet@puck.ch>
Date: Tue, 18 Aug 2009 21:05:55 +0200
Subject: USB: iuu_phoenix: add support for changing VCC

You can now set the IUU reader to 3.3V VCC instead of 5V VCC, using the sysfs
parameter vcc_mode. Valid values are 3 and 5.

Signed-off-by: Olivier Bornet <Olivier.Bornet@puck.ch>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/serial/iuu_phoenix.c b/drivers/usb/serial/iuu_phoenix.c
index 60a7d8d..5f13342 100644
--- a/drivers/usb/serial/iuu_phoenix.c
+++ b/drivers/usb/serial/iuu_phoenix.c
@@ -79,6 +79,7 @@ struct iuu_private {
 	u8 *buf;		/* used for initialize speed */
 	u8 *dbgbuf;		/* debug buffer */
 	u8 len;
+	int vcc;		/* vcc (either 3 or 5 V) */
 };
 
 
@@ -114,6 +115,7 @@ static int iuu_startup(struct usb_serial *serial)
 		kfree(priv);
 		return -ENOMEM;
 	}
+	priv->vcc = 5;  /* 5 V for vcc by default */
 	spin_lock_init(&priv->lock);
 	init_waitqueue_head(&priv->delta_msr_wait);
 	usb_set_serial_port_data(serial->port[0], priv);
@@ -1178,6 +1180,95 @@ static int iuu_open(struct tty_struct *tty, struct usb_serial_port *port)
 	return result;
 }
 
+/* how to change VCC */
+static int iuu_vcc_set(struct usb_serial_port *port, unsigned int vcc)
+{
+	int status;
+	u8 *buf;
+
+	buf = kmalloc(5, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	dbg("%s - enter", __func__);
+
+	buf[0] = IUU_SET_VCC;
+	buf[1] = vcc & 0xFF;
+	buf[2] = (vcc >> 8) & 0xFF;
+	buf[3] = (vcc >> 16) & 0xFF;
+	buf[4] = (vcc >> 24) & 0xFF;
+
+	status = bulk_immediate(port, buf, 5);
+	kfree(buf);
+
+	if (status != IUU_OPERATION_OK)
+		dbg("%s - vcc error status = %2x", __func__, status);
+	else
+		dbg("%s - vcc OK !", __func__);
+
+	return status;
+}
+
+/*
+ * Sysfs Attributes
+ */
+
+static ssize_t show_vcc_mode(struct device *dev,
+	struct device_attribute *attr, char *buf)
+{
+	struct usb_serial_port *port = to_usb_serial_port(dev);
+	struct iuu_private *priv = usb_get_serial_port_data(port);
+
+	return sprintf(buf, "%d\n", priv->vcc);
+}
+
+static ssize_t store_vcc_mode(struct device *dev,
+	struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct usb_serial_port *port = to_usb_serial_port(dev);
+	struct iuu_private *priv = usb_get_serial_port_data(port);
+	unsigned long v;
+
+	if (strict_strtoul(buf, 10, &v)) {
+		dev_err(dev, "%s - vcc_mode: %s is not a unsigned long\n",
+				__func__, buf);
+		goto fail_store_vcc_mode;
+	}
+
+	dbg("%s: setting vcc_mode = %ld", __func__, v);
+
+	if ((v != 3) && (v != 5)) {
+		dev_err(dev, "%s - vcc_mode %ld is invalid\n", __func__, v);
+	} else {
+		iuu_vcc_set(port, v);
+		priv->vcc = v;
+	}
+fail_store_vcc_mode:
+	return count;
+}
+
+static DEVICE_ATTR(vcc_mode, S_IRUSR | S_IWUSR, show_vcc_mode,
+	store_vcc_mode);
+
+static int iuu_create_sysfs_attrs(struct usb_serial_port *port)
+{
+	dbg("%s", __func__);
+
+	return device_create_file(&port->dev, &dev_attr_vcc_mode);
+}
+
+static int iuu_remove_sysfs_attrs(struct usb_serial_port *port)
+{
+	dbg("%s", __func__);
+
+	device_remove_file(&port->dev, &dev_attr_vcc_mode);
+	return 0;
+}
+
+/*
+ * End Sysfs Attributes
+ */
+
 static struct usb_serial_driver iuu_device = {
 	.driver = {
 		   .owner = THIS_MODULE,
@@ -1185,6 +1276,8 @@ static struct usb_serial_driver iuu_device = {
 		   },
 	.id_table = id_table,
 	.num_ports = 1,
+	.port_probe = iuu_create_sysfs_attrs,
+	.port_remove = iuu_remove_sysfs_attrs,
 	.open = iuu_open,
 	.close = iuu_close,
 	.write = iuu_uart_write,
-- 
cgit v0.10.2


From 02b180665279df9de6b121ce0b4d42ce4f04c411 Mon Sep 17 00:00:00 2001
From: Olivier Bornet <Olivier.Bornet@puck.ch>
Date: Tue, 18 Aug 2009 21:05:56 +0200
Subject: USB: iuu_phoenix: increment version number

Signed-off-by: Olivier Bornet <Olivier.Bornet@puck.ch>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/serial/iuu_phoenix.c b/drivers/usb/serial/iuu_phoenix.c
index 5f13342..0b1a74e 100644
--- a/drivers/usb/serial/iuu_phoenix.c
+++ b/drivers/usb/serial/iuu_phoenix.c
@@ -40,7 +40,7 @@ static int debug;
 /*
  * Version Information
  */
-#define DRIVER_VERSION "v0.10"
+#define DRIVER_VERSION "v0.11"
 #define DRIVER_DESC "Infinity USB Unlimited Phoenix driver"
 
 static struct usb_device_id id_table[] = {
-- 
cgit v0.10.2


From e55c6d06fead7e58b7c597fd9afc46a88ef740e6 Mon Sep 17 00:00:00 2001
From: Olivier Bornet <Olivier.Bornet@puck.ch>
Date: Tue, 18 Aug 2009 21:05:57 +0200
Subject: USB: iuu_phoenix: add a way to select the default VCC

Using the module parameter vcc_default, you can choose the default VCC value.

Signed-off-by: Olivier Bornet <Olivier.Bornet@puck.ch>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/serial/iuu_phoenix.c b/drivers/usb/serial/iuu_phoenix.c
index 0b1a74e..e6e02b1 100644
--- a/drivers/usb/serial/iuu_phoenix.c
+++ b/drivers/usb/serial/iuu_phoenix.c
@@ -64,6 +64,7 @@ static int cdmode = 1;
 static int iuu_cardin;
 static int iuu_cardout;
 static int xmas;
+static int vcc_default = 5;
 
 static void read_rxcmd_callback(struct urb *urb);
 
@@ -115,7 +116,7 @@ static int iuu_startup(struct usb_serial *serial)
 		kfree(priv);
 		return -ENOMEM;
 	}
-	priv->vcc = 5;  /* 5 V for vcc by default */
+	priv->vcc = vcc_default;
 	spin_lock_init(&priv->lock);
 	init_waitqueue_head(&priv->delta_msr_wait);
 	usb_set_serial_port_data(serial->port[0], priv);
@@ -1339,3 +1340,7 @@ MODULE_PARM_DESC(clockmode, "Card clock mode (1=3.579 MHz, 2=3.680 MHz, "
 module_param(cdmode, int, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(cdmode, "Card detect mode (0=none, 1=CD, 2=!CD, 3=DSR, "
 		 "4=!DSR, 5=CTS, 6=!CTS, 7=RING, 8=!RING)");
+
+module_param(vcc_default, int, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(vcc_default, "Set default VCC (either 3 for 3.3V or 5 "
+		"for 5V). Default to 5.");
-- 
cgit v0.10.2


From 25b8286805e856c8c7fda127018e31032c918015 Mon Sep 17 00:00:00 2001
From: Frank Schaefer <schaefer.frank@gmx.net>
Date: Tue, 18 Aug 2009 20:15:07 +0200
Subject: USB-serial: pl2303: fix baud rate handling in case of unsupported
 values

According to the datasheets, the PL2303 supports a set of 25 baudrates.
The baudrate is set as a 4 byte value directly.

During my experiments with device 067b:2303 (PL2303X), I noticed that
 - the bridge-controller always uses 9600 baud if invalid/unsupported baud rate
   values are set
 - the baud rate value returned by usb_control_msg(..., GET_LINE_REQUEST, ...)
   does not reflect the actually used baudrate. Always the last set value is
   returned, even if it was invalid and not used by the controller.

This patch fixes the following issues with the current code:
1.) make sure that only supported baudrates are set (are there any buggy
    chip revisions out there which don't "like" other values... ?).
2.) always set the baudrate to the next nearest supported baudrate.
3.) applications can now read back the resulting baudrate properly, because
    tty_encode_baud_rate(...) is now fed with the actually used baudrate.

Signed-off-by: Frank Schaefer <schaefer.frank@gmx.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c
index 4cbe59c..82055b0 100644
--- a/drivers/usb/serial/pl2303.c
+++ b/drivers/usb/serial/pl2303.c
@@ -528,6 +528,12 @@ static void pl2303_set_termios(struct tty_struct *tty,
 	int baud;
 	int i;
 	u8 control;
+	const int baud_sup[] = { 75, 150, 300, 600, 1200, 1800, 2400, 3600,
+	                         4800, 7200, 9600, 14400, 19200, 28800, 38400,
+	                         57600, 115200, 230400, 460800, 614400,
+	                         921600, 1228800, 2457600, 3000000, 6000000 };
+	int baud_floor, baud_ceil;
+	int k;
 
 	dbg("%s -  port %d", __func__, port->number);
 
@@ -573,9 +579,39 @@ static void pl2303_set_termios(struct tty_struct *tty,
 		dbg("%s - data bits = %d", __func__, buf[6]);
 	}
 
+	/* For reference buf[0]:buf[3] baud rate value */
+	/* NOTE: Only the values defined in baud_sup are supported !
+	 *       => if unsupported values are set, the PL2303 seems to use
+	 *          9600 baud (at least my PL2303X always does)
+	 */
 	baud = tty_get_baud_rate(tty);
-	dbg("%s - baud = %d", __func__, baud);
+	dbg("%s - baud requested = %d", __func__, baud);
 	if (baud) {
+		/* Set baudrate to nearest supported value */
+		for (k=0; k<ARRAY_SIZE(baud_sup); k++) {
+			if (baud_sup[k] / baud) {
+				baud_ceil = baud_sup[k];
+				if (k==0) {
+					baud = baud_ceil;
+				} else {
+					baud_floor = baud_sup[k-1];
+					if ((baud_ceil % baud)
+					    > (baud % baud_floor))
+						baud = baud_floor;
+					else
+						baud = baud_ceil;
+				}
+				break;
+			}
+		}
+		if (baud > 1228800) {
+			/* type_0, type_1 only support up to 1228800 baud */
+			if (priv->type != HX)
+				baud = 1228800;
+			else if (baud > 6000000)
+				baud = 6000000;
+		}
+		dbg("%s - baud set = %d", __func__, baud);
 		buf[0] = baud & 0xff;
 		buf[1] = (baud >> 8) & 0xff;
 		buf[2] = (baud >> 16) & 0xff;
@@ -648,7 +684,7 @@ static void pl2303_set_termios(struct tty_struct *tty,
 		pl2303_vendor_write(0x0, 0x0, serial);
 	}
 
-	/* FIXME: Need to read back resulting baud rate */
+	/* Save resulting baud rate */
 	if (baud)
 		tty_encode_baud_rate(tty, baud, baud);
 
-- 
cgit v0.10.2


From 6dd81b45fd7628f3eb308f387aee696366718f25 Mon Sep 17 00:00:00 2001
From: Frank Schaefer <schaefer.frank@gmx.net>
Date: Tue, 18 Aug 2009 20:31:11 +0200
Subject: USB-serial: pl2303: add space/mark parity

The device supports it, so why not use it ? Works fine !

Signed-off-by: Frank Schaefer <schaefer.frank@gmx.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c
index 82055b0..6ed33c7 100644
--- a/drivers/usb/serial/pl2303.c
+++ b/drivers/usb/serial/pl2303.c
@@ -636,11 +636,21 @@ static void pl2303_set_termios(struct tty_struct *tty,
 		/* For reference buf[5]=3 is mark parity */
 		/* For reference buf[5]=4 is space parity */
 		if (cflag & PARODD) {
-			buf[5] = 1;
-			dbg("%s - parity = odd", __func__);
+			if (cflag & CMSPAR) {
+				buf[5] = 3;
+				dbg("%s - parity = mark", __func__);
+			} else {
+				buf[5] = 1;
+				dbg("%s - parity = odd", __func__);
+			}
 		} else {
-			buf[5] = 2;
-			dbg("%s - parity = even", __func__);
+			if (cflag & CMSPAR) {
+				buf[5] = 4;
+				dbg("%s - parity = space", __func__);
+			} else {
+				buf[5] = 2;
+				dbg("%s - parity = even", __func__);
+			}
 		}
 	} else {
 		buf[5] = 0;
-- 
cgit v0.10.2


From 29cf1b72f34519413b3fafbccc9ac776eb948ede Mon Sep 17 00:00:00 2001
From: Frank Schaefer <schaefer.frank@gmx.net>
Date: Tue, 18 Aug 2009 20:34:24 +0200
Subject: USB-serial: pl2303: use 1.5 instead of 2 stop bits with 5 data bits

This is how "real" UARTs (e.g. 16550) work and AFAIK what RS232 specifies, too.
Make the driver more compliant.

Signed-off-by: Frank Schaefer <schaefer.frank@gmx.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c
index 6ed33c7..1128e01 100644
--- a/drivers/usb/serial/pl2303.c
+++ b/drivers/usb/serial/pl2303.c
@@ -622,8 +622,16 @@ static void pl2303_set_termios(struct tty_struct *tty,
 	/* For reference buf[4]=1 is 1.5 stop bits */
 	/* For reference buf[4]=2 is 2 stop bits */
 	if (cflag & CSTOPB) {
-		buf[4] = 2;
-		dbg("%s - stop bits = 2", __func__);
+		/* NOTE: Comply with "real" UARTs / RS232:
+		 *       use 1.5 instead of 2 stop bits with 5 data bits
+		 */
+		if ((cflag & CSIZE) == CS5) {
+			buf[4] = 1;
+			dbg("%s - stop bits = 1.5", __func__);
+		} else {
+			buf[4] = 2;
+			dbg("%s - stop bits = 2", __func__);
+		}
 	} else {
 		buf[4] = 0;
 		dbg("%s - stop bits = 1", __func__);
-- 
cgit v0.10.2


From 04c4ab17c7c39603c5017bee20d3b8ccb2f19816 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@ru.mvista.com>
Date: Wed, 19 Aug 2009 02:23:35 +0400
Subject: USB: fsl_qe_udc: Add fsl,mpc8323-qe-usb compatible entry

Current bindings specify that "fsl,mpc8323-qe-usb" compatible entry
should be used as a base match for QE UDCs, so update the driver to
comply with the bindings.

Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Cc: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/gadget/fsl_qe_udc.c b/drivers/usb/gadget/fsl_qe_udc.c
index d701bf4..7881f12 100644
--- a/drivers/usb/gadget/fsl_qe_udc.c
+++ b/drivers/usb/gadget/fsl_qe_udc.c
@@ -2751,6 +2751,10 @@ static int __devexit qe_udc_remove(struct of_device *ofdev)
 /*-------------------------------------------------------------------------*/
 static struct of_device_id __devinitdata qe_udc_match[] = {
 	{
+		.compatible = "fsl,mpc8323-qe-usb",
+		.data = (void *)PORT_QE,
+	},
+	{
 		.compatible = "fsl,mpc8360-qe-usb",
 		.data = (void *)PORT_QE,
 	},
-- 
cgit v0.10.2


From 3a44494e233c0fdd818d485cfea8998500543589 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Wed, 19 Aug 2009 12:22:06 -0400
Subject: USB: EHCI: rescan the queue after an unlink

This patch (as1280) fixes an obscure bug in ehci-hcd's dequeuing logic
for async URBs.  If a later URB is unlinked and the completion
routine unlinks an earlier URB, then the earlier URB won't be given
back in a timely manner because the endpoint queue isn't rescanned as
it should be.

Similar bugs occur if an endpoint is reset or a halt is cleared while
a completion routine is running, because the subroutines don't test
for the COMPLETING state.

All these problems are solved by adding a new needs_rescan flag to the
ehci_qh structure.  If the flag is set while scanning through an idle
QH, the scan will be repeated.  If the QH isn't idle then an unlink
cycle will be initiated, and the proper action will be taken when it
becomes idle.

Also, an unnecessary test is removed from qh_link_async(): That
routine is never called if the QH's state isn't IDLE.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
CC: David Brownell <david-b@pacbell.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c
index 6887aac..d7e85b6 100644
--- a/drivers/usb/host/ehci-hcd.c
+++ b/drivers/usb/host/ehci-hcd.c
@@ -863,12 +863,18 @@ static void unlink_async (struct ehci_hcd *ehci, struct ehci_qh *qh)
 	if (!HC_IS_RUNNING(ehci_to_hcd(ehci)->state) && ehci->reclaim)
 		end_unlink_async(ehci);
 
-	/* if it's not linked then there's nothing to do */
-	if (qh->qh_state != QH_STATE_LINKED)
-		;
+	/* If the QH isn't linked then there's nothing we can do
+	 * unless we were called during a giveback, in which case
+	 * qh_completions() has to deal with it.
+	 */
+	if (qh->qh_state != QH_STATE_LINKED) {
+		if (qh->qh_state == QH_STATE_COMPLETING)
+			qh->needs_rescan = 1;
+		return;
+	}
 
 	/* defer till later if busy */
-	else if (ehci->reclaim) {
+	if (ehci->reclaim) {
 		struct ehci_qh		*last;
 
 		for (last = ehci->reclaim;
@@ -1001,6 +1007,7 @@ rescan:
 		qh->qh_state = QH_STATE_IDLE;
 	switch (qh->qh_state) {
 	case QH_STATE_LINKED:
+	case QH_STATE_COMPLETING:
 		for (tmp = ehci->async->qh_next.qh;
 				tmp && tmp != qh;
 				tmp = tmp->qh_next.qh)
@@ -1065,7 +1072,8 @@ ehci_endpoint_reset(struct usb_hcd *hcd, struct usb_host_endpoint *ep)
 		usb_settoggle(qh->dev, epnum, is_out, 0);
 		if (!list_empty(&qh->qtd_list)) {
 			WARN_ONCE(1, "clear_halt for a busy endpoint\n");
-		} else if (qh->qh_state == QH_STATE_LINKED) {
+		} else if (qh->qh_state == QH_STATE_LINKED ||
+				qh->qh_state == QH_STATE_COMPLETING) {
 
 			/* The toggle value in the QH can't be updated
 			 * while the QH is active.  Unlink it now;
diff --git a/drivers/usb/host/ehci-q.c b/drivers/usb/host/ehci-q.c
index 377ed53..57a8479 100644
--- a/drivers/usb/host/ehci-q.c
+++ b/drivers/usb/host/ehci-q.c
@@ -310,13 +310,13 @@ static int qh_schedule (struct ehci_hcd *ehci, struct ehci_qh *qh);
 static unsigned
 qh_completions (struct ehci_hcd *ehci, struct ehci_qh *qh)
 {
-	struct ehci_qtd		*last = NULL, *end = qh->dummy;
+	struct ehci_qtd		*last, *end = qh->dummy;
 	struct list_head	*entry, *tmp;
-	int			last_status = -EINPROGRESS;
+	int			last_status;
 	int			stopped;
 	unsigned		count = 0;
 	u8			state;
-	__le32			halt = HALT_BIT(ehci);
+	const __le32		halt = HALT_BIT(ehci);
 	struct ehci_qh_hw	*hw = qh->hw;
 
 	if (unlikely (list_empty (&qh->qtd_list)))
@@ -327,11 +327,20 @@ qh_completions (struct ehci_hcd *ehci, struct ehci_qh *qh)
 	 * they add urbs to this qh's queue or mark them for unlinking.
 	 *
 	 * NOTE:  unlinking expects to be done in queue order.
+	 *
+	 * It's a bug for qh->qh_state to be anything other than
+	 * QH_STATE_IDLE, unless our caller is scan_async() or
+	 * scan_periodic().
 	 */
 	state = qh->qh_state;
 	qh->qh_state = QH_STATE_COMPLETING;
 	stopped = (state == QH_STATE_IDLE);
 
+ rescan:
+	last = NULL;
+	last_status = -EINPROGRESS;
+	qh->needs_rescan = 0;
+
 	/* remove de-activated QTDs from front of queue.
 	 * after faults (including short reads), cleanup this urb
 	 * then let the queue advance.
@@ -507,6 +516,21 @@ halt:
 		ehci_qtd_free (ehci, last);
 	}
 
+	/* Do we need to rescan for URBs dequeued during a giveback? */
+	if (unlikely(qh->needs_rescan)) {
+		/* If the QH is already unlinked, do the rescan now. */
+		if (state == QH_STATE_IDLE)
+			goto rescan;
+
+		/* Otherwise we have to wait until the QH is fully unlinked.
+		 * Our caller will start an unlink if qh->needs_rescan is
+		 * set.  But if an unlink has already started, nothing needs
+		 * to be done.
+		 */
+		if (state != QH_STATE_LINKED)
+			qh->needs_rescan = 0;
+	}
+
 	/* restore original state; caller must unlink or relink */
 	qh->qh_state = state;
 
@@ -535,8 +559,10 @@ halt:
 					& hw->hw_info2) != 0) {
 				intr_deschedule (ehci, qh);
 				(void) qh_schedule (ehci, qh);
-			} else
-				unlink_async (ehci, qh);
+			} else {
+				/* Tell the caller to start an unlink */
+				qh->needs_rescan = 1;
+			}
 			break;
 		/* otherwise, unlink already started */
 		}
@@ -916,6 +942,8 @@ static void qh_link_async (struct ehci_hcd *ehci, struct ehci_qh *qh)
 	if (unlikely(qh->clearing_tt))
 		return;
 
+	WARN_ON(qh->qh_state != QH_STATE_IDLE);
+
 	/* (re)start the async schedule? */
 	head = ehci->async;
 	timer_action_done (ehci, TIMER_ASYNC_OFF);
@@ -934,8 +962,7 @@ static void qh_link_async (struct ehci_hcd *ehci, struct ehci_qh *qh)
 	}
 
 	/* clear halt and/or toggle; and maybe recover from silicon quirk */
-	if (qh->qh_state == QH_STATE_IDLE)
-		qh_refresh (ehci, qh);
+	qh_refresh(ehci, qh);
 
 	/* splice right after start */
 	qh->qh_next = head->qh_next;
@@ -1220,6 +1247,8 @@ rescan:
 				qh = qh_get (qh);
 				qh->stamp = ehci->stamp;
 				temp = qh_completions (ehci, qh);
+				if (qh->needs_rescan)
+					unlink_async(ehci, qh);
 				qh_put (qh);
 				if (temp != 0) {
 					goto rescan;
diff --git a/drivers/usb/host/ehci.h b/drivers/usb/host/ehci.h
index ec3dba6..064e768 100644
--- a/drivers/usb/host/ehci.h
+++ b/drivers/usb/host/ehci.h
@@ -341,6 +341,7 @@ struct ehci_qh {
 	u32			refcount;
 	unsigned		stamp;
 
+	u8			needs_rescan;	/* Dequeue during giveback */
 	u8			qh_state;
 #define	QH_STATE_LINKED		1		/* HC sees this */
 #define	QH_STATE_UNLINK		2		/* HC may still see this */
-- 
cgit v0.10.2


From a448c9d8c58ff7d3f8cc2a8f835065460099b22d Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Wed, 19 Aug 2009 12:22:44 -0400
Subject: USB: EHCI: change deschedule logic for interrupt QHs

This patch (as1281) changes the way ehci-hcd deschedules interrupt
QHs, copying the approach used for async QHs.  The caller is no longer
responsible for rescheduling the QH if its queue is non-empty; instead
the reschedule is done directly by intr_deschedule(), after calling
qh_completions().  This is exactly the same as how end_unlink_async()
works.

ehci_urb_dequeue() and intr_deschedule() now correctly handle the case
where they are called while another interrupt URB for the same QH is
being given back.  This was a surprisingly large blind spot.  And
scan_periodic() now respects the new needs_rescan flag.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
CC: David Brownell <david-b@pacbell.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c
index d7e85b6..4f89d7f 100644
--- a/drivers/usb/host/ehci-hcd.c
+++ b/drivers/usb/host/ehci-hcd.c
@@ -934,8 +934,9 @@ static int ehci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
 			break;
 		switch (qh->qh_state) {
 		case QH_STATE_LINKED:
+		case QH_STATE_COMPLETING:
 			intr_deschedule (ehci, qh);
-			/* FALL THROUGH */
+			break;
 		case QH_STATE_IDLE:
 			qh_completions (ehci, qh);
 			break;
@@ -944,23 +945,6 @@ static int ehci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
 					qh, qh->qh_state);
 			goto done;
 		}
-
-		/* reschedule QH iff another request is queued */
-		if (!list_empty (&qh->qtd_list)
-				&& HC_IS_RUNNING (hcd->state)) {
-			rc = qh_schedule(ehci, qh);
-
-			/* An error here likely indicates handshake failure
-			 * or no space left in the schedule.  Neither fault
-			 * should happen often ...
-			 *
-			 * FIXME kill the now-dysfunctional queued urbs
-			 */
-			if (rc != 0)
-				ehci_err(ehci,
-					"can't reschedule qh %p, err %d",
-					qh, rc);
-		}
 		break;
 
 	case PIPE_ISOCHRONOUS:
@@ -1079,12 +1063,10 @@ ehci_endpoint_reset(struct usb_hcd *hcd, struct usb_host_endpoint *ep)
 			 * while the QH is active.  Unlink it now;
 			 * re-linking will call qh_refresh().
 			 */
-			if (eptype == USB_ENDPOINT_XFER_BULK) {
+			if (eptype == USB_ENDPOINT_XFER_BULK)
 				unlink_async(ehci, qh);
-			} else {
+			else
 				intr_deschedule(ehci, qh);
-				(void) qh_schedule(ehci, qh);
-			}
 		}
 	}
 	spin_unlock_irqrestore(&ehci->lock, flags);
diff --git a/drivers/usb/host/ehci-q.c b/drivers/usb/host/ehci-q.c
index 57a8479..00ad9ce 100644
--- a/drivers/usb/host/ehci-q.c
+++ b/drivers/usb/host/ehci-q.c
@@ -299,7 +299,6 @@ __acquires(ehci->lock)
 static void start_unlink_async (struct ehci_hcd *ehci, struct ehci_qh *qh);
 static void unlink_async (struct ehci_hcd *ehci, struct ehci_qh *qh);
 
-static void intr_deschedule (struct ehci_hcd *ehci, struct ehci_qh *qh);
 static int qh_schedule (struct ehci_hcd *ehci, struct ehci_qh *qh);
 
 /*
@@ -555,14 +554,9 @@ halt:
 			 * That should be rare for interrupt transfers,
 			 * except maybe high bandwidth ...
 			 */
-			if ((cpu_to_hc32(ehci, QH_SMASK)
-					& hw->hw_info2) != 0) {
-				intr_deschedule (ehci, qh);
-				(void) qh_schedule (ehci, qh);
-			} else {
-				/* Tell the caller to start an unlink */
-				qh->needs_rescan = 1;
-			}
+
+			/* Tell the caller to start an unlink */
+			qh->needs_rescan = 1;
 			break;
 		/* otherwise, unlink already started */
 		}
diff --git a/drivers/usb/host/ehci-sched.c b/drivers/usb/host/ehci-sched.c
index 327437a..3ea0593 100644
--- a/drivers/usb/host/ehci-sched.c
+++ b/drivers/usb/host/ehci-sched.c
@@ -615,8 +615,19 @@ static int qh_unlink_periodic(struct ehci_hcd *ehci, struct ehci_qh *qh)
 
 static void intr_deschedule (struct ehci_hcd *ehci, struct ehci_qh *qh)
 {
-	unsigned	wait;
-	struct ehci_qh_hw *hw = qh->hw;
+	unsigned		wait;
+	struct ehci_qh_hw	*hw = qh->hw;
+	int			rc;
+
+	/* If the QH isn't linked then there's nothing we can do
+	 * unless we were called during a giveback, in which case
+	 * qh_completions() has to deal with it.
+	 */
+	if (qh->qh_state != QH_STATE_LINKED) {
+		if (qh->qh_state == QH_STATE_COMPLETING)
+			qh->needs_rescan = 1;
+		return;
+	}
 
 	qh_unlink_periodic (ehci, qh);
 
@@ -636,6 +647,24 @@ static void intr_deschedule (struct ehci_hcd *ehci, struct ehci_qh *qh)
 	qh->qh_state = QH_STATE_IDLE;
 	hw->hw_next = EHCI_LIST_END(ehci);
 	wmb ();
+
+	qh_completions(ehci, qh);
+
+	/* reschedule QH iff another request is queued */
+	if (!list_empty(&qh->qtd_list) &&
+			HC_IS_RUNNING(ehci_to_hcd(ehci)->state)) {
+		rc = qh_schedule(ehci, qh);
+
+		/* An error here likely indicates handshake failure
+		 * or no space left in the schedule.  Neither fault
+		 * should happen often ...
+		 *
+		 * FIXME kill the now-dysfunctional queued urbs
+		 */
+		if (rc != 0)
+			ehci_err(ehci, "can't reschedule qh %p, err %d\n",
+					qh, rc);
+	}
 }
 
 /*-------------------------------------------------------------------------*/
@@ -2213,7 +2242,8 @@ restart:
 				type = Q_NEXT_TYPE(ehci, q.qh->hw->hw_next);
 				q = q.qh->qh_next;
 				modified = qh_completions (ehci, temp.qh);
-				if (unlikely (list_empty (&temp.qh->qtd_list)))
+				if (unlikely(list_empty(&temp.qh->qtd_list) ||
+						temp.qh->needs_rescan))
 					intr_deschedule (ehci, temp.qh);
 				qh_put (temp.qh);
 				break;
-- 
cgit v0.10.2


From 2912282c06f219cf1634a624653c445329b37acf Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Sat, 22 Aug 2009 20:24:49 +0200
Subject: USB: make usb_buffer_map_sg consistent with doc

usb_buffer_map_sg should return negative on error according to
its documentation. But dma_map_sg returns 0 on error. Take this
into account and return -ENOMEM in such situation.

While at it, return -EINVAL instead of -1 when wrong input is
passed in.

If this wasn't done, usb_sg_* operations used after usb_sg_init
which returned 0 may cause oopses/deadlocks since we don't init
structures/entries, esp. completion and status entry.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
index 43ee943..30dd263 100644
--- a/drivers/usb/core/usb.c
+++ b/drivers/usb/core/usb.c
@@ -914,11 +914,11 @@ int usb_buffer_map_sg(const struct usb_device *dev, int is_in,
 			|| !(bus = dev->bus)
 			|| !(controller = bus->controller)
 			|| !controller->dma_mask)
-		return -1;
+		return -EINVAL;
 
 	/* FIXME generic api broken like pci, can't report errors */
 	return dma_map_sg(controller, sg, nents,
-			is_in ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
+			is_in ? DMA_FROM_DEVICE : DMA_TO_DEVICE) ? : -ENOMEM;
 }
 EXPORT_SYMBOL_GPL(usb_buffer_map_sg);
 
-- 
cgit v0.10.2


From 48d316770bd4dcf3d21b53cfa91e358280c31d69 Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Tue, 25 Aug 2009 10:26:57 +0200
Subject: USB: double put_tty_driver(gs_tty_driver) in gserial_setup()

If the driver cannot be registered, put_tty_driver(gs_tty_driver)
occurred here as well as at label fail.

put_tty_driver() already occurs at label fail

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/gadget/u_serial.c b/drivers/usb/gadget/u_serial.c
index fc6e709..adf8260 100644
--- a/drivers/usb/gadget/u_serial.c
+++ b/drivers/usb/gadget/u_serial.c
@@ -1114,7 +1114,6 @@ int __init gserial_setup(struct usb_gadget *g, unsigned count)
 	/* export the driver ... */
 	status = tty_register_driver(gs_tty_driver);
 	if (status) {
-		put_tty_driver(gs_tty_driver);
 		pr_err("%s: cannot register, err %d\n",
 				__func__, status);
 		goto fail;
-- 
cgit v0.10.2


From 392ca68b401e0797115a08836642faad5778fdb2 Mon Sep 17 00:00:00 2001
From: George Spelvin <linux@horizon.com>
Date: Mon, 24 Aug 2009 22:06:41 -0400
Subject: USB: Clean up root hub string descriptors

The previous code had a bug that would add a trailing null byte to
the returned descriptor.

Signed-off-by: George Spelvin <linux@horizon.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 95ccfa0..34de475 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -337,72 +337,89 @@ static const u8 ss_rh_config_descriptor[] = {
 
 /*-------------------------------------------------------------------------*/
 
-/*
- * helper routine for returning string descriptors in UTF-16LE
- * input can actually be ISO-8859-1; ASCII is its 7-bit subset
+/**
+ * ascii2desc() - Helper routine for producing UTF-16LE string descriptors
+ * @s: Null-terminated ASCII (actually ISO-8859-1) string
+ * @buf: Buffer for USB string descriptor (header + UTF-16LE)
+ * @len: Length (in bytes; may be odd) of descriptor buffer.
+ *
+ * The return value is the number of bytes filled in: 2 + 2*strlen(s) or
+ * buflen, whichever is less.
+ *
+ * USB String descriptors can contain at most 126 characters; input
+ * strings longer than that are truncated.
  */
-static unsigned ascii2utf(char *s, u8 *utf, int utfmax)
+static unsigned
+ascii2desc(char const *s, u8 *buf, unsigned len)
 {
-	unsigned retval;
+	unsigned n, t = 2 + 2*strlen(s);
 
-	for (retval = 0; *s && utfmax > 1; utfmax -= 2, retval += 2) {
-		*utf++ = *s++;
-		*utf++ = 0;
-	}
-	if (utfmax > 0) {
-		*utf = *s;
-		++retval;
+	if (t > 254)
+		t = 254;	/* Longest possible UTF string descriptor */
+	if (len > t)
+		len = t;
+
+	t += USB_DT_STRING << 8;	/* Now t is first 16 bits to store */
+
+	n = len;
+	while (n--) {
+		*buf++ = t;
+		if (!n--)
+			break;
+		*buf++ = t >> 8;
+		t = (unsigned char)*s++;
 	}
-	return retval;
+	return len;
 }
 
-/*
- * rh_string - provides manufacturer, product and serial strings for root hub
- * @id: the string ID number (1: serial number, 2: product, 3: vendor)
+/**
+ * rh_string() - provides string descriptors for root hub
+ * @id: the string ID number (0: langids, 1: serial #, 2: product, 3: vendor)
  * @hcd: the host controller for this root hub
- * @data: return packet in UTF-16 LE
- * @len: length of the return packet
+ * @data: buffer for output packet
+ * @len: length of the provided buffer
  *
  * Produces either a manufacturer, product or serial number string for the
  * virtual root hub device.
+ * Returns the number of bytes filled in: the length of the descriptor or
+ * of the provided buffer, whichever is less.
  */
-static unsigned rh_string(int id, struct usb_hcd *hcd, u8 *data, unsigned len)
+static unsigned
+rh_string(int id, struct usb_hcd const *hcd, u8 *data, unsigned len)
 {
-	char buf [100];
+	char buf[100];
+	char const *s;
+	static char const langids[4] = {4, USB_DT_STRING, 0x09, 0x04};
 
 	// language ids
-	if (id == 0) {
-		buf[0] = 4;    buf[1] = 3;	/* 4 bytes string data */
-		buf[2] = 0x09; buf[3] = 0x04;	/* MSFT-speak for "en-us" */
-		len = min_t(unsigned, len, 4);
-		memcpy (data, buf, len);
+	switch (id) {
+	case 0:
+		/* Array of LANGID codes (0x0409 is MSFT-speak for "en-us") */
+		/* See http://www.usb.org/developers/docs/USB_LANGIDs.pdf */
+		if (len > 4)
+			len = 4;
+		memcpy(data, langids, len);
 		return len;
-
-	// serial number
-	} else if (id == 1) {
-		strlcpy (buf, hcd->self.bus_name, sizeof buf);
-
-	// product description
-	} else if (id == 2) {
-		strlcpy (buf, hcd->product_desc, sizeof buf);
-
- 	// id 3 == vendor description
-	} else if (id == 3) {
+	case 1:
+		/* Serial number */
+		s = hcd->self.bus_name;
+		break;
+	case 2:
+		/* Product name */
+		s = hcd->product_desc;
+		break;
+	case 3:
+		/* Manufacturer */
 		snprintf (buf, sizeof buf, "%s %s %s", init_utsname()->sysname,
 			init_utsname()->release, hcd->driver->description);
-	}
-
-	switch (len) {		/* All cases fall through */
+		s = buf;
+		break;
 	default:
-		len = 2 + ascii2utf (buf, data + 2, len - 2);
-	case 2:
-		data [1] = 3;	/* type == string */
-	case 1:
-		data [0] = 2 * (strlen (buf) + 1);
-	case 0:
-		;		/* Compiler wants a statement here */
+		/* Can't happen; caller guarantees it */
+		return 0;
 	}
-	return len;
+
+	return ascii2desc(s, data, len);
 }
 
 
-- 
cgit v0.10.2


From 7f536692afd45eea349501beb2b76492a3524a28 Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Mon, 24 Aug 2009 18:27:23 +0200
Subject: USB: gadget: double free_irq() in at91udc_probe()

If request_irq() fails, udp_irq is freed twice.

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/gadget/at91_udc.c b/drivers/usb/gadget/at91_udc.c
index 72bae8f..66450a1 100644
--- a/drivers/usb/gadget/at91_udc.c
+++ b/drivers/usb/gadget/at91_udc.c
@@ -1754,7 +1754,6 @@ static int __init at91udc_probe(struct platform_device *pdev)
 				IRQF_DISABLED, driver_name, udc)) {
 			DBG("request vbus irq %d failed\n",
 					udc->board.vbus_pin);
-			free_irq(udc->udp_irq, udc);
 			retval = -EBUSY;
 			goto fail3;
 		}
-- 
cgit v0.10.2


From d77282c836d6c2601da6a188812b20cff8e9bbe2 Mon Sep 17 00:00:00 2001
From: Anand Gadiyar <gadiyar@ti.com>
Date: Mon, 24 Aug 2009 20:14:45 +0530
Subject: USB: OMAP: ISP1301: Compile fix

OMAP: ISP1301: Compile fix

Fix this build error on non- OMAP-H2/H3/H4 systems:
(factored out two empty functions as part of the fix)

  CC      drivers/usb/otg/isp1301_omap.o
drivers/usb/otg/isp1301_omap.c: In function 'otg_update_isp':
drivers/usb/otg/isp1301_omap.c:635: error: implicit declaration of function 'notresponding'
drivers/usb/otg/isp1301_omap.c: In function 'b_peripheral':
drivers/usb/otg/isp1301_omap.c:973: error: implicit declaration of function 'enable_vbus_draw'
drivers/usb/otg/isp1301_omap.c: In function 'isp_update_otg':
drivers/usb/otg/isp1301_omap.c:1003: error: implicit declaration of function 'enable_vbus_source'
make[2]: *** [drivers/usb/otg/isp1301_omap.o] Error 1
make[1]: *** [drivers/usb/otg] Error 2
make: *** [drivers] Error 2

Signed-off-by: Anand Gadiyar <gadiyar@ti.com>
Cc: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/otg/isp1301_omap.c b/drivers/usb/otg/isp1301_omap.c
index e0d56ef..77a5f41 100644
--- a/drivers/usb/otg/isp1301_omap.c
+++ b/drivers/usb/otg/isp1301_omap.c
@@ -117,24 +117,7 @@ static void enable_vbus_draw(struct isp1301 *isp, unsigned mA)
 		pr_debug("  VBUS %d mA error %d\n", mA, status);
 }
 
-static void enable_vbus_source(struct isp1301 *isp)
-{
-	/* this board won't supply more than 8mA vbus power.
-	 * some boards can switch a 100ma "unit load" (or more).
-	 */
-}
-
-
-/* products will deliver OTG messages with LEDs, GUI, etc */
-static inline void notresponding(struct isp1301 *isp)
-{
-	printk(KERN_NOTICE "OTG device not responding.\n");
-}
-
-
-#endif
-
-#if defined(CONFIG_MACH_OMAP_H4)
+#else
 
 static void enable_vbus_draw(struct isp1301 *isp, unsigned mA)
 {
@@ -144,6 +127,8 @@ static void enable_vbus_draw(struct isp1301 *isp, unsigned mA)
 	 */
 }
 
+#endif
+
 static void enable_vbus_source(struct isp1301 *isp)
 {
 	/* this board won't supply more than 8mA vbus power.
@@ -159,8 +144,6 @@ static inline void notresponding(struct isp1301 *isp)
 }
 
 
-#endif
-
 /*-------------------------------------------------------------------------*/
 
 static struct i2c_driver isp1301_driver;
-- 
cgit v0.10.2


From f0cc82a831d4d839eb6b67c7c046ebd2d1d7c4c2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rog=C3=A9rio=20Brito?= <rbrito@ime.usp.br>
Date: Sat, 22 Aug 2009 17:33:53 -0300
Subject: USB: fix paths in usbmon documentation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Hi there.

On Aug 21 2009, Alan Stern wrote:
> On Thu, 20 Aug 2009, Rogério Brito wrote:
> > Again, just reiterating, what I said before, even though I am not sure
> > if I can reproduce it, I will try to.
>
> A usbmon trace showing what happens when you plug in the drive and
> when you run smartctl would help.

The documentation for usbmon in the kernel 2.6.31-rc7 kernel doesn't
match what the kernel exposes in the debug fs tree. This patch fixes it.

Signed-off-by: Rogério Brito <rbrito@ime.usp.br>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/Documentation/usb/usbmon.txt b/Documentation/usb/usbmon.txt
index 4efd847..66f92d1 100644
--- a/Documentation/usb/usbmon.txt
+++ b/Documentation/usb/usbmon.txt
@@ -33,7 +33,7 @@ if usbmon is built into the kernel.
 
 Verify that bus sockets are present.
 
-# ls /sys/kernel/debug/usbmon
+# ls /sys/kernel/debug/usb/usbmon
 0s  0u  1s  1t  1u  2s  2t  2u  3s  3t  3u  4s  4t  4u
 #
 
@@ -58,11 +58,11 @@ Bus=03 means it's bus 3.
 
 3. Start 'cat'
 
-# cat /sys/kernel/debug/usbmon/3u > /tmp/1.mon.out
+# cat /sys/kernel/debug/usb/usbmon/3u > /tmp/1.mon.out
 
 to listen on a single bus, otherwise, to listen on all buses, type:
 
-# cat /sys/kernel/debug/usbmon/0u > /tmp/1.mon.out
+# cat /sys/kernel/debug/usb/usbmon/0u > /tmp/1.mon.out
 
 This process will be reading until killed. Naturally, the output can be
 redirected to a desirable location. This is preferred, because it is going
-- 
cgit v0.10.2


From fd4f3a931f6e047e88bc8c6023666acad957109a Mon Sep 17 00:00:00 2001
From: Pete Zaitcev <zaitcev@redhat.com>
Date: Thu, 20 Aug 2009 20:00:19 -0600
Subject: USB: unusual_devs.h: drop some unneeded floppy entries

We set pdt_1f_for_no_lun for UFI devices, so most floppy entiries should
be unnecessary. This patch removes three entries which I'm certain are.
 - For Mitsumi I have a customer with RHEL 5 (bz#514296)
 - For SMSC I accessed Novell's Bugzilla and verified the entry
 - For Y-E I tested the patch with the actual device

Signed-off-by: Pete Zaitcev <zaitcev@redhat.com>
Signed-off-by: Phil Dibowitz <phil@ipom.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h
index 7477d41..079ae0f 100644
--- a/drivers/usb/storage/unusual_devs.h
+++ b/drivers/usb/storage/unusual_devs.h
@@ -66,13 +66,6 @@ UNUSUAL_DEV(  0x03eb, 0x2002, 0x0100, 0x0100,
 		US_SC_DEVICE, US_PR_DEVICE, NULL,
 		US_FL_IGNORE_RESIDUE),
 
-/* modified by Tobias Lorenz <tobias.lorenz@gmx.net> */
-UNUSUAL_DEV(  0x03ee, 0x6901, 0x0000, 0x0200,
-		"Mitsumi",
-		"USB FDD",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
-		US_FL_SINGLE_LUN ),
-
 /* Reported by Rodolfo Quesada <rquesada@roqz.net> */
 UNUSUAL_DEV(  0x03ee, 0x6906, 0x0003, 0x0003,
 		"VIA Technologies Inc.",
@@ -233,13 +226,6 @@ UNUSUAL_DEV(  0x0421, 0x0495, 0x0370, 0x0370,
 		US_SC_DEVICE, US_PR_DEVICE, NULL,
 		US_FL_MAX_SECTORS_64 ),
 
-/* Reported by Olaf Hering <olh@suse.de> from novell bug #105878 */
-UNUSUAL_DEV(  0x0424, 0x0fdc, 0x0210, 0x0210,
-		"SMSC",
-		"FDC GOLD-2.30",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
-		US_FL_SINGLE_LUN ),
-
 #ifdef NO_SDDR09
 UNUSUAL_DEV(  0x0436, 0x0005, 0x0100, 0x0100,
 		"Microtech",
@@ -664,19 +650,13 @@ UNUSUAL_DEV(  0x055d, 0x2020, 0x0000, 0x0210,
 		US_SC_DEVICE, US_PR_DEVICE, NULL,
 		US_FL_SINGLE_LUN ),
 
-		
+/* We keep this entry to force the transport; firmware 3.00 and later is ok. */
 UNUSUAL_DEV(  0x057b, 0x0000, 0x0000, 0x0299,
 		"Y-E Data",
 		"Flashbuster-U",
 		US_SC_DEVICE,  US_PR_CB, NULL,
 		US_FL_SINGLE_LUN),
 
-UNUSUAL_DEV(  0x057b, 0x0000, 0x0300, 0x9999,
-		"Y-E Data",
-		"Flashbuster-U",
-		US_SC_DEVICE,  US_PR_DEVICE, NULL,
-		US_FL_SINGLE_LUN),
-
 /* Reported by Johann Cardon <johann.cardon@free.fr>
  * This entry is needed only because the device reports
  * bInterfaceClass = 0xff (vendor-specific)
-- 
cgit v0.10.2


From df6c516900d48df3581b23d37d6516a22ec4f2ca Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Thu, 20 Aug 2009 15:39:48 -0500
Subject: USB: ehci,dbgp,early_printk: split ehci debug driver from
 early_printk.c

Move the dbgp early printk driver in advance of refactoring and adding
new code, so the changes to this code are tracked separately from the
move of the code.

The drivers/usb/early directory will be the location of the current
and future early usb code for driving usb devices prior initializing
the standard interrupt driven USB drivers.

Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index b11cab3..519a5e1 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -160,721 +160,6 @@ static struct console early_serial_console = {
 	.index =	-1,
 };
 
-#ifdef CONFIG_EARLY_PRINTK_DBGP
-
-static struct ehci_caps __iomem *ehci_caps;
-static struct ehci_regs __iomem *ehci_regs;
-static struct ehci_dbg_port __iomem *ehci_debug;
-static unsigned int dbgp_endpoint_out;
-
-struct ehci_dev {
-	u32 bus;
-	u32 slot;
-	u32 func;
-};
-
-static struct ehci_dev ehci_dev;
-
-#define USB_DEBUG_DEVNUM 127
-
-#define DBGP_DATA_TOGGLE	0x8800
-
-static inline u32 dbgp_pid_update(u32 x, u32 tok)
-{
-	return ((x ^ DBGP_DATA_TOGGLE) & 0xffff00) | (tok & 0xff);
-}
-
-static inline u32 dbgp_len_update(u32 x, u32 len)
-{
-	return (x & ~0x0f) | (len & 0x0f);
-}
-
-/*
- * USB Packet IDs (PIDs)
- */
-
-/* token */
-#define USB_PID_OUT		0xe1
-#define USB_PID_IN		0x69
-#define USB_PID_SOF		0xa5
-#define USB_PID_SETUP		0x2d
-/* handshake */
-#define USB_PID_ACK		0xd2
-#define USB_PID_NAK		0x5a
-#define USB_PID_STALL		0x1e
-#define USB_PID_NYET		0x96
-/* data */
-#define USB_PID_DATA0		0xc3
-#define USB_PID_DATA1		0x4b
-#define USB_PID_DATA2		0x87
-#define USB_PID_MDATA		0x0f
-/* Special */
-#define USB_PID_PREAMBLE	0x3c
-#define USB_PID_ERR		0x3c
-#define USB_PID_SPLIT		0x78
-#define USB_PID_PING		0xb4
-#define USB_PID_UNDEF_0		0xf0
-
-#define USB_PID_DATA_TOGGLE	0x88
-#define DBGP_CLAIM (DBGP_OWNER | DBGP_ENABLED | DBGP_INUSE)
-
-#define PCI_CAP_ID_EHCI_DEBUG	0xa
-
-#define HUB_ROOT_RESET_TIME	50	/* times are in msec */
-#define HUB_SHORT_RESET_TIME	10
-#define HUB_LONG_RESET_TIME	200
-#define HUB_RESET_TIMEOUT	500
-
-#define DBGP_MAX_PACKET		8
-
-static int dbgp_wait_until_complete(void)
-{
-	u32 ctrl;
-	int loop = 0x100000;
-
-	do {
-		ctrl = readl(&ehci_debug->control);
-		/* Stop when the transaction is finished */
-		if (ctrl & DBGP_DONE)
-			break;
-	} while (--loop > 0);
-
-	if (!loop)
-		return -1;
-
-	/*
-	 * Now that we have observed the completed transaction,
-	 * clear the done bit.
-	 */
-	writel(ctrl | DBGP_DONE, &ehci_debug->control);
-	return (ctrl & DBGP_ERROR) ? -DBGP_ERRCODE(ctrl) : DBGP_LEN(ctrl);
-}
-
-static void __init dbgp_mdelay(int ms)
-{
-	int i;
-
-	while (ms--) {
-		for (i = 0; i < 1000; i++)
-			outb(0x1, 0x80);
-	}
-}
-
-static void dbgp_breath(void)
-{
-	/* Sleep to give the debug port a chance to breathe */
-}
-
-static int dbgp_wait_until_done(unsigned ctrl)
-{
-	u32 pids, lpid;
-	int ret;
-	int loop = 3;
-
-retry:
-	writel(ctrl | DBGP_GO, &ehci_debug->control);
-	ret = dbgp_wait_until_complete();
-	pids = readl(&ehci_debug->pids);
-	lpid = DBGP_PID_GET(pids);
-
-	if (ret < 0)
-		return ret;
-
-	/*
-	 * If the port is getting full or it has dropped data
-	 * start pacing ourselves, not necessary but it's friendly.
-	 */
-	if ((lpid == USB_PID_NAK) || (lpid == USB_PID_NYET))
-		dbgp_breath();
-
-	/* If I get a NACK reissue the transmission */
-	if (lpid == USB_PID_NAK) {
-		if (--loop > 0)
-			goto retry;
-	}
-
-	return ret;
-}
-
-static void dbgp_set_data(const void *buf, int size)
-{
-	const unsigned char *bytes = buf;
-	u32 lo, hi;
-	int i;
-
-	lo = hi = 0;
-	for (i = 0; i < 4 && i < size; i++)
-		lo |= bytes[i] << (8*i);
-	for (; i < 8 && i < size; i++)
-		hi |= bytes[i] << (8*(i - 4));
-	writel(lo, &ehci_debug->data03);
-	writel(hi, &ehci_debug->data47);
-}
-
-static void __init dbgp_get_data(void *buf, int size)
-{
-	unsigned char *bytes = buf;
-	u32 lo, hi;
-	int i;
-
-	lo = readl(&ehci_debug->data03);
-	hi = readl(&ehci_debug->data47);
-	for (i = 0; i < 4 && i < size; i++)
-		bytes[i] = (lo >> (8*i)) & 0xff;
-	for (; i < 8 && i < size; i++)
-		bytes[i] = (hi >> (8*(i - 4))) & 0xff;
-}
-
-static int dbgp_bulk_write(unsigned devnum, unsigned endpoint,
-			 const char *bytes, int size)
-{
-	u32 pids, addr, ctrl;
-	int ret;
-
-	if (size > DBGP_MAX_PACKET)
-		return -1;
-
-	addr = DBGP_EPADDR(devnum, endpoint);
-
-	pids = readl(&ehci_debug->pids);
-	pids = dbgp_pid_update(pids, USB_PID_OUT);
-
-	ctrl = readl(&ehci_debug->control);
-	ctrl = dbgp_len_update(ctrl, size);
-	ctrl |= DBGP_OUT;
-	ctrl |= DBGP_GO;
-
-	dbgp_set_data(bytes, size);
-	writel(addr, &ehci_debug->address);
-	writel(pids, &ehci_debug->pids);
-
-	ret = dbgp_wait_until_done(ctrl);
-	if (ret < 0)
-		return ret;
-
-	return ret;
-}
-
-static int __init dbgp_bulk_read(unsigned devnum, unsigned endpoint, void *data,
-				 int size)
-{
-	u32 pids, addr, ctrl;
-	int ret;
-
-	if (size > DBGP_MAX_PACKET)
-		return -1;
-
-	addr = DBGP_EPADDR(devnum, endpoint);
-
-	pids = readl(&ehci_debug->pids);
-	pids = dbgp_pid_update(pids, USB_PID_IN);
-
-	ctrl = readl(&ehci_debug->control);
-	ctrl = dbgp_len_update(ctrl, size);
-	ctrl &= ~DBGP_OUT;
-	ctrl |= DBGP_GO;
-
-	writel(addr, &ehci_debug->address);
-	writel(pids, &ehci_debug->pids);
-	ret = dbgp_wait_until_done(ctrl);
-	if (ret < 0)
-		return ret;
-
-	if (size > ret)
-		size = ret;
-	dbgp_get_data(data, size);
-	return ret;
-}
-
-static int __init dbgp_control_msg(unsigned devnum, int requesttype,
-	int request, int value, int index, void *data, int size)
-{
-	u32 pids, addr, ctrl;
-	struct usb_ctrlrequest req;
-	int read;
-	int ret;
-
-	read = (requesttype & USB_DIR_IN) != 0;
-	if (size > (read ? DBGP_MAX_PACKET:0))
-		return -1;
-
-	/* Compute the control message */
-	req.bRequestType = requesttype;
-	req.bRequest = request;
-	req.wValue = cpu_to_le16(value);
-	req.wIndex = cpu_to_le16(index);
-	req.wLength = cpu_to_le16(size);
-
-	pids = DBGP_PID_SET(USB_PID_DATA0, USB_PID_SETUP);
-	addr = DBGP_EPADDR(devnum, 0);
-
-	ctrl = readl(&ehci_debug->control);
-	ctrl = dbgp_len_update(ctrl, sizeof(req));
-	ctrl |= DBGP_OUT;
-	ctrl |= DBGP_GO;
-
-	/* Send the setup message */
-	dbgp_set_data(&req, sizeof(req));
-	writel(addr, &ehci_debug->address);
-	writel(pids, &ehci_debug->pids);
-	ret = dbgp_wait_until_done(ctrl);
-	if (ret < 0)
-		return ret;
-
-	/* Read the result */
-	return dbgp_bulk_read(devnum, 0, data, size);
-}
-
-
-/* Find a PCI capability */
-static u32 __init find_cap(u32 num, u32 slot, u32 func, int cap)
-{
-	u8 pos;
-	int bytes;
-
-	if (!(read_pci_config_16(num, slot, func, PCI_STATUS) &
-		PCI_STATUS_CAP_LIST))
-		return 0;
-
-	pos = read_pci_config_byte(num, slot, func, PCI_CAPABILITY_LIST);
-	for (bytes = 0; bytes < 48 && pos >= 0x40; bytes++) {
-		u8 id;
-
-		pos &= ~3;
-		id = read_pci_config_byte(num, slot, func, pos+PCI_CAP_LIST_ID);
-		if (id == 0xff)
-			break;
-		if (id == cap)
-			return pos;
-
-		pos = read_pci_config_byte(num, slot, func,
-						 pos+PCI_CAP_LIST_NEXT);
-	}
-	return 0;
-}
-
-static u32 __init __find_dbgp(u32 bus, u32 slot, u32 func)
-{
-	u32 class;
-
-	class = read_pci_config(bus, slot, func, PCI_CLASS_REVISION);
-	if ((class >> 8) != PCI_CLASS_SERIAL_USB_EHCI)
-		return 0;
-
-	return find_cap(bus, slot, func, PCI_CAP_ID_EHCI_DEBUG);
-}
-
-static u32 __init find_dbgp(int ehci_num, u32 *rbus, u32 *rslot, u32 *rfunc)
-{
-	u32 bus, slot, func;
-
-	for (bus = 0; bus < 256; bus++) {
-		for (slot = 0; slot < 32; slot++) {
-			for (func = 0; func < 8; func++) {
-				unsigned cap;
-
-				cap = __find_dbgp(bus, slot, func);
-
-				if (!cap)
-					continue;
-				if (ehci_num-- != 0)
-					continue;
-				*rbus = bus;
-				*rslot = slot;
-				*rfunc = func;
-				return cap;
-			}
-		}
-	}
-	return 0;
-}
-
-static int __init ehci_reset_port(int port)
-{
-	u32 portsc;
-	u32 delay_time, delay;
-	int loop;
-
-	/* Reset the usb debug port */
-	portsc = readl(&ehci_regs->port_status[port - 1]);
-	portsc &= ~PORT_PE;
-	portsc |= PORT_RESET;
-	writel(portsc, &ehci_regs->port_status[port - 1]);
-
-	delay = HUB_ROOT_RESET_TIME;
-	for (delay_time = 0; delay_time < HUB_RESET_TIMEOUT;
-	     delay_time += delay) {
-		dbgp_mdelay(delay);
-
-		portsc = readl(&ehci_regs->port_status[port - 1]);
-		if (portsc & PORT_RESET) {
-			/* force reset to complete */
-			loop = 2;
-			writel(portsc & ~(PORT_RWC_BITS | PORT_RESET),
-				&ehci_regs->port_status[port - 1]);
-			do {
-				portsc = readl(&ehci_regs->port_status[port-1]);
-			} while ((portsc & PORT_RESET) && (--loop > 0));
-		}
-
-		/* Device went away? */
-		if (!(portsc & PORT_CONNECT))
-			return -ENOTCONN;
-
-		/* bomb out completely if something weird happend */
-		if ((portsc & PORT_CSC))
-			return -EINVAL;
-
-		/* If we've finished resetting, then break out of the loop */
-		if (!(portsc & PORT_RESET) && (portsc & PORT_PE))
-			return 0;
-	}
-	return -EBUSY;
-}
-
-static int __init ehci_wait_for_port(int port)
-{
-	u32 status;
-	int ret, reps;
-
-	for (reps = 0; reps < 3; reps++) {
-		dbgp_mdelay(100);
-		status = readl(&ehci_regs->status);
-		if (status & STS_PCD) {
-			ret = ehci_reset_port(port);
-			if (ret == 0)
-				return 0;
-		}
-	}
-	return -ENOTCONN;
-}
-
-#ifdef DBGP_DEBUG
-# define dbgp_printk early_printk
-#else
-static inline void dbgp_printk(const char *fmt, ...) { }
-#endif
-
-typedef void (*set_debug_port_t)(int port);
-
-static void __init default_set_debug_port(int port)
-{
-}
-
-static set_debug_port_t __initdata set_debug_port = default_set_debug_port;
-
-static void __init nvidia_set_debug_port(int port)
-{
-	u32 dword;
-	dword = read_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func,
-				 0x74);
-	dword &= ~(0x0f<<12);
-	dword |= ((port & 0x0f)<<12);
-	write_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func, 0x74,
-				 dword);
-	dbgp_printk("set debug port to %d\n", port);
-}
-
-static void __init detect_set_debug_port(void)
-{
-	u32 vendorid;
-
-	vendorid = read_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func,
-		 0x00);
-
-	if ((vendorid & 0xffff) == 0x10de) {
-		dbgp_printk("using nvidia set_debug_port\n");
-		set_debug_port = nvidia_set_debug_port;
-	}
-}
-
-static int __init ehci_setup(void)
-{
-	struct usb_debug_descriptor dbgp_desc;
-	u32 cmd, ctrl, status, portsc, hcs_params;
-	u32 debug_port, new_debug_port = 0, n_ports;
-	u32  devnum;
-	int ret, i;
-	int loop;
-	int port_map_tried;
-	int playtimes = 3;
-
-try_next_time:
-	port_map_tried = 0;
-
-try_next_port:
-
-	hcs_params = readl(&ehci_caps->hcs_params);
-	debug_port = HCS_DEBUG_PORT(hcs_params);
-	n_ports    = HCS_N_PORTS(hcs_params);
-
-	dbgp_printk("debug_port: %d\n", debug_port);
-	dbgp_printk("n_ports:    %d\n", n_ports);
-
-	for (i = 1; i <= n_ports; i++) {
-		portsc = readl(&ehci_regs->port_status[i-1]);
-		dbgp_printk("portstatus%d: %08x\n", i, portsc);
-	}
-
-	if (port_map_tried && (new_debug_port != debug_port)) {
-		if (--playtimes) {
-			set_debug_port(new_debug_port);
-			goto try_next_time;
-		}
-		return -1;
-	}
-
-	loop = 100000;
-	/* Reset the EHCI controller */
-	cmd = readl(&ehci_regs->command);
-	cmd |= CMD_RESET;
-	writel(cmd, &ehci_regs->command);
-	do {
-		cmd = readl(&ehci_regs->command);
-	} while ((cmd & CMD_RESET) && (--loop > 0));
-
-	if (!loop) {
-		dbgp_printk("can not reset ehci\n");
-		return -1;
-	}
-	dbgp_printk("ehci reset done\n");
-
-	/* Claim ownership, but do not enable yet */
-	ctrl = readl(&ehci_debug->control);
-	ctrl |= DBGP_OWNER;
-	ctrl &= ~(DBGP_ENABLED | DBGP_INUSE);
-	writel(ctrl, &ehci_debug->control);
-
-	/* Start the ehci running */
-	cmd = readl(&ehci_regs->command);
-	cmd &= ~(CMD_LRESET | CMD_IAAD | CMD_PSE | CMD_ASE | CMD_RESET);
-	cmd |= CMD_RUN;
-	writel(cmd, &ehci_regs->command);
-
-	/* Ensure everything is routed to the EHCI */
-	writel(FLAG_CF, &ehci_regs->configured_flag);
-
-	/* Wait until the controller is no longer halted */
-	loop = 10;
-	do {
-		status = readl(&ehci_regs->status);
-	} while ((status & STS_HALT) && (--loop > 0));
-
-	if (!loop) {
-		dbgp_printk("ehci can be started\n");
-		return -1;
-	}
-	dbgp_printk("ehci started\n");
-
-	/* Wait for a device to show up in the debug port */
-	ret = ehci_wait_for_port(debug_port);
-	if (ret < 0) {
-		dbgp_printk("No device found in debug port\n");
-		goto next_debug_port;
-	}
-	dbgp_printk("ehci wait for port done\n");
-
-	/* Enable the debug port */
-	ctrl = readl(&ehci_debug->control);
-	ctrl |= DBGP_CLAIM;
-	writel(ctrl, &ehci_debug->control);
-	ctrl = readl(&ehci_debug->control);
-	if ((ctrl & DBGP_CLAIM) != DBGP_CLAIM) {
-		dbgp_printk("No device in debug port\n");
-		writel(ctrl & ~DBGP_CLAIM, &ehci_debug->control);
-		goto err;
-	}
-	dbgp_printk("debug ported enabled\n");
-
-	/* Completely transfer the debug device to the debug controller */
-	portsc = readl(&ehci_regs->port_status[debug_port - 1]);
-	portsc &= ~PORT_PE;
-	writel(portsc, &ehci_regs->port_status[debug_port - 1]);
-
-	dbgp_mdelay(100);
-
-	/* Find the debug device and make it device number 127 */
-	for (devnum = 0; devnum <= 127; devnum++) {
-		ret = dbgp_control_msg(devnum,
-			USB_DIR_IN | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
-			USB_REQ_GET_DESCRIPTOR, (USB_DT_DEBUG << 8), 0,
-			&dbgp_desc, sizeof(dbgp_desc));
-		if (ret > 0)
-			break;
-	}
-	if (devnum > 127) {
-		dbgp_printk("Could not find attached debug device\n");
-		goto err;
-	}
-	if (ret < 0) {
-		dbgp_printk("Attached device is not a debug device\n");
-		goto err;
-	}
-	dbgp_endpoint_out = dbgp_desc.bDebugOutEndpoint;
-
-	/* Move the device to 127 if it isn't already there */
-	if (devnum != USB_DEBUG_DEVNUM) {
-		ret = dbgp_control_msg(devnum,
-			USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
-			USB_REQ_SET_ADDRESS, USB_DEBUG_DEVNUM, 0, NULL, 0);
-		if (ret < 0) {
-			dbgp_printk("Could not move attached device to %d\n",
-				USB_DEBUG_DEVNUM);
-			goto err;
-		}
-		devnum = USB_DEBUG_DEVNUM;
-		dbgp_printk("debug device renamed to 127\n");
-	}
-
-	/* Enable the debug interface */
-	ret = dbgp_control_msg(USB_DEBUG_DEVNUM,
-		USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
-		USB_REQ_SET_FEATURE, USB_DEVICE_DEBUG_MODE, 0, NULL, 0);
-	if (ret < 0) {
-		dbgp_printk(" Could not enable the debug device\n");
-		goto err;
-	}
-	dbgp_printk("debug interface enabled\n");
-
-	/* Perform a small write to get the even/odd data state in sync
-	 */
-	ret = dbgp_bulk_write(USB_DEBUG_DEVNUM, dbgp_endpoint_out, " ", 1);
-	if (ret < 0) {
-		dbgp_printk("dbgp_bulk_write failed: %d\n", ret);
-		goto err;
-	}
-	dbgp_printk("small write doned\n");
-
-	return 0;
-err:
-	/* Things didn't work so remove my claim */
-	ctrl = readl(&ehci_debug->control);
-	ctrl &= ~(DBGP_CLAIM | DBGP_OUT);
-	writel(ctrl, &ehci_debug->control);
-	return -1;
-
-next_debug_port:
-	port_map_tried |= (1<<(debug_port - 1));
-	new_debug_port = ((debug_port-1+1)%n_ports) + 1;
-	if (port_map_tried != ((1<<n_ports) - 1)) {
-		set_debug_port(new_debug_port);
-		goto try_next_port;
-	}
-	if (--playtimes) {
-		set_debug_port(new_debug_port);
-		goto try_next_time;
-	}
-
-	return -1;
-}
-
-static int __init early_dbgp_init(char *s)
-{
-	u32 debug_port, bar, offset;
-	u32 bus, slot, func, cap;
-	void __iomem *ehci_bar;
-	u32 dbgp_num;
-	u32 bar_val;
-	char *e;
-	int ret;
-	u8 byte;
-
-	if (!early_pci_allowed())
-		return -1;
-
-	dbgp_num = 0;
-	if (*s)
-		dbgp_num = simple_strtoul(s, &e, 10);
-	dbgp_printk("dbgp_num: %d\n", dbgp_num);
-
-	cap = find_dbgp(dbgp_num, &bus, &slot, &func);
-	if (!cap)
-		return -1;
-
-	dbgp_printk("Found EHCI debug port on %02x:%02x.%1x\n", bus, slot,
-			 func);
-
-	debug_port = read_pci_config(bus, slot, func, cap);
-	bar = (debug_port >> 29) & 0x7;
-	bar = (bar * 4) + 0xc;
-	offset = (debug_port >> 16) & 0xfff;
-	dbgp_printk("bar: %02x offset: %03x\n", bar, offset);
-	if (bar != PCI_BASE_ADDRESS_0) {
-		dbgp_printk("only debug ports on bar 1 handled.\n");
-
-		return -1;
-	}
-
-	bar_val = read_pci_config(bus, slot, func, PCI_BASE_ADDRESS_0);
-	dbgp_printk("bar_val: %02x offset: %03x\n", bar_val, offset);
-	if (bar_val & ~PCI_BASE_ADDRESS_MEM_MASK) {
-		dbgp_printk("only simple 32bit mmio bars supported\n");
-
-		return -1;
-	}
-
-	/* double check if the mem space is enabled */
-	byte = read_pci_config_byte(bus, slot, func, 0x04);
-	if (!(byte & 0x2)) {
-		byte  |= 0x02;
-		write_pci_config_byte(bus, slot, func, 0x04, byte);
-		dbgp_printk("mmio for ehci enabled\n");
-	}
-
-	/*
-	 * FIXME I don't have the bar size so just guess PAGE_SIZE is more
-	 * than enough.  1K is the biggest I have seen.
-	 */
-	set_fixmap_nocache(FIX_DBGP_BASE, bar_val & PAGE_MASK);
-	ehci_bar = (void __iomem *)__fix_to_virt(FIX_DBGP_BASE);
-	ehci_bar += bar_val & ~PAGE_MASK;
-	dbgp_printk("ehci_bar: %p\n", ehci_bar);
-
-	ehci_caps  = ehci_bar;
-	ehci_regs  = ehci_bar + HC_LENGTH(readl(&ehci_caps->hc_capbase));
-	ehci_debug = ehci_bar + offset;
-	ehci_dev.bus = bus;
-	ehci_dev.slot = slot;
-	ehci_dev.func = func;
-
-	detect_set_debug_port();
-
-	ret = ehci_setup();
-	if (ret < 0) {
-		dbgp_printk("ehci_setup failed\n");
-		ehci_debug = NULL;
-
-		return -1;
-	}
-
-	return 0;
-}
-
-static void early_dbgp_write(struct console *con, const char *str, u32 n)
-{
-	int chunk, ret;
-
-	if (!ehci_debug)
-		return;
-	while (n > 0) {
-		chunk = n;
-		if (chunk > DBGP_MAX_PACKET)
-			chunk = DBGP_MAX_PACKET;
-		ret = dbgp_bulk_write(USB_DEBUG_DEVNUM,
-			dbgp_endpoint_out, str, chunk);
-		str += chunk;
-		n -= chunk;
-	}
-}
-
-static struct console early_dbgp_console = {
-	.name =		"earlydbg",
-	.write =	early_dbgp_write,
-	.flags =	CON_PRINTBUFFER,
-	.index =	-1,
-};
-#endif
-
 /* Direct interface for emergencies */
 static struct console *early_console = &early_vga_console;
 static int __initdata early_console_initialized;
diff --git a/drivers/usb/Makefile b/drivers/usb/Makefile
index dbe4fb6..be3c9b8 100644
--- a/drivers/usb/Makefile
+++ b/drivers/usb/Makefile
@@ -40,6 +40,7 @@ obj-$(CONFIG_USB_MICROTEK)	+= image/
 obj-$(CONFIG_USB_SERIAL)	+= serial/
 
 obj-$(CONFIG_USB)		+= misc/
+obj-y				+= early/
 
 obj-$(CONFIG_USB_ATM)		+= atm/
 obj-$(CONFIG_USB_SPEEDTOUCH)	+= atm/
diff --git a/drivers/usb/early/Makefile b/drivers/usb/early/Makefile
new file mode 100644
index 0000000..dfedee8
--- /dev/null
+++ b/drivers/usb/early/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for early USB devices
+#
+
+obj-$(CONFIG_EARLY_PRINTK_DBGP)        += ehci-dbgp.o
diff --git a/drivers/usb/early/ehci-dbgp.c b/drivers/usb/early/ehci-dbgp.c
new file mode 100644
index 0000000..821b7b2
--- /dev/null
+++ b/drivers/usb/early/ehci-dbgp.c
@@ -0,0 +1,723 @@
+#include <linux/console.h>
+#include <linux/errno.h>
+#include <linux/pci_regs.h>
+#include <linux/pci_ids.h>
+#include <linux/usb/ch9.h>
+#include <linux/usb/ehci_def.h>
+#include <linux/delay.h>
+#include <asm/io.h>
+#include <asm/pci-direct.h>
+#include <asm/fixmap.h>
+
+static struct ehci_caps __iomem *ehci_caps;
+static struct ehci_regs __iomem *ehci_regs;
+static struct ehci_dbg_port __iomem *ehci_debug;
+static unsigned int dbgp_endpoint_out;
+
+struct ehci_dev {
+	u32 bus;
+	u32 slot;
+	u32 func;
+};
+
+static struct ehci_dev ehci_dev;
+
+#define USB_DEBUG_DEVNUM 127
+
+#define DBGP_DATA_TOGGLE	0x8800
+
+static inline u32 dbgp_pid_update(u32 x, u32 tok)
+{
+	return ((x ^ DBGP_DATA_TOGGLE) & 0xffff00) | (tok & 0xff);
+}
+
+static inline u32 dbgp_len_update(u32 x, u32 len)
+{
+	return (x & ~0x0f) | (len & 0x0f);
+}
+
+/*
+ * USB Packet IDs (PIDs)
+ */
+
+/* token */
+#define USB_PID_OUT		0xe1
+#define USB_PID_IN		0x69
+#define USB_PID_SOF		0xa5
+#define USB_PID_SETUP		0x2d
+/* handshake */
+#define USB_PID_ACK		0xd2
+#define USB_PID_NAK		0x5a
+#define USB_PID_STALL		0x1e
+#define USB_PID_NYET		0x96
+/* data */
+#define USB_PID_DATA0		0xc3
+#define USB_PID_DATA1		0x4b
+#define USB_PID_DATA2		0x87
+#define USB_PID_MDATA		0x0f
+/* Special */
+#define USB_PID_PREAMBLE	0x3c
+#define USB_PID_ERR		0x3c
+#define USB_PID_SPLIT		0x78
+#define USB_PID_PING		0xb4
+#define USB_PID_UNDEF_0		0xf0
+
+#define USB_PID_DATA_TOGGLE	0x88
+#define DBGP_CLAIM (DBGP_OWNER | DBGP_ENABLED | DBGP_INUSE)
+
+#define PCI_CAP_ID_EHCI_DEBUG	0xa
+
+#define HUB_ROOT_RESET_TIME	50	/* times are in msec */
+#define HUB_SHORT_RESET_TIME	10
+#define HUB_LONG_RESET_TIME	200
+#define HUB_RESET_TIMEOUT	500
+
+#define DBGP_MAX_PACKET		8
+
+static int dbgp_wait_until_complete(void)
+{
+	u32 ctrl;
+	int loop = 0x100000;
+
+	do {
+		ctrl = readl(&ehci_debug->control);
+		/* Stop when the transaction is finished */
+		if (ctrl & DBGP_DONE)
+			break;
+	} while (--loop > 0);
+
+	if (!loop)
+		return -1;
+
+	/*
+	 * Now that we have observed the completed transaction,
+	 * clear the done bit.
+	 */
+	writel(ctrl | DBGP_DONE, &ehci_debug->control);
+	return (ctrl & DBGP_ERROR) ? -DBGP_ERRCODE(ctrl) : DBGP_LEN(ctrl);
+}
+
+static void __init dbgp_mdelay(int ms)
+{
+	int i;
+
+	while (ms--) {
+		for (i = 0; i < 1000; i++)
+			outb(0x1, 0x80);
+	}
+}
+
+static void dbgp_breath(void)
+{
+	/* Sleep to give the debug port a chance to breathe */
+}
+
+static int dbgp_wait_until_done(unsigned ctrl)
+{
+	u32 pids, lpid;
+	int ret;
+	int loop = 3;
+
+retry:
+	writel(ctrl | DBGP_GO, &ehci_debug->control);
+	ret = dbgp_wait_until_complete();
+	pids = readl(&ehci_debug->pids);
+	lpid = DBGP_PID_GET(pids);
+
+	if (ret < 0)
+		return ret;
+
+	/*
+	 * If the port is getting full or it has dropped data
+	 * start pacing ourselves, not necessary but it's friendly.
+	 */
+	if ((lpid == USB_PID_NAK) || (lpid == USB_PID_NYET))
+		dbgp_breath();
+
+	/* If I get a NACK reissue the transmission */
+	if (lpid == USB_PID_NAK) {
+		if (--loop > 0)
+			goto retry;
+	}
+
+	return ret;
+}
+
+static void dbgp_set_data(const void *buf, int size)
+{
+	const unsigned char *bytes = buf;
+	u32 lo, hi;
+	int i;
+
+	lo = hi = 0;
+	for (i = 0; i < 4 && i < size; i++)
+		lo |= bytes[i] << (8*i);
+	for (; i < 8 && i < size; i++)
+		hi |= bytes[i] << (8*(i - 4));
+	writel(lo, &ehci_debug->data03);
+	writel(hi, &ehci_debug->data47);
+}
+
+static void __init dbgp_get_data(void *buf, int size)
+{
+	unsigned char *bytes = buf;
+	u32 lo, hi;
+	int i;
+
+	lo = readl(&ehci_debug->data03);
+	hi = readl(&ehci_debug->data47);
+	for (i = 0; i < 4 && i < size; i++)
+		bytes[i] = (lo >> (8*i)) & 0xff;
+	for (; i < 8 && i < size; i++)
+		bytes[i] = (hi >> (8*(i - 4))) & 0xff;
+}
+
+static int dbgp_bulk_write(unsigned devnum, unsigned endpoint,
+			 const char *bytes, int size)
+{
+	u32 pids, addr, ctrl;
+	int ret;
+
+	if (size > DBGP_MAX_PACKET)
+		return -1;
+
+	addr = DBGP_EPADDR(devnum, endpoint);
+
+	pids = readl(&ehci_debug->pids);
+	pids = dbgp_pid_update(pids, USB_PID_OUT);
+
+	ctrl = readl(&ehci_debug->control);
+	ctrl = dbgp_len_update(ctrl, size);
+	ctrl |= DBGP_OUT;
+	ctrl |= DBGP_GO;
+
+	dbgp_set_data(bytes, size);
+	writel(addr, &ehci_debug->address);
+	writel(pids, &ehci_debug->pids);
+
+	ret = dbgp_wait_until_done(ctrl);
+	if (ret < 0)
+		return ret;
+
+	return ret;
+}
+
+static int __init dbgp_bulk_read(unsigned devnum, unsigned endpoint, void *data,
+				 int size)
+{
+	u32 pids, addr, ctrl;
+	int ret;
+
+	if (size > DBGP_MAX_PACKET)
+		return -1;
+
+	addr = DBGP_EPADDR(devnum, endpoint);
+
+	pids = readl(&ehci_debug->pids);
+	pids = dbgp_pid_update(pids, USB_PID_IN);
+
+	ctrl = readl(&ehci_debug->control);
+	ctrl = dbgp_len_update(ctrl, size);
+	ctrl &= ~DBGP_OUT;
+	ctrl |= DBGP_GO;
+
+	writel(addr, &ehci_debug->address);
+	writel(pids, &ehci_debug->pids);
+	ret = dbgp_wait_until_done(ctrl);
+	if (ret < 0)
+		return ret;
+
+	if (size > ret)
+		size = ret;
+	dbgp_get_data(data, size);
+	return ret;
+}
+
+static int __init dbgp_control_msg(unsigned devnum, int requesttype,
+	int request, int value, int index, void *data, int size)
+{
+	u32 pids, addr, ctrl;
+	struct usb_ctrlrequest req;
+	int read;
+	int ret;
+
+	read = (requesttype & USB_DIR_IN) != 0;
+	if (size > (read ? DBGP_MAX_PACKET:0))
+		return -1;
+
+	/* Compute the control message */
+	req.bRequestType = requesttype;
+	req.bRequest = request;
+	req.wValue = cpu_to_le16(value);
+	req.wIndex = cpu_to_le16(index);
+	req.wLength = cpu_to_le16(size);
+
+	pids = DBGP_PID_SET(USB_PID_DATA0, USB_PID_SETUP);
+	addr = DBGP_EPADDR(devnum, 0);
+
+	ctrl = readl(&ehci_debug->control);
+	ctrl = dbgp_len_update(ctrl, sizeof(req));
+	ctrl |= DBGP_OUT;
+	ctrl |= DBGP_GO;
+
+	/* Send the setup message */
+	dbgp_set_data(&req, sizeof(req));
+	writel(addr, &ehci_debug->address);
+	writel(pids, &ehci_debug->pids);
+	ret = dbgp_wait_until_done(ctrl);
+	if (ret < 0)
+		return ret;
+
+	/* Read the result */
+	return dbgp_bulk_read(devnum, 0, data, size);
+}
+
+
+/* Find a PCI capability */
+static u32 __init find_cap(u32 num, u32 slot, u32 func, int cap)
+{
+	u8 pos;
+	int bytes;
+
+	if (!(read_pci_config_16(num, slot, func, PCI_STATUS) &
+		PCI_STATUS_CAP_LIST))
+		return 0;
+
+	pos = read_pci_config_byte(num, slot, func, PCI_CAPABILITY_LIST);
+	for (bytes = 0; bytes < 48 && pos >= 0x40; bytes++) {
+		u8 id;
+
+		pos &= ~3;
+		id = read_pci_config_byte(num, slot, func, pos+PCI_CAP_LIST_ID);
+		if (id == 0xff)
+			break;
+		if (id == cap)
+			return pos;
+
+		pos = read_pci_config_byte(num, slot, func,
+						 pos+PCI_CAP_LIST_NEXT);
+	}
+	return 0;
+}
+
+static u32 __init __find_dbgp(u32 bus, u32 slot, u32 func)
+{
+	u32 class;
+
+	class = read_pci_config(bus, slot, func, PCI_CLASS_REVISION);
+	if ((class >> 8) != PCI_CLASS_SERIAL_USB_EHCI)
+		return 0;
+
+	return find_cap(bus, slot, func, PCI_CAP_ID_EHCI_DEBUG);
+}
+
+static u32 __init find_dbgp(int ehci_num, u32 *rbus, u32 *rslot, u32 *rfunc)
+{
+	u32 bus, slot, func;
+
+	for (bus = 0; bus < 256; bus++) {
+		for (slot = 0; slot < 32; slot++) {
+			for (func = 0; func < 8; func++) {
+				unsigned cap;
+
+				cap = __find_dbgp(bus, slot, func);
+
+				if (!cap)
+					continue;
+				if (ehci_num-- != 0)
+					continue;
+				*rbus = bus;
+				*rslot = slot;
+				*rfunc = func;
+				return cap;
+			}
+		}
+	}
+	return 0;
+}
+
+static int __init ehci_reset_port(int port)
+{
+	u32 portsc;
+	u32 delay_time, delay;
+	int loop;
+
+	/* Reset the usb debug port */
+	portsc = readl(&ehci_regs->port_status[port - 1]);
+	portsc &= ~PORT_PE;
+	portsc |= PORT_RESET;
+	writel(portsc, &ehci_regs->port_status[port - 1]);
+
+	delay = HUB_ROOT_RESET_TIME;
+	for (delay_time = 0; delay_time < HUB_RESET_TIMEOUT;
+	     delay_time += delay) {
+		dbgp_mdelay(delay);
+
+		portsc = readl(&ehci_regs->port_status[port - 1]);
+		if (portsc & PORT_RESET) {
+			/* force reset to complete */
+			loop = 2;
+			writel(portsc & ~(PORT_RWC_BITS | PORT_RESET),
+				&ehci_regs->port_status[port - 1]);
+			do {
+				portsc = readl(&ehci_regs->port_status[port-1]);
+			} while ((portsc & PORT_RESET) && (--loop > 0));
+		}
+
+		/* Device went away? */
+		if (!(portsc & PORT_CONNECT))
+			return -ENOTCONN;
+
+		/* bomb out completely if something weird happend */
+		if ((portsc & PORT_CSC))
+			return -EINVAL;
+
+		/* If we've finished resetting, then break out of the loop */
+		if (!(portsc & PORT_RESET) && (portsc & PORT_PE))
+			return 0;
+	}
+	return -EBUSY;
+}
+
+static int __init ehci_wait_for_port(int port)
+{
+	u32 status;
+	int ret, reps;
+
+	for (reps = 0; reps < 3; reps++) {
+		dbgp_mdelay(100);
+		status = readl(&ehci_regs->status);
+		if (status & STS_PCD) {
+			ret = ehci_reset_port(port);
+			if (ret == 0)
+				return 0;
+		}
+	}
+	return -ENOTCONN;
+}
+
+#ifdef DBGP_DEBUG
+# define dbgp_printk early_printk
+#else
+static inline void dbgp_printk(const char *fmt, ...) { }
+#endif
+
+typedef void (*set_debug_port_t)(int port);
+
+static void __init default_set_debug_port(int port)
+{
+}
+
+static set_debug_port_t __initdata set_debug_port = default_set_debug_port;
+
+static void __init nvidia_set_debug_port(int port)
+{
+	u32 dword;
+	dword = read_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func,
+				 0x74);
+	dword &= ~(0x0f<<12);
+	dword |= ((port & 0x0f)<<12);
+	write_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func, 0x74,
+				 dword);
+	dbgp_printk("set debug port to %d\n", port);
+}
+
+static void __init detect_set_debug_port(void)
+{
+	u32 vendorid;
+
+	vendorid = read_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func,
+		 0x00);
+
+	if ((vendorid & 0xffff) == 0x10de) {
+		dbgp_printk("using nvidia set_debug_port\n");
+		set_debug_port = nvidia_set_debug_port;
+	}
+}
+
+static int __init ehci_setup(void)
+{
+	struct usb_debug_descriptor dbgp_desc;
+	u32 cmd, ctrl, status, portsc, hcs_params;
+	u32 debug_port, new_debug_port = 0, n_ports;
+	u32  devnum;
+	int ret, i;
+	int loop;
+	int port_map_tried;
+	int playtimes = 3;
+
+try_next_time:
+	port_map_tried = 0;
+
+try_next_port:
+
+	hcs_params = readl(&ehci_caps->hcs_params);
+	debug_port = HCS_DEBUG_PORT(hcs_params);
+	n_ports    = HCS_N_PORTS(hcs_params);
+
+	dbgp_printk("debug_port: %d\n", debug_port);
+	dbgp_printk("n_ports:    %d\n", n_ports);
+
+	for (i = 1; i <= n_ports; i++) {
+		portsc = readl(&ehci_regs->port_status[i-1]);
+		dbgp_printk("portstatus%d: %08x\n", i, portsc);
+	}
+
+	if (port_map_tried && (new_debug_port != debug_port)) {
+		if (--playtimes) {
+			set_debug_port(new_debug_port);
+			goto try_next_time;
+		}
+		return -1;
+	}
+
+	loop = 100000;
+	/* Reset the EHCI controller */
+	cmd = readl(&ehci_regs->command);
+	cmd |= CMD_RESET;
+	writel(cmd, &ehci_regs->command);
+	do {
+		cmd = readl(&ehci_regs->command);
+	} while ((cmd & CMD_RESET) && (--loop > 0));
+
+	if (!loop) {
+		dbgp_printk("can not reset ehci\n");
+		return -1;
+	}
+	dbgp_printk("ehci reset done\n");
+
+	/* Claim ownership, but do not enable yet */
+	ctrl = readl(&ehci_debug->control);
+	ctrl |= DBGP_OWNER;
+	ctrl &= ~(DBGP_ENABLED | DBGP_INUSE);
+	writel(ctrl, &ehci_debug->control);
+
+	/* Start the ehci running */
+	cmd = readl(&ehci_regs->command);
+	cmd &= ~(CMD_LRESET | CMD_IAAD | CMD_PSE | CMD_ASE | CMD_RESET);
+	cmd |= CMD_RUN;
+	writel(cmd, &ehci_regs->command);
+
+	/* Ensure everything is routed to the EHCI */
+	writel(FLAG_CF, &ehci_regs->configured_flag);
+
+	/* Wait until the controller is no longer halted */
+	loop = 10;
+	do {
+		status = readl(&ehci_regs->status);
+	} while ((status & STS_HALT) && (--loop > 0));
+
+	if (!loop) {
+		dbgp_printk("ehci can be started\n");
+		return -1;
+	}
+	dbgp_printk("ehci started\n");
+
+	/* Wait for a device to show up in the debug port */
+	ret = ehci_wait_for_port(debug_port);
+	if (ret < 0) {
+		dbgp_printk("No device found in debug port\n");
+		goto next_debug_port;
+	}
+	dbgp_printk("ehci wait for port done\n");
+
+	/* Enable the debug port */
+	ctrl = readl(&ehci_debug->control);
+	ctrl |= DBGP_CLAIM;
+	writel(ctrl, &ehci_debug->control);
+	ctrl = readl(&ehci_debug->control);
+	if ((ctrl & DBGP_CLAIM) != DBGP_CLAIM) {
+		dbgp_printk("No device in debug port\n");
+		writel(ctrl & ~DBGP_CLAIM, &ehci_debug->control);
+		goto err;
+	}
+	dbgp_printk("debug ported enabled\n");
+
+	/* Completely transfer the debug device to the debug controller */
+	portsc = readl(&ehci_regs->port_status[debug_port - 1]);
+	portsc &= ~PORT_PE;
+	writel(portsc, &ehci_regs->port_status[debug_port - 1]);
+
+	dbgp_mdelay(100);
+
+	/* Find the debug device and make it device number 127 */
+	for (devnum = 0; devnum <= 127; devnum++) {
+		ret = dbgp_control_msg(devnum,
+			USB_DIR_IN | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
+			USB_REQ_GET_DESCRIPTOR, (USB_DT_DEBUG << 8), 0,
+			&dbgp_desc, sizeof(dbgp_desc));
+		if (ret > 0)
+			break;
+	}
+	if (devnum > 127) {
+		dbgp_printk("Could not find attached debug device\n");
+		goto err;
+	}
+	if (ret < 0) {
+		dbgp_printk("Attached device is not a debug device\n");
+		goto err;
+	}
+	dbgp_endpoint_out = dbgp_desc.bDebugOutEndpoint;
+
+	/* Move the device to 127 if it isn't already there */
+	if (devnum != USB_DEBUG_DEVNUM) {
+		ret = dbgp_control_msg(devnum,
+			USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
+			USB_REQ_SET_ADDRESS, USB_DEBUG_DEVNUM, 0, NULL, 0);
+		if (ret < 0) {
+			dbgp_printk("Could not move attached device to %d\n",
+				USB_DEBUG_DEVNUM);
+			goto err;
+		}
+		devnum = USB_DEBUG_DEVNUM;
+		dbgp_printk("debug device renamed to 127\n");
+	}
+
+	/* Enable the debug interface */
+	ret = dbgp_control_msg(USB_DEBUG_DEVNUM,
+		USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
+		USB_REQ_SET_FEATURE, USB_DEVICE_DEBUG_MODE, 0, NULL, 0);
+	if (ret < 0) {
+		dbgp_printk(" Could not enable the debug device\n");
+		goto err;
+	}
+	dbgp_printk("debug interface enabled\n");
+
+	/* Perform a small write to get the even/odd data state in sync
+	 */
+	ret = dbgp_bulk_write(USB_DEBUG_DEVNUM, dbgp_endpoint_out, " ", 1);
+	if (ret < 0) {
+		dbgp_printk("dbgp_bulk_write failed: %d\n", ret);
+		goto err;
+	}
+	dbgp_printk("small write doned\n");
+
+	return 0;
+err:
+	/* Things didn't work so remove my claim */
+	ctrl = readl(&ehci_debug->control);
+	ctrl &= ~(DBGP_CLAIM | DBGP_OUT);
+	writel(ctrl, &ehci_debug->control);
+	return -1;
+
+next_debug_port:
+	port_map_tried |= (1<<(debug_port - 1));
+	new_debug_port = ((debug_port-1+1)%n_ports) + 1;
+	if (port_map_tried != ((1<<n_ports) - 1)) {
+		set_debug_port(new_debug_port);
+		goto try_next_port;
+	}
+	if (--playtimes) {
+		set_debug_port(new_debug_port);
+		goto try_next_time;
+	}
+
+	return -1;
+}
+
+int __init early_dbgp_init(char *s)
+{
+	u32 debug_port, bar, offset;
+	u32 bus, slot, func, cap;
+	void __iomem *ehci_bar;
+	u32 dbgp_num;
+	u32 bar_val;
+	char *e;
+	int ret;
+	u8 byte;
+
+	if (!early_pci_allowed())
+		return -1;
+
+	dbgp_num = 0;
+	if (*s)
+		dbgp_num = simple_strtoul(s, &e, 10);
+	dbgp_printk("dbgp_num: %d\n", dbgp_num);
+
+	cap = find_dbgp(dbgp_num, &bus, &slot, &func);
+	if (!cap)
+		return -1;
+
+	dbgp_printk("Found EHCI debug port on %02x:%02x.%1x\n", bus, slot,
+			 func);
+
+	debug_port = read_pci_config(bus, slot, func, cap);
+	bar = (debug_port >> 29) & 0x7;
+	bar = (bar * 4) + 0xc;
+	offset = (debug_port >> 16) & 0xfff;
+	dbgp_printk("bar: %02x offset: %03x\n", bar, offset);
+	if (bar != PCI_BASE_ADDRESS_0) {
+		dbgp_printk("only debug ports on bar 1 handled.\n");
+
+		return -1;
+	}
+
+	bar_val = read_pci_config(bus, slot, func, PCI_BASE_ADDRESS_0);
+	dbgp_printk("bar_val: %02x offset: %03x\n", bar_val, offset);
+	if (bar_val & ~PCI_BASE_ADDRESS_MEM_MASK) {
+		dbgp_printk("only simple 32bit mmio bars supported\n");
+
+		return -1;
+	}
+
+	/* double check if the mem space is enabled */
+	byte = read_pci_config_byte(bus, slot, func, 0x04);
+	if (!(byte & 0x2)) {
+		byte  |= 0x02;
+		write_pci_config_byte(bus, slot, func, 0x04, byte);
+		dbgp_printk("mmio for ehci enabled\n");
+	}
+
+	/*
+	 * FIXME I don't have the bar size so just guess PAGE_SIZE is more
+	 * than enough.  1K is the biggest I have seen.
+	 */
+	set_fixmap_nocache(FIX_DBGP_BASE, bar_val & PAGE_MASK);
+	ehci_bar = (void __iomem *)__fix_to_virt(FIX_DBGP_BASE);
+	ehci_bar += bar_val & ~PAGE_MASK;
+	dbgp_printk("ehci_bar: %p\n", ehci_bar);
+
+	ehci_caps  = ehci_bar;
+	ehci_regs  = ehci_bar + HC_LENGTH(readl(&ehci_caps->hc_capbase));
+	ehci_debug = ehci_bar + offset;
+	ehci_dev.bus = bus;
+	ehci_dev.slot = slot;
+	ehci_dev.func = func;
+
+	detect_set_debug_port();
+
+	ret = ehci_setup();
+	if (ret < 0) {
+		dbgp_printk("ehci_setup failed\n");
+		ehci_debug = NULL;
+
+		return -1;
+	}
+
+	return 0;
+}
+
+static void early_dbgp_write(struct console *con, const char *str, u32 n)
+{
+	int chunk, ret;
+
+	if (!ehci_debug)
+		return;
+	while (n > 0) {
+		chunk = n;
+		if (chunk > DBGP_MAX_PACKET)
+			chunk = DBGP_MAX_PACKET;
+		ret = dbgp_bulk_write(USB_DEBUG_DEVNUM,
+			dbgp_endpoint_out, str, chunk);
+		str += chunk;
+		n -= chunk;
+	}
+}
+
+struct console early_dbgp_console = {
+	.name =		"earlydbg",
+	.write =	early_dbgp_write,
+	.flags =	CON_PRINTBUFFER,
+	.index =	-1,
+};
+
diff --git a/include/linux/usb/ehci_def.h b/include/linux/usb/ehci_def.h
index 4c4b701..1deac2a 100644
--- a/include/linux/usb/ehci_def.h
+++ b/include/linux/usb/ehci_def.h
@@ -170,4 +170,10 @@ struct ehci_dbg_port {
 #define DBGP_EPADDR(dev, ep)	(((dev)<<8)|(ep))
 } __attribute__ ((packed));
 
+#ifdef CONFIG_EARLY_PRINTK_DBGP
+#include <linux/init.h>
+extern int __init early_dbgp_init(char *s);
+extern struct console early_dbgp_console;
+#endif /* CONFIG_EARLY_PRINTK_DBGP */
+
 #endif /* __LINUX_USB_EHCI_DEF_H */
-- 
cgit v0.10.2


From 87a5d15154ae2389251e6ad99216a846b905375c Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Thu, 20 Aug 2009 15:39:49 -0500
Subject: USB: dbgp: insert cr prior to nl as needed

The rs232 drivers send a carriage return prior to a new line in the
early printk code.

The usb debug driver should do the same because you want to be able to
use the same terminal programs and tools for analysis of early printk
data.

Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/early/ehci-dbgp.c b/drivers/usb/early/ehci-dbgp.c
index 821b7b2..8de709a 100644
--- a/drivers/usb/early/ehci-dbgp.c
+++ b/drivers/usb/early/ehci-dbgp.c
@@ -700,17 +700,27 @@ int __init early_dbgp_init(char *s)
 static void early_dbgp_write(struct console *con, const char *str, u32 n)
 {
 	int chunk, ret;
+	char buf[DBGP_MAX_PACKET];
+	int use_cr = 0;
 
 	if (!ehci_debug)
 		return;
 	while (n > 0) {
-		chunk = n;
-		if (chunk > DBGP_MAX_PACKET)
-			chunk = DBGP_MAX_PACKET;
+		for (chunk = 0; chunk < DBGP_MAX_PACKET && n > 0;
+		     str++, chunk++, n--) {
+			if (!use_cr && *str == '\n') {
+				use_cr = 1;
+				buf[chunk] = '\r';
+				str--;
+				n++;
+				continue;
+			}
+			if (use_cr)
+				use_cr = 0;
+			buf[chunk] = *str;
+		}
 		ret = dbgp_bulk_write(USB_DEBUG_DEVNUM,
-			dbgp_endpoint_out, str, chunk);
-		str += chunk;
-		n -= chunk;
+			dbgp_endpoint_out, buf, chunk);
 	}
 }
 
-- 
cgit v0.10.2


From 093344e1362cbf9525a5da09a565f357d8102f3b Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Thu, 20 Aug 2009 15:39:50 -0500
Subject: USB: ehci-dbgp: Execute early BIOS hand off

The PCI quirk code executes a BIOS hand off to obtain full control of
the EHCI host controller, the self contained ehci-dbgp driver must do
the same thing using the early PCI API, else the BIOS can cause a
fatal fault.

Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: dbrownell@users.sourceforge.net
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/early/ehci-dbgp.c b/drivers/usb/early/ehci-dbgp.c
index 8de709a..7deae97 100644
--- a/drivers/usb/early/ehci-dbgp.c
+++ b/drivers/usb/early/ehci-dbgp.c
@@ -435,6 +435,53 @@ static void __init detect_set_debug_port(void)
 	}
 }
 
+/* The code in early_ehci_bios_handoff() is derived from the usb pci
+ * quirk initialization, but altered so as to use the early PCI
+ * routines. */
+#define EHCI_USBLEGSUP_BIOS	(1 << 16)	/* BIOS semaphore */
+#define EHCI_USBLEGCTLSTS	4		/* legacy control/status */
+static void __init early_ehci_bios_handoff(void)
+{
+	u32 hcc_params = readl(&ehci_caps->hcc_params);
+	int offset = (hcc_params >> 8) & 0xff;
+	u32 cap;
+	int msec;
+
+	if (!offset)
+		return;
+
+	cap = read_pci_config(ehci_dev.bus, ehci_dev.slot,
+			      ehci_dev.func, offset);
+	dbgp_printk("dbgp: ehci BIOS state %08x\n", cap);
+
+	if ((cap & 0xff) == 1 && (cap & EHCI_USBLEGSUP_BIOS)) {
+		dbgp_printk("dbgp: BIOS handoff\n");
+		write_pci_config_byte(ehci_dev.bus, ehci_dev.slot,
+				      ehci_dev.func, offset + 3, 1);
+	}
+
+	/* if boot firmware now owns EHCI, spin till it hands it over. */
+	msec = 1000;
+	while ((cap & EHCI_USBLEGSUP_BIOS) && (msec > 0)) {
+		mdelay(10);
+		msec -= 10;
+		cap = read_pci_config(ehci_dev.bus, ehci_dev.slot,
+				      ehci_dev.func, offset);
+	}
+
+	if (cap & EHCI_USBLEGSUP_BIOS) {
+		/* well, possibly buggy BIOS... try to shut it down,
+		 * and hope nothing goes too wrong */
+		dbgp_printk("dbgp: BIOS handoff failed: %08x\n", cap);
+		write_pci_config_byte(ehci_dev.bus, ehci_dev.slot,
+				      ehci_dev.func, offset + 2, 0);
+	}
+
+	/* just in case, always disable EHCI SMIs */
+	write_pci_config_byte(ehci_dev.bus, ehci_dev.slot, ehci_dev.func,
+			      offset + EHCI_USBLEGCTLSTS, 0);
+}
+
 static int __init ehci_setup(void)
 {
 	struct usb_debug_descriptor dbgp_desc;
@@ -446,6 +493,8 @@ static int __init ehci_setup(void)
 	int port_map_tried;
 	int playtimes = 3;
 
+	early_ehci_bios_handoff();
+
 try_next_time:
 	port_map_tried = 0;
 
-- 
cgit v0.10.2


From 56faf0f98fd53e4a27cec331a3ff6d4aa55b1213 Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Thu, 20 Aug 2009 15:39:51 -0500
Subject: USB: dbgp: EHCI debug controller initialization delays

When using the EHCI host controller as a polled device, a bit more
tolerance is required in terms of delays.  On some 3+ghz systems the
cpu loops were faster than the EHCI device mmio and resulted in the
controller failing to initialize.

On at least one first generation EHCI controller when it was not
operating in interrupt mode, it would fail to report a port change
status, but executing the port reset allowed the debug controller to
work correctly anyway.  This errata causes a one time 300ms delay in
the boot time, where as the typical delay is 1-5ms for an EHCI
controller that does not have this errata.

The debug printk's were fixed to have the correct state messages, and
there was a conversion from using early_printk to printk to avoid
calling the dbgp driver while debugging the initialization.

Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/early/ehci-dbgp.c b/drivers/usb/early/ehci-dbgp.c
index 7deae97..6198ebd 100644
--- a/drivers/usb/early/ehci-dbgp.c
+++ b/drivers/usb/early/ehci-dbgp.c
@@ -9,6 +9,12 @@
 #include <asm/pci-direct.h>
 #include <asm/fixmap.h>
 
+#ifdef DBGP_DEBUG
+# define dbgp_printk printk
+#else
+static inline void dbgp_printk(const char *fmt, ...) { }
+#endif
+
 static struct ehci_caps __iomem *ehci_caps;
 static struct ehci_regs __iomem *ehci_regs;
 static struct ehci_dbg_port __iomem *ehci_debug;
@@ -342,6 +348,7 @@ static int __init ehci_reset_port(int port)
 	u32 delay_time, delay;
 	int loop;
 
+	dbgp_printk("ehci_reset_port %i\n", port);
 	/* Reset the usb debug port */
 	portsc = readl(&ehci_regs->port_status[port - 1]);
 	portsc &= ~PORT_PE;
@@ -352,14 +359,17 @@ static int __init ehci_reset_port(int port)
 	for (delay_time = 0; delay_time < HUB_RESET_TIMEOUT;
 	     delay_time += delay) {
 		dbgp_mdelay(delay);
-
 		portsc = readl(&ehci_regs->port_status[port - 1]);
+		if (!(portsc & PORT_RESET))
+			break;
+	}
 		if (portsc & PORT_RESET) {
 			/* force reset to complete */
-			loop = 2;
+			loop = 100 * 1000;
 			writel(portsc & ~(PORT_RWC_BITS | PORT_RESET),
 				&ehci_regs->port_status[port - 1]);
 			do {
+				udelay(1);
 				portsc = readl(&ehci_regs->port_status[port-1]);
 			} while ((portsc & PORT_RESET) && (--loop > 0));
 		}
@@ -375,7 +385,6 @@ static int __init ehci_reset_port(int port)
 		/* If we've finished resetting, then break out of the loop */
 		if (!(portsc & PORT_RESET) && (portsc & PORT_PE))
 			return 0;
-	}
 	return -EBUSY;
 }
 
@@ -384,24 +393,18 @@ static int __init ehci_wait_for_port(int port)
 	u32 status;
 	int ret, reps;
 
-	for (reps = 0; reps < 3; reps++) {
-		dbgp_mdelay(100);
+	for (reps = 0; reps < 300; reps++) {
 		status = readl(&ehci_regs->status);
-		if (status & STS_PCD) {
-			ret = ehci_reset_port(port);
-			if (ret == 0)
-				return 0;
-		}
+		if (status & STS_PCD)
+			break;
+		dbgp_mdelay(1);
 	}
+	ret = ehci_reset_port(port);
+	if (ret == 0)
+		return 0;
 	return -ENOTCONN;
 }
 
-#ifdef DBGP_DEBUG
-# define dbgp_printk early_printk
-#else
-static inline void dbgp_printk(const char *fmt, ...) { }
-#endif
-
 typedef void (*set_debug_port_t)(int port);
 
 static void __init default_set_debug_port(int port)
@@ -520,7 +523,7 @@ try_next_port:
 		return -1;
 	}
 
-	loop = 100000;
+	loop = 250 * 1000;
 	/* Reset the EHCI controller */
 	cmd = readl(&ehci_regs->command);
 	cmd |= CMD_RESET;
@@ -540,6 +543,7 @@ try_next_port:
 	ctrl |= DBGP_OWNER;
 	ctrl &= ~(DBGP_ENABLED | DBGP_INUSE);
 	writel(ctrl, &ehci_debug->control);
+	udelay(1);
 
 	/* Start the ehci running */
 	cmd = readl(&ehci_regs->command);
@@ -554,10 +558,13 @@ try_next_port:
 	loop = 10;
 	do {
 		status = readl(&ehci_regs->status);
-	} while ((status & STS_HALT) && (--loop > 0));
+		if (!(status & STS_HALT))
+			break;
+		udelay(1);
+	} while (--loop > 0);
 
 	if (!loop) {
-		dbgp_printk("ehci can be started\n");
+		dbgp_printk("ehci can not be started\n");
 		return -1;
 	}
 	dbgp_printk("ehci started\n");
-- 
cgit v0.10.2


From c9530948bc626c8b638015c0b32abb9615659ec6 Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Thu, 20 Aug 2009 15:39:52 -0500
Subject: early_printk: Allow more than one early console

It is desirable to be able to use one early boot device to debug
another or to have multiple places you can see the early boot
diagnostics, such as the vga screen or serial device.

This patch changes the early_printk console device registration to
allow more than one early printk device to get registered via
register_console().

Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index 519a5e1..2acfd3f 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -176,10 +176,19 @@ asmlinkage void early_printk(const char *fmt, ...)
 	va_end(ap);
 }
 
+static inline void early_console_register(struct console *con, int keep_early)
+{
+	early_console = con;
+	if (keep_early)
+		early_console->flags &= ~CON_BOOT;
+	else
+		early_console->flags |= CON_BOOT;
+	register_console(early_console);
+}
 
 static int __init setup_early_printk(char *buf)
 {
-	int keep_early;
+	int keep;
 
 	if (!buf)
 		return 0;
@@ -188,42 +197,34 @@ static int __init setup_early_printk(char *buf)
 		return 0;
 	early_console_initialized = 1;
 
-	keep_early = (strstr(buf, "keep") != NULL);
-
-	if (!strncmp(buf, "serial", 6)) {
-		early_serial_init(buf + 6);
-		early_console = &early_serial_console;
-	} else if (!strncmp(buf, "ttyS", 4)) {
-		early_serial_init(buf);
-		early_console = &early_serial_console;
-	} else if (!strncmp(buf, "vga", 3)
-		&& boot_params.screen_info.orig_video_isVGA == 1) {
-		max_xpos = boot_params.screen_info.orig_video_cols;
-		max_ypos = boot_params.screen_info.orig_video_lines;
-		current_ypos = boot_params.screen_info.orig_y;
-		early_console = &early_vga_console;
+	keep = (strstr(buf, "keep") != NULL);
+
+	while (*buf != '\0') {
+		if (!strncmp(buf, "serial", 6)) {
+			early_serial_init(buf + 6);
+			early_console_register(&early_serial_console, keep);
+		}
+		if (!strncmp(buf, "ttyS", 4)) {
+			early_serial_init(buf + 4);
+			early_console_register(&early_serial_console, keep);
+		}
+		if (!strncmp(buf, "vga", 3) &&
+		    boot_params.screen_info.orig_video_isVGA == 1) {
+			max_xpos = boot_params.screen_info.orig_video_cols;
+			max_ypos = boot_params.screen_info.orig_video_lines;
+			current_ypos = boot_params.screen_info.orig_y;
+			early_console_register(&early_vga_console, keep);
+		}
 #ifdef CONFIG_EARLY_PRINTK_DBGP
-	} else if (!strncmp(buf, "dbgp", 4)) {
-		if (early_dbgp_init(buf+4) < 0)
-			return 0;
-		early_console = &early_dbgp_console;
-		/*
-		 * usb subsys will reset ehci controller, so don't keep
-		 * that early console
-		 */
-		keep_early = 0;
+		if (!strncmp(buf, "dbgp", 4) && !early_dbgp_init(buf + 4))
+			early_console_register(&early_dbgp_console, keep);
 #endif
 #ifdef CONFIG_HVC_XEN
-	} else if (!strncmp(buf, "xen", 3)) {
-		early_console = &xenboot_console;
+		if (!strncmp(buf, "xen", 3))
+			early_console_register(&xenboot_console, keep);
 #endif
+		buf++;
 	}
-
-	if (keep_early)
-		early_console->flags &= ~CON_BOOT;
-	else
-		early_console->flags |= CON_BOOT;
-	register_console(early_console);
 	return 0;
 }
 
-- 
cgit v0.10.2


From 917778267fbe67703ab7d5c6f0b7a05d4c3df485 Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Thu, 20 Aug 2009 15:39:53 -0500
Subject: USB: ehci-dbgp: stability improvements and external re-init

This patch implements several changes:

1) Improve the capability to debug the dbgp driver

   The dbgp_ehci_status() was added in a number of places to report
   the critical ehci registers to diagnose the cause of a failure of
   the ehci-dbgp driver.

2) Capability to survive the host controller initialization

   The dbgp_external_startup(), dbgp_not_safe, and dbgp_phys_port were
   added so as to allow the ehci-dbgp to re-initialize after the ehci
   host controller is reset by the standard host controller driver.
   This same routine is common for the early startup or
   re-initialization.

   This resulted in the need to move some of the initialization code
   out of the __init section because the ehci driver has the
   possibility to be loaded later on as a kernel module.

3) Stability improvements for device initialization

   The device enumeration from 0 to 127 has the possibility to fail
   the first time after a warm reset on some older EHCI debug
   controllers.  The enumeration will be tried up to 3 times to
   account for this failure case.

   The dbg_wait_until_complete() was changed to wait up to 250 ms
   before failing which only comes into play during device
   initialization. The maximum delay will never get hit during the
   course of normal operation of the driver, unless the device got
   unplugged or there was a ehci controller failure, in which case the
   dbgp device driver will shut itself down.

Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: dbrownell@users.sourceforge.net
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/early/ehci-dbgp.c b/drivers/usb/early/ehci-dbgp.c
index 6198ebd..06e05ea 100644
--- a/drivers/usb/early/ehci-dbgp.c
+++ b/drivers/usb/early/ehci-dbgp.c
@@ -1,5 +1,19 @@
+/*
+ * Standalone EHCI usb debug driver
+ *
+ * Originally written by:
+ *  Eric W. Biederman" <ebiederm@xmission.com> and
+ *  Yinghai Lu <yhlu.kernel@gmail.com>
+ *
+ * Changes for early/late printk and HW errata:
+ *  Jason Wessel <jason.wessel@windriver.com>
+ *  Copyright (C) 2009 Wind River Systems, Inc.
+ *
+ */
+
 #include <linux/console.h>
 #include <linux/errno.h>
+#include <linux/module.h>
 #include <linux/pci_regs.h>
 #include <linux/pci_ids.h>
 #include <linux/usb/ch9.h>
@@ -9,15 +23,37 @@
 #include <asm/pci-direct.h>
 #include <asm/fixmap.h>
 
-#ifdef DBGP_DEBUG
-# define dbgp_printk printk
-#else
-static inline void dbgp_printk(const char *fmt, ...) { }
-#endif
+/* The code here is intended to talk directly to the EHCI debug port
+ * and does not require that you have any kind of USB host controller
+ * drivers or USB device drivers compiled into the kernel.
+ *
+ * If you make a change to anything in here, the following test cases
+ * need to pass where a USB debug device works in the following
+ * configurations.
+ *
+ * 1. boot args:  earlyprintk=dbgp
+ *     o kernel compiled with # CONFIG_USB_EHCI_HCD is not set
+ *     o kernel compiled with CONFIG_USB_EHCI_HCD=y
+ * 2. boot args: earlyprintk=dbgp,keep
+ *     o kernel compiled with # CONFIG_USB_EHCI_HCD is not set
+ *     o kernel compiled with CONFIG_USB_EHCI_HCD=y
+ * 3. boot args: earlyprintk=dbgp console=ttyUSB0
+ *     o kernel has CONFIG_USB_EHCI_HCD=y and
+ *       CONFIG_USB_SERIAL_DEBUG=y
+ * 4. boot args: earlyprintk=vga,dbgp
+ *     o kernel compiled with # CONFIG_USB_EHCI_HCD is not set
+ *     o kernel compiled with CONFIG_USB_EHCI_HCD=y
+ *
+ * For the 4th configuration you can turn on or off the DBGP_DEBUG
+ * such that you can debug the dbgp device's driver code.
+ */
+
+static int dbgp_phys_port = 1;
 
 static struct ehci_caps __iomem *ehci_caps;
 static struct ehci_regs __iomem *ehci_regs;
 static struct ehci_dbg_port __iomem *ehci_debug;
+static int dbgp_not_safe; /* Cannot use debug device during ehci reset */
 static unsigned int dbgp_endpoint_out;
 
 struct ehci_dev {
@@ -32,6 +68,26 @@ static struct ehci_dev ehci_dev;
 
 #define DBGP_DATA_TOGGLE	0x8800
 
+#ifdef DBGP_DEBUG
+#define dbgp_printk printk
+static void dbgp_ehci_status(char *str)
+{
+	if (!ehci_debug)
+		return;
+	dbgp_printk("dbgp: %s\n", str);
+	dbgp_printk("  Debug control: %08x", readl(&ehci_debug->control));
+	dbgp_printk("  ehci cmd     : %08x", readl(&ehci_regs->command));
+	dbgp_printk("  ehci conf flg: %08x\n",
+		    readl(&ehci_regs->configured_flag));
+	dbgp_printk("  ehci status  : %08x", readl(&ehci_regs->status));
+	dbgp_printk("  ehci portsc  : %08x\n",
+		    readl(&ehci_regs->port_status[dbgp_phys_port - 1]));
+}
+#else
+static inline void dbgp_ehci_status(char *str) { }
+static inline void dbgp_printk(const char *fmt, ...) { }
+#endif
+
 static inline u32 dbgp_pid_update(u32 x, u32 tok)
 {
 	return ((x ^ DBGP_DATA_TOGGLE) & 0xffff00) | (tok & 0xff);
@@ -79,21 +135,23 @@ static inline u32 dbgp_len_update(u32 x, u32 len)
 #define HUB_RESET_TIMEOUT	500
 
 #define DBGP_MAX_PACKET		8
+#define DBGP_TIMEOUT		(250 * 1000)
 
 static int dbgp_wait_until_complete(void)
 {
 	u32 ctrl;
-	int loop = 0x100000;
+	int loop = DBGP_TIMEOUT;
 
 	do {
 		ctrl = readl(&ehci_debug->control);
 		/* Stop when the transaction is finished */
 		if (ctrl & DBGP_DONE)
 			break;
+		udelay(1);
 	} while (--loop > 0);
 
 	if (!loop)
-		return -1;
+		return -DBGP_TIMEOUT;
 
 	/*
 	 * Now that we have observed the completed transaction,
@@ -103,7 +161,7 @@ static int dbgp_wait_until_complete(void)
 	return (ctrl & DBGP_ERROR) ? -DBGP_ERRCODE(ctrl) : DBGP_LEN(ctrl);
 }
 
-static void __init dbgp_mdelay(int ms)
+static inline void dbgp_mdelay(int ms)
 {
 	int i;
 
@@ -130,8 +188,17 @@ retry:
 	pids = readl(&ehci_debug->pids);
 	lpid = DBGP_PID_GET(pids);
 
-	if (ret < 0)
+	if (ret < 0) {
+		/* A -DBGP_TIMEOUT failure here means the device has
+		 * failed, perhaps because it was unplugged, in which
+		 * case we do not want to hang the system so the dbgp
+		 * will be marked as unsafe to use.  EHCI reset is the
+		 * only way to recover if you unplug the dbgp device.
+		 */
+		if (ret == -DBGP_TIMEOUT && !dbgp_not_safe)
+			dbgp_not_safe = 1;
 		return ret;
+	}
 
 	/*
 	 * If the port is getting full or it has dropped data
@@ -149,7 +216,7 @@ retry:
 	return ret;
 }
 
-static void dbgp_set_data(const void *buf, int size)
+static inline void dbgp_set_data(const void *buf, int size)
 {
 	const unsigned char *bytes = buf;
 	u32 lo, hi;
@@ -164,7 +231,7 @@ static void dbgp_set_data(const void *buf, int size)
 	writel(hi, &ehci_debug->data47);
 }
 
-static void __init dbgp_get_data(void *buf, int size)
+static inline void dbgp_get_data(void *buf, int size)
 {
 	unsigned char *bytes = buf;
 	u32 lo, hi;
@@ -208,7 +275,7 @@ static int dbgp_bulk_write(unsigned devnum, unsigned endpoint,
 	return ret;
 }
 
-static int __init dbgp_bulk_read(unsigned devnum, unsigned endpoint, void *data,
+static int dbgp_bulk_read(unsigned devnum, unsigned endpoint, void *data,
 				 int size)
 {
 	u32 pids, addr, ctrl;
@@ -239,7 +306,7 @@ static int __init dbgp_bulk_read(unsigned devnum, unsigned endpoint, void *data,
 	return ret;
 }
 
-static int __init dbgp_control_msg(unsigned devnum, int requesttype,
+static int dbgp_control_msg(unsigned devnum, int requesttype,
 	int request, int value, int index, void *data, int size)
 {
 	u32 pids, addr, ctrl;
@@ -342,13 +409,179 @@ static u32 __init find_dbgp(int ehci_num, u32 *rbus, u32 *rslot, u32 *rfunc)
 	return 0;
 }
 
+static int dbgp_ehci_startup(void)
+{
+	u32 ctrl, cmd, status;
+	int loop;
+
+	/* Claim ownership, but do not enable yet */
+	ctrl = readl(&ehci_debug->control);
+	ctrl |= DBGP_OWNER;
+	ctrl &= ~(DBGP_ENABLED | DBGP_INUSE);
+	writel(ctrl, &ehci_debug->control);
+	udelay(1);
+
+	dbgp_ehci_status("EHCI startup");
+	/* Start the ehci running */
+	cmd = readl(&ehci_regs->command);
+	cmd &= ~(CMD_LRESET | CMD_IAAD | CMD_PSE | CMD_ASE | CMD_RESET);
+	cmd |= CMD_RUN;
+	writel(cmd, &ehci_regs->command);
+
+	/* Ensure everything is routed to the EHCI */
+	writel(FLAG_CF, &ehci_regs->configured_flag);
+
+	/* Wait until the controller is no longer halted */
+	loop = 10;
+	do {
+		status = readl(&ehci_regs->status);
+		if (!(status & STS_HALT))
+			break;
+		udelay(1);
+	} while (--loop > 0);
+
+	if (!loop) {
+		dbgp_printk("ehci can not be started\n");
+		return -ENODEV;
+	}
+	dbgp_printk("ehci started\n");
+	return 0;
+}
+
+static int dbgp_ehci_controller_reset(void)
+{
+	int loop = 250 * 1000;
+	u32 cmd;
+
+	/* Reset the EHCI controller */
+	cmd = readl(&ehci_regs->command);
+	cmd |= CMD_RESET;
+	writel(cmd, &ehci_regs->command);
+	do {
+		cmd = readl(&ehci_regs->command);
+	} while ((cmd & CMD_RESET) && (--loop > 0));
+
+	if (!loop) {
+		dbgp_printk("can not reset ehci\n");
+		return -1;
+	}
+	dbgp_ehci_status("ehci reset done");
+	return 0;
+}
+static int ehci_wait_for_port(int port);
+/* Return 0 on success
+ * Return -ENODEV for any general failure
+ * Return -EIO if wait for port fails
+ */
+int dbgp_external_startup(void)
+{
+	int devnum;
+	struct usb_debug_descriptor dbgp_desc;
+	int ret;
+	u32 ctrl, portsc;
+	int dbg_port = dbgp_phys_port;
+	int tries = 3;
+
+	ret = dbgp_ehci_startup();
+	if (ret)
+		return ret;
+
+	/* Wait for a device to show up in the debug port */
+	ret = ehci_wait_for_port(dbg_port);
+	if (ret < 0) {
+		portsc = readl(&ehci_regs->port_status[dbg_port - 1]);
+		dbgp_printk("No device found in debug port\n");
+		return -EIO;
+	}
+	dbgp_ehci_status("wait for port done");
+
+	/* Enable the debug port */
+	ctrl = readl(&ehci_debug->control);
+	ctrl |= DBGP_CLAIM;
+	writel(ctrl, &ehci_debug->control);
+	ctrl = readl(&ehci_debug->control);
+	if ((ctrl & DBGP_CLAIM) != DBGP_CLAIM) {
+		dbgp_printk("No device in debug port\n");
+		writel(ctrl & ~DBGP_CLAIM, &ehci_debug->control);
+		return -ENODEV;
+	}
+	dbgp_ehci_status("debug ported enabled");
+
+	/* Completely transfer the debug device to the debug controller */
+	portsc = readl(&ehci_regs->port_status[dbg_port - 1]);
+	portsc &= ~PORT_PE;
+	writel(portsc, &ehci_regs->port_status[dbg_port - 1]);
+
+	dbgp_mdelay(100);
+
+try_again:
+	/* Find the debug device and make it device number 127 */
+	for (devnum = 0; devnum <= 127; devnum++) {
+		ret = dbgp_control_msg(devnum,
+			USB_DIR_IN | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
+			USB_REQ_GET_DESCRIPTOR, (USB_DT_DEBUG << 8), 0,
+			&dbgp_desc, sizeof(dbgp_desc));
+		if (ret > 0)
+			break;
+	}
+	if (devnum > 127) {
+		dbgp_printk("Could not find attached debug device\n");
+		goto err;
+	}
+	if (ret < 0) {
+		dbgp_printk("Attached device is not a debug device\n");
+		goto err;
+	}
+	dbgp_endpoint_out = dbgp_desc.bDebugOutEndpoint;
+
+	/* Move the device to 127 if it isn't already there */
+	if (devnum != USB_DEBUG_DEVNUM) {
+		ret = dbgp_control_msg(devnum,
+			USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
+			USB_REQ_SET_ADDRESS, USB_DEBUG_DEVNUM, 0, NULL, 0);
+		if (ret < 0) {
+			dbgp_printk("Could not move attached device to %d\n",
+				USB_DEBUG_DEVNUM);
+			goto err;
+		}
+		devnum = USB_DEBUG_DEVNUM;
+		dbgp_printk("debug device renamed to 127\n");
+	}
+
+	/* Enable the debug interface */
+	ret = dbgp_control_msg(USB_DEBUG_DEVNUM,
+		USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
+		USB_REQ_SET_FEATURE, USB_DEVICE_DEBUG_MODE, 0, NULL, 0);
+	if (ret < 0) {
+		dbgp_printk(" Could not enable the debug device\n");
+		goto err;
+	}
+	dbgp_printk("debug interface enabled\n");
+	/* Perform a small write to get the even/odd data state in sync
+	 */
+	ret = dbgp_bulk_write(USB_DEBUG_DEVNUM, dbgp_endpoint_out, " ", 1);
+	if (ret < 0) {
+		dbgp_printk("dbgp_bulk_write failed: %d\n", ret);
+		goto err;
+	}
+	dbgp_printk("small write doned\n");
+	dbgp_not_safe = 0;
+
+	return 0;
+err:
+	if (tries--)
+		goto try_again;
+	return -ENODEV;
+}
+EXPORT_SYMBOL_GPL(dbgp_external_startup);
+
 static int __init ehci_reset_port(int port)
 {
 	u32 portsc;
 	u32 delay_time, delay;
 	int loop;
 
-	dbgp_printk("ehci_reset_port %i\n", port);
+	dbgp_ehci_status("reset port");
 	/* Reset the usb debug port */
 	portsc = readl(&ehci_regs->port_status[port - 1]);
 	portsc &= ~PORT_PE;
@@ -388,7 +621,7 @@ static int __init ehci_reset_port(int port)
 	return -EBUSY;
 }
 
-static int __init ehci_wait_for_port(int port)
+static int ehci_wait_for_port(int port)
 {
 	u32 status;
 	int ret, reps;
@@ -487,12 +720,9 @@ static void __init early_ehci_bios_handoff(void)
 
 static int __init ehci_setup(void)
 {
-	struct usb_debug_descriptor dbgp_desc;
-	u32 cmd, ctrl, status, portsc, hcs_params;
+	u32 ctrl, portsc, hcs_params;
 	u32 debug_port, new_debug_port = 0, n_ports;
-	u32  devnum;
 	int ret, i;
-	int loop;
 	int port_map_tried;
 	int playtimes = 3;
 
@@ -505,10 +735,12 @@ try_next_port:
 
 	hcs_params = readl(&ehci_caps->hcs_params);
 	debug_port = HCS_DEBUG_PORT(hcs_params);
+	dbgp_phys_port = debug_port;
 	n_ports    = HCS_N_PORTS(hcs_params);
 
 	dbgp_printk("debug_port: %d\n", debug_port);
 	dbgp_printk("n_ports:    %d\n", n_ports);
+	dbgp_ehci_status("");
 
 	for (i = 1; i <= n_ports; i++) {
 		portsc = readl(&ehci_regs->port_status[i-1]);
@@ -523,138 +755,27 @@ try_next_port:
 		return -1;
 	}
 
-	loop = 250 * 1000;
-	/* Reset the EHCI controller */
-	cmd = readl(&ehci_regs->command);
-	cmd |= CMD_RESET;
-	writel(cmd, &ehci_regs->command);
-	do {
-		cmd = readl(&ehci_regs->command);
-	} while ((cmd & CMD_RESET) && (--loop > 0));
-
-	if (!loop) {
-		dbgp_printk("can not reset ehci\n");
-		return -1;
-	}
-	dbgp_printk("ehci reset done\n");
-
-	/* Claim ownership, but do not enable yet */
-	ctrl = readl(&ehci_debug->control);
-	ctrl |= DBGP_OWNER;
-	ctrl &= ~(DBGP_ENABLED | DBGP_INUSE);
-	writel(ctrl, &ehci_debug->control);
-	udelay(1);
-
-	/* Start the ehci running */
-	cmd = readl(&ehci_regs->command);
-	cmd &= ~(CMD_LRESET | CMD_IAAD | CMD_PSE | CMD_ASE | CMD_RESET);
-	cmd |= CMD_RUN;
-	writel(cmd, &ehci_regs->command);
-
-	/* Ensure everything is routed to the EHCI */
-	writel(FLAG_CF, &ehci_regs->configured_flag);
-
-	/* Wait until the controller is no longer halted */
-	loop = 10;
-	do {
-		status = readl(&ehci_regs->status);
-		if (!(status & STS_HALT))
-			break;
-		udelay(1);
-	} while (--loop > 0);
-
-	if (!loop) {
-		dbgp_printk("ehci can not be started\n");
-		return -1;
+	/* Only reset the controller if it is not already in the
+	 * configured state */
+	if (!(readl(&ehci_regs->configured_flag) & FLAG_CF)) {
+		if (dbgp_ehci_controller_reset() != 0)
+			return -1;
+	} else {
+		dbgp_ehci_status("ehci skip - already configured");
 	}
-	dbgp_printk("ehci started\n");
 
-	/* Wait for a device to show up in the debug port */
-	ret = ehci_wait_for_port(debug_port);
-	if (ret < 0) {
-		dbgp_printk("No device found in debug port\n");
+	ret = dbgp_external_startup();
+	if (ret == -EIO)
 		goto next_debug_port;
-	}
-	dbgp_printk("ehci wait for port done\n");
-
-	/* Enable the debug port */
-	ctrl = readl(&ehci_debug->control);
-	ctrl |= DBGP_CLAIM;
-	writel(ctrl, &ehci_debug->control);
-	ctrl = readl(&ehci_debug->control);
-	if ((ctrl & DBGP_CLAIM) != DBGP_CLAIM) {
-		dbgp_printk("No device in debug port\n");
-		writel(ctrl & ~DBGP_CLAIM, &ehci_debug->control);
-		goto err;
-	}
-	dbgp_printk("debug ported enabled\n");
 
-	/* Completely transfer the debug device to the debug controller */
-	portsc = readl(&ehci_regs->port_status[debug_port - 1]);
-	portsc &= ~PORT_PE;
-	writel(portsc, &ehci_regs->port_status[debug_port - 1]);
-
-	dbgp_mdelay(100);
-
-	/* Find the debug device and make it device number 127 */
-	for (devnum = 0; devnum <= 127; devnum++) {
-		ret = dbgp_control_msg(devnum,
-			USB_DIR_IN | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
-			USB_REQ_GET_DESCRIPTOR, (USB_DT_DEBUG << 8), 0,
-			&dbgp_desc, sizeof(dbgp_desc));
-		if (ret > 0)
-			break;
-	}
-	if (devnum > 127) {
-		dbgp_printk("Could not find attached debug device\n");
-		goto err;
-	}
 	if (ret < 0) {
-		dbgp_printk("Attached device is not a debug device\n");
-		goto err;
-	}
-	dbgp_endpoint_out = dbgp_desc.bDebugOutEndpoint;
-
-	/* Move the device to 127 if it isn't already there */
-	if (devnum != USB_DEBUG_DEVNUM) {
-		ret = dbgp_control_msg(devnum,
-			USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
-			USB_REQ_SET_ADDRESS, USB_DEBUG_DEVNUM, 0, NULL, 0);
-		if (ret < 0) {
-			dbgp_printk("Could not move attached device to %d\n",
-				USB_DEBUG_DEVNUM);
-			goto err;
-		}
-		devnum = USB_DEBUG_DEVNUM;
-		dbgp_printk("debug device renamed to 127\n");
-	}
-
-	/* Enable the debug interface */
-	ret = dbgp_control_msg(USB_DEBUG_DEVNUM,
-		USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
-		USB_REQ_SET_FEATURE, USB_DEVICE_DEBUG_MODE, 0, NULL, 0);
-	if (ret < 0) {
-		dbgp_printk(" Could not enable the debug device\n");
-		goto err;
-	}
-	dbgp_printk("debug interface enabled\n");
-
-	/* Perform a small write to get the even/odd data state in sync
-	 */
-	ret = dbgp_bulk_write(USB_DEBUG_DEVNUM, dbgp_endpoint_out, " ", 1);
-	if (ret < 0) {
-		dbgp_printk("dbgp_bulk_write failed: %d\n", ret);
-		goto err;
+		/* Things didn't work so remove my claim */
+		ctrl = readl(&ehci_debug->control);
+		ctrl &= ~(DBGP_CLAIM | DBGP_OUT);
+		writel(ctrl, &ehci_debug->control);
+		return -1;
 	}
-	dbgp_printk("small write doned\n");
-
 	return 0;
-err:
-	/* Things didn't work so remove my claim */
-	ctrl = readl(&ehci_debug->control);
-	ctrl &= ~(DBGP_CLAIM | DBGP_OUT);
-	writel(ctrl, &ehci_debug->control);
-	return -1;
 
 next_debug_port:
 	port_map_tried |= (1<<(debug_port - 1));
@@ -749,6 +870,7 @@ int __init early_dbgp_init(char *s)
 
 		return -1;
 	}
+	dbgp_ehci_status("early_init_complete");
 
 	return 0;
 }
@@ -758,9 +880,27 @@ static void early_dbgp_write(struct console *con, const char *str, u32 n)
 	int chunk, ret;
 	char buf[DBGP_MAX_PACKET];
 	int use_cr = 0;
+	u32 cmd, ctrl;
+	int reset_run = 0;
 
-	if (!ehci_debug)
+	if (!ehci_debug || dbgp_not_safe)
 		return;
+
+	cmd = readl(&ehci_regs->command);
+	if (unlikely(!(cmd & CMD_RUN))) {
+		/* If the ehci controller is not in the run state do extended
+		 * checks to see if the acpi or some other initialization also
+		 * reset the ehci debug port */
+		ctrl = readl(&ehci_debug->control);
+		if (!(ctrl & DBGP_ENABLED)) {
+			dbgp_not_safe = 1;
+			dbgp_external_startup();
+		} else {
+			cmd |= CMD_RUN;
+			writel(cmd, &ehci_regs->command);
+			reset_run = 1;
+		}
+	}
 	while (n > 0) {
 		for (chunk = 0; chunk < DBGP_MAX_PACKET && n > 0;
 		     str++, chunk++, n--) {
@@ -775,8 +915,15 @@ static void early_dbgp_write(struct console *con, const char *str, u32 n)
 				use_cr = 0;
 			buf[chunk] = *str;
 		}
-		ret = dbgp_bulk_write(USB_DEBUG_DEVNUM,
-			dbgp_endpoint_out, buf, chunk);
+		if (chunk > 0) {
+			ret = dbgp_bulk_write(USB_DEBUG_DEVNUM,
+				      dbgp_endpoint_out, buf, chunk);
+		}
+	}
+	if (unlikely(reset_run)) {
+		cmd = readl(&ehci_regs->command);
+		cmd &= ~CMD_RUN;
+		writel(cmd, &ehci_regs->command);
 	}
 }
 
@@ -786,4 +933,3 @@ struct console early_dbgp_console = {
 	.flags =	CON_PRINTBUFFER,
 	.index =	-1,
 };
-
diff --git a/include/linux/usb/ehci_def.h b/include/linux/usb/ehci_def.h
index 1deac2a..07851e0 100644
--- a/include/linux/usb/ehci_def.h
+++ b/include/linux/usb/ehci_def.h
@@ -176,4 +176,9 @@ extern int __init early_dbgp_init(char *s);
 extern struct console early_dbgp_console;
 #endif /* CONFIG_EARLY_PRINTK_DBGP */
 
+#ifdef CONFIG_EARLY_PRINTK_DBGP
+/* Call backs from ehci host driver to ehci debug driver */
+extern int dbgp_external_startup(void);
+#endif
+
 #endif /* __LINUX_USB_EHCI_DEF_H */
-- 
cgit v0.10.2


From 8d053c79f22462f55c02c8083580730b922cf7b4 Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Thu, 20 Aug 2009 15:39:54 -0500
Subject: USB: ehci-dbgp,ehci: Allow early or late use of the dbgp device

If the EHCI debug port is initialized and in use, the EHCI host
controller driver must follow two rules.

1) If the EHCI host driver issues a controller reset, the debug
   controller driver re-initialization must get called after the reset
   is completed.

2) The EHCI host driver should ignore any requests to the physical
   EHCI debug port when the EHCI debug port is in use.

The code to check for the debug port was moved from ehci_pci_reinit()
to ehci_pci_setup because it must get called prior to ehci_reset()
which will clear the debug port registers.

Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: dbrownell@users.sourceforge.net
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/early/ehci-dbgp.c b/drivers/usb/early/ehci-dbgp.c
index 06e05ea..b88cb65 100644
--- a/drivers/usb/early/ehci-dbgp.c
+++ b/drivers/usb/early/ehci-dbgp.c
@@ -933,3 +933,26 @@ struct console early_dbgp_console = {
 	.flags =	CON_PRINTBUFFER,
 	.index =	-1,
 };
+
+int dbgp_reset_prep(void)
+{
+	u32 ctrl;
+
+	dbgp_not_safe = 1;
+	if (!ehci_debug)
+		return 0;
+
+	if (early_dbgp_console.index != -1 &&
+		!(early_dbgp_console.flags & CON_BOOT))
+		return 1;
+	/* This means the console is not initialized, or should get
+	 * shutdown so as to allow for reuse of the usb device, which
+	 * means it is time to shutdown the usb debug port. */
+	ctrl = readl(&ehci_debug->control);
+	if (ctrl & DBGP_ENABLED) {
+		ctrl &= ~(DBGP_CLAIM);
+		writel(ctrl, &ehci_debug->control);
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(dbgp_reset_prep);
diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c
index 4f89d7f..9835e07 100644
--- a/drivers/usb/host/ehci-hcd.c
+++ b/drivers/usb/host/ehci-hcd.c
@@ -240,6 +240,11 @@ static int ehci_reset (struct ehci_hcd *ehci)
 	int	retval;
 	u32	command = ehci_readl(ehci, &ehci->regs->command);
 
+	/* If the EHCI debug controller is active, special care must be
+	 * taken before and after a host controller reset */
+	if (ehci->debug && !dbgp_reset_prep())
+		ehci->debug = NULL;
+
 	command |= CMD_RESET;
 	dbg_cmd (ehci, "reset", command);
 	ehci_writel(ehci, command, &ehci->regs->command);
@@ -260,6 +265,9 @@ static int ehci_reset (struct ehci_hcd *ehci)
 	if (ehci_is_TDI(ehci))
 		tdi_reset (ehci);
 
+	if (ehci->debug)
+		dbgp_external_startup();
+
 	return retval;
 }
 
diff --git a/drivers/usb/host/ehci-hub.c b/drivers/usb/host/ehci-hub.c
index 818647c..6b5e4d1 100644
--- a/drivers/usb/host/ehci-hub.c
+++ b/drivers/usb/host/ehci-hub.c
@@ -855,6 +855,15 @@ static int ehci_hub_control (
 	case SetPortFeature:
 		selector = wIndex >> 8;
 		wIndex &= 0xff;
+		if (unlikely(ehci->debug)) {
+			/* If the debug port is active any port
+			 * feature requests should get denied */
+			if (wIndex == HCS_DEBUG_PORT(ehci->hcs_params) &&
+			    (readl(&ehci->debug->control) & DBGP_ENABLED)) {
+				retval = -ENODEV;
+				goto error_exit;
+			}
+		}
 		if (!wIndex || wIndex > ports)
 			goto error;
 		wIndex--;
@@ -951,6 +960,7 @@ error:
 		/* "stall" on error */
 		retval = -EPIPE;
 	}
+error_exit:
 	spin_unlock_irqrestore (&ehci->lock, flags);
 	return retval;
 }
diff --git a/drivers/usb/host/ehci-pci.c b/drivers/usb/host/ehci-pci.c
index a88ad51..378861b 100644
--- a/drivers/usb/host/ehci-pci.c
+++ b/drivers/usb/host/ehci-pci.c
@@ -27,28 +27,8 @@
 /* called after powerup, by probe or system-pm "wakeup" */
 static int ehci_pci_reinit(struct ehci_hcd *ehci, struct pci_dev *pdev)
 {
-	u32			temp;
 	int			retval;
 
-	/* optional debug port, normally in the first BAR */
-	temp = pci_find_capability(pdev, 0x0a);
-	if (temp) {
-		pci_read_config_dword(pdev, temp, &temp);
-		temp >>= 16;
-		if ((temp & (3 << 13)) == (1 << 13)) {
-			temp &= 0x1fff;
-			ehci->debug = ehci_to_hcd(ehci)->regs + temp;
-			temp = ehci_readl(ehci, &ehci->debug->control);
-			ehci_info(ehci, "debug port %d%s\n",
-				HCS_DEBUG_PORT(ehci->hcs_params),
-				(temp & DBGP_ENABLED)
-					? " IN USE"
-					: "");
-			if (!(temp & DBGP_ENABLED))
-				ehci->debug = NULL;
-		}
-	}
-
 	/* we expect static quirk code to handle the "extended capabilities"
 	 * (currently just BIOS handoff) allowed starting with EHCI 0.96
 	 */
@@ -195,6 +175,25 @@ static int ehci_pci_setup(struct usb_hcd *hcd)
 		break;
 	}
 
+	/* optional debug port, normally in the first BAR */
+	temp = pci_find_capability(pdev, 0x0a);
+	if (temp) {
+		pci_read_config_dword(pdev, temp, &temp);
+		temp >>= 16;
+		if ((temp & (3 << 13)) == (1 << 13)) {
+			temp &= 0x1fff;
+			ehci->debug = ehci_to_hcd(ehci)->regs + temp;
+			temp = ehci_readl(ehci, &ehci->debug->control);
+			ehci_info(ehci, "debug port %d%s\n",
+				HCS_DEBUG_PORT(ehci->hcs_params),
+				(temp & DBGP_ENABLED)
+					? " IN USE"
+					: "");
+			if (!(temp & DBGP_ENABLED))
+				ehci->debug = NULL;
+		}
+	}
+
 	ehci_reset(ehci);
 
 	/* at least the Genesys GL880S needs fixup here */
diff --git a/include/linux/usb/ehci_def.h b/include/linux/usb/ehci_def.h
index 07851e0..1909d92 100644
--- a/include/linux/usb/ehci_def.h
+++ b/include/linux/usb/ehci_def.h
@@ -179,6 +179,16 @@ extern struct console early_dbgp_console;
 #ifdef CONFIG_EARLY_PRINTK_DBGP
 /* Call backs from ehci host driver to ehci debug driver */
 extern int dbgp_external_startup(void);
+extern int dbgp_reset_prep(void);
+#else
+static inline int dbgp_reset_prep(void)
+{
+	return 1;
+}
+static inline int dbgp_external_startup(void)
+{
+	return -1;
+}
 #endif
 
 #endif /* __LINUX_USB_EHCI_DEF_H */
-- 
cgit v0.10.2


From aab2d4086a1876fcff282aa36e2d4a92aa9935c9 Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Thu, 20 Aug 2009 15:39:55 -0500
Subject: USB: ehci-dbgp: errata for EHCI debug controller initialization

On some EHCI usb debug controllers, the EHCI debug device will fail to
be seen after a port reset, after a warm reset.  Two options exist to
get the device to initialize correctly.

Option 1 is to unplug and plug in the device.

Option 2 is to use the EHCI port test to get the usb debug device to
start talking again.  At that point the debug controller port reset
will succeed.

Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
CC: dbrownell@users.sourceforge.net
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/early/ehci-dbgp.c b/drivers/usb/early/ehci-dbgp.c
index b88cb65..f0a41c6 100644
--- a/drivers/usb/early/ehci-dbgp.c
+++ b/drivers/usb/early/ehci-dbgp.c
@@ -478,10 +478,13 @@ int dbgp_external_startup(void)
 	int devnum;
 	struct usb_debug_descriptor dbgp_desc;
 	int ret;
-	u32 ctrl, portsc;
+	u32 ctrl, portsc, cmd;
 	int dbg_port = dbgp_phys_port;
 	int tries = 3;
+	int reset_port_tries = 1;
+	int try_hard_once = 1;
 
+try_port_reset_again:
 	ret = dbgp_ehci_startup();
 	if (ret)
 		return ret;
@@ -490,6 +493,24 @@ int dbgp_external_startup(void)
 	ret = ehci_wait_for_port(dbg_port);
 	if (ret < 0) {
 		portsc = readl(&ehci_regs->port_status[dbg_port - 1]);
+		if (!(portsc & PORT_CONNECT) && try_hard_once) {
+			/* Last ditch effort to try to force enable
+			 * the debug device by using the packet test
+			 * ehci command to try and wake it up. */
+			try_hard_once = 0;
+			cmd = readl(&ehci_regs->command);
+			cmd &= ~CMD_RUN;
+			writel(cmd, &ehci_regs->command);
+			portsc = readl(&ehci_regs->port_status[dbg_port - 1]);
+			portsc |= PORT_TEST_PKT;
+			writel(portsc, &ehci_regs->port_status[dbg_port - 1]);
+			dbgp_ehci_status("Trying to force debug port online");
+			mdelay(50);
+			dbgp_ehci_controller_reset();
+			goto try_port_reset_again;
+		} else if (reset_port_tries--) {
+			goto try_port_reset_again;
+		}
 		dbgp_printk("No device found in debug port\n");
 		return -EIO;
 	}
diff --git a/include/linux/usb/ehci_def.h b/include/linux/usb/ehci_def.h
index 1909d92..af4b86f 100644
--- a/include/linux/usb/ehci_def.h
+++ b/include/linux/usb/ehci_def.h
@@ -105,6 +105,7 @@ struct ehci_regs {
 #define PORT_WKDISC_E	(1<<21)		/* wake on disconnect (enable) */
 #define PORT_WKCONN_E	(1<<20)		/* wake on connect (enable) */
 /* 19:16 for port testing */
+#define PORT_TEST_PKT	(0x4<<16)	/* Port Test Control - packet test */
 #define PORT_LED_OFF	(0<<14)
 #define PORT_LED_AMBER	(1<<14)
 #define PORT_LED_GREEN	(2<<14)
-- 
cgit v0.10.2


From 68d2956a810b5c1b8213a1a9f59eacc54d7ce087 Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Thu, 20 Aug 2009 15:39:56 -0500
Subject: USB: ehci-dbgp: errata for EHCI debug/host controller synchronization

On some EHCI debug controllers after the host controller driver is
activated, the debug controller will occasionally fail to submit a
bulk write URB.  On controllers that exhibit this behavior a dummy
bulk write must get submitted to resynchronize the device.

The "dummy bulk write" does not get received by the host attached to
the other end of the usb debug device.  The usb debug device simply
acknowledges the "dummy bulk write" and returns to a usable state.

The behavior, without this patch is that you see missing text from a
complete kernel boot when using the keep option to the earlyprintk
kernel argument.

Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/early/ehci-dbgp.c b/drivers/usb/early/ehci-dbgp.c
index f0a41c6..1206a26 100644
--- a/drivers/usb/early/ehci-dbgp.c
+++ b/drivers/usb/early/ehci-dbgp.c
@@ -245,16 +245,9 @@ static inline void dbgp_get_data(void *buf, int size)
 		bytes[i] = (hi >> (8*(i - 4))) & 0xff;
 }
 
-static int dbgp_bulk_write(unsigned devnum, unsigned endpoint,
-			 const char *bytes, int size)
+static int dbgp_out(u32 addr, const char *bytes, int size)
 {
-	u32 pids, addr, ctrl;
-	int ret;
-
-	if (size > DBGP_MAX_PACKET)
-		return -1;
-
-	addr = DBGP_EPADDR(devnum, endpoint);
+	u32 pids, ctrl;
 
 	pids = readl(&ehci_debug->pids);
 	pids = dbgp_pid_update(pids, USB_PID_OUT);
@@ -267,10 +260,34 @@ static int dbgp_bulk_write(unsigned devnum, unsigned endpoint,
 	dbgp_set_data(bytes, size);
 	writel(addr, &ehci_debug->address);
 	writel(pids, &ehci_debug->pids);
+	return dbgp_wait_until_done(ctrl);
+}
 
-	ret = dbgp_wait_until_done(ctrl);
-	if (ret < 0)
-		return ret;
+static int dbgp_bulk_write(unsigned devnum, unsigned endpoint,
+			 const char *bytes, int size)
+{
+	int ret;
+	int loops = 5;
+	u32 addr;
+	if (size > DBGP_MAX_PACKET)
+		return -1;
+
+	addr = DBGP_EPADDR(devnum, endpoint);
+try_again:
+	if (loops--) {
+		ret = dbgp_out(addr, bytes, size);
+		if (ret == -DBGP_ERR_BAD) {
+			int try_loops = 3;
+			do {
+				/* Emit a dummy packet to re-sync communication
+				 * with the debug device */
+				if (dbgp_out(addr, "12345678", 8) >= 0) {
+					udelay(2);
+					goto try_again;
+				}
+			} while (try_loops--);
+		}
+	}
 
 	return ret;
 }
-- 
cgit v0.10.2


From 9780bc41dca728f9b082a42d9e1f1716d5057081 Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Thu, 20 Aug 2009 15:39:57 -0500
Subject: USB: ehci-dbgp,documentation: Documentation updates for ehci-dbgp

Add missing information about requirements of using the EHCI usb debug
controller as well as to mention you can use a debug controller other
than the first one in the system.

Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Sarah Sharp <sarah.a.sharp@intel.com>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index c363840..6fa7292 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -671,7 +671,7 @@ and is between 256 and 4096 characters. It is defined in the file
 	earlyprintk=	[X86,SH,BLACKFIN]
 			earlyprintk=vga
 			earlyprintk=serial[,ttySn[,baudrate]]
-			earlyprintk=dbgp
+			earlyprintk=dbgp[debugController#]
 
 			Append ",keep" to not disable it when the real console
 			takes over.
diff --git a/Documentation/x86/earlyprintk.txt b/Documentation/x86/earlyprintk.txt
index 607b1a0..f19802c 100644
--- a/Documentation/x86/earlyprintk.txt
+++ b/Documentation/x86/earlyprintk.txt
@@ -7,7 +7,7 @@ and two USB cables, connected like this:
 
   [host/target] <-------> [USB debug key] <-------> [client/console]
 
-1. There are three specific hardware requirements:
+1. There are a number of specific hardware requirements:
 
  a.) Host/target system needs to have USB debug port capability.
 
@@ -42,7 +42,35 @@ and two USB cables, connected like this:
      This is a small blue plastic connector with two USB connections,
      it draws power from its USB connections.
 
- c.) Thirdly, you need a second client/console system with a regular USB port.
+ c.) You need a second client/console system with a high speed USB 2.0
+     port.
+
+ d.) The Netchip device must be plugged directly into the physical
+     debug port on the "host/target" system.  You cannot use a USB hub in
+     between the physical debug port and the "host/target" system.
+
+     The EHCI debug controller is bound to a specific physical USB
+     port and the Netchip device will only work as an early printk
+     device in this port.  The EHCI host controllers are electrically
+     wired such that the EHCI debug controller is hooked up to the
+     first physical and there is no way to change this via software.
+     You can find the physical port through experimentation by trying
+     each physical port on the system and rebooting.  Or you can try
+     and use lsusb or look at the kernel info messages emitted by the
+     usb stack when you plug a usb device into various ports on the
+     "host/target" system.
+
+     Some hardware vendors do not expose the usb debug port with a
+     physical connector and if you find such a device send a complaint
+     to the hardware vendor, because there is no reason not to wire
+     this port into one of the physically accessible ports.
+
+ e.) It is also important to note, that many versions of the Netchip
+     device require the "client/console" system to be plugged into the
+     right and side of the device (with the product logo facing up and
+     readable left to right).  The reason being is that the 5 volt
+     power supply is taken from only one side of the device and it
+     must be the side that does not get rebooted.
 
 2. Software requirements:
 
@@ -56,6 +84,13 @@ and two USB cables, connected like this:
     (If you are using Grub, append it to the 'kernel' line in
      /etc/grub.conf)
 
+    On systems with more than one EHCI debug controller you must
+    specify the correct EHCI debug controller number.  The ordering
+    comes from the PCI bus enumeration of the EHCI controllers.  The
+    default with no number argument is "0" the first EHCI debug
+    controller.  To use the second EHCI debug controller, you would
+    use the command line: "earlyprintk=dbgp1"
+
     NOTE: normally earlyprintk console gets turned off once the
     regular console is alive - use "earlyprintk=dbgp,keep" to keep
     this channel open beyond early bootup. This can be useful for
-- 
cgit v0.10.2


From ad45f1dc836cb175e9aeea927837dd48039d652c Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Thu, 20 Aug 2009 15:39:58 -0500
Subject: USB: ehci-dbgp,ehci: Allow dbpg to work with suspend/resume

In order for the dbgp driver to survive suspend/resume, on every ehci
resume operation the debug controller must get re-initialized.

Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: dbrownell@users.sourceforge.net
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/host/ehci-hub.c b/drivers/usb/host/ehci-hub.c
index 6b5e4d1..1b6f1c0 100644
--- a/drivers/usb/host/ehci-hub.c
+++ b/drivers/usb/host/ehci-hub.c
@@ -235,6 +235,13 @@ static int ehci_bus_resume (struct usb_hcd *hcd)
 		return -ESHUTDOWN;
 	}
 
+	if (unlikely(ehci->debug)) {
+		if (ehci->debug && !dbgp_reset_prep())
+			ehci->debug = NULL;
+		else
+			dbgp_external_startup();
+	}
+
 	/* Ideally and we've got a real resume here, and no port's power
 	 * was lost.  (For PCI, that means Vaux was maintained.)  But we
 	 * could instead be restoring a swsusp snapshot -- so that BIOS was
-- 
cgit v0.10.2


From e6929a9020acbeb04d9a3ad9a88234c15be808fd Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oliver@neukum.org>
Date: Fri, 4 Sep 2009 23:19:53 +0200
Subject: USB: support for autosuspend in sierra while online

This implements support for autosuspend in the sierra driver while online.
Remote wakeup is used for reception. Transmission is facilitated with a queue
and the asynchronous autopm mechanism. To prevent races a private flag
for opened ports and a counter of running transmissions needs to be added.

Signed-off-by: Oliver Neukum <oliver@neukum.org>
Tested-by: Elina Pasheva <epasheva@sierrawireless.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/serial/sierra.c b/drivers/usb/serial/sierra.c
index 55391bb..68fa0e4 100644
--- a/drivers/usb/serial/sierra.c
+++ b/drivers/usb/serial/sierra.c
@@ -51,6 +51,12 @@ struct sierra_iface_info {
 	const u8  *ifaceinfo;	/* pointer to the array holding the numbers */
 };
 
+struct sierra_intf_private {
+	spinlock_t susp_lock;
+	unsigned int suspended:1;
+	int in_flight;
+};
+
 static int sierra_set_power_state(struct usb_device *udev, __u16 swiState)
 {
 	int result;
@@ -144,6 +150,7 @@ static int sierra_probe(struct usb_serial *serial,
 {
 	int result = 0;
 	struct usb_device *udev;
+	struct sierra_intf_private *data;
 	u8 ifnum;
 
 	udev = serial->dev;
@@ -171,6 +178,11 @@ static int sierra_probe(struct usb_serial *serial,
 		return -ENODEV;
 	}
 
+	data = serial->private = kzalloc(sizeof(struct sierra_intf_private), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+	spin_lock_init(&data->susp_lock);
+
 	return result;
 }
 
@@ -261,13 +273,18 @@ static struct usb_driver sierra_driver = {
 	.name       = "sierra",
 	.probe      = usb_serial_probe,
 	.disconnect = usb_serial_disconnect,
+	.suspend    = usb_serial_suspend,
+	.resume     = usb_serial_resume,
 	.id_table   = id_table,
 	.no_dynamic_id = 	1,
+	.supports_autosuspend =	1,
 };
 
 struct sierra_port_private {
 	spinlock_t lock;	/* lock the structure */
 	int outstanding_urbs;	/* number of out urbs in flight */
+	struct usb_anchor active;
+	struct usb_anchor delayed;
 
 	/* Input endpoints and buffers for this port */
 	struct urb *in_urbs[N_IN_URB];
@@ -279,6 +296,8 @@ struct sierra_port_private {
 	int dsr_state;
 	int dcd_state;
 	int ri_state;
+
+	unsigned int opened:1;
 };
 
 static int sierra_send_setup(struct usb_serial_port *port)
@@ -390,21 +409,25 @@ static void sierra_outdat_callback(struct urb *urb)
 {
 	struct usb_serial_port *port = urb->context;
 	struct sierra_port_private *portdata = usb_get_serial_port_data(port);
+	struct sierra_intf_private *intfdata;
 	int status = urb->status;
-	unsigned long flags;
 
 	dev_dbg(&port->dev, "%s - port %d\n", __func__, port->number);
+	intfdata = port->serial->private;
 
 	/* free up the transfer buffer, as usb_free_urb() does not do this */
 	kfree(urb->transfer_buffer);
-
+	usb_autopm_put_interface_async(port->serial->interface);
 	if (status)
 		dev_dbg(&port->dev, "%s - nonzero write bulk status "
 		    "received: %d\n", __func__, status);
 
-	spin_lock_irqsave(&portdata->lock, flags);
+	spin_lock(&portdata->lock);
 	--portdata->outstanding_urbs;
-	spin_unlock_irqrestore(&portdata->lock, flags);
+	spin_unlock(&portdata->lock);
+	spin_lock(&intfdata->susp_lock);
+	--intfdata->in_flight;
+	spin_unlock(&intfdata->susp_lock);
 
 	usb_serial_port_softint(port);
 }
@@ -414,6 +437,7 @@ static int sierra_write(struct tty_struct *tty, struct usb_serial_port *port,
 					const unsigned char *buf, int count)
 {
 	struct sierra_port_private *portdata = usb_get_serial_port_data(port);
+	struct sierra_intf_private *intfdata;
 	struct usb_serial *serial = port->serial;
 	unsigned long flags;
 	unsigned char *buffer;
@@ -426,9 +450,9 @@ static int sierra_write(struct tty_struct *tty, struct usb_serial_port *port,
 		return 0;
 
 	portdata = usb_get_serial_port_data(port);
+	intfdata = serial->private;
 
 	dev_dbg(&port->dev, "%s: write (%zd bytes)\n", __func__, writesize);
-
 	spin_lock_irqsave(&portdata->lock, flags);
 	dev_dbg(&port->dev, "%s - outstanding_urbs: %d\n", __func__,
 		portdata->outstanding_urbs);
@@ -442,6 +466,14 @@ static int sierra_write(struct tty_struct *tty, struct usb_serial_port *port,
 		portdata->outstanding_urbs);
 	spin_unlock_irqrestore(&portdata->lock, flags);
 
+	retval = usb_autopm_get_interface_async(serial->interface);
+	if (retval < 0) {
+		spin_lock_irqsave(&portdata->lock, flags);
+		portdata->outstanding_urbs--;
+		spin_unlock_irqrestore(&portdata->lock, flags);
+		goto error_simple;
+	}
+
 	buffer = kmalloc(writesize, GFP_ATOMIC);
 	if (!buffer) {
 		dev_err(&port->dev, "out of memory\n");
@@ -468,14 +500,29 @@ static int sierra_write(struct tty_struct *tty, struct usb_serial_port *port,
 	/* Handle the need to send a zero length packet */
 	urb->transfer_flags |= URB_ZERO_PACKET;
 
+	spin_lock_irqsave(&intfdata->susp_lock, flags);
+
+	if (intfdata->suspended) {
+		usb_anchor_urb(urb, &portdata->delayed);
+		spin_unlock_irqrestore(&intfdata->susp_lock, flags);
+		goto skip_power;
+	} else {
+		usb_anchor_urb(urb, &portdata->active);
+	}
 	/* send it down the pipe */
 	retval = usb_submit_urb(urb, GFP_ATOMIC);
 	if (retval) {
+		usb_unanchor_urb(urb);
+		spin_unlock_irqrestore(&intfdata->susp_lock, flags);
 		dev_err(&port->dev, "%s - usb_submit_urb(write bulk) failed "
 			"with status = %d\n", __func__, retval);
 		goto error;
+	} else {
+		intfdata->in_flight++;
+		spin_unlock_irqrestore(&intfdata->susp_lock, flags);
 	}
 
+skip_power:
 	/* we are done with this urb, so let the host driver
 	 * really free it when it is finished with it */
 	usb_free_urb(urb);
@@ -491,6 +538,8 @@ error_no_buffer:
 	dev_dbg(&port->dev, "%s - 2. outstanding_urbs: %d\n", __func__,
 		portdata->outstanding_urbs);
 	spin_unlock_irqrestore(&portdata->lock, flags);
+	usb_autopm_put_interface_async(serial->interface);
+error_simple:
 	return retval;
 }
 
@@ -530,6 +579,7 @@ static void sierra_indat_callback(struct urb *urb)
 
 	/* Resubmit urb so we continue receiving */
 	if (port->port.count && status != -ESHUTDOWN && status != -EPERM) {
+		usb_mark_last_busy(port->serial->dev);
 		err = usb_submit_urb(urb, GFP_ATOMIC);
 		if (err)
 			dev_err(&port->dev, "resubmit read urb failed."
@@ -591,6 +641,7 @@ static void sierra_instat_callback(struct urb *urb)
 
 	/* Resubmit urb so we continue receiving IRQ data */
 	if (port->port.count && status != -ESHUTDOWN && status != -ENOENT) {
+		usb_mark_last_busy(serial->dev);
 		urb->dev = serial->dev;
 		err = usb_submit_urb(urb, GFP_ATOMIC);
 		if (err)
@@ -711,6 +762,8 @@ static void sierra_close(struct usb_serial_port *port)
 	int i;
 	struct usb_serial *serial = port->serial;
 	struct sierra_port_private *portdata;
+	struct sierra_intf_private *intfdata = port->serial->private;
+
 
 	dev_dbg(&port->dev, "%s\n", __func__);
 	portdata = usb_get_serial_port_data(port);
@@ -723,6 +776,10 @@ static void sierra_close(struct usb_serial_port *port)
 		if (!serial->disconnected)
 			sierra_send_setup(port);
 		mutex_unlock(&serial->disc_mutex);
+		spin_lock_irq(&intfdata->susp_lock);
+		portdata->opened = 0;
+		spin_unlock_irq(&intfdata->susp_lock);
+
 
 		/* Stop reading urbs */
 		sierra_stop_rx_urbs(port);
@@ -731,6 +788,8 @@ static void sierra_close(struct usb_serial_port *port)
 			sierra_release_urb(portdata->in_urbs[i]);
 			portdata->in_urbs[i] = NULL;
 		}
+		usb_autopm_get_interface(serial->interface);
+		serial->interface->needs_remote_wakeup = 0;
 	}
 }
 
@@ -738,6 +797,7 @@ static int sierra_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	struct sierra_port_private *portdata;
 	struct usb_serial *serial = port->serial;
+	struct sierra_intf_private *intfdata = serial->private;
 	int i;
 	int err;
 	int endpoint;
@@ -771,6 +831,12 @@ static int sierra_open(struct tty_struct *tty, struct usb_serial_port *port)
 	}
 	sierra_send_setup(port);
 
+	serial->interface->needs_remote_wakeup = 1;
+	spin_lock_irq(&intfdata->susp_lock);
+	portdata->opened = 1;
+	spin_unlock_irq(&intfdata->susp_lock);
+	usb_autopm_put_interface(serial->interface);
+
 	return 0;
 }
 
@@ -818,6 +884,8 @@ static int sierra_startup(struct usb_serial *serial)
 			return -ENOMEM;
 		}
 		spin_lock_init(&portdata->lock);
+		init_usb_anchor(&portdata->active);
+		init_usb_anchor(&portdata->delayed);
 		/* Set the port private data pointer */
 		usb_set_serial_port_data(port, portdata);
 	}
@@ -844,6 +912,83 @@ static void sierra_release(struct usb_serial *serial)
 	}
 }
 
+static void stop_read_write_urbs(struct usb_serial *serial)
+{
+	int i, j;
+	struct usb_serial_port *port;
+	struct sierra_port_private *portdata;
+
+	/* Stop reading/writing urbs */
+	for (i = 0; i < serial->num_ports; ++i) {
+		port = serial->port[i];
+		portdata = usb_get_serial_port_data(port);
+		for (j = 0; j < N_IN_URB; j++)
+			usb_kill_urb(portdata->in_urbs[j]);
+		usb_kill_anchored_urbs(&portdata->active);
+	}
+}
+
+static int sierra_suspend(struct usb_serial *serial, pm_message_t message)
+{
+	struct sierra_intf_private *intfdata;
+	int b;
+
+	if (serial->dev->auto_pm) {
+		intfdata = serial->private;
+		spin_lock_irq(&intfdata->susp_lock);
+		b = intfdata->in_flight;
+
+		if (b) {
+			spin_unlock_irq(&intfdata->susp_lock);
+			return -EBUSY;
+		} else {
+			intfdata->suspended = 1;
+			spin_unlock_irq(&intfdata->susp_lock);
+		}
+	}
+	stop_read_write_urbs(serial);
+
+	return 0;
+}
+
+static int sierra_resume(struct usb_serial *serial)
+{
+	struct usb_serial_port *port;
+	struct sierra_intf_private *intfdata = serial->private;
+	struct sierra_port_private *portdata;
+	struct urb *urb;
+	int ec = 0;
+	int i, err;
+
+	spin_lock_irq(&intfdata->susp_lock);
+	for (i = 0; i < serial->num_ports; i++) {
+		port = serial->port[i];
+		portdata = usb_get_serial_port_data(port);
+
+		while ((urb = usb_get_from_anchor(&portdata->delayed))) {
+			usb_anchor_urb(urb, &portdata->active);
+			intfdata->in_flight++;
+			err = usb_submit_urb(urb, GFP_ATOMIC);
+			if (err < 0) {
+				intfdata->in_flight--;
+				usb_unanchor_urb(urb);
+				usb_scuttle_anchored_urbs(&portdata->delayed);
+				break;
+			}
+		}
+
+		if (portdata->opened) {
+			err = sierra_submit_rx_urbs(port, GFP_ATOMIC);
+			if (err)
+				ec++;
+		}
+	}
+	intfdata->suspended = 0;
+	spin_unlock_irq(&intfdata->susp_lock);
+
+	return ec ? -EIO : 0;
+}
+
 static struct usb_serial_driver sierra_device = {
 	.driver = {
 		.owner =	THIS_MODULE,
@@ -864,6 +1009,8 @@ static struct usb_serial_driver sierra_device = {
 	.tiocmset          = sierra_tiocmset,
 	.attach            = sierra_startup,
 	.release           = sierra_release,
+	.suspend	   = sierra_suspend,
+	.resume		   = sierra_resume,
 	.read_int_callback = sierra_instat_callback,
 };
 
-- 
cgit v0.10.2


From 01c6460f968d7b57fc6f98adb587952628c6e099 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Tue, 1 Sep 2009 11:09:56 -0400
Subject: USB: usbfs: add USBDEVFS_URB_BULK_CONTINUATION flag

This patch (as1283) adds a new flag, USBDEVFS_URB_BULK_CONTINUATION,
to usbfs.  It is intended for userspace libraries such as libusb and
openusb.  When they have to break up a single usbfs bulk transfer into
multiple URBs, they will set the flag on all but the first URB of the
series.

If an error other than an unlink occurs, the kernel will automatically
cancel all the following URBs for the same endpoint and refuse to
accept new submissions, until an URB is encountered that is not marked
as a BULK_CONTINUATION.  Such an URB would indicate the start of a new
transfer or the presence of an older library, so the kernel returns to
normal operation.

This enables libraries to delimit bulk transfers correctly, even in
the presence of early termination as indicated by short packets.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
index 71514be..181f78c 100644
--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -74,6 +74,7 @@ struct dev_state {
 	void __user *disccontext;
 	unsigned long ifclaimed;
 	u32 secid;
+	u32 disabled_bulk_eps;
 };
 
 struct async {
@@ -88,6 +89,8 @@ struct async {
 	struct urb *urb;
 	int status;
 	u32 secid;
+	u8 bulk_addr;
+	u8 bulk_status;
 };
 
 static int usbfs_snoop;
@@ -343,6 +346,43 @@ static void snoop_urb(struct usb_device *udev,
 	}
 }
 
+#define AS_CONTINUATION	1
+#define AS_UNLINK	2
+
+static void cancel_bulk_urbs(struct dev_state *ps, unsigned bulk_addr)
+__releases(ps->lock)
+__acquires(ps->lock)
+{
+	struct async *as;
+
+	/* Mark all the pending URBs that match bulk_addr, up to but not
+	 * including the first one without AS_CONTINUATION.  If such an
+	 * URB is encountered then a new transfer has already started so
+	 * the endpoint doesn't need to be disabled; otherwise it does.
+	 */
+	list_for_each_entry(as, &ps->async_pending, asynclist) {
+		if (as->bulk_addr == bulk_addr) {
+			if (as->bulk_status != AS_CONTINUATION)
+				goto rescan;
+			as->bulk_status = AS_UNLINK;
+			as->bulk_addr = 0;
+		}
+	}
+	ps->disabled_bulk_eps |= (1 << bulk_addr);
+
+	/* Now carefully unlink all the marked pending URBs */
+ rescan:
+	list_for_each_entry(as, &ps->async_pending, asynclist) {
+		if (as->bulk_status == AS_UNLINK) {
+			as->bulk_status = 0;		/* Only once */
+			spin_unlock(&ps->lock);		/* Allow completions */
+			usb_unlink_urb(as->urb);
+			spin_lock(&ps->lock);
+			goto rescan;
+		}
+	}
+}
+
 static void async_completed(struct urb *urb)
 {
 	struct async *as = urb->context;
@@ -371,6 +411,9 @@ static void async_completed(struct urb *urb)
 	snoop(&urb->dev->dev, "urb complete\n");
 	snoop_urb(urb->dev, as->userurb, urb->pipe, urb->actual_length,
 			as->status, COMPLETE);
+	if (as->status < 0 && as->bulk_addr && as->status != -ECONNRESET &&
+			as->status != -ENOENT)
+		cancel_bulk_urbs(ps, as->bulk_addr);
 	spin_unlock(&ps->lock);
 
 	if (signr)
@@ -993,6 +1036,7 @@ static int proc_do_submiturb(struct dev_state *ps, struct usbdevfs_urb *uurb,
 
 	if (uurb->flags & ~(USBDEVFS_URB_ISO_ASAP |
 				USBDEVFS_URB_SHORT_NOT_OK |
+				USBDEVFS_URB_BULK_CONTINUATION |
 				USBDEVFS_URB_NO_FSBR |
 				USBDEVFS_URB_ZERO_PACKET |
 				USBDEVFS_URB_NO_INTERRUPT))
@@ -1194,7 +1238,39 @@ static int proc_do_submiturb(struct dev_state *ps, struct usbdevfs_urb *uurb,
 	snoop_urb(ps->dev, as->userurb, as->urb->pipe,
 			as->urb->transfer_buffer_length, 0, SUBMIT);
 	async_newpending(as);
-	if ((ret = usb_submit_urb(as->urb, GFP_KERNEL))) {
+
+	if (usb_endpoint_xfer_bulk(&ep->desc)) {
+		spin_lock_irq(&ps->lock);
+
+		/* Not exactly the endpoint address; the direction bit is
+		 * shifted to the 0x10 position so that the value will be
+		 * between 0 and 31.
+		 */
+		as->bulk_addr = usb_endpoint_num(&ep->desc) |
+			((ep->desc.bEndpointAddress & USB_ENDPOINT_DIR_MASK)
+				>> 3);
+
+		/* If this bulk URB is the start of a new transfer, re-enable
+		 * the endpoint.  Otherwise mark it as a continuation URB.
+		 */
+		if (uurb->flags & USBDEVFS_URB_BULK_CONTINUATION)
+			as->bulk_status = AS_CONTINUATION;
+		else
+			ps->disabled_bulk_eps &= ~(1 << as->bulk_addr);
+
+		/* Don't accept continuation URBs if the endpoint is
+		 * disabled because of an earlier error.
+		 */
+		if (ps->disabled_bulk_eps & (1 << as->bulk_addr))
+			ret = -EREMOTEIO;
+		else
+			ret = usb_submit_urb(as->urb, GFP_ATOMIC);
+		spin_unlock_irq(&ps->lock);
+	} else {
+		ret = usb_submit_urb(as->urb, GFP_KERNEL);
+	}
+
+	if (ret) {
 		dev_printk(KERN_DEBUG, &ps->dev->dev,
 			   "usbfs: usb_submit_urb returned %d\n", ret);
 		snoop_urb(ps->dev, as->userurb, as->urb->pipe,
diff --git a/include/linux/usbdevice_fs.h b/include/linux/usbdevice_fs.h
index 00ceebe..b2a7d8b 100644
--- a/include/linux/usbdevice_fs.h
+++ b/include/linux/usbdevice_fs.h
@@ -77,6 +77,7 @@ struct usbdevfs_connectinfo {
 
 #define USBDEVFS_URB_SHORT_NOT_OK	0x01
 #define USBDEVFS_URB_ISO_ASAP		0x02
+#define USBDEVFS_URB_BULK_CONTINUATION	0x04
 #define USBDEVFS_URB_NO_FSBR		0x20
 #define USBDEVFS_URB_ZERO_PACKET	0x40
 #define USBDEVFS_URB_NO_INTERRUPT	0x80
-- 
cgit v0.10.2


From 1e5ea5e32043094d96ca1e501110c1fbb631f693 Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oliver@neukum.org>
Date: Thu, 27 Aug 2009 16:46:56 +0200
Subject: USB: fix missing error check in probing

usb: check for IO errors usb_set_interface can return

if they happen while unbinding a flag is set to retry upon probe
if they happen during probe they are handled as probe errors

Signed-off-by: Oliver Neukum <oliver@neukum.org>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c
index 1c976c1..4f86447 100644
--- a/drivers/usb/core/driver.c
+++ b/drivers/usb/core/driver.c
@@ -239,24 +239,31 @@ static int usb_probe_interface(struct device *dev)
 
 		/* Carry out a deferred switch to altsetting 0 */
 		if (intf->needs_altsetting0) {
-			usb_set_interface(udev, intf->altsetting[0].
+			error = usb_set_interface(udev, intf->altsetting[0].
 					desc.bInterfaceNumber, 0);
+			if (error < 0)
+				goto err;
+
 			intf->needs_altsetting0 = 0;
 		}
 
 		error = driver->probe(intf, id);
-		if (error) {
-			mark_quiesced(intf);
-			intf->needs_remote_wakeup = 0;
-			intf->condition = USB_INTERFACE_UNBOUND;
-			usb_cancel_queued_reset(intf);
-		} else
-			intf->condition = USB_INTERFACE_BOUND;
+		if (error)
+			goto err;
 
+		intf->condition = USB_INTERFACE_BOUND;
 		usb_autosuspend_device(udev);
 	}
 
 	return error;
+
+err:
+	mark_quiesced(intf);
+	intf->needs_remote_wakeup = 0;
+	intf->condition = USB_INTERFACE_UNBOUND;
+	usb_cancel_queued_reset(intf);
+	usb_autosuspend_device(udev);
+	return error;
 }
 
 /* called from driver core with dev locked */
@@ -265,7 +272,7 @@ static int usb_unbind_interface(struct device *dev)
 	struct usb_driver *driver = to_usb_driver(dev->driver);
 	struct usb_interface *intf = to_usb_interface(dev);
 	struct usb_device *udev;
-	int error;
+	int error, r;
 
 	intf->condition = USB_INTERFACE_UNBINDING;
 
@@ -293,11 +300,14 @@ static int usb_unbind_interface(struct device *dev)
 		 * Just re-enable it without affecting the endpoint toggles.
 		 */
 		usb_enable_interface(udev, intf, false);
-	} else if (!error && intf->dev.power.status == DPM_ON)
-		usb_set_interface(udev, intf->altsetting[0].
+	} else if (!error && intf->dev.power.status == DPM_ON) {
+		r = usb_set_interface(udev, intf->altsetting[0].
 				desc.bInterfaceNumber, 0);
-	else
+		if (r < 0)
+			intf->needs_altsetting0 = 1;
+	} else {
 		intf->needs_altsetting0 = 1;
+	}
 	usb_set_intfdata(intf, NULL);
 
 	intf->condition = USB_INTERFACE_UNBOUND;
-- 
cgit v0.10.2


From d0a38365d9585bf3fb71f7c57fd532441a14f3e8 Mon Sep 17 00:00:00 2001
From: Gergely Imreh <imrehg@gmail.com>
Date: Mon, 7 Sep 2009 10:47:01 +0800
Subject: USB: fix USBTMC get_capabilities success handling

In order:
Add reference to relevant section of USBTMC usb488 subclass specs.
Print debug output of capabilities only when it was retrieved successfully.
Clear return value on success, otherwise driver always reports failure.

Signed-off-by: Gergely Imreh <imrehg@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c
index 6395f22..333ee02 100644
--- a/drivers/usb/class/usbtmc.c
+++ b/drivers/usb/class/usbtmc.c
@@ -57,7 +57,9 @@ MODULE_DEVICE_TABLE(usb, usbtmc_devices);
 
 /*
  * This structure is the capabilities for the device
- * See section 4.2.1.8 of the USBTMC specification for details.
+ * See section 4.2.1.8 of the USBTMC specification,
+ * and section 4.2.2 of the USBTMC usb488 subclass
+ * specification for details.
  */
 struct usbtmc_dev_capabilities {
 	__u8 interface_capabilities;
@@ -796,20 +798,21 @@ static int get_capabilities(struct usbtmc_device_data *data)
 	}
 
 	dev_dbg(dev, "GET_CAPABILITIES returned %x\n", buffer[0]);
-	dev_dbg(dev, "Interface capabilities are %x\n", buffer[4]);
-	dev_dbg(dev, "Device capabilities are %x\n", buffer[5]);
-	dev_dbg(dev, "USB488 interface capabilities are %x\n", buffer[14]);
-	dev_dbg(dev, "USB488 device capabilities are %x\n", buffer[15]);
 	if (buffer[0] != USBTMC_STATUS_SUCCESS) {
 		dev_err(dev, "GET_CAPABILITIES returned %x\n", buffer[0]);
 		rv = -EPERM;
 		goto err_out;
 	}
+	dev_dbg(dev, "Interface capabilities are %x\n", buffer[4]);
+	dev_dbg(dev, "Device capabilities are %x\n", buffer[5]);
+	dev_dbg(dev, "USB488 interface capabilities are %x\n", buffer[14]);
+	dev_dbg(dev, "USB488 device capabilities are %x\n", buffer[15]);
 
 	data->capabilities.interface_capabilities = buffer[4];
 	data->capabilities.device_capabilities = buffer[5];
 	data->capabilities.usb488_interface_capabilities = buffer[14];
 	data->capabilities.usb488_device_capabilities = buffer[15];
+	rv = 0;
 
 err_out:
 	kfree(buffer);
-- 
cgit v0.10.2


From 9e221be815cd263480928248bfd4541497017a1b Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Mon, 7 Sep 2009 17:08:39 -0700
Subject: USB: gadget: ether needs to select CRC32

Fix build error, ether uses/needs to select CRC32 config symbol:

ether.c:(.text+0x271480): undefined reference to `crc32_le'

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig
index 4d8ab47..3335131 100644
--- a/drivers/usb/gadget/Kconfig
+++ b/drivers/usb/gadget/Kconfig
@@ -627,6 +627,7 @@ config USB_AUDIO
 config USB_ETH
 	tristate "Ethernet Gadget (with CDC Ethernet support)"
 	depends on NET
+	select CRC32
 	help
 	  This driver implements Ethernet style communication, in one of
 	  several ways:
-- 
cgit v0.10.2


From 63a0d9abd18cdcf5a985029c266c6bfe0511768f Mon Sep 17 00:00:00 2001
From: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Date: Fri, 4 Sep 2009 10:53:09 -0700
Subject: USB: xhci: Endpoint representation refactoring.

The xhci_ring structure contained information that is really related to an
endpoint, not a ring.  This will cause problems later when endpoint
streams are supported and there are multiple rings per endpoint.

Move the endpoint state and cancellation information into a new virtual
endpoint structure, xhci_virt_ep.  The list of TRBs to be cancelled should
be per endpoint, not per ring, for easy access.  There can be only one TRB
that the endpoint stopped on after a stop endpoint command (even with
streams enabled); move the stopped TRB information into the new virtual
endpoint structure.  Also move the 31 endpoint rings and temporary ring
storage from the virtual device structure (xhci_virt_device) into the
virtual endpoint structure (xhci_virt_ep).

Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/host/xhci-hcd.c b/drivers/usb/host/xhci-hcd.c
index e478a63..3ab9090 100644
--- a/drivers/usb/host/xhci-hcd.c
+++ b/drivers/usb/host/xhci-hcd.c
@@ -351,13 +351,14 @@ void xhci_event_ring_work(unsigned long arg)
 	xhci_dbg_ring_ptrs(xhci, xhci->cmd_ring);
 	xhci_dbg_cmd_ptrs(xhci);
 	for (i = 0; i < MAX_HC_SLOTS; ++i) {
-		if (xhci->devs[i]) {
-			for (j = 0; j < 31; ++j) {
-				if (xhci->devs[i]->ep_rings[j]) {
-					xhci_dbg(xhci, "Dev %d endpoint ring %d:\n", i, j);
-					xhci_debug_segment(xhci, xhci->devs[i]->ep_rings[j]->deq_seg);
-				}
-			}
+		if (!xhci->devs[i])
+			continue;
+		for (j = 0; j < 31; ++j) {
+			struct xhci_ring *ring = xhci->devs[i]->eps[j].ring;
+			if (!ring)
+				continue;
+			xhci_dbg(xhci, "Dev %d endpoint ring %d:\n", i, j);
+			xhci_debug_segment(xhci, ring->deq_seg);
 		}
 	}
 
@@ -778,6 +779,7 @@ int xhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
 	struct xhci_td *td;
 	unsigned int ep_index;
 	struct xhci_ring *ep_ring;
+	struct xhci_virt_ep *ep;
 
 	xhci = hcd_to_xhci(hcd);
 	spin_lock_irqsave(&xhci->lock, flags);
@@ -790,17 +792,18 @@ int xhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
 	xhci_dbg(xhci, "Event ring:\n");
 	xhci_debug_ring(xhci, xhci->event_ring);
 	ep_index = xhci_get_endpoint_index(&urb->ep->desc);
-	ep_ring = xhci->devs[urb->dev->slot_id]->ep_rings[ep_index];
+	ep = &xhci->devs[urb->dev->slot_id]->eps[ep_index];
+	ep_ring = ep->ring;
 	xhci_dbg(xhci, "Endpoint ring:\n");
 	xhci_debug_ring(xhci, ep_ring);
 	td = (struct xhci_td *) urb->hcpriv;
 
-	ep_ring->cancels_pending++;
-	list_add_tail(&td->cancelled_td_list, &ep_ring->cancelled_td_list);
+	ep->cancels_pending++;
+	list_add_tail(&td->cancelled_td_list, &ep->cancelled_td_list);
 	/* Queue a stop endpoint command, but only if this is
 	 * the first cancellation to be handled.
 	 */
-	if (ep_ring->cancels_pending == 1) {
+	if (ep->cancels_pending == 1) {
 		xhci_queue_stop_endpoint(xhci, urb->dev->slot_id, ep_index);
 		xhci_ring_cmd_db(xhci);
 	}
@@ -1206,10 +1209,10 @@ int xhci_check_bandwidth(struct usb_hcd *hcd, struct usb_device *udev)
 	xhci_zero_in_ctx(xhci, virt_dev);
 	/* Free any old rings */
 	for (i = 1; i < 31; ++i) {
-		if (virt_dev->new_ep_rings[i]) {
-			xhci_ring_free(xhci, virt_dev->ep_rings[i]);
-			virt_dev->ep_rings[i] = virt_dev->new_ep_rings[i];
-			virt_dev->new_ep_rings[i] = NULL;
+		if (virt_dev->eps[i].new_ring) {
+			xhci_ring_free(xhci, virt_dev->eps[i].ring);
+			virt_dev->eps[i].ring = virt_dev->eps[i].new_ring;
+			virt_dev->eps[i].new_ring = NULL;
 		}
 	}
 
@@ -1236,9 +1239,9 @@ void xhci_reset_bandwidth(struct usb_hcd *hcd, struct usb_device *udev)
 	virt_dev = xhci->devs[udev->slot_id];
 	/* Free any rings allocated for added endpoints */
 	for (i = 0; i < 31; ++i) {
-		if (virt_dev->new_ep_rings[i]) {
-			xhci_ring_free(xhci, virt_dev->new_ep_rings[i]);
-			virt_dev->new_ep_rings[i] = NULL;
+		if (virt_dev->eps[i].new_ring) {
+			xhci_ring_free(xhci, virt_dev->eps[i].new_ring);
+			virt_dev->eps[i].new_ring = NULL;
 		}
 	}
 	xhci_zero_in_ctx(xhci, virt_dev);
@@ -1281,17 +1284,18 @@ void xhci_setup_input_ctx_for_quirk(struct xhci_hcd *xhci,
 }
 
 void xhci_cleanup_stalled_ring(struct xhci_hcd *xhci,
-		struct usb_device *udev,
-		unsigned int ep_index, struct xhci_ring *ep_ring)
+		struct usb_device *udev, unsigned int ep_index)
 {
 	struct xhci_dequeue_state deq_state;
+	struct xhci_virt_ep *ep;
 
 	xhci_dbg(xhci, "Cleaning up stalled endpoint ring\n");
+	ep = &xhci->devs[udev->slot_id]->eps[ep_index];
 	/* We need to move the HW's dequeue pointer past this TD,
 	 * or it will attempt to resend it on the next doorbell ring.
 	 */
 	xhci_find_new_dequeue_state(xhci, udev->slot_id,
-			ep_index, ep_ring->stopped_td,
+			ep_index, ep->stopped_td,
 			&deq_state);
 
 	/* HW with the reset endpoint quirk will use the saved dequeue state to
@@ -1299,8 +1303,7 @@ void xhci_cleanup_stalled_ring(struct xhci_hcd *xhci,
 	 */
 	if (!(xhci->quirks & XHCI_RESET_EP_QUIRK)) {
 		xhci_dbg(xhci, "Queueing new dequeue state\n");
-		xhci_queue_new_dequeue_state(xhci, ep_ring,
-				udev->slot_id,
+		xhci_queue_new_dequeue_state(xhci, udev->slot_id,
 				ep_index, &deq_state);
 	} else {
 		/* Better hope no one uses the input context between now and the
@@ -1327,7 +1330,7 @@ void xhci_endpoint_reset(struct usb_hcd *hcd,
 	unsigned int ep_index;
 	unsigned long flags;
 	int ret;
-	struct xhci_ring *ep_ring;
+	struct xhci_virt_ep *virt_ep;
 
 	xhci = hcd_to_xhci(hcd);
 	udev = (struct usb_device *) ep->hcpriv;
@@ -1337,8 +1340,8 @@ void xhci_endpoint_reset(struct usb_hcd *hcd,
 	if (!ep->hcpriv)
 		return;
 	ep_index = xhci_get_endpoint_index(&ep->desc);
-	ep_ring = xhci->devs[udev->slot_id]->ep_rings[ep_index];
-	if (!ep_ring->stopped_td) {
+	virt_ep = &xhci->devs[udev->slot_id]->eps[ep_index];
+	if (!virt_ep->stopped_td) {
 		xhci_dbg(xhci, "Endpoint 0x%x not halted, refusing to reset.\n",
 				ep->desc.bEndpointAddress);
 		return;
@@ -1357,8 +1360,8 @@ void xhci_endpoint_reset(struct usb_hcd *hcd,
 	 * command.  Better hope that last command worked!
 	 */
 	if (!ret) {
-		xhci_cleanup_stalled_ring(xhci, udev, ep_index, ep_ring);
-		kfree(ep_ring->stopped_td);
+		xhci_cleanup_stalled_ring(xhci, udev, ep_index);
+		kfree(virt_ep->stopped_td);
 		xhci_ring_cmd_db(xhci);
 	}
 	spin_unlock_irqrestore(&xhci->lock, flags);
diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index 55920b3..75458ec 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -144,7 +144,6 @@ static struct xhci_ring *xhci_ring_alloc(struct xhci_hcd *xhci,
 		return 0;
 
 	INIT_LIST_HEAD(&ring->td_list);
-	INIT_LIST_HEAD(&ring->cancelled_td_list);
 	if (num_segs == 0)
 		return ring;
 
@@ -265,8 +264,8 @@ void xhci_free_virt_device(struct xhci_hcd *xhci, int slot_id)
 		return;
 
 	for (i = 0; i < 31; ++i)
-		if (dev->ep_rings[i])
-			xhci_ring_free(xhci, dev->ep_rings[i]);
+		if (dev->eps[i].ring)
+			xhci_ring_free(xhci, dev->eps[i].ring);
 
 	if (dev->in_ctx)
 		xhci_free_container_ctx(xhci, dev->in_ctx);
@@ -281,6 +280,7 @@ int xhci_alloc_virt_device(struct xhci_hcd *xhci, int slot_id,
 		struct usb_device *udev, gfp_t flags)
 {
 	struct xhci_virt_device *dev;
+	int i;
 
 	/* Slot ID 0 is reserved */
 	if (slot_id == 0 || xhci->devs[slot_id]) {
@@ -309,9 +309,13 @@ int xhci_alloc_virt_device(struct xhci_hcd *xhci, int slot_id,
 	xhci_dbg(xhci, "Slot %d input ctx = 0x%llx (dma)\n", slot_id,
 			(unsigned long long)dev->in_ctx->dma);
 
+	/* Initialize the cancellation list for each endpoint */
+	for (i = 0; i < 31; i++)
+		INIT_LIST_HEAD(&dev->eps[i].cancelled_td_list);
+
 	/* Allocate endpoint 0 ring */
-	dev->ep_rings[0] = xhci_ring_alloc(xhci, 1, true, flags);
-	if (!dev->ep_rings[0])
+	dev->eps[0].ring = xhci_ring_alloc(xhci, 1, true, flags);
+	if (!dev->eps[0].ring)
 		goto fail;
 
 	init_completion(&dev->cmd_completion);
@@ -428,8 +432,8 @@ int xhci_setup_addressable_virt_dev(struct xhci_hcd *xhci, struct usb_device *ud
 	ep0_ctx->ep_info2 |= ERROR_COUNT(3);
 
 	ep0_ctx->deq =
-		dev->ep_rings[0]->first_seg->dma;
-	ep0_ctx->deq |= dev->ep_rings[0]->cycle_state;
+		dev->eps[0].ring->first_seg->dma;
+	ep0_ctx->deq |= dev->eps[0].ring->cycle_state;
 
 	/* Steps 7 and 8 were done in xhci_alloc_virt_device() */
 
@@ -539,10 +543,11 @@ int xhci_endpoint_init(struct xhci_hcd *xhci,
 	ep_ctx = xhci_get_ep_ctx(xhci, virt_dev->in_ctx, ep_index);
 
 	/* Set up the endpoint ring */
-	virt_dev->new_ep_rings[ep_index] = xhci_ring_alloc(xhci, 1, true, mem_flags);
-	if (!virt_dev->new_ep_rings[ep_index])
+	virt_dev->eps[ep_index].new_ring =
+		xhci_ring_alloc(xhci, 1, true, mem_flags);
+	if (!virt_dev->eps[ep_index].new_ring)
 		return -ENOMEM;
-	ep_ring = virt_dev->new_ep_rings[ep_index];
+	ep_ring = virt_dev->eps[ep_index].new_ring;
 	ep_ctx->deq = ep_ring->first_seg->dma | ep_ring->cycle_state;
 
 	ep_ctx->ep_info = xhci_get_endpoint_interval(udev, ep);
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index ff5e6bc..6a72d20 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -296,16 +296,18 @@ static void ring_ep_doorbell(struct xhci_hcd *xhci,
 		unsigned int slot_id,
 		unsigned int ep_index)
 {
-	struct xhci_ring *ep_ring;
+	struct xhci_virt_ep *ep;
+	unsigned int ep_state;
 	u32 field;
 	__u32 __iomem *db_addr = &xhci->dba->doorbell[slot_id];
 
-	ep_ring = xhci->devs[slot_id]->ep_rings[ep_index];
+	ep = &xhci->devs[slot_id]->eps[ep_index];
+	ep_state = ep->ep_state;
 	/* Don't ring the doorbell for this endpoint if there are pending
 	 * cancellations because the we don't want to interrupt processing.
 	 */
-	if (!ep_ring->cancels_pending && !(ep_ring->state & SET_DEQ_PENDING)
-			&& !(ep_ring->state & EP_HALTED)) {
+	if (!ep->cancels_pending && !(ep_state & SET_DEQ_PENDING)
+			&& !(ep_state & EP_HALTED)) {
 		field = xhci_readl(xhci, db_addr) & DB_MASK;
 		xhci_writel(xhci, field | EPI_TO_DB(ep_index), db_addr);
 		/* Flush PCI posted writes - FIXME Matthew Wilcox says this
@@ -361,7 +363,7 @@ void xhci_find_new_dequeue_state(struct xhci_hcd *xhci,
 		struct xhci_td *cur_td, struct xhci_dequeue_state *state)
 {
 	struct xhci_virt_device *dev = xhci->devs[slot_id];
-	struct xhci_ring *ep_ring = dev->ep_rings[ep_index];
+	struct xhci_ring *ep_ring = dev->eps[ep_index].ring;
 	struct xhci_generic_trb *trb;
 	struct xhci_ep_ctx *ep_ctx;
 	dma_addr_t addr;
@@ -369,7 +371,7 @@ void xhci_find_new_dequeue_state(struct xhci_hcd *xhci,
 	state->new_cycle_state = 0;
 	xhci_dbg(xhci, "Finding segment containing stopped TRB.\n");
 	state->new_deq_seg = find_trb_seg(cur_td->start_seg,
-			ep_ring->stopped_trb,
+			dev->eps[ep_index].stopped_trb,
 			&state->new_cycle_state);
 	if (!state->new_deq_seg)
 		BUG();
@@ -449,9 +451,11 @@ static int queue_set_tr_deq(struct xhci_hcd *xhci, int slot_id,
 		union xhci_trb *deq_ptr, u32 cycle_state);
 
 void xhci_queue_new_dequeue_state(struct xhci_hcd *xhci,
-		struct xhci_ring *ep_ring, unsigned int slot_id,
-		unsigned int ep_index, struct xhci_dequeue_state *deq_state)
+		unsigned int slot_id, unsigned int ep_index,
+		struct xhci_dequeue_state *deq_state)
 {
+	struct xhci_virt_ep *ep = &xhci->devs[slot_id]->eps[ep_index];
+
 	xhci_dbg(xhci, "Set TR Deq Ptr cmd, new deq seg = %p (0x%llx dma), "
 			"new deq ptr = %p (0x%llx dma), new cycle = %u\n",
 			deq_state->new_deq_seg,
@@ -468,7 +472,7 @@ void xhci_queue_new_dequeue_state(struct xhci_hcd *xhci,
 	 * if the ring is running, and ringing the doorbell starts the
 	 * ring running.
 	 */
-	ep_ring->state |= SET_DEQ_PENDING;
+	ep->ep_state |= SET_DEQ_PENDING;
 }
 
 /*
@@ -487,6 +491,7 @@ static void handle_stopped_endpoint(struct xhci_hcd *xhci,
 	unsigned int slot_id;
 	unsigned int ep_index;
 	struct xhci_ring *ep_ring;
+	struct xhci_virt_ep *ep;
 	struct list_head *entry;
 	struct xhci_td *cur_td = 0;
 	struct xhci_td *last_unlinked_td;
@@ -499,9 +504,10 @@ static void handle_stopped_endpoint(struct xhci_hcd *xhci,
 	memset(&deq_state, 0, sizeof(deq_state));
 	slot_id = TRB_TO_SLOT_ID(trb->generic.field[3]);
 	ep_index = TRB_TO_EP_INDEX(trb->generic.field[3]);
-	ep_ring = xhci->devs[slot_id]->ep_rings[ep_index];
+	ep = &xhci->devs[slot_id]->eps[ep_index];
+	ep_ring = ep->ring;
 
-	if (list_empty(&ep_ring->cancelled_td_list))
+	if (list_empty(&ep->cancelled_td_list))
 		return;
 
 	/* Fix up the ep ring first, so HW stops executing cancelled TDs.
@@ -509,7 +515,7 @@ static void handle_stopped_endpoint(struct xhci_hcd *xhci,
 	 * it.  We're also in the event handler, so we can't get re-interrupted
 	 * if another Stop Endpoint command completes
 	 */
-	list_for_each(entry, &ep_ring->cancelled_td_list) {
+	list_for_each(entry, &ep->cancelled_td_list) {
 		cur_td = list_entry(entry, struct xhci_td, cancelled_td_list);
 		xhci_dbg(xhci, "Cancelling TD starting at %p, 0x%llx (dma).\n",
 				cur_td->first_trb,
@@ -518,7 +524,7 @@ static void handle_stopped_endpoint(struct xhci_hcd *xhci,
 		 * If we stopped on the TD we need to cancel, then we have to
 		 * move the xHC endpoint ring dequeue pointer past this TD.
 		 */
-		if (cur_td == ep_ring->stopped_td)
+		if (cur_td == ep->stopped_td)
 			xhci_find_new_dequeue_state(xhci, slot_id, ep_index, cur_td,
 					&deq_state);
 		else
@@ -529,13 +535,13 @@ static void handle_stopped_endpoint(struct xhci_hcd *xhci,
 		 * the cancelled TD list for URB completion later.
 		 */
 		list_del(&cur_td->td_list);
-		ep_ring->cancels_pending--;
+		ep->cancels_pending--;
 	}
 	last_unlinked_td = cur_td;
 
 	/* If necessary, queue a Set Transfer Ring Dequeue Pointer command */
 	if (deq_state.new_deq_ptr && deq_state.new_deq_seg) {
-		xhci_queue_new_dequeue_state(xhci, ep_ring,
+		xhci_queue_new_dequeue_state(xhci,
 				slot_id, ep_index, &deq_state);
 		xhci_ring_cmd_db(xhci);
 	} else {
@@ -550,7 +556,7 @@ static void handle_stopped_endpoint(struct xhci_hcd *xhci,
 	 * So stop when we've completed the URB for the last TD we unlinked.
 	 */
 	do {
-		cur_td = list_entry(ep_ring->cancelled_td_list.next,
+		cur_td = list_entry(ep->cancelled_td_list.next,
 				struct xhci_td, cancelled_td_list);
 		list_del(&cur_td->cancelled_td_list);
 
@@ -597,7 +603,7 @@ static void handle_set_deq_completion(struct xhci_hcd *xhci,
 	slot_id = TRB_TO_SLOT_ID(trb->generic.field[3]);
 	ep_index = TRB_TO_EP_INDEX(trb->generic.field[3]);
 	dev = xhci->devs[slot_id];
-	ep_ring = dev->ep_rings[ep_index];
+	ep_ring = dev->eps[ep_index].ring;
 	ep_ctx = xhci_get_ep_ctx(xhci, dev->out_ctx, ep_index);
 	slot_ctx = xhci_get_slot_ctx(xhci, dev->out_ctx);
 
@@ -641,7 +647,7 @@ static void handle_set_deq_completion(struct xhci_hcd *xhci,
 				ep_ctx->deq);
 	}
 
-	ep_ring->state &= ~SET_DEQ_PENDING;
+	dev->eps[ep_index].ep_state &= ~SET_DEQ_PENDING;
 	ring_ep_doorbell(xhci, slot_id, ep_index);
 }
 
@@ -655,7 +661,7 @@ static void handle_reset_ep_completion(struct xhci_hcd *xhci,
 
 	slot_id = TRB_TO_SLOT_ID(trb->generic.field[3]);
 	ep_index = TRB_TO_EP_INDEX(trb->generic.field[3]);
-	ep_ring = xhci->devs[slot_id]->ep_rings[ep_index];
+	ep_ring = xhci->devs[slot_id]->eps[ep_index].ring;
 	/* This command will only fail if the endpoint wasn't halted,
 	 * but we don't care.
 	 */
@@ -673,7 +679,7 @@ static void handle_reset_ep_completion(struct xhci_hcd *xhci,
 		xhci_ring_cmd_db(xhci);
 	} else {
 		/* Clear our internal halted state and restart the ring */
-		ep_ring->state &= ~EP_HALTED;
+		xhci->devs[slot_id]->eps[ep_index].ep_state &= ~EP_HALTED;
 		ring_ep_doorbell(xhci, slot_id, ep_index);
 	}
 }
@@ -726,7 +732,7 @@ static void handle_cmd_completion(struct xhci_hcd *xhci,
 				xhci->devs[slot_id]->in_ctx);
 		/* Input ctx add_flags are the endpoint index plus one */
 		ep_index = xhci_last_valid_endpoint(ctrl_ctx->add_flags) - 1;
-		ep_ring = xhci->devs[slot_id]->ep_rings[ep_index];
+		ep_ring = xhci->devs[slot_id]->eps[ep_index].ring;
 		if (!ep_ring) {
 			/* This must have been an initial configure endpoint */
 			xhci->devs[slot_id]->cmd_status =
@@ -734,13 +740,13 @@ static void handle_cmd_completion(struct xhci_hcd *xhci,
 			complete(&xhci->devs[slot_id]->cmd_completion);
 			break;
 		}
-		ep_state = ep_ring->state;
+		ep_state = xhci->devs[slot_id]->eps[ep_index].ep_state;
 		xhci_dbg(xhci, "Completed config ep cmd - last ep index = %d, "
 				"state = %d\n", ep_index, ep_state);
 		if (xhci->quirks & XHCI_RESET_EP_QUIRK &&
 				ep_state & EP_HALTED) {
 			/* Clear our internal halted state and restart ring */
-			xhci->devs[slot_id]->ep_rings[ep_index]->state &=
+			xhci->devs[slot_id]->eps[ep_index].ep_state &=
 				~EP_HALTED;
 			ring_ep_doorbell(xhci, slot_id, ep_index);
 		} else {
@@ -864,6 +870,7 @@ static int handle_tx_event(struct xhci_hcd *xhci,
 		struct xhci_transfer_event *event)
 {
 	struct xhci_virt_device *xdev;
+	struct xhci_virt_ep *ep;
 	struct xhci_ring *ep_ring;
 	unsigned int slot_id;
 	int ep_index;
@@ -887,7 +894,8 @@ static int handle_tx_event(struct xhci_hcd *xhci,
 	/* Endpoint ID is 1 based, our index is zero based */
 	ep_index = TRB_TO_EP_ID(event->flags) - 1;
 	xhci_dbg(xhci, "%s - ep index = %d\n", __func__, ep_index);
-	ep_ring = xdev->ep_rings[ep_index];
+	ep = &xdev->eps[ep_index];
+	ep_ring = ep->ring;
 	ep_ctx = xhci_get_ep_ctx(xhci, xdev->out_ctx, ep_index);
 	if (!ep_ring || (ep_ctx->ep_info & EP_STATE_MASK) == EP_STATE_DISABLED) {
 		xhci_err(xhci, "ERROR Transfer event pointed to disabled endpoint\n");
@@ -948,7 +956,7 @@ static int handle_tx_event(struct xhci_hcd *xhci,
 		break;
 	case COMP_STALL:
 		xhci_warn(xhci, "WARN: Stalled endpoint\n");
-		ep_ring->state |= EP_HALTED;
+		ep->ep_state |= EP_HALTED;
 		status = -EPIPE;
 		break;
 	case COMP_TRB_ERR:
@@ -1016,12 +1024,10 @@ static int handle_tx_event(struct xhci_hcd *xhci,
 			else
 				td->urb->actual_length = 0;
 
-			ep_ring->stopped_td = td;
-			ep_ring->stopped_trb = event_trb;
+			ep->stopped_td = td;
+			ep->stopped_trb = event_trb;
 			xhci_queue_reset_ep(xhci, slot_id, ep_index);
-			xhci_cleanup_stalled_ring(xhci,
-					td->urb->dev,
-					ep_index, ep_ring);
+			xhci_cleanup_stalled_ring(xhci, td->urb->dev, ep_index);
 			xhci_ring_cmd_db(xhci);
 			goto td_cleanup;
 		default:
@@ -1161,8 +1167,8 @@ static int handle_tx_event(struct xhci_hcd *xhci,
 		 * stopped TDs.  A stopped TD may be restarted, so don't update
 		 * the ring dequeue pointer or take this TD off any lists yet.
 		 */
-		ep_ring->stopped_td = td;
-		ep_ring->stopped_trb = event_trb;
+		ep->stopped_td = td;
+		ep->stopped_trb = event_trb;
 	} else {
 		if (trb_comp_code == COMP_STALL ||
 				trb_comp_code == COMP_BABBLE) {
@@ -1172,8 +1178,8 @@ static int handle_tx_event(struct xhci_hcd *xhci,
 			 * pointer past the TD.  We can't do that here because
 			 * the halt condition must be cleared first.
 			 */
-			ep_ring->stopped_td = td;
-			ep_ring->stopped_trb = event_trb;
+			ep->stopped_td = td;
+			ep->stopped_trb = event_trb;
 		} else {
 			/* Update ring dequeue pointer */
 			while (ep_ring->dequeue != td->last_trb)
@@ -1206,7 +1212,7 @@ td_cleanup:
 		/* Was this TD slated to be cancelled but completed anyway? */
 		if (!list_empty(&td->cancelled_td_list)) {
 			list_del(&td->cancelled_td_list);
-			ep_ring->cancels_pending--;
+			ep->cancels_pending--;
 		}
 		/* Leave the TD around for the reset endpoint function to use
 		 * (but only if it's not a control endpoint, since we already
@@ -1369,7 +1375,7 @@ static int prepare_transfer(struct xhci_hcd *xhci,
 {
 	int ret;
 	struct xhci_ep_ctx *ep_ctx = xhci_get_ep_ctx(xhci, xdev->out_ctx, ep_index);
-	ret = prepare_ring(xhci, xdev->ep_rings[ep_index],
+	ret = prepare_ring(xhci, xdev->eps[ep_index].ring,
 			ep_ctx->ep_info & EP_STATE_MASK,
 			num_trbs, mem_flags);
 	if (ret)
@@ -1389,9 +1395,9 @@ static int prepare_transfer(struct xhci_hcd *xhci,
 	(*td)->urb = urb;
 	urb->hcpriv = (void *) (*td);
 	/* Add this TD to the tail of the endpoint ring's TD list */
-	list_add_tail(&(*td)->td_list, &xdev->ep_rings[ep_index]->td_list);
-	(*td)->start_seg = xdev->ep_rings[ep_index]->enq_seg;
-	(*td)->first_trb = xdev->ep_rings[ep_index]->enqueue;
+	list_add_tail(&(*td)->td_list, &xdev->eps[ep_index].ring->td_list);
+	(*td)->start_seg = xdev->eps[ep_index].ring->enq_seg;
+	(*td)->first_trb = xdev->eps[ep_index].ring->enqueue;
 
 	return 0;
 }
@@ -1525,7 +1531,7 @@ static int queue_bulk_sg_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
 	struct xhci_generic_trb *start_trb;
 	int start_cycle;
 
-	ep_ring = xhci->devs[slot_id]->ep_rings[ep_index];
+	ep_ring = xhci->devs[slot_id]->eps[ep_index].ring;
 	num_trbs = count_sg_trbs_needed(xhci, urb);
 	num_sgs = urb->num_sgs;
 
@@ -1658,7 +1664,7 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
 	if (urb->sg)
 		return queue_bulk_sg_tx(xhci, mem_flags, urb, slot_id, ep_index);
 
-	ep_ring = xhci->devs[slot_id]->ep_rings[ep_index];
+	ep_ring = xhci->devs[slot_id]->eps[ep_index].ring;
 
 	num_trbs = 0;
 	/* How much data is (potentially) left before the 64KB boundary? */
@@ -1769,7 +1775,7 @@ int xhci_queue_ctrl_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
 	u32 field, length_field;
 	struct xhci_td *td;
 
-	ep_ring = xhci->devs[slot_id]->ep_rings[ep_index];
+	ep_ring = xhci->devs[slot_id]->eps[ep_index].ring;
 
 	/*
 	 * Need to copy setup packet into setup TRB, so we can't use the setup
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index a7728aa..6270922 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -625,6 +625,23 @@ struct xhci_input_control_ctx {
 /* add context bitmasks */
 #define	ADD_EP(x)	(0x1 << x)
 
+struct xhci_virt_ep {
+	struct xhci_ring		*ring;
+	/* Temporary storage in case the configure endpoint command fails and we
+	 * have to restore the device state to the previous state
+	 */
+	struct xhci_ring		*new_ring;
+	unsigned int			ep_state;
+#define SET_DEQ_PENDING		(1 << 0)
+#define EP_HALTED		(1 << 1)
+	/* ----  Related to URB cancellation ---- */
+	struct list_head	cancelled_td_list;
+	unsigned int		cancels_pending;
+	/* The TRB that was last reported in a stopped endpoint ring */
+	union xhci_trb		*stopped_trb;
+	struct xhci_td		*stopped_td;
+};
+
 struct xhci_virt_device {
 	/*
 	 * Commands to the hardware are passed an "input context" that
@@ -637,13 +654,7 @@ struct xhci_virt_device {
 	struct xhci_container_ctx       *out_ctx;
 	/* Used for addressing devices and configuration changes */
 	struct xhci_container_ctx       *in_ctx;
-
-	/* FIXME when stream support is added */
-	struct xhci_ring		*ep_rings[31];
-	/* Temporary storage in case the configure endpoint command fails and we
-	 * have to restore the device state to the previous state
-	 */
-	struct xhci_ring		*new_ep_rings[31];
+	struct xhci_virt_ep		eps[31];
 	struct completion		cmd_completion;
 	/* Status of the last command issued for this device */
 	u32				cmd_status;
@@ -945,15 +956,6 @@ struct xhci_ring {
 	struct xhci_segment	*deq_seg;
 	unsigned int		deq_updates;
 	struct list_head	td_list;
-	/* ----  Related to URB cancellation ---- */
-	struct list_head	cancelled_td_list;
-	unsigned int		cancels_pending;
-	unsigned int		state;
-#define SET_DEQ_PENDING		(1 << 0)
-#define EP_HALTED		(1 << 1)
-	/* The TRB that was last reported in a stopped endpoint ring */
-	union xhci_trb		*stopped_trb;
-	struct xhci_td		*stopped_td;
 	/*
 	 * Write the cycle state into the TRB cycle field to give ownership of
 	 * the TRB to the host controller (if we are the producer), or to check
@@ -1236,11 +1238,10 @@ void xhci_find_new_dequeue_state(struct xhci_hcd *xhci,
 		unsigned int slot_id, unsigned int ep_index,
 		struct xhci_td *cur_td, struct xhci_dequeue_state *state);
 void xhci_queue_new_dequeue_state(struct xhci_hcd *xhci,
-		struct xhci_ring *ep_ring, unsigned int slot_id,
-		unsigned int ep_index, struct xhci_dequeue_state *deq_state);
+		unsigned int slot_id, unsigned int ep_index,
+		struct xhci_dequeue_state *deq_state);
 void xhci_cleanup_stalled_ring(struct xhci_hcd *xhci,
-		struct usb_device *udev,
-		unsigned int ep_index, struct xhci_ring *ep_ring);
+		struct usb_device *udev, unsigned int ep_index);
 void xhci_queue_config_ep_quirk(struct xhci_hcd *xhci,
 		unsigned int slot_id, unsigned int ep_index,
 		struct xhci_dequeue_state *deq_state);
-- 
cgit v0.10.2


From 5270b951b9cd5e50aea55cb52684a171fb10381c Mon Sep 17 00:00:00 2001
From: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Date: Fri, 4 Sep 2009 10:53:11 -0700
Subject: USB: xhci: Refactor input device context setup.

Refactor common code to set up the add and drop flags for the input device
context setup.  This setup is used before a configure endpoint command for
the reset endpoint quirk, and will be used for the command to alloc or
free streams rings.

Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/host/xhci-hcd.c b/drivers/usb/host/xhci-hcd.c
index 3ab9090..9c985d1 100644
--- a/drivers/usb/host/xhci-hcd.c
+++ b/drivers/usb/host/xhci-hcd.c
@@ -1247,12 +1247,27 @@ void xhci_reset_bandwidth(struct usb_hcd *hcd, struct usb_device *udev)
 	xhci_zero_in_ctx(xhci, virt_dev);
 }
 
+static void xhci_setup_input_ctx_for_config_ep(struct xhci_hcd *xhci,
+		unsigned int slot_id, u32 add_flags, u32 drop_flags)
+{
+	struct xhci_input_control_ctx *ctrl_ctx;
+	ctrl_ctx = xhci_get_input_control_ctx(xhci,
+			xhci->devs[slot_id]->in_ctx);
+	ctrl_ctx->add_flags = add_flags;
+	ctrl_ctx->drop_flags = drop_flags;
+	xhci_slot_copy(xhci, xhci->devs[slot_id]);
+	ctrl_ctx->add_flags |= SLOT_FLAG;
+
+	xhci_dbg(xhci, "Slot ID %d Input Context:\n", slot_id);
+	xhci_dbg_ctx(xhci, xhci->devs[slot_id]->in_ctx,
+			xhci_last_valid_endpoint(add_flags));
+}
+
 void xhci_setup_input_ctx_for_quirk(struct xhci_hcd *xhci,
 		unsigned int slot_id, unsigned int ep_index,
 		struct xhci_dequeue_state *deq_state)
 {
 	struct xhci_container_ctx *in_ctx;
-	struct xhci_input_control_ctx *ctrl_ctx;
 	struct xhci_ep_ctx *ep_ctx;
 	u32 added_ctxs;
 	dma_addr_t addr;
@@ -1272,15 +1287,9 @@ void xhci_setup_input_ctx_for_quirk(struct xhci_hcd *xhci,
 	}
 	ep_ctx->deq = addr | deq_state->new_cycle_state;
 
-	xhci_slot_copy(xhci, xhci->devs[slot_id]);
-
-	ctrl_ctx = xhci_get_input_control_ctx(xhci, in_ctx);
 	added_ctxs = xhci_get_endpoint_flag_from_index(ep_index);
-	ctrl_ctx->add_flags = added_ctxs | SLOT_FLAG;
-	ctrl_ctx->drop_flags = added_ctxs;
-
-	xhci_dbg(xhci, "Slot ID %d Input Context:\n", slot_id);
-	xhci_dbg_ctx(xhci, in_ctx, ep_index);
+	xhci_setup_input_ctx_for_config_ep(xhci, slot_id,
+			added_ctxs, added_ctxs);
 }
 
 void xhci_cleanup_stalled_ring(struct xhci_hcd *xhci,
-- 
cgit v0.10.2


From 913a8a344ffcaf0b4a586d6662a2c66a7106557d Mon Sep 17 00:00:00 2001
From: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Date: Fri, 4 Sep 2009 10:53:13 -0700
Subject: USB: xhci: Change how xHCI commands are handled.

Some commands to the xHCI hardware cannot be allowed to fail due to out of
memory issues or the command ring being full.

Add a way to reserve a TRB on the command ring, and make all command
queueing functions indicate whether they are using a reserved TRB.

Add a way to pre-allocate all the memory a command might need.  A command
needs an input context, a variable to store the status, and (optionally) a
completion for the caller to wait on.  Change all code that assumes the
input device context, status, and completion for a command is stored in
the xhci virtual USB device structure (xhci_virt_device).

Store pending completions in a FIFO in xhci_virt_device.  Make the event
handler for a configure endpoint command check to see whether a pending
command in the list has completed.  We need to use separate input device
contexts for some configure endpoint commands, since multiple drivers can
submit requests at the same time that require a configure endpoint
command.

Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/host/xhci-hcd.c b/drivers/usb/host/xhci-hcd.c
index 9c985d1..2fcc360 100644
--- a/drivers/usb/host/xhci-hcd.c
+++ b/drivers/usb/host/xhci-hcd.c
@@ -612,8 +612,8 @@ int xhci_check_args(struct usb_hcd *hcd, struct usb_device *udev,
 }
 
 static int xhci_configure_endpoint(struct xhci_hcd *xhci,
-		struct usb_device *udev, struct xhci_virt_device *virt_dev,
-		bool ctx_change);
+		struct usb_device *udev, struct xhci_command *command,
+		bool ctx_change, bool must_succeed);
 
 /*
  * Full speed devices may have a max packet size greater than 8 bytes, but the
@@ -645,7 +645,8 @@ static int xhci_check_maxpacket(struct xhci_hcd *xhci, unsigned int slot_id,
 		xhci_dbg(xhci, "Issuing evaluate context command.\n");
 
 		/* Set up the modified control endpoint 0 */
-		xhci_endpoint_copy(xhci, xhci->devs[slot_id], ep_index);
+		xhci_endpoint_copy(xhci, xhci->devs[slot_id]->in_ctx,
+				xhci->devs[slot_id]->out_ctx, ep_index);
 		in_ctx = xhci->devs[slot_id]->in_ctx;
 		ep_ctx = xhci_get_ep_ctx(xhci, in_ctx, ep_index);
 		ep_ctx->ep_info2 &= ~MAX_PACKET_MASK;
@@ -664,8 +665,8 @@ static int xhci_check_maxpacket(struct xhci_hcd *xhci, unsigned int slot_id,
 		xhci_dbg(xhci, "Slot %d output context\n", slot_id);
 		xhci_dbg_ctx(xhci, out_ctx, ep_index);
 
-		ret = xhci_configure_endpoint(xhci, urb->dev,
-				xhci->devs[slot_id], true);
+		ret = xhci_configure_endpoint(xhci, urb->dev, NULL,
+				true, false);
 
 		/* Clean up the input context for later use by bandwidth
 		 * functions.
@@ -1038,11 +1039,11 @@ static void xhci_zero_in_ctx(struct xhci_hcd *xhci, struct xhci_virt_device *vir
 }
 
 static int xhci_configure_endpoint_result(struct xhci_hcd *xhci,
-		struct usb_device *udev, struct xhci_virt_device *virt_dev)
+		struct usb_device *udev, int *cmd_status)
 {
 	int ret;
 
-	switch (virt_dev->cmd_status) {
+	switch (*cmd_status) {
 	case COMP_ENOMEM:
 		dev_warn(&udev->dev, "Not enough host controller resources "
 				"for new device state.\n");
@@ -1068,7 +1069,7 @@ static int xhci_configure_endpoint_result(struct xhci_hcd *xhci,
 		break;
 	default:
 		xhci_err(xhci, "ERROR: unexpected command completion "
-				"code 0x%x.\n", virt_dev->cmd_status);
+				"code 0x%x.\n", *cmd_status);
 		ret = -EINVAL;
 		break;
 	}
@@ -1076,11 +1077,12 @@ static int xhci_configure_endpoint_result(struct xhci_hcd *xhci,
 }
 
 static int xhci_evaluate_context_result(struct xhci_hcd *xhci,
-		struct usb_device *udev, struct xhci_virt_device *virt_dev)
+		struct usb_device *udev, int *cmd_status)
 {
 	int ret;
+	struct xhci_virt_device *virt_dev = xhci->devs[udev->slot_id];
 
-	switch (virt_dev->cmd_status) {
+	switch (*cmd_status) {
 	case COMP_EINVAL:
 		dev_warn(&udev->dev, "WARN: xHCI driver setup invalid evaluate "
 				"context command.\n");
@@ -1101,7 +1103,7 @@ static int xhci_evaluate_context_result(struct xhci_hcd *xhci,
 		break;
 	default:
 		xhci_err(xhci, "ERROR: unexpected command completion "
-				"code 0x%x.\n", virt_dev->cmd_status);
+				"code 0x%x.\n", *cmd_status);
 		ret = -EINVAL;
 		break;
 	}
@@ -1112,19 +1114,37 @@ static int xhci_evaluate_context_result(struct xhci_hcd *xhci,
  * and wait for it to finish.
  */
 static int xhci_configure_endpoint(struct xhci_hcd *xhci,
-		struct usb_device *udev, struct xhci_virt_device *virt_dev,
-		bool ctx_change)
+		struct usb_device *udev,
+		struct xhci_command *command,
+		bool ctx_change, bool must_succeed)
 {
 	int ret;
 	int timeleft;
 	unsigned long flags;
+	struct xhci_container_ctx *in_ctx;
+	struct completion *cmd_completion;
+	int *cmd_status;
+	struct xhci_virt_device *virt_dev;
 
 	spin_lock_irqsave(&xhci->lock, flags);
+	virt_dev = xhci->devs[udev->slot_id];
+	if (command) {
+		in_ctx = command->in_ctx;
+		cmd_completion = command->completion;
+		cmd_status = &command->status;
+		command->command_trb = xhci->cmd_ring->enqueue;
+		list_add_tail(&command->cmd_list, &virt_dev->cmd_list);
+	} else {
+		in_ctx = virt_dev->in_ctx;
+		cmd_completion = &virt_dev->cmd_completion;
+		cmd_status = &virt_dev->cmd_status;
+	}
+
 	if (!ctx_change)
-		ret = xhci_queue_configure_endpoint(xhci, virt_dev->in_ctx->dma,
-				udev->slot_id);
+		ret = xhci_queue_configure_endpoint(xhci, in_ctx->dma,
+				udev->slot_id, must_succeed);
 	else
-		ret = xhci_queue_evaluate_context(xhci, virt_dev->in_ctx->dma,
+		ret = xhci_queue_evaluate_context(xhci, in_ctx->dma,
 				udev->slot_id);
 	if (ret < 0) {
 		spin_unlock_irqrestore(&xhci->lock, flags);
@@ -1136,7 +1156,7 @@ static int xhci_configure_endpoint(struct xhci_hcd *xhci,
 
 	/* Wait for the configure endpoint command to complete */
 	timeleft = wait_for_completion_interruptible_timeout(
-			&virt_dev->cmd_completion,
+			cmd_completion,
 			USB_CTRL_SET_TIMEOUT);
 	if (timeleft <= 0) {
 		xhci_warn(xhci, "%s while waiting for %s command\n",
@@ -1149,8 +1169,8 @@ static int xhci_configure_endpoint(struct xhci_hcd *xhci,
 	}
 
 	if (!ctx_change)
-		return xhci_configure_endpoint_result(xhci, udev, virt_dev);
-	return xhci_evaluate_context_result(xhci, udev, virt_dev);
+		return xhci_configure_endpoint_result(xhci, udev, cmd_status);
+	return xhci_evaluate_context_result(xhci, udev, cmd_status);
 }
 
 /* Called after one or more calls to xhci_add_endpoint() or
@@ -1196,7 +1216,8 @@ int xhci_check_bandwidth(struct usb_hcd *hcd, struct usb_device *udev)
 	xhci_dbg_ctx(xhci, virt_dev->in_ctx,
 			LAST_CTX_TO_EP_NUM(slot_ctx->dev_info));
 
-	ret = xhci_configure_endpoint(xhci, udev, virt_dev, false);
+	ret = xhci_configure_endpoint(xhci, udev, NULL,
+			false, false);
 	if (ret) {
 		/* Callee should call reset_bandwidth() */
 		return ret;
@@ -1248,19 +1269,19 @@ void xhci_reset_bandwidth(struct usb_hcd *hcd, struct usb_device *udev)
 }
 
 static void xhci_setup_input_ctx_for_config_ep(struct xhci_hcd *xhci,
-		unsigned int slot_id, u32 add_flags, u32 drop_flags)
+		struct xhci_container_ctx *in_ctx,
+		struct xhci_container_ctx *out_ctx,
+		u32 add_flags, u32 drop_flags)
 {
 	struct xhci_input_control_ctx *ctrl_ctx;
-	ctrl_ctx = xhci_get_input_control_ctx(xhci,
-			xhci->devs[slot_id]->in_ctx);
+	ctrl_ctx = xhci_get_input_control_ctx(xhci, in_ctx);
 	ctrl_ctx->add_flags = add_flags;
 	ctrl_ctx->drop_flags = drop_flags;
-	xhci_slot_copy(xhci, xhci->devs[slot_id]);
+	xhci_slot_copy(xhci, in_ctx, out_ctx);
 	ctrl_ctx->add_flags |= SLOT_FLAG;
 
-	xhci_dbg(xhci, "Slot ID %d Input Context:\n", slot_id);
-	xhci_dbg_ctx(xhci, xhci->devs[slot_id]->in_ctx,
-			xhci_last_valid_endpoint(add_flags));
+	xhci_dbg(xhci, "Input Context:\n");
+	xhci_dbg_ctx(xhci, in_ctx, xhci_last_valid_endpoint(add_flags));
 }
 
 void xhci_setup_input_ctx_for_quirk(struct xhci_hcd *xhci,
@@ -1272,7 +1293,8 @@ void xhci_setup_input_ctx_for_quirk(struct xhci_hcd *xhci,
 	u32 added_ctxs;
 	dma_addr_t addr;
 
-	xhci_endpoint_copy(xhci, xhci->devs[slot_id], ep_index);
+	xhci_endpoint_copy(xhci, xhci->devs[slot_id]->in_ctx,
+			xhci->devs[slot_id]->out_ctx, ep_index);
 	in_ctx = xhci->devs[slot_id]->in_ctx;
 	ep_ctx = xhci_get_ep_ctx(xhci, in_ctx, ep_index);
 	addr = xhci_trb_virt_to_dma(deq_state->new_deq_seg,
@@ -1288,8 +1310,8 @@ void xhci_setup_input_ctx_for_quirk(struct xhci_hcd *xhci,
 	ep_ctx->deq = addr | deq_state->new_cycle_state;
 
 	added_ctxs = xhci_get_endpoint_flag_from_index(ep_index);
-	xhci_setup_input_ctx_for_config_ep(xhci, slot_id,
-			added_ctxs, added_ctxs);
+	xhci_setup_input_ctx_for_config_ep(xhci, xhci->devs[slot_id]->in_ctx,
+			xhci->devs[slot_id]->out_ctx, added_ctxs, added_ctxs);
 }
 
 void xhci_cleanup_stalled_ring(struct xhci_hcd *xhci,
diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index 75458ec..6e6797a 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -319,6 +319,7 @@ int xhci_alloc_virt_device(struct xhci_hcd *xhci, int slot_id,
 		goto fail;
 
 	init_completion(&dev->cmd_completion);
+	INIT_LIST_HEAD(&dev->cmd_list);
 
 	/* Point to output device context in dcbaa. */
 	xhci->dcbaa->dev_context_ptrs[slot_id] = dev->out_ctx->dma;
@@ -624,13 +625,15 @@ void xhci_endpoint_zero(struct xhci_hcd *xhci,
  * issue a configure endpoint command.
  */
 void xhci_endpoint_copy(struct xhci_hcd *xhci,
-		struct xhci_virt_device *vdev, unsigned int ep_index)
+		struct xhci_container_ctx *in_ctx,
+		struct xhci_container_ctx *out_ctx,
+		unsigned int ep_index)
 {
 	struct xhci_ep_ctx *out_ep_ctx;
 	struct xhci_ep_ctx *in_ep_ctx;
 
-	out_ep_ctx = xhci_get_ep_ctx(xhci, vdev->out_ctx, ep_index);
-	in_ep_ctx = xhci_get_ep_ctx(xhci, vdev->in_ctx, ep_index);
+	out_ep_ctx = xhci_get_ep_ctx(xhci, out_ctx, ep_index);
+	in_ep_ctx = xhci_get_ep_ctx(xhci, in_ctx, ep_index);
 
 	in_ep_ctx->ep_info = out_ep_ctx->ep_info;
 	in_ep_ctx->ep_info2 = out_ep_ctx->ep_info2;
@@ -643,13 +646,15 @@ void xhci_endpoint_copy(struct xhci_hcd *xhci,
  * issue a configure endpoint command.  Only the context entries field matters,
  * but we'll copy the whole thing anyway.
  */
-void xhci_slot_copy(struct xhci_hcd *xhci, struct xhci_virt_device *vdev)
+void xhci_slot_copy(struct xhci_hcd *xhci,
+		struct xhci_container_ctx *in_ctx,
+		struct xhci_container_ctx *out_ctx)
 {
 	struct xhci_slot_ctx *in_slot_ctx;
 	struct xhci_slot_ctx *out_slot_ctx;
 
-	in_slot_ctx = xhci_get_slot_ctx(xhci, vdev->in_ctx);
-	out_slot_ctx = xhci_get_slot_ctx(xhci, vdev->out_ctx);
+	in_slot_ctx = xhci_get_slot_ctx(xhci, in_ctx);
+	out_slot_ctx = xhci_get_slot_ctx(xhci, out_ctx);
 
 	in_slot_ctx->dev_info = out_slot_ctx->dev_info;
 	in_slot_ctx->dev_info2 = out_slot_ctx->dev_info2;
@@ -754,6 +759,44 @@ static void scratchpad_free(struct xhci_hcd *xhci)
 	xhci->scratchpad = NULL;
 }
 
+struct xhci_command *xhci_alloc_command(struct xhci_hcd *xhci,
+		bool allocate_completion, gfp_t mem_flags)
+{
+	struct xhci_command *command;
+
+	command = kzalloc(sizeof(*command), mem_flags);
+	if (!command)
+		return NULL;
+
+	command->in_ctx =
+		xhci_alloc_container_ctx(xhci, XHCI_CTX_TYPE_INPUT, mem_flags);
+	if (!command->in_ctx)
+		return NULL;
+
+	if (allocate_completion) {
+		command->completion =
+			kzalloc(sizeof(struct completion), mem_flags);
+		if (!command->completion) {
+			xhci_free_container_ctx(xhci, command->in_ctx);
+			return NULL;
+		}
+		init_completion(command->completion);
+	}
+
+	command->status = 0;
+	INIT_LIST_HEAD(&command->cmd_list);
+	return command;
+}
+
+void xhci_free_command(struct xhci_hcd *xhci,
+		struct xhci_command *command)
+{
+	xhci_free_container_ctx(xhci,
+			command->in_ctx);
+	kfree(command->completion);
+	kfree(command);
+}
+
 void xhci_mem_cleanup(struct xhci_hcd *xhci)
 {
 	struct pci_dev	*pdev = to_pci_dev(xhci_to_hcd(xhci)->self.controller);
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 6a72d20..a9379b3 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -675,7 +675,8 @@ static void handle_reset_ep_completion(struct xhci_hcd *xhci,
 	if (xhci->quirks & XHCI_RESET_EP_QUIRK) {
 		xhci_dbg(xhci, "Queueing configure endpoint command\n");
 		xhci_queue_configure_endpoint(xhci,
-				xhci->devs[slot_id]->in_ctx->dma, slot_id);
+				xhci->devs[slot_id]->in_ctx->dma, slot_id,
+				false);
 		xhci_ring_cmd_db(xhci);
 	} else {
 		/* Clear our internal halted state and restart the ring */
@@ -691,6 +692,7 @@ static void handle_cmd_completion(struct xhci_hcd *xhci,
 	u64 cmd_dma;
 	dma_addr_t cmd_dequeue_dma;
 	struct xhci_input_control_ctx *ctrl_ctx;
+	struct xhci_virt_device *virt_dev;
 	unsigned int ep_index;
 	struct xhci_ring *ep_ring;
 	unsigned int ep_state;
@@ -721,6 +723,25 @@ static void handle_cmd_completion(struct xhci_hcd *xhci,
 			xhci_free_virt_device(xhci, slot_id);
 		break;
 	case TRB_TYPE(TRB_CONFIG_EP):
+		virt_dev = xhci->devs[slot_id];
+		/* Check to see if a command in the device's command queue
+		 * matches this one.  Signal the completion or free the command.
+		 */
+		if (!list_empty(&virt_dev->cmd_list)) {
+			struct xhci_command *command;
+			command = list_entry(virt_dev->cmd_list.next,
+					struct xhci_command, cmd_list);
+			if (xhci->cmd_ring->dequeue == command->command_trb) {
+				command->status =
+					GET_COMP_CODE(event->status);
+				list_del(&command->cmd_list);
+				if (command->completion)
+					complete(command->completion);
+				else
+					xhci_free_command(xhci, command);
+			}
+			break;
+		}
 		/*
 		 * Configure endpoint commands can come from the USB core
 		 * configuration or alt setting changes, or because the HW
@@ -729,7 +750,7 @@ static void handle_cmd_completion(struct xhci_hcd *xhci,
 		 * not waiting on the configure endpoint command.
 		 */
 		ctrl_ctx = xhci_get_input_control_ctx(xhci,
-				xhci->devs[slot_id]->in_ctx);
+				virt_dev->in_ctx);
 		/* Input ctx add_flags are the endpoint index plus one */
 		ep_index = xhci_last_valid_endpoint(ctrl_ctx->add_flags) - 1;
 		ep_ring = xhci->devs[slot_id]->eps[ep_index].ring;
@@ -1858,12 +1879,27 @@ int xhci_queue_ctrl_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
 
 /****		Command Ring Operations		****/
 
-/* Generic function for queueing a command TRB on the command ring */
-static int queue_command(struct xhci_hcd *xhci, u32 field1, u32 field2, u32 field3, u32 field4)
+/* Generic function for queueing a command TRB on the command ring.
+ * Check to make sure there's room on the command ring for one command TRB.
+ * Also check that there's room reserved for commands that must not fail.
+ * If this is a command that must not fail, meaning command_must_succeed = TRUE,
+ * then only check for the number of reserved spots.
+ * Don't decrement xhci->cmd_ring_reserved_trbs after we've queued the TRB
+ * because the command event handler may want to resubmit a failed command.
+ */
+static int queue_command(struct xhci_hcd *xhci, u32 field1, u32 field2,
+		u32 field3, u32 field4, bool command_must_succeed)
 {
-	if (!room_on_ring(xhci, xhci->cmd_ring, 1)) {
+	int reserved_trbs = xhci->cmd_ring_reserved_trbs;
+	if (!command_must_succeed)
+		reserved_trbs++;
+
+	if (!room_on_ring(xhci, xhci->cmd_ring, reserved_trbs)) {
 		if (!in_interrupt())
 			xhci_err(xhci, "ERR: No room for command on command ring\n");
+		if (command_must_succeed)
+			xhci_err(xhci, "ERR: Reserved TRB counting for "
+					"unfailable commands failed.\n");
 		return -ENOMEM;
 	}
 	queue_trb(xhci, xhci->cmd_ring, false, field1, field2, field3,
@@ -1874,7 +1910,7 @@ static int queue_command(struct xhci_hcd *xhci, u32 field1, u32 field2, u32 fiel
 /* Queue a no-op command on the command ring */
 static int queue_cmd_noop(struct xhci_hcd *xhci)
 {
-	return queue_command(xhci, 0, 0, 0, TRB_TYPE(TRB_CMD_NOOP));
+	return queue_command(xhci, 0, 0, 0, TRB_TYPE(TRB_CMD_NOOP), false);
 }
 
 /*
@@ -1893,7 +1929,7 @@ void *xhci_setup_one_noop(struct xhci_hcd *xhci)
 int xhci_queue_slot_control(struct xhci_hcd *xhci, u32 trb_type, u32 slot_id)
 {
 	return queue_command(xhci, 0, 0, 0,
-			TRB_TYPE(trb_type) | SLOT_ID_FOR_TRB(slot_id));
+			TRB_TYPE(trb_type) | SLOT_ID_FOR_TRB(slot_id), false);
 }
 
 /* Queue an address device command TRB */
@@ -1902,16 +1938,18 @@ int xhci_queue_address_device(struct xhci_hcd *xhci, dma_addr_t in_ctx_ptr,
 {
 	return queue_command(xhci, lower_32_bits(in_ctx_ptr),
 			upper_32_bits(in_ctx_ptr), 0,
-			TRB_TYPE(TRB_ADDR_DEV) | SLOT_ID_FOR_TRB(slot_id));
+			TRB_TYPE(TRB_ADDR_DEV) | SLOT_ID_FOR_TRB(slot_id),
+			false);
 }
 
 /* Queue a configure endpoint command TRB */
 int xhci_queue_configure_endpoint(struct xhci_hcd *xhci, dma_addr_t in_ctx_ptr,
-		u32 slot_id)
+		u32 slot_id, bool command_must_succeed)
 {
 	return queue_command(xhci, lower_32_bits(in_ctx_ptr),
 			upper_32_bits(in_ctx_ptr), 0,
-			TRB_TYPE(TRB_CONFIG_EP) | SLOT_ID_FOR_TRB(slot_id));
+			TRB_TYPE(TRB_CONFIG_EP) | SLOT_ID_FOR_TRB(slot_id),
+			command_must_succeed);
 }
 
 /* Queue an evaluate context command TRB */
@@ -1920,7 +1958,8 @@ int xhci_queue_evaluate_context(struct xhci_hcd *xhci, dma_addr_t in_ctx_ptr,
 {
 	return queue_command(xhci, lower_32_bits(in_ctx_ptr),
 			upper_32_bits(in_ctx_ptr), 0,
-			TRB_TYPE(TRB_EVAL_CONTEXT) | SLOT_ID_FOR_TRB(slot_id));
+			TRB_TYPE(TRB_EVAL_CONTEXT) | SLOT_ID_FOR_TRB(slot_id),
+			false);
 }
 
 int xhci_queue_stop_endpoint(struct xhci_hcd *xhci, int slot_id,
@@ -1931,7 +1970,7 @@ int xhci_queue_stop_endpoint(struct xhci_hcd *xhci, int slot_id,
 	u32 type = TRB_TYPE(TRB_STOP_RING);
 
 	return queue_command(xhci, 0, 0, 0,
-			trb_slot_id | trb_ep_index | type);
+			trb_slot_id | trb_ep_index | type, false);
 }
 
 /* Set Transfer Ring Dequeue Pointer command.
@@ -1955,7 +1994,7 @@ static int queue_set_tr_deq(struct xhci_hcd *xhci, int slot_id,
 	}
 	return queue_command(xhci, lower_32_bits(addr) | cycle_state,
 			upper_32_bits(addr), 0,
-			trb_slot_id | trb_ep_index | type);
+			trb_slot_id | trb_ep_index | type, false);
 }
 
 int xhci_queue_reset_ep(struct xhci_hcd *xhci, int slot_id,
@@ -1965,5 +2004,6 @@ int xhci_queue_reset_ep(struct xhci_hcd *xhci, int slot_id,
 	u32 trb_ep_index = EP_ID_FOR_TRB(ep_index);
 	u32 type = TRB_TYPE(TRB_RESET_EP);
 
-	return queue_command(xhci, 0, 0, 0, trb_slot_id | trb_ep_index | type);
+	return queue_command(xhci, 0, 0, 0, trb_slot_id | trb_ep_index | type,
+			false);
 }
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index 6270922..36f7d4f 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -620,6 +620,22 @@ struct xhci_input_control_ctx {
 	u32	rsvd2[6];
 };
 
+/* Represents everything that is needed to issue a command on the command ring.
+ * It's useful to pre-allocate these for commands that cannot fail due to
+ * out-of-memory errors, like freeing streams.
+ */
+struct xhci_command {
+	/* Input context for changing device state */
+	struct xhci_container_ctx	*in_ctx;
+	u32				status;
+	/* If completion is null, no one is waiting on this command
+	 * and the structure can be freed after the command completes.
+	 */
+	struct completion		*completion;
+	union xhci_trb			*command_trb;
+	struct list_head		cmd_list;
+};
+
 /* drop context bitmasks */
 #define	DROP_EP(x)	(0x1 << x)
 /* add context bitmasks */
@@ -658,6 +674,7 @@ struct xhci_virt_device {
 	struct completion		cmd_completion;
 	/* Status of the last command issued for this device */
 	u32				cmd_status;
+	struct list_head		cmd_list;
 };
 
 
@@ -920,6 +937,8 @@ union xhci_trb {
  * It must also be greater than 16.
  */
 #define TRBS_PER_SEGMENT	64
+/* Allow two commands + a link TRB, along with any reserved command TRBs */
+#define MAX_RSVD_CMD_TRBS	(TRBS_PER_SEGMENT - 3)
 #define SEGMENT_SIZE		(TRBS_PER_SEGMENT*16)
 /* TRB buffer pointers can't cross 64KB boundaries */
 #define TRB_MAX_BUFF_SHIFT		16
@@ -1040,6 +1059,7 @@ struct xhci_hcd {
 	/* data structures */
 	struct xhci_device_context_array *dcbaa;
 	struct xhci_ring	*cmd_ring;
+	unsigned int		cmd_ring_reserved_trbs;
 	struct xhci_ring	*event_ring;
 	struct xhci_erst	erst;
 	/* Scratchpad */
@@ -1178,12 +1198,20 @@ unsigned int xhci_get_endpoint_flag_from_index(unsigned int ep_index);
 unsigned int xhci_last_valid_endpoint(u32 added_ctxs);
 void xhci_endpoint_zero(struct xhci_hcd *xhci, struct xhci_virt_device *virt_dev, struct usb_host_endpoint *ep);
 void xhci_endpoint_copy(struct xhci_hcd *xhci,
-		struct xhci_virt_device *vdev, unsigned int ep_index);
-void xhci_slot_copy(struct xhci_hcd *xhci, struct xhci_virt_device *vdev);
+		struct xhci_container_ctx *in_ctx,
+		struct xhci_container_ctx *out_ctx,
+		unsigned int ep_index);
+void xhci_slot_copy(struct xhci_hcd *xhci,
+		struct xhci_container_ctx *in_ctx,
+		struct xhci_container_ctx *out_ctx);
 int xhci_endpoint_init(struct xhci_hcd *xhci, struct xhci_virt_device *virt_dev,
 		struct usb_device *udev, struct usb_host_endpoint *ep,
 		gfp_t mem_flags);
 void xhci_ring_free(struct xhci_hcd *xhci, struct xhci_ring *ring);
+struct xhci_command *xhci_alloc_command(struct xhci_hcd *xhci,
+		bool allocate_completion, gfp_t mem_flags);
+void xhci_free_command(struct xhci_hcd *xhci,
+		struct xhci_command *command);
 
 #ifdef CONFIG_PCI
 /* xHCI PCI glue */
@@ -1229,7 +1257,7 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags, struct urb *urb,
 int xhci_queue_intr_tx(struct xhci_hcd *xhci, gfp_t mem_flags, struct urb *urb,
 		int slot_id, unsigned int ep_index);
 int xhci_queue_configure_endpoint(struct xhci_hcd *xhci, dma_addr_t in_ctx_ptr,
-		u32 slot_id);
+		u32 slot_id, bool command_must_succeed);
 int xhci_queue_evaluate_context(struct xhci_hcd *xhci, dma_addr_t in_ctx_ptr,
 		u32 slot_id);
 int xhci_queue_reset_ep(struct xhci_hcd *xhci, int slot_id,
-- 
cgit v0.10.2


From a50c8aa953c65fd690eca03a2618ac445a3da05d Mon Sep 17 00:00:00 2001
From: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Date: Fri, 4 Sep 2009 10:53:15 -0700
Subject: USB: xhci: Fix command wait list handling.

In the xHCI driver, configure endpoint commands that are submitted to the
hardware may involve one of two data structures.  If the configure
endpoint command is setting up a new configuration or modifying max packet
sizes, the data structures and completions are statically allocated in the
xhci_virt_device structure.  If the command is being used to set up
streams or add hub information, then the data structures are dynamically
allocated, and placed on a device command waiting list.

Break out the code to check whether a completed command is in the device
command waiting list.  Fix a subtle bug in the old code: continue
processing the command if the command isn't in the wait list.  In the old
code, if there was a command in the wait list, but it didn't match the
completed command, the completed command event would be dropped.

Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index a9379b3..2274604 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -685,6 +685,34 @@ static void handle_reset_ep_completion(struct xhci_hcd *xhci,
 	}
 }
 
+/* Check to see if a command in the device's command queue matches this one.
+ * Signal the completion or free the command, and return 1.  Return 0 if the
+ * completed command isn't at the head of the command list.
+ */
+static int handle_cmd_in_cmd_wait_list(struct xhci_hcd *xhci,
+		struct xhci_virt_device *virt_dev,
+		struct xhci_event_cmd *event)
+{
+	struct xhci_command *command;
+
+	if (list_empty(&virt_dev->cmd_list))
+		return 0;
+
+	command = list_entry(virt_dev->cmd_list.next,
+			struct xhci_command, cmd_list);
+	if (xhci->cmd_ring->dequeue != command->command_trb)
+		return 0;
+
+	command->status =
+		GET_COMP_CODE(event->status);
+	list_del(&command->cmd_list);
+	if (command->completion)
+		complete(command->completion);
+	else
+		xhci_free_command(xhci, command);
+	return 1;
+}
+
 static void handle_cmd_completion(struct xhci_hcd *xhci,
 		struct xhci_event_cmd *event)
 {
@@ -724,24 +752,8 @@ static void handle_cmd_completion(struct xhci_hcd *xhci,
 		break;
 	case TRB_TYPE(TRB_CONFIG_EP):
 		virt_dev = xhci->devs[slot_id];
-		/* Check to see if a command in the device's command queue
-		 * matches this one.  Signal the completion or free the command.
-		 */
-		if (!list_empty(&virt_dev->cmd_list)) {
-			struct xhci_command *command;
-			command = list_entry(virt_dev->cmd_list.next,
-					struct xhci_command, cmd_list);
-			if (xhci->cmd_ring->dequeue == command->command_trb) {
-				command->status =
-					GET_COMP_CODE(event->status);
-				list_del(&command->cmd_list);
-				if (command->completion)
-					complete(command->completion);
-				else
-					xhci_free_command(xhci, command);
-			}
+		if (handle_cmd_in_cmd_wait_list(xhci, virt_dev, event))
 			break;
-		}
 		/*
 		 * Configure endpoint commands can come from the USB core
 		 * configuration or alt setting changes, or because the HW
-- 
cgit v0.10.2


From 4a0cd9670f22c308bc5936ee9734d8ee3f1baa52 Mon Sep 17 00:00:00 2001
From: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Date: Fri, 4 Sep 2009 10:53:17 -0700
Subject: USB: xhci: Set route string for all devices.

The xHCI driver needs to set the route string in the slot context of all
devices, not just SuperSpeed devices.  The route string concept was added
in the USB 3.0 specification, section 10.1.3.2.  Each hub in the topology
is expected to have no more than 15 ports in order for the route string of
a device to be unique.  SuperSpeed hubs are restricted to only having 15
ports, but FS/LS/HS hubs are not.  The xHCI specification says that if the
port number the device is under is greater than 15, that portion of the
route string shall be set to 15.

Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
index 30dd263..b1b85ab 100644
--- a/drivers/usb/core/usb.c
+++ b/drivers/usb/core/usb.c
@@ -413,8 +413,13 @@ struct usb_device *usb_alloc_dev(struct usb_device *parent,
 		} else {
 			snprintf(dev->devpath, sizeof dev->devpath,
 				"%s.%d", parent->devpath, port1);
-			dev->route = parent->route +
-				(port1 << ((parent->level - 1)*4));
+			/* Route string assumes hubs have less than 16 ports */
+			if (port1 < 15)
+				dev->route = parent->route +
+					(port1 << ((parent->level - 1)*4));
+			else
+				dev->route = parent->route +
+					(15 << ((parent->level - 1)*4));
 		}
 
 		dev->dev.parent = &parent->dev;
diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index 6e6797a..e046f0f 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -360,9 +360,9 @@ int xhci_setup_addressable_virt_dev(struct xhci_hcd *xhci, struct usb_device *ud
 	/* 3) Only the control endpoint is valid - one endpoint context */
 	slot_ctx->dev_info |= LAST_CTX(1);
 
+	slot_ctx->dev_info |= (u32) udev->route;
 	switch (udev->speed) {
 	case USB_SPEED_SUPER:
-		slot_ctx->dev_info |= (u32) udev->route;
 		slot_ctx->dev_info |= (u32) SLOT_SPEED_SS;
 		break;
 	case USB_SPEED_HIGH:
-- 
cgit v0.10.2


From 07b6de102843b717ecd962cf35ec4ad9b1fbed9d Mon Sep 17 00:00:00 2001
From: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Date: Fri, 4 Sep 2009 10:53:19 -0700
Subject: USB: xhci: Set multi-TT field for LS/FS devices under hubs.

When setting up a slot context for an address device command, set the
multi-TT field if this is a low or full speed device under a HS hub with
multiple transaction translators.

Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index e046f0f..1db4fea 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -390,14 +390,12 @@ int xhci_setup_addressable_virt_dev(struct xhci_hcd *xhci, struct usb_device *ud
 	xhci_dbg(xhci, "Set root hub portnum to %d\n", top_dev->portnum);
 
 	/* Is this a LS/FS device under a HS hub? */
-	/*
-	 * FIXME: I don't think this is right, where does the TT info for the
-	 * roothub or parent hub come from?
-	 */
 	if ((udev->speed == USB_SPEED_LOW || udev->speed == USB_SPEED_FULL) &&
 			udev->tt) {
 		slot_ctx->tt_info = udev->tt->hub->slot_id;
 		slot_ctx->tt_info |= udev->ttport << 8;
+		if (udev->tt->multi)
+			slot_ctx->dev_info |= DEV_MTT;
 	}
 	xhci_dbg(xhci, "udev->tt = %p\n", udev->tt);
 	xhci_dbg(xhci, "udev->ttport = 0x%x\n", udev->ttport);
-- 
cgit v0.10.2


From ac1c1b7f16ed287fcec5bcfae06d0165c3941ec3 Mon Sep 17 00:00:00 2001
From: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Date: Fri, 4 Sep 2009 10:53:20 -0700
Subject: USB: xhci: Support USB hubs.

For a USB hub to work under an xHCI host controller, the xHC's internal
scheduler must be made aware of the hub's characteristics.  Add an xHCI
hook that the USB core will call after it fetches the hub descriptor.
This hook will add hub information to the slot context for that device,
including whether it has multiple TTs or a single TT, the number of ports
on the hub, and TT think time.

Setting up the slot context for the device is different for 0.95 and 0.96
xHCI host controllers.

Some of the slot context reserved fields in the 0.95 specification were
changed into hub fields in the 0.96 specification.  Don't set the TT think
time or number of ports for a hub if we're dealing with a 0.95-compliant
xHCI host controller.

The 0.95 xHCI specification says that to modify the hub flag, we need to
issue an evaluate context command.  The 0.96 specification says that flag
can be set with a configure endpoint command.  Issue the correct command
based on the version reported by the hardware.

This patch does not add support for multi-TT hubs.  Multi-TT hubs expose
a single TT on alt setting 0, and multi-TT on alt setting 1.  The xHCI
driver can't handle setting alternate interfaces yet.

Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/host/xhci-hcd.c b/drivers/usb/host/xhci-hcd.c
index 2fcc360..99911e7 100644
--- a/drivers/usb/host/xhci-hcd.c
+++ b/drivers/usb/host/xhci-hcd.c
@@ -1594,6 +1594,88 @@ int xhci_address_device(struct usb_hcd *hcd, struct usb_device *udev)
 	return 0;
 }
 
+/* Once a hub descriptor is fetched for a device, we need to update the xHC's
+ * internal data structures for the device.
+ */
+int xhci_update_hub_device(struct usb_hcd *hcd, struct usb_device *hdev,
+			struct usb_tt *tt, gfp_t mem_flags)
+{
+	struct xhci_hcd *xhci = hcd_to_xhci(hcd);
+	struct xhci_virt_device *vdev;
+	struct xhci_command *config_cmd;
+	struct xhci_input_control_ctx *ctrl_ctx;
+	struct xhci_slot_ctx *slot_ctx;
+	unsigned long flags;
+	unsigned think_time;
+	int ret;
+
+	/* Ignore root hubs */
+	if (!hdev->parent)
+		return 0;
+
+	vdev = xhci->devs[hdev->slot_id];
+	if (!vdev) {
+		xhci_warn(xhci, "Cannot update hub desc for unknown device.\n");
+		return -EINVAL;
+	}
+	config_cmd = xhci_alloc_command(xhci, true, mem_flags);
+	if (!config_cmd) {
+		xhci_dbg(xhci, "Could not allocate xHCI command structure.\n");
+		return -ENOMEM;
+	}
+
+	spin_lock_irqsave(&xhci->lock, flags);
+	xhci_slot_copy(xhci, config_cmd->in_ctx, vdev->out_ctx);
+	ctrl_ctx = xhci_get_input_control_ctx(xhci, config_cmd->in_ctx);
+	ctrl_ctx->add_flags |= SLOT_FLAG;
+	slot_ctx = xhci_get_slot_ctx(xhci, config_cmd->in_ctx);
+	slot_ctx->dev_info |= DEV_HUB;
+	if (tt->multi)
+		slot_ctx->dev_info |= DEV_MTT;
+	if (xhci->hci_version > 0x95) {
+		xhci_dbg(xhci, "xHCI version %x needs hub "
+				"TT think time and number of ports\n",
+				(unsigned int) xhci->hci_version);
+		slot_ctx->dev_info2 |= XHCI_MAX_PORTS(hdev->maxchild);
+		/* Set TT think time - convert from ns to FS bit times.
+		 * 0 = 8 FS bit times, 1 = 16 FS bit times,
+		 * 2 = 24 FS bit times, 3 = 32 FS bit times.
+		 */
+		think_time = tt->think_time;
+		if (think_time != 0)
+			think_time = (think_time / 666) - 1;
+		slot_ctx->tt_info |= TT_THINK_TIME(think_time);
+	} else {
+		xhci_dbg(xhci, "xHCI version %x doesn't need hub "
+				"TT think time or number of ports\n",
+				(unsigned int) xhci->hci_version);
+	}
+	slot_ctx->dev_state = 0;
+	spin_unlock_irqrestore(&xhci->lock, flags);
+
+	xhci_dbg(xhci, "Set up %s for hub device.\n",
+			(xhci->hci_version > 0x95) ?
+			"configure endpoint" : "evaluate context");
+	xhci_dbg(xhci, "Slot %u Input Context:\n", hdev->slot_id);
+	xhci_dbg_ctx(xhci, config_cmd->in_ctx, 0);
+
+	/* Issue and wait for the configure endpoint or
+	 * evaluate context command.
+	 */
+	if (xhci->hci_version > 0x95)
+		ret = xhci_configure_endpoint(xhci, hdev, config_cmd,
+				false, false);
+	else
+		ret = xhci_configure_endpoint(xhci, hdev, config_cmd,
+				true, false);
+
+	xhci_dbg(xhci, "Slot %u Output Context:\n", hdev->slot_id);
+	xhci_dbg_ctx(xhci, vdev->out_ctx, 0);
+
+	xhci_free_command(xhci, config_cmd);
+	return ret;
+}
+
 int xhci_get_frame(struct usb_hcd *hcd)
 {
 	struct xhci_hcd *xhci = hcd_to_xhci(hcd);
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index 8fb308d..b7712f2 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -63,6 +63,8 @@ static int xhci_pci_setup(struct usb_hcd *hcd)
 	xhci->hcs_params1 = xhci_readl(xhci, &xhci->cap_regs->hcs_params1);
 	xhci->hcs_params2 = xhci_readl(xhci, &xhci->cap_regs->hcs_params2);
 	xhci->hcs_params3 = xhci_readl(xhci, &xhci->cap_regs->hcs_params3);
+	xhci->hcc_params = xhci_readl(xhci, &xhci->cap_regs->hc_capbase);
+	xhci->hci_version = HC_VERSION(xhci->hcc_params);
 	xhci->hcc_params = xhci_readl(xhci, &xhci->cap_regs->hcc_params);
 	xhci_print_registers(xhci);
 
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 2274604..173c39c 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -789,6 +789,9 @@ static void handle_cmd_completion(struct xhci_hcd *xhci,
 		}
 		break;
 	case TRB_TYPE(TRB_EVAL_CONTEXT):
+		virt_dev = xhci->devs[slot_id];
+		if (handle_cmd_in_cmd_wait_list(xhci, virt_dev, event))
+			break;
 		xhci->devs[slot_id]->cmd_status = GET_COMP_CODE(event->status);
 		complete(&xhci->devs[slot_id]->cmd_completion);
 		break;
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index 36f7d4f..4b254b6 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -509,6 +509,8 @@ struct xhci_slot_ctx {
 #define MAX_EXIT	(0xffff)
 /* Root hub port number that is needed to access the USB device */
 #define ROOT_HUB_PORT(p)	(((p) & 0xff) << 16)
+/* Maximum number of ports under a hub device */
+#define XHCI_MAX_PORTS(p)	(((p) & 0xff) << 24)
 
 /* tt_info bitmasks */
 /*
@@ -522,6 +524,7 @@ struct xhci_slot_ctx {
  * '0' if the device is not low or full speed.
  */
 #define TT_PORT		(0xff << 8)
+#define TT_THINK_TIME(p)	(((p) & 0x3) << 16)
 
 /* dev_state bitmasks */
 /* USB device address - assigned by the HC */
@@ -1231,6 +1234,8 @@ irqreturn_t xhci_irq(struct usb_hcd *hcd);
 int xhci_alloc_dev(struct usb_hcd *hcd, struct usb_device *udev);
 void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev);
 int xhci_address_device(struct usb_hcd *hcd, struct usb_device *udev);
+int xhci_update_hub_device(struct usb_hcd *hcd, struct usb_device *hdev,
+			struct usb_tt *tt, gfp_t mem_flags);
 int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flags);
 int xhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status);
 int xhci_add_endpoint(struct usb_hcd *hcd, struct usb_device *udev, struct usb_host_endpoint *ep);
-- 
cgit v0.10.2


From b356b7c7696b289dda99022d71e3979c6134af52 Mon Sep 17 00:00:00 2001
From: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Date: Fri, 4 Sep 2009 10:53:24 -0700
Subject: USB: Add hub descriptor update hook for xHCI

Add a hook for updating xHCI internal structures after khubd fetches the
hub descriptor and sets up the hub's TT information.  The xHCI driver must
update the internal structures before devices under the hub can be
enumerated.

Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/core/hcd.h b/drivers/usb/core/hcd.h
index ec5c67e..79782a1 100644
--- a/drivers/usb/core/hcd.h
+++ b/drivers/usb/core/hcd.h
@@ -267,6 +267,11 @@ struct hc_driver {
 	void	(*reset_bandwidth)(struct usb_hcd *, struct usb_device *);
 		/* Returns the hardware-chosen device address */
 	int	(*address_device)(struct usb_hcd *, struct usb_device *udev);
+		/* Notifies the HCD after a hub descriptor is fetched.
+		 * Will block.
+		 */
+	int	(*update_hub_device)(struct usb_hcd *, struct usb_device *hdev,
+			struct usb_tt *tt, gfp_t mem_flags);
 };
 
 extern int usb_hcd_link_urb_to_ep(struct usb_hcd *hcd, struct urb *urb);
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index a880516..5ce8391 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -860,6 +860,7 @@ static int hub_post_reset(struct usb_interface *intf)
 static int hub_configure(struct usb_hub *hub,
 	struct usb_endpoint_descriptor *endpoint)
 {
+	struct usb_hcd *hcd;
 	struct usb_device *hdev = hub->hdev;
 	struct device *hub_dev = hub->intfdev;
 	u16 hubstatus, hubchange;
@@ -1061,6 +1062,19 @@ static int hub_configure(struct usb_hub *hub,
 		dev_dbg(hub_dev, "%umA bus power budget for each child\n",
 				hub->mA_per_port);
 
+	/* Update the HCD's internal representation of this hub before khubd
+	 * starts getting port status changes for devices under the hub.
+	 */
+	hcd = bus_to_hcd(hdev->bus);
+	if (hcd->driver->update_hub_device) {
+		ret = hcd->driver->update_hub_device(hcd, hdev,
+				&hub->tt, GFP_KERNEL);
+		if (ret < 0) {
+			message = "can't update HCD hub info";
+			goto fail;
+		}
+	}
+
 	ret = hub_hub_status(hub, &hubstatus, &hubchange);
 	if (ret < 0) {
 		message = "can't get hub status";
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index b7712f2..06595ec 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -136,6 +136,7 @@ static const struct hc_driver xhci_pci_hc_driver = {
 	.check_bandwidth =	xhci_check_bandwidth,
 	.reset_bandwidth =	xhci_reset_bandwidth,
 	.address_device =	xhci_address_device,
+	.update_hub_device =	xhci_update_hub_device,
 
 	/*
 	 * scheduling support
-- 
cgit v0.10.2


From e7389cc9a7ff7c6e760e741c81a751c834f7d145 Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oliver@neukum.org>
Date: Wed, 9 Sep 2009 17:06:53 +0200
Subject: USB: skel_read really sucks royally

The read code path of the skeleton driver really sucks

 - skel_read works only for devices which always send data
 - the timeout comes out of thin air
 - it blocks signals for the duration of the timeout
 - it disallows nonblocking IO by design

This patch fixes it by using a real urb, a completion and interruptible waits.

Signed-off-by: Oliver Neukum <oliver@neukum.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/usb-skeleton.c b/drivers/usb/usb-skeleton.c
index 60ba631..5ffa3e2 100644
--- a/drivers/usb/usb-skeleton.c
+++ b/drivers/usb/usb-skeleton.c
@@ -52,15 +52,21 @@ struct usb_skel {
 	struct usb_interface	*interface;		/* the interface for this device */
 	struct semaphore	limit_sem;		/* limiting the number of writes in progress */
 	struct usb_anchor	submitted;		/* in case we need to retract our submissions */
+	struct urb		*bulk_in_urb;		/* the urb to read data with */
 	unsigned char           *bulk_in_buffer;	/* the buffer to receive data */
 	size_t			bulk_in_size;		/* the size of the receive buffer */
+	size_t			bulk_in_filled;		/* number of bytes in the buffer */
+	size_t			bulk_in_copied;		/* already copied to user space */
 	__u8			bulk_in_endpointAddr;	/* the address of the bulk in endpoint */
 	__u8			bulk_out_endpointAddr;	/* the address of the bulk out endpoint */
 	int			errors;			/* the last request tanked */
 	int			open_count;		/* count the number of openers */
+	bool			ongoing_read;		/* a read is going on */
+	bool			processed_urb;		/* indicates we haven't processed the urb */
 	spinlock_t		err_lock;		/* lock for errors */
 	struct kref		kref;
 	struct mutex		io_mutex;		/* synchronize I/O with disconnect */
+	struct completion	bulk_in_completion;	/* to wait for an ongoing read */
 };
 #define to_skel_dev(d) container_of(d, struct usb_skel, kref)
 
@@ -71,6 +77,7 @@ static void skel_delete(struct kref *kref)
 {
 	struct usb_skel *dev = to_skel_dev(kref);
 
+	usb_free_urb(dev->bulk_in_urb);
 	usb_put_dev(dev->udev);
 	kfree(dev->bulk_in_buffer);
 	kfree(dev);
@@ -174,38 +181,182 @@ static int skel_flush(struct file *file, fl_owner_t id)
 	return res;
 }
 
+static void skel_read_bulk_callback(struct urb *urb)
+{
+	struct usb_skel *dev;
+
+	dev = urb->context;
+
+	spin_lock(&dev->err_lock);
+	/* sync/async unlink faults aren't errors */
+	if (urb->status) {
+		if(!(urb->status == -ENOENT ||
+		    urb->status == -ECONNRESET ||
+		    urb->status == -ESHUTDOWN))
+			err("%s - nonzero write bulk status received: %d",
+			    __func__, urb->status);
+
+		dev->errors = urb->status;
+	} else {
+		dev->bulk_in_filled = urb->actual_length;
+	}
+	dev->ongoing_read = 0;
+	spin_unlock(&dev->err_lock);
+
+	complete(&dev->bulk_in_completion);
+}
+
+static int skel_do_read_io(struct usb_skel *dev, size_t count)
+{
+	int rv;
+
+	/* prepare a read */
+	usb_fill_bulk_urb(dev->bulk_in_urb,
+			dev->udev,
+			usb_rcvbulkpipe(dev->udev,
+				dev->bulk_in_endpointAddr),
+			dev->bulk_in_buffer,
+			min(dev->bulk_in_size, count),
+			skel_read_bulk_callback,
+			dev);
+	/* tell everybody to leave the URB alone */
+	spin_lock_irq(&dev->err_lock);
+	dev->ongoing_read = 1;
+	spin_unlock_irq(&dev->err_lock);
+
+	/* do it */
+	rv = usb_submit_urb(dev->bulk_in_urb, GFP_KERNEL);
+	if (rv < 0) {
+		err("%s - failed submitting read urb, error %d",
+			__func__, rv);
+		dev->bulk_in_filled = 0;
+		rv = (rv == -ENOMEM) ? rv : -EIO;
+		spin_lock_irq(&dev->err_lock);
+		dev->ongoing_read = 0;
+		spin_unlock_irq(&dev->err_lock);
+	}
+
+	return rv;
+}
+
 static ssize_t skel_read(struct file *file, char *buffer, size_t count, loff_t *ppos)
 {
 	struct usb_skel *dev;
-	int retval;
-	int bytes_read;
+	int rv;
+	bool ongoing_io;
 
 	dev = (struct usb_skel *)file->private_data;
 
-	mutex_lock(&dev->io_mutex);
+	/* if we cannot read at all, return EOF */
+	if (!dev->bulk_in_urb || !count)
+		return 0;
+
+	/* no concurrent readers */
+	rv = mutex_lock_interruptible(&dev->io_mutex);
+	if (rv < 0)
+		return rv;
+
 	if (!dev->interface) {		/* disconnect() was called */
-		retval = -ENODEV;
+		rv = -ENODEV;
 		goto exit;
 	}
 
-	/* do a blocking bulk read to get data from the device */
-	retval = usb_bulk_msg(dev->udev,
-			      usb_rcvbulkpipe(dev->udev, dev->bulk_in_endpointAddr),
-			      dev->bulk_in_buffer,
-			      min(dev->bulk_in_size, count),
-			      &bytes_read, 10000);
-
-	/* if the read was successful, copy the data to userspace */
-	if (!retval) {
-		if (copy_to_user(buffer, dev->bulk_in_buffer, bytes_read))
-			retval = -EFAULT;
-		else
-			retval = bytes_read;
+	/* if IO is under way, we must not touch things */
+retry:
+	spin_lock_irq(&dev->err_lock);
+	ongoing_io = dev->ongoing_read;
+	spin_unlock_irq(&dev->err_lock);
+
+	if (ongoing_io) {
+		/*
+		 * IO may take forever
+		 * hence wait in an interruptible state
+		 */
+		rv = wait_for_completion_interruptible(&dev->bulk_in_completion);
+		if (rv < 0)
+			goto exit;
+		/*
+		 * by waiting we also semiprocessed the urb
+		 * we must finish now
+		 */
+		dev->bulk_in_copied = 0;
+		dev->processed_urb = 1;
+	}
+
+	if (!dev->processed_urb) {
+		/*
+		 * the URB hasn't been processed
+		 * do it now
+		 */
+		wait_for_completion(&dev->bulk_in_completion);
+		dev->bulk_in_copied = 0;
+		dev->processed_urb = 1;
 	}
 
+	/* errors must be reported */
+	if ((rv = dev->errors) < 0) {
+		/* any error is reported once */
+		dev->errors = 0;
+		/* to preserve notifications about reset */
+		rv = (rv == -EPIPE) ? rv : -EIO;
+		/* no data to deliver */
+		dev->bulk_in_filled = 0;
+		/* report it */
+		goto exit;
+	}
+
+	/*
+	 * if the buffer is filled we may satisfy the read
+	 * else we need to start IO
+	 */
+
+	if (dev->bulk_in_filled) {
+		/* we had read data */
+		size_t available = dev->bulk_in_filled - dev->bulk_in_copied;
+		size_t chunk = min(available, count);
+
+		if (!available) {
+			/*
+			 * all data has been used
+			 * actual IO needs to be done
+			 */
+			rv = skel_do_read_io(dev, count);
+			if (rv < 0)
+				goto exit;
+			else
+				goto retry;
+		}
+		/*
+		 * data is available
+		 * chunk tells us how much shall be copied
+		 */
+
+		if (copy_to_user(buffer,
+				 dev->bulk_in_buffer + dev->bulk_in_copied,
+				 chunk))
+			rv = -EFAULT;
+		else
+			rv = chunk;
+
+		dev->bulk_in_copied += chunk;
+
+		/*
+		 * if we are asked for more than we have,
+		 * we start IO but don't wait
+		 */
+		if (available < count)
+			skel_do_read_io(dev, count - chunk);
+	} else {
+		/* no data in the buffer */
+		rv = skel_do_read_io(dev, count);
+		if (rv < 0)
+			goto exit;
+		else
+			goto retry;
+	}
 exit:
 	mutex_unlock(&dev->io_mutex);
-	return retval;
+	return rv;
 }
 
 static void skel_write_bulk_callback(struct urb *urb)
@@ -363,6 +514,7 @@ static int skel_probe(struct usb_interface *interface, const struct usb_device_i
 	mutex_init(&dev->io_mutex);
 	spin_lock_init(&dev->err_lock);
 	init_usb_anchor(&dev->submitted);
+	init_completion(&dev->bulk_in_completion);
 
 	dev->udev = usb_get_dev(interface_to_usbdev(interface));
 	dev->interface = interface;
@@ -384,6 +536,11 @@ static int skel_probe(struct usb_interface *interface, const struct usb_device_i
 				err("Could not allocate bulk_in_buffer");
 				goto error;
 			}
+			dev->bulk_in_urb = usb_alloc_urb(0, GFP_KERNEL);
+			if (!dev->bulk_in_urb) {
+				err("Could not allocate bulk_in_urb");
+				goto error;
+			}
 		}
 
 		if (!dev->bulk_out_endpointAddr &&
@@ -453,6 +610,7 @@ static void skel_draw_down(struct usb_skel *dev)
 	time = usb_wait_anchor_empty_timeout(&dev->submitted, 1000);
 	if (!time)
 		usb_kill_anchored_urbs(&dev->submitted);
+	usb_kill_urb(dev->bulk_in_urb);
 }
 
 static int skel_suspend(struct usb_interface *intf, pm_message_t message)
-- 
cgit v0.10.2


From 798199867385417ba6494472e39c016e3340758c Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oliver@neukum.org>
Date: Wed, 9 Sep 2009 10:23:35 +0200
Subject: USB: make usb-skeleton honor O_NONBLOCK in write path

usb:usb-skeleton: honor O_NONBLOCK in write path

nonblocking writes are allowed by using down_trylock if necessary
to reserve an URB

Signed-off-by: Oliver Neukum <oliver@neukum.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/usb-skeleton.c b/drivers/usb/usb-skeleton.c
index 5ffa3e2..768fda9 100644
--- a/drivers/usb/usb-skeleton.c
+++ b/drivers/usb/usb-skeleton.c
@@ -399,9 +399,16 @@ static ssize_t skel_write(struct file *file, const char *user_buffer, size_t cou
 		goto exit;
 
 	/* limit the number of URBs in flight to stop a user from using up all RAM */
-	if (down_interruptible(&dev->limit_sem)) {
-		retval = -ERESTARTSYS;
-		goto exit;
+	if (!file->f_flags & O_NONBLOCK) {
+		if (down_interruptible(&dev->limit_sem)) {
+			retval = -ERESTARTSYS;
+			goto exit;
+		}
+	} else {
+		if (down_trylock(&dev->limit_sem)) {
+			retval = -EAGAIN;
+			goto exit;
+		}
 	}
 
 	spin_lock_irq(&dev->err_lock);
-- 
cgit v0.10.2


From 8cd01664344e983d73a85ce604f7c23f475cf303 Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oliver@neukum.org>
Date: Wed, 9 Sep 2009 17:08:50 +0200
Subject: USB: O_NONBLOCK in read path of skeleton

Non blocking IO is supported in the read path of usb-skeleton.
This is done by just not blocking. As support for handling signals
without stopping IO is already there, it can be used for O_NONBLOCK, too.

Signed-off-by: Oliver Neukum <oliver@neukum.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/usb-skeleton.c b/drivers/usb/usb-skeleton.c
index 768fda9..ef8c877 100644
--- a/drivers/usb/usb-skeleton.c
+++ b/drivers/usb/usb-skeleton.c
@@ -268,6 +268,11 @@ retry:
 	spin_unlock_irq(&dev->err_lock);
 
 	if (ongoing_io) {
+		/* nonblocking IO shall not wait */
+		if (file->f_flags & O_NONBLOCK) {
+			rv = -EAGAIN;
+			goto exit;
+		}
 		/*
 		 * IO may take forever
 		 * hence wait in an interruptible state
@@ -351,8 +356,9 @@ retry:
 		rv = skel_do_read_io(dev, count);
 		if (rv < 0)
 			goto exit;
-		else
+		else if (!file->f_flags & O_NONBLOCK)
 			goto retry;
+		rv = -EAGAIN;
 	}
 exit:
 	mutex_unlock(&dev->io_mutex);
-- 
cgit v0.10.2


From 3ae9da1c99eda248b469fc10d8d9fcebebc949cf Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Fri, 11 Sep 2009 16:07:30 -0700
Subject: USB: skeleton: fix coding style issues.

This fixes up the majority of the coding style issues in the
usb-skeleton driver.

Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/usb-skeleton.c b/drivers/usb/usb-skeleton.c
index ef8c877..b62f2bc 100644
--- a/drivers/usb/usb-skeleton.c
+++ b/drivers/usb/usb-skeleton.c
@@ -18,7 +18,7 @@
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/kref.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/usb.h>
 #include <linux/mutex.h>
 
@@ -28,7 +28,7 @@
 #define USB_SKEL_PRODUCT_ID	0xfff0
 
 /* table of devices that work with this driver */
-static struct usb_device_id skel_table [] = {
+static struct usb_device_id skel_table[] = {
 	{ USB_DEVICE(USB_SKEL_VENDOR_ID, USB_SKEL_PRODUCT_ID) },
 	{ }					/* Terminating entry */
 };
@@ -94,7 +94,7 @@ static int skel_open(struct inode *inode, struct file *file)
 
 	interface = usb_find_interface(&skel_driver, subminor);
 	if (!interface) {
-		err ("%s - error, can't find device for minor %d",
+		err("%s - error, can't find device for minor %d",
 		     __func__, subminor);
 		retval = -ENODEV;
 		goto exit;
@@ -190,7 +190,7 @@ static void skel_read_bulk_callback(struct urb *urb)
 	spin_lock(&dev->err_lock);
 	/* sync/async unlink faults aren't errors */
 	if (urb->status) {
-		if(!(urb->status == -ENOENT ||
+		if (!(urb->status == -ENOENT ||
 		    urb->status == -ECONNRESET ||
 		    urb->status == -ESHUTDOWN))
 			err("%s - nonzero write bulk status received: %d",
@@ -239,7 +239,8 @@ static int skel_do_read_io(struct usb_skel *dev, size_t count)
 	return rv;
 }
 
-static ssize_t skel_read(struct file *file, char *buffer, size_t count, loff_t *ppos)
+static ssize_t skel_read(struct file *file, char *buffer, size_t count,
+			 loff_t *ppos)
 {
 	struct usb_skel *dev;
 	int rv;
@@ -299,7 +300,8 @@ retry:
 	}
 
 	/* errors must be reported */
-	if ((rv = dev->errors) < 0) {
+	rv = dev->errors;
+	if (rv < 0) {
 		/* any error is reported once */
 		dev->errors = 0;
 		/* to preserve notifications about reset */
@@ -373,7 +375,7 @@ static void skel_write_bulk_callback(struct urb *urb)
 
 	/* sync/async unlink faults aren't errors */
 	if (urb->status) {
-		if(!(urb->status == -ENOENT ||
+		if (!(urb->status == -ENOENT ||
 		    urb->status == -ECONNRESET ||
 		    urb->status == -ESHUTDOWN))
 			err("%s - nonzero write bulk status received: %d",
@@ -390,7 +392,8 @@ static void skel_write_bulk_callback(struct urb *urb)
 	up(&dev->limit_sem);
 }
 
-static ssize_t skel_write(struct file *file, const char *user_buffer, size_t count, loff_t *ppos)
+static ssize_t skel_write(struct file *file, const char *user_buffer,
+			  size_t count, loff_t *ppos)
 {
 	struct usb_skel *dev;
 	int retval = 0;
@@ -404,7 +407,10 @@ static ssize_t skel_write(struct file *file, const char *user_buffer, size_t cou
 	if (count == 0)
 		goto exit;
 
-	/* limit the number of URBs in flight to stop a user from using up all RAM */
+	/*
+	 * limit the number of URBs in flight to stop a user from using up all
+	 * RAM
+	 */
 	if (!file->f_flags & O_NONBLOCK) {
 		if (down_interruptible(&dev->limit_sem)) {
 			retval = -ERESTARTSYS;
@@ -418,7 +424,8 @@ static ssize_t skel_write(struct file *file, const char *user_buffer, size_t cou
 	}
 
 	spin_lock_irq(&dev->err_lock);
-	if ((retval = dev->errors) < 0) {
+	retval = dev->errors;
+	if (retval < 0) {
 		/* any error is reported once */
 		dev->errors = 0;
 		/* to preserve notifications about reset */
@@ -435,7 +442,8 @@ static ssize_t skel_write(struct file *file, const char *user_buffer, size_t cou
 		goto error;
 	}
 
-	buf = usb_buffer_alloc(dev->udev, writesize, GFP_KERNEL, &urb->transfer_dma);
+	buf = usb_buffer_alloc(dev->udev, writesize, GFP_KERNEL,
+			       &urb->transfer_dma);
 	if (!buf) {
 		retval = -ENOMEM;
 		goto error;
@@ -465,11 +473,15 @@ static ssize_t skel_write(struct file *file, const char *user_buffer, size_t cou
 	retval = usb_submit_urb(urb, GFP_KERNEL);
 	mutex_unlock(&dev->io_mutex);
 	if (retval) {
-		err("%s - failed submitting write urb, error %d", __func__, retval);
+		err("%s - failed submitting write urb, error %d", __func__,
+		    retval);
 		goto error_unanchor;
 	}
 
-	/* release our reference to this urb, the USB core will eventually free it entirely */
+	/*
+	 * release our reference to this urb, the USB core will eventually free
+	 * it entirely
+	 */
 	usb_free_urb(urb);
 
 
@@ -507,7 +519,8 @@ static struct usb_class_driver skel_class = {
 	.minor_base =	USB_SKEL_MINOR_BASE,
 };
 
-static int skel_probe(struct usb_interface *interface, const struct usb_device_id *id)
+static int skel_probe(struct usb_interface *interface,
+		      const struct usb_device_id *id)
 {
 	struct usb_skel *dev;
 	struct usb_host_interface *iface_desc;
@@ -636,7 +649,7 @@ static int skel_suspend(struct usb_interface *intf, pm_message_t message)
 	return 0;
 }
 
-static int skel_resume (struct usb_interface *intf)
+static int skel_resume(struct usb_interface *intf)
 {
 	return 0;
 }
-- 
cgit v0.10.2


From a87371b477774b290c27bc5cb7f4ccc5379574a9 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Sat, 12 Sep 2009 17:00:46 +0200
Subject: USB: Fix sysfs paths in documentation

Neither /sys/usb/devices nor /sys/bus/devices exist. The correct path
is /sys/bus/usb/devices.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/Documentation/usb/authorization.txt b/Documentation/usb/authorization.txt
index 381b22e..c069b68 100644
--- a/Documentation/usb/authorization.txt
+++ b/Documentation/usb/authorization.txt
@@ -16,20 +16,20 @@ Usage:
 
 Authorize a device to connect:
 
-$ echo 1 > /sys/usb/devices/DEVICE/authorized
+$ echo 1 > /sys/bus/usb/devices/DEVICE/authorized
 
 Deauthorize a device:
 
-$ echo 0 > /sys/usb/devices/DEVICE/authorized
+$ echo 0 > /sys/bus/usb/devices/DEVICE/authorized
 
 Set new devices connected to hostX to be deauthorized by default (ie:
 lock down):
 
-$ echo 0 > /sys/bus/devices/usbX/authorized_default
+$ echo 0 > /sys/bus/usb/devices/usbX/authorized_default
 
 Remove the lock down:
 
-$ echo 1 > /sys/bus/devices/usbX/authorized_default
+$ echo 1 > /sys/bus/usb/devices/usbX/authorized_default
 
 By default, Wired USB devices are authorized by default to
 connect. Wireless USB hosts deauthorize by default all new connected
@@ -47,7 +47,7 @@ USB port):
 boot up
 rc.local ->
 
- for host in /sys/bus/devices/usb*
+ for host in /sys/bus/usb/devices/usb*
  do
     echo 0 > $host/authorized_default
  done
-- 
cgit v0.10.2


From fa081b00a80ef3f4575c99af6e97d29e1628cf51 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Tue, 22 Sep 2009 16:43:27 -0700
Subject: include/linux/kmemcheck.h: fix a trillion warnings

of the form

include/net/inet_sock.h:208: warning: ISO C90 forbids mixed declarations and code

Cc: Johannes Berg <johannes@sipsolutions.net>
Acked-by: Vegard Nossum <vegard.nossum@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/kmemcheck.h b/include/linux/kmemcheck.h
index c800660..136cdcd 100644
--- a/include/linux/kmemcheck.h
+++ b/include/linux/kmemcheck.h
@@ -145,10 +145,12 @@ static inline bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size)
 
 #define kmemcheck_annotate_bitfield(ptr, name)				\
 	do {								\
+		int _n;							\
+									\
 		if (!ptr)						\
 			break;						\
 									\
-		int _n = (long) &((ptr)->name##_end)			\
+		_n = (long) &((ptr)->name##_end)			\
 			- (long) &((ptr)->name##_begin);		\
 		BUILD_BUG_ON(_n < 0);					\
 									\
-- 
cgit v0.10.2


From ca976c53de0c33160083d36f70bd18d7970f6969 Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Tue, 22 Sep 2009 16:43:28 -0700
Subject: smbfs: read buffer overflow

This function uses signed integers for the unix_date and local variables -
if a negative number is supplied and the leap-year condition is not met,
month will be 0, leading to a read of day_n[-1]

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/smbfs/proc.c b/fs/smbfs/proc.c
index 9468168..71c29b6 100644
--- a/fs/smbfs/proc.c
+++ b/fs/smbfs/proc.c
@@ -509,7 +509,7 @@ date_unix2dos(struct smb_sb_info *server,
 		month = 2;
 	} else {
 		nl_day = (year & 3) || day <= 59 ? day : day - 1;
-		for (month = 0; month < 12; month++)
+		for (month = 1; month < 12; month++)
 			if (day_n[month] > nl_day)
 				break;
 	}
-- 
cgit v0.10.2


From f58f2fa9286db0ce9124ca9986d56aa5420b7f59 Mon Sep 17 00:00:00 2001
From: "M. Mohan Kumar" <mohan@in.ibm.com>
Date: Tue, 22 Sep 2009 16:43:29 -0700
Subject: kprobes: use do_IRQ() in lkdtm

Current lkdtm code puts a probe on __do_IRQ for some of the kdump test
cases.  Since __do_IRQ is deprecated, change lkdtm code to use do_IRQ
function.

Signed-off-by: M. Mohan Kumar <mohan@in.ibm.com>
Cc: Ankita Garg <ankita@in.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/misc/lkdtm.c b/drivers/misc/lkdtm.c
index 1bfe5d1..3648b23 100644
--- a/drivers/misc/lkdtm.c
+++ b/drivers/misc/lkdtm.c
@@ -283,7 +283,7 @@ static int __init lkdtm_module_init(void)
 
 	switch (cpoint) {
 	case INT_HARDWARE_ENTRY:
-		lkdtm.kp.symbol_name = "__do_IRQ";
+		lkdtm.kp.symbol_name = "do_IRQ";
 		lkdtm.entry = (kprobe_opcode_t*) jp_do_irq;
 		break;
 	case INT_HW_IRQ_EN:
-- 
cgit v0.10.2


From 5ae87e79ecb5baa65e9cf48be874098fafad0668 Mon Sep 17 00:00:00 2001
From: Guillaume Knispel <gknispel@proformatique.com>
Date: Tue, 22 Sep 2009 16:43:30 -0700
Subject: poll/select: avoid arithmetic overflow in __estimate_accuracy()

__estimate_accuracy() was prone to integer overflow, for example if *tv ==
{2147, 483648000} on a 32 bit computer (or even for delays as small as
{429, 500000000} if the task is niced).

Because the result was already forced between 0 and 100ms, the effect of
the overflow was not too problematic, but the use of the hrtimer range
feature was not optimal in overflow cases.

This patch ensures that there can not be an integer overflow in this
function.

Signed-off-by: Guillaume Knispel <gknispel@proformatique.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Arjan van de Ven <arjan@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/select.c b/fs/select.c
index 8084834..a201fc3 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -41,22 +41,28 @@
  * better solutions..
  */
 
+#define MAX_SLACK	(100 * NSEC_PER_MSEC)
+
 static long __estimate_accuracy(struct timespec *tv)
 {
 	long slack;
 	int divfactor = 1000;
 
+	if (tv->tv_sec < 0)
+		return 0;
+
 	if (task_nice(current) > 0)
 		divfactor = divfactor / 5;
 
+	if (tv->tv_sec > MAX_SLACK / (NSEC_PER_SEC/divfactor))
+		return MAX_SLACK;
+
 	slack = tv->tv_nsec / divfactor;
 	slack += tv->tv_sec * (NSEC_PER_SEC/divfactor);
 
-	if (slack > 100 * NSEC_PER_MSEC)
-		slack =  100 * NSEC_PER_MSEC;
+	if (slack > MAX_SLACK)
+		return MAX_SLACK;
 
-	if (slack < 0)
-		slack = 0;
 	return slack;
 }
 
-- 
cgit v0.10.2


From 3a3b6ed2235f2f619889dd6096e24b6d93bf3339 Mon Sep 17 00:00:00 2001
From: Dave Young <hidave.darkstar@gmail.com>
Date: Tue, 22 Sep 2009 16:43:31 -0700
Subject: printk boot_delay: rename printk_delay_msec to loops_per_msec

Rename `printk_delay_msec' to `loops_per_msec', because the patch "printk:
add printk_delay to make messages readable for some scenarios" wishes to
more appropriately use the `printk_delay_msec' identifier.

[akpm@linux-foundation.org: add a comment]
Signed-off-by: Dave Young <hidave.darkstar@gmail.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/kernel/printk.c b/kernel/printk.c
index 602033a..932ea21 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -206,12 +206,11 @@ __setup("log_buf_len=", log_buf_len_setup);
 #ifdef CONFIG_BOOT_PRINTK_DELAY
 
 static unsigned int boot_delay; /* msecs delay after each printk during bootup */
-static unsigned long long printk_delay_msec; /* per msec, based on boot_delay */
+static unsigned long long loops_per_msec;	/* based on boot_delay */
 
 static int __init boot_delay_setup(char *str)
 {
 	unsigned long lpj;
-	unsigned long long loops_per_msec;
 
 	lpj = preset_lpj ? preset_lpj : 1000000;	/* some guess */
 	loops_per_msec = (unsigned long long)lpj / 1000 * HZ;
@@ -220,10 +219,9 @@ static int __init boot_delay_setup(char *str)
 	if (boot_delay > 10 * 1000)
 		boot_delay = 0;
 
-	printk_delay_msec = loops_per_msec;
-	printk(KERN_DEBUG "boot_delay: %u, preset_lpj: %ld, lpj: %lu, "
-		"HZ: %d, printk_delay_msec: %llu\n",
-		boot_delay, preset_lpj, lpj, HZ, printk_delay_msec);
+	pr_debug("boot_delay: %u, preset_lpj: %ld, lpj: %lu, "
+		"HZ: %d, loops_per_msec: %llu\n",
+		boot_delay, preset_lpj, lpj, HZ, loops_per_msec);
 	return 1;
 }
 __setup("boot_delay=", boot_delay_setup);
@@ -236,7 +234,7 @@ static void boot_delay_msec(void)
 	if (boot_delay == 0 || system_state != SYSTEM_BOOTING)
 		return;
 
-	k = (unsigned long long)printk_delay_msec * boot_delay;
+	k = (unsigned long long)loops_per_msec * boot_delay;
 
 	timeout = jiffies + msecs_to_jiffies(boot_delay);
 	while (k) {
-- 
cgit v0.10.2


From af91322ef3f29ae4114e736e2a72e28b4d619cf9 Mon Sep 17 00:00:00 2001
From: Dave Young <hidave.darkstar@gmail.com>
Date: Tue, 22 Sep 2009 16:43:33 -0700
Subject: printk: add printk_delay to make messages readable for some scenarios

When syslog is not possible, at the same time there's no serial/net
console available, it will be hard to read the printk messages.  For
example oops/panic/warning messages in shutdown phase.

Add a printk delay feature, we can make each printk message delay some
milliseconds.

Setting the delay by proc/sysctl interface: /proc/sys/kernel/printk_delay

The value range from 0 - 10000, default value is 0

[akpm@linux-foundation.org: fix a few things]
Signed-off-by: Dave Young <hidave.darkstar@gmail.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index 3e5b63e..b3d8b49 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -313,6 +313,14 @@ send before ratelimiting kicks in.
 
 ==============================================================
 
+printk_delay:
+
+Delay each printk message in printk_delay milliseconds
+
+Value from 0 - 10000 is allowed.
+
+==============================================================
+
 randomize-va-space:
 
 This option can be used to select the type of process address
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 2b5b1e0..0a2a190 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -246,6 +246,8 @@ extern int printk_ratelimit(void);
 extern bool printk_timed_ratelimit(unsigned long *caller_jiffies,
 				   unsigned int interval_msec);
 
+extern int printk_delay_msec;
+
 /*
  * Print a one-time message (analogous to WARN_ONCE() et al):
  */
diff --git a/kernel/printk.c b/kernel/printk.c
index 932ea21..f38b07f 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -653,6 +653,20 @@ static int recursion_bug;
 static int new_text_line = 1;
 static char printk_buf[1024];
 
+int printk_delay_msec __read_mostly;
+
+static inline void printk_delay(void)
+{
+	if (unlikely(printk_delay_msec)) {
+		int m = printk_delay_msec;
+
+		while (m--) {
+			mdelay(1);
+			touch_nmi_watchdog();
+		}
+	}
+}
+
 asmlinkage int vprintk(const char *fmt, va_list args)
 {
 	int printed_len = 0;
@@ -662,6 +676,7 @@ asmlinkage int vprintk(const char *fmt, va_list args)
 	char *p;
 
 	boot_delay_msec();
+	printk_delay();
 
 	preempt_disable();
 	/* This stops the holder of console_sem just where we want him */
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 6ba49c7..0dfaa47 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -106,6 +106,9 @@ static int __maybe_unused one = 1;
 static int __maybe_unused two = 2;
 static unsigned long one_ul = 1;
 static int one_hundred = 100;
+#ifdef CONFIG_PRINTK
+static int ten_thousand = 10000;
+#endif
 
 /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
 static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
@@ -722,6 +725,17 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "printk_delay",
+		.data		= &printk_delay_msec,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &zero,
+		.extra2		= &ten_thousand,
+	},
 #endif
 	{
 		.ctl_name	= KERN_NGROUPS_MAX,
-- 
cgit v0.10.2


From 1fd7317d02ec03c6fdf072317841287933d06d24 Mon Sep 17 00:00:00 2001
From: Nick Black <dank@qemfd.net>
Date: Tue, 22 Sep 2009 16:43:33 -0700
Subject: Move magic numbers into magic.h

Move various magic-number definitions into magic.h.

Signed-off-by: Nick Black <dank@qemfd.net>
Acked-by: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Casey Schaufler <casey@schaufler-ca.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 75efb02..d5f8c96 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -18,14 +18,13 @@
 #include <linux/mount.h>
 #include <linux/tty.h>
 #include <linux/mutex.h>
+#include <linux/magic.h>
 #include <linux/idr.h>
 #include <linux/devpts_fs.h>
 #include <linux/parser.h>
 #include <linux/fsnotify.h>
 #include <linux/seq_file.h>
 
-#define DEVPTS_SUPER_MAGIC 0x1cd1
-
 #define DEVPTS_DEFAULT_MODE 0600
 /*
  * ptmx is a new node in /dev/pts and will be unused in legacy (single-
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 06b7c26..eba6d55 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -31,12 +31,10 @@
 #include <linux/statfs.h>
 #include <linux/security.h>
 #include <linux/ima.h>
+#include <linux/magic.h>
 
 #include <asm/uaccess.h>
 
-/* some random number */
-#define HUGETLBFS_MAGIC	0x958458f6
-
 static const struct super_operations hugetlbfs_ops;
 static const struct address_space_operations hugetlbfs_aops;
 const struct file_operations hugetlbfs_file_operations;
diff --git a/include/linux/magic.h b/include/linux/magic.h
index 1923327..bce3778 100644
--- a/include/linux/magic.h
+++ b/include/linux/magic.h
@@ -13,6 +13,7 @@
 #define SECURITYFS_MAGIC	0x73636673
 #define SELINUX_MAGIC		0xf97cff8c
 #define TMPFS_MAGIC		0x01021994
+#define HUGETLBFS_MAGIC 	0x958458f6	/* some random number */
 #define SQUASHFS_MAGIC		0x73717368
 #define EFS_SUPER_MAGIC		0x414A53
 #define EXT2_SUPER_MAGIC	0xEF53
@@ -53,4 +54,8 @@
 #define INOTIFYFS_SUPER_MAGIC	0x2BAD1DEA
 
 #define STACK_END_MAGIC		0x57AC6E9D
+
+#define DEVPTS_SUPER_MAGIC	0x1cd1
+#define SOCKFS_MAGIC		0x534F434B
+
 #endif /* __LINUX_MAGIC_H__ */
diff --git a/net/socket.c b/net/socket.c
index 0ad02ae..49917a1 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -86,6 +86,7 @@
 #include <linux/audit.h>
 #include <linux/wireless.h>
 #include <linux/nsproxy.h>
+#include <linux/magic.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -235,8 +236,6 @@ int move_addr_to_user(struct sockaddr *kaddr, int klen, void __user *uaddr,
 	return __put_user(klen, ulen);
 }
 
-#define SOCKFS_MAGIC 0x534F434B
-
 static struct kmem_cache *sock_inode_cachep __read_mostly;
 
 static struct inode *sock_alloc_inode(struct super_block *sb)
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index acae7ef4..c33b6bb 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -30,17 +30,11 @@
 #include <net/netlabel.h>
 #include <net/cipso_ipv4.h>
 #include <linux/audit.h>
+#include <linux/magic.h>
 #include "smack.h"
 
 #define task_security(task)	(task_cred_xxx((task), security))
 
-/*
- * I hope these are the hokeyist lines of code in the module. Casey.
- */
-#define DEVPTS_SUPER_MAGIC	0x1cd1
-#define SOCKFS_MAGIC		0x534F434B
-#define TMPFS_MAGIC		0x01021994
-
 /**
  * smk_fetch - Fetch the smack label from a file.
  * @ip: a pointer to the inode
-- 
cgit v0.10.2


From c02e3f361c75da04ca3025b4d19e947e9cc62ed3 Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Tue, 22 Sep 2009 16:43:36 -0700
Subject: kmod: fix race in usermodehelper code

The user mode helper code has a race in it.  call_usermodehelper_exec()
takes an allocated subprocess_info structure, which it passes to a
workqueue, and then passes it to a kernel thread which it creates, after
which it calls complete to signal to the caller of
call_usermodehelper_exec() that it can free the subprocess_info struct.

But since we use that structure in the created thread, we can't call
complete from __call_usermodehelper(), which is where we create the kernel
thread.  We need to call complete() from within the kernel thread and then
not use subprocess_info afterward in the case of UMH_WAIT_EXEC.  Tested
successfully by me.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/kernel/kmod.c b/kernel/kmod.c
index 9fcb53a..689d20f 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -143,6 +143,7 @@ struct subprocess_info {
 static int ____call_usermodehelper(void *data)
 {
 	struct subprocess_info *sub_info = data;
+	enum umh_wait wait = sub_info->wait;
 	int retval;
 
 	BUG_ON(atomic_read(&sub_info->cred->usage) != 1);
@@ -184,10 +185,14 @@ static int ____call_usermodehelper(void *data)
 	 */
 	set_user_nice(current, 0);
 
+	if (wait == UMH_WAIT_EXEC)
+		complete(sub_info->complete);
+
 	retval = kernel_execve(sub_info->path, sub_info->argv, sub_info->envp);
 
 	/* Exec failed? */
-	sub_info->retval = retval;
+	if (wait != UMH_WAIT_EXEC)
+		sub_info->retval = retval;
 	do_exit(0);
 }
 
@@ -266,16 +271,14 @@ static void __call_usermodehelper(struct work_struct *work)
 
 	switch (wait) {
 	case UMH_NO_WAIT:
+	case UMH_WAIT_EXEC:
 		break;
 
 	case UMH_WAIT_PROC:
 		if (pid > 0)
 			break;
 		sub_info->retval = pid;
-		/* FALLTHROUGH */
-
-	case UMH_WAIT_EXEC:
-		complete(sub_info->complete);
+		break;
 	}
 }
 
-- 
cgit v0.10.2


From e898893399335514b10dfbd75598f8308976abe4 Mon Sep 17 00:00:00 2001
From: Michael Buesch <mb@bu3sch.de>
Date: Tue, 22 Sep 2009 16:43:36 -0700
Subject: dac960: fix undefined behavior on empty string

Fix undefined behavior due to a buffer underrun if an empty string is
written to the proc file.

Signed-off-by: Michael Buesch <mb@bu3sch.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index c77b6f3..6fa7b0f 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -6562,7 +6562,7 @@ static int DAC960_ProcWriteUserCommand(struct file *file,
   if (copy_from_user(CommandBuffer, Buffer, Count)) return -EFAULT;
   CommandBuffer[Count] = '\0';
   Length = strlen(CommandBuffer);
-  if (CommandBuffer[Length-1] == '\n')
+  if (Length > 0 && CommandBuffer[Length-1] == '\n')
     CommandBuffer[--Length] = '\0';
   if (Controller->FirmwareType == DAC960_V1_Controller)
     return (DAC960_V1_ExecuteUserCommand(Controller, CommandBuffer)
-- 
cgit v0.10.2


From 5c725138437837291db5c25f4a076ee852e806e3 Mon Sep 17 00:00:00 2001
From: Trevor Keith <tsrk@tsrk.net>
Date: Tue, 22 Sep 2009 16:43:38 -0700
Subject: Fix all -Wmissing-prototypes warnings in x86 defconfig

Signed-off-by: Trevor Keith <tsrk@tsrk.net>
Cc: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/scripts/conmakehash.c b/scripts/conmakehash.c
index e0c6891..263a44d 100644
--- a/scripts/conmakehash.c
+++ b/scripts/conmakehash.c
@@ -24,14 +24,14 @@
 
 typedef unsigned short unicode;
 
-void usage(char *argv0)
+static void usage(char *argv0)
 {
   fprintf(stderr, "Usage: \n"
          "        %s chartable [hashsize] [hashstep] [maxhashlevel]\n", argv0);
   exit(EX_USAGE);
 }
 
-int getunicode(char **p0)
+static int getunicode(char **p0)
 {
   char *p = *p0;
 
@@ -49,7 +49,7 @@ unicode unitable[MAX_FONTLEN][255];
 				/* Massive overkill, but who cares? */
 int unicount[MAX_FONTLEN];
 
-void addpair(int fp, int un)
+static void addpair(int fp, int un)
 {
   int i;
 
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 4522948..801a16a 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -691,7 +691,7 @@ static int number_prefix(const char *sym)
  *   The $ syntax is for sections where ld append a dot number
  *   to make section name unique.
  */
-int match(const char *sym, const char * const pat[])
+static int match(const char *sym, const char * const pat[])
 {
 	const char *p;
 	while (*pat) {
@@ -1746,7 +1746,7 @@ static void add_header(struct buffer *b, struct module *mod)
 	buf_printf(b, "};\n");
 }
 
-void add_staging_flag(struct buffer *b, const char *name)
+static void add_staging_flag(struct buffer *b, const char *name)
 {
 	static const char *staging_dir = "drivers/staging";
 
diff --git a/scripts/selinux/mdp/mdp.c b/scripts/selinux/mdp/mdp.c
index ca757d4..b4ced85 100644
--- a/scripts/selinux/mdp/mdp.c
+++ b/scripts/selinux/mdp/mdp.c
@@ -31,13 +31,13 @@
 
 #include "flask.h"
 
-void usage(char *name)
+static void usage(char *name)
 {
 	printf("usage: %s [-m] policy_file context_file\n", name);
 	exit(1);
 }
 
-void find_common_name(char *cname, char *dest, int len)
+static void find_common_name(char *cname, char *dest, int len)
 {
 	char *start, *end;
 
diff --git a/usr/gen_init_cpio.c b/usr/gen_init_cpio.c
index f1d3fe3..83b3dde 100644
--- a/usr/gen_init_cpio.c
+++ b/usr/gen_init_cpio.c
@@ -446,7 +446,7 @@ static int cpio_mkfile_line(const char *line)
 	return rc;
 }
 
-void usage(const char *prog)
+static void usage(const char *prog)
 {
 	fprintf(stderr, "Usage:\n"
 		"\t%s <cpio_list>\n"
-- 
cgit v0.10.2


From 54fdade1c3332391948ec43530c02c4794a38172 Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Date: Tue, 22 Sep 2009 16:43:39 -0700
Subject: generic-ipi: make struct call_function_data lockless

This patch can remove spinlock from struct call_function_data, the
reasons are below:

1: add a new interface for cpumask named cpumask_test_and_clear_cpu(),
   it can atomically test and clear specific cpu, we can use it instead
   of cpumask_test_cpu() and cpumask_clear_cpu() and no need data->lock
   to protect those in generic_smp_call_function_interrupt().

2: in smp_call_function_many(), after csd_lock() return, the current's
   cfd_data is deleted from call_function list, so it not have race
   between other cpus, then cfs_data is only used in
   smp_call_function_many() that must disable preemption and not from
   a hardware interrupthandler or from a bottom half handler to call,
   only the correspond cpu can use it, so it not have race in current
   cpu, no need cfs_data->lock to protect it.

3: after 1 and 2, cfs_data->lock is only use to protect cfs_data->refs in
   generic_smp_call_function_interrupt(), so we can define cfs_data->refs
   to atomic_t, and no need cfs_data->lock any more.

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Jens Axboe <jens.axboe@oracle.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Peter Zijlstra <peterz@infradead.org>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
[akpm@linux-foundation.org: use atomic_dec_return()]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 796df12..9b1d458 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -715,6 +715,18 @@ static inline int cpumask_test_and_set_cpu(int cpu, struct cpumask *cpumask)
 }
 
 /**
+ * cpumask_test_and_clear_cpu - atomically test and clear a cpu in a cpumask
+ * @cpu: cpu number (< nr_cpu_ids)
+ * @cpumask: the cpumask pointer
+ *
+ * test_and_clear_bit wrapper for cpumasks.
+ */
+static inline int cpumask_test_and_clear_cpu(int cpu, struct cpumask *cpumask)
+{
+	return test_and_clear_bit(cpumask_check(cpu), cpumask_bits(cpumask));
+}
+
+/**
  * cpumask_setall - set all cpus (< nr_cpu_ids) in a cpumask
  * @dstp: the cpumask pointer
  */
diff --git a/kernel/smp.c b/kernel/smp.c
index 8e21850..fd47a25 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -29,8 +29,7 @@ enum {
 
 struct call_function_data {
 	struct call_single_data	csd;
-	spinlock_t		lock;
-	unsigned int		refs;
+	atomic_t		refs;
 	cpumask_var_t		cpumask;
 };
 
@@ -39,9 +38,7 @@ struct call_single_queue {
 	spinlock_t		lock;
 };
 
-static DEFINE_PER_CPU(struct call_function_data, cfd_data) = {
-	.lock			= __SPIN_LOCK_UNLOCKED(cfd_data.lock),
-};
+static DEFINE_PER_CPU(struct call_function_data, cfd_data);
 
 static int
 hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu)
@@ -196,25 +193,18 @@ void generic_smp_call_function_interrupt(void)
 	list_for_each_entry_rcu(data, &call_function.queue, csd.list) {
 		int refs;
 
-		spin_lock(&data->lock);
-		if (!cpumask_test_cpu(cpu, data->cpumask)) {
-			spin_unlock(&data->lock);
+		if (!cpumask_test_and_clear_cpu(cpu, data->cpumask))
 			continue;
-		}
-		cpumask_clear_cpu(cpu, data->cpumask);
-		spin_unlock(&data->lock);
 
 		data->csd.func(data->csd.info);
 
-		spin_lock(&data->lock);
-		WARN_ON(data->refs == 0);
-		refs = --data->refs;
+		refs = atomic_dec_return(&data->refs);
+		WARN_ON(refs < 0);
 		if (!refs) {
 			spin_lock(&call_function.lock);
 			list_del_rcu(&data->csd.list);
 			spin_unlock(&call_function.lock);
 		}
-		spin_unlock(&data->lock);
 
 		if (refs)
 			continue;
@@ -419,23 +409,20 @@ void smp_call_function_many(const struct cpumask *mask,
 	data = &__get_cpu_var(cfd_data);
 	csd_lock(&data->csd);
 
-	spin_lock_irqsave(&data->lock, flags);
 	data->csd.func = func;
 	data->csd.info = info;
 	cpumask_and(data->cpumask, mask, cpu_online_mask);
 	cpumask_clear_cpu(this_cpu, data->cpumask);
-	data->refs = cpumask_weight(data->cpumask);
+	atomic_set(&data->refs, cpumask_weight(data->cpumask));
 
-	spin_lock(&call_function.lock);
+	spin_lock_irqsave(&call_function.lock, flags);
 	/*
 	 * Place entry at the _HEAD_ of the list, so that any cpu still
 	 * observing the entry in generic_smp_call_function_interrupt()
 	 * will not miss any other list entries:
 	 */
 	list_add_rcu(&data->csd.list, &call_function.queue);
-	spin_unlock(&call_function.lock);
-
-	spin_unlock_irqrestore(&data->lock, flags);
+	spin_unlock_irqrestore(&call_function.lock, flags);
 
 	/*
 	 * Make the list addition visible before sending the ipi.
-- 
cgit v0.10.2


From 912e837aef72a3dd263dafc3717d92bbc1211a53 Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Tue, 22 Sep 2009 16:43:41 -0700
Subject: dme1737: Keep index within pwm_config[]

The static code scanner "Parfait" reported this because pwm_config is
only 3 bytes - pwm_config[3] is out of range.

Since this code path is never called with ix == 3 (the device has no PWM4
output) this doesn't change anything in practice.  But to encourage
testing with Parfait, lets make the warning go away...

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Acked-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/hwmon/dme1737.c b/drivers/hwmon/dme1737.c
index 9814d51..2c2cb1e 100644
--- a/drivers/hwmon/dme1737.c
+++ b/drivers/hwmon/dme1737.c
@@ -1134,7 +1134,7 @@ static ssize_t show_pwm(struct device *dev, struct device_attribute *attr,
 		res = PWM_FREQ_FROM_REG(data->pwm_freq[ix]);
 		break;
 	case SYS_PWM_ENABLE:
-		if (ix > 3) {
+		if (ix >= 3) {
 			res = 1; /* pwm[5-6] hard-wired to manual mode */
 		} else {
 			res = PWM_EN_FROM_REG(data->pwm_config[ix]);
-- 
cgit v0.10.2


From b7ed698cc9d556306a4088c238e2ea9311ea2cb3 Mon Sep 17 00:00:00 2001
From: Ladinu Chandrasinghe <ladinu.pub@gmail.com>
Date: Tue, 22 Sep 2009 16:43:42 -0700
Subject: Documentation/: fix warnings from -Wmissing-prototypes in HOSTCFLAGS

Fix up -Wmissing-prototypes in compileable userspace code, mainly under
Documentation/.

Signed-off-by: Ladinu Chandrasinghe <ladinu.pub@gmail.com>
Signed-off-by: Trevor Keith <tsrk@tsrk.net>
Cc: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/Documentation/accounting/getdelays.c b/Documentation/accounting/getdelays.c
index aa73e72..6e25c26 100644
--- a/Documentation/accounting/getdelays.c
+++ b/Documentation/accounting/getdelays.c
@@ -116,7 +116,7 @@ error:
 }
 
 
-int send_cmd(int sd, __u16 nlmsg_type, __u32 nlmsg_pid,
+static int send_cmd(int sd, __u16 nlmsg_type, __u32 nlmsg_pid,
 	     __u8 genl_cmd, __u16 nla_type,
 	     void *nla_data, int nla_len)
 {
@@ -160,7 +160,7 @@ int send_cmd(int sd, __u16 nlmsg_type, __u32 nlmsg_pid,
  * Probe the controller in genetlink to find the family id
  * for the TASKSTATS family
  */
-int get_family_id(int sd)
+static int get_family_id(int sd)
 {
 	struct {
 		struct nlmsghdr n;
@@ -190,7 +190,7 @@ int get_family_id(int sd)
 	return id;
 }
 
-void print_delayacct(struct taskstats *t)
+static void print_delayacct(struct taskstats *t)
 {
 	printf("\n\nCPU   %15s%15s%15s%15s\n"
 	       "      %15llu%15llu%15llu%15llu\n"
@@ -216,7 +216,7 @@ void print_delayacct(struct taskstats *t)
 	       (unsigned long long)t->freepages_delay_total);
 }
 
-void task_context_switch_counts(struct taskstats *t)
+static void task_context_switch_counts(struct taskstats *t)
 {
 	printf("\n\nTask   %15s%15s\n"
 	       "       %15llu%15llu\n",
@@ -224,7 +224,7 @@ void task_context_switch_counts(struct taskstats *t)
 	       (unsigned long long)t->nvcsw, (unsigned long long)t->nivcsw);
 }
 
-void print_cgroupstats(struct cgroupstats *c)
+static void print_cgroupstats(struct cgroupstats *c)
 {
 	printf("sleeping %llu, blocked %llu, running %llu, stopped %llu, "
 		"uninterruptible %llu\n", (unsigned long long)c->nr_sleeping,
@@ -235,7 +235,7 @@ void print_cgroupstats(struct cgroupstats *c)
 }
 
 
-void print_ioacct(struct taskstats *t)
+static void print_ioacct(struct taskstats *t)
 {
 	printf("%s: read=%llu, write=%llu, cancelled_write=%llu\n",
 		t->ac_comm,
diff --git a/Documentation/auxdisplay/cfag12864b-example.c b/Documentation/auxdisplay/cfag12864b-example.c
index 2caeea5..1d2c010 100644
--- a/Documentation/auxdisplay/cfag12864b-example.c
+++ b/Documentation/auxdisplay/cfag12864b-example.c
@@ -62,7 +62,7 @@ unsigned char cfag12864b_buffer[CFAG12864B_SIZE];
  * Unable to open: return = -1
  * Unable to mmap: return = -2
  */
-int cfag12864b_init(char *path)
+static int cfag12864b_init(char *path)
 {
 	cfag12864b_fd = open(path, O_RDWR);
 	if (cfag12864b_fd == -1)
@@ -81,7 +81,7 @@ int cfag12864b_init(char *path)
 /*
  * exit a cfag12864b framebuffer device
  */
-void cfag12864b_exit(void)
+static void cfag12864b_exit(void)
 {
 	munmap(cfag12864b_mem, CFAG12864B_SIZE);
 	close(cfag12864b_fd);
@@ -90,7 +90,7 @@ void cfag12864b_exit(void)
 /*
  * set (x, y) pixel
  */
-void cfag12864b_set(unsigned char x, unsigned char y)
+static void cfag12864b_set(unsigned char x, unsigned char y)
 {
 	if (CFAG12864B_CHECK(x, y))
 		cfag12864b_buffer[CFAG12864B_ADDRESS(x, y)] |=
@@ -100,7 +100,7 @@ void cfag12864b_set(unsigned char x, unsigned char y)
 /*
  * unset (x, y) pixel
  */
-void cfag12864b_unset(unsigned char x, unsigned char y)
+static void cfag12864b_unset(unsigned char x, unsigned char y)
 {
 	if (CFAG12864B_CHECK(x, y))
 		cfag12864b_buffer[CFAG12864B_ADDRESS(x, y)] &=
@@ -113,7 +113,7 @@ void cfag12864b_unset(unsigned char x, unsigned char y)
  * Pixel off: return = 0
  * Pixel on:  return = 1
  */
-unsigned char cfag12864b_isset(unsigned char x, unsigned char y)
+static unsigned char cfag12864b_isset(unsigned char x, unsigned char y)
 {
 	if (CFAG12864B_CHECK(x, y))
 		if (cfag12864b_buffer[CFAG12864B_ADDRESS(x, y)] &
@@ -126,7 +126,7 @@ unsigned char cfag12864b_isset(unsigned char x, unsigned char y)
 /*
  * not (x, y) pixel
  */
-void cfag12864b_not(unsigned char x, unsigned char y)
+static void cfag12864b_not(unsigned char x, unsigned char y)
 {
 	if (cfag12864b_isset(x, y))
 		cfag12864b_unset(x, y);
@@ -137,7 +137,7 @@ void cfag12864b_not(unsigned char x, unsigned char y)
 /*
  * fill (set all pixels)
  */
-void cfag12864b_fill(void)
+static void cfag12864b_fill(void)
 {
 	unsigned short i;
 
@@ -148,7 +148,7 @@ void cfag12864b_fill(void)
 /*
  * clear (unset all pixels)
  */
-void cfag12864b_clear(void)
+static void cfag12864b_clear(void)
 {
 	unsigned short i;
 
@@ -162,7 +162,7 @@ void cfag12864b_clear(void)
  * Pixel off: src[i] = 0
  * Pixel on:  src[i] > 0
  */
-void cfag12864b_format(unsigned char * matrix)
+static void cfag12864b_format(unsigned char * matrix)
 {
 	unsigned char i, j, n;
 
@@ -182,7 +182,7 @@ void cfag12864b_format(unsigned char * matrix)
 /*
  * blit buffer to lcd
  */
-void cfag12864b_blit(void)
+static void cfag12864b_blit(void)
 {
 	memcpy(cfag12864b_mem, cfag12864b_buffer, CFAG12864B_SIZE);
 }
@@ -198,7 +198,7 @@ void cfag12864b_blit(void)
 
 #define EXAMPLES	6
 
-void example(unsigned char n)
+static void example(unsigned char n)
 {
 	unsigned short i, j;
 	unsigned char matrix[CFAG12864B_WIDTH * CFAG12864B_HEIGHT];
diff --git a/Documentation/ia64/aliasing-test.c b/Documentation/ia64/aliasing-test.c
index d23610f..3dfb76c 100644
--- a/Documentation/ia64/aliasing-test.c
+++ b/Documentation/ia64/aliasing-test.c
@@ -24,7 +24,7 @@
 
 int sum;
 
-int map_mem(char *path, off_t offset, size_t length, int touch)
+static int map_mem(char *path, off_t offset, size_t length, int touch)
 {
 	int fd, rc;
 	void *addr;
@@ -62,7 +62,7 @@ int map_mem(char *path, off_t offset, size_t length, int touch)
 	return 0;
 }
 
-int scan_tree(char *path, char *file, off_t offset, size_t length, int touch)
+static int scan_tree(char *path, char *file, off_t offset, size_t length, int touch)
 {
 	struct dirent **namelist;
 	char *name, *path2;
@@ -119,7 +119,7 @@ skip:
 
 char buf[1024];
 
-int read_rom(char *path)
+static int read_rom(char *path)
 {
 	int fd, rc;
 	size_t size = 0;
@@ -146,7 +146,7 @@ int read_rom(char *path)
 	return size;
 }
 
-int scan_rom(char *path, char *file)
+static int scan_rom(char *path, char *file)
 {
 	struct dirent **namelist;
 	char *name, *path2;
diff --git a/Documentation/pcmcia/crc32hash.c b/Documentation/pcmcia/crc32hash.c
index 4210e5a..44f8bee 100644
--- a/Documentation/pcmcia/crc32hash.c
+++ b/Documentation/pcmcia/crc32hash.c
@@ -8,7 +8,7 @@ $ ./crc32hash "Dual Speed"
 #include <ctype.h>
 #include <stdlib.h>
 
-unsigned int crc32(unsigned char const *p, unsigned int len)
+static unsigned int crc32(unsigned char const *p, unsigned int len)
 {
 	int i;
 	unsigned int crc = 0;
diff --git a/Documentation/spi/spidev_test.c b/Documentation/spi/spidev_test.c
index c1a5aad..10abd37 100644
--- a/Documentation/spi/spidev_test.c
+++ b/Documentation/spi/spidev_test.c
@@ -69,7 +69,7 @@ static void transfer(int fd)
 	puts("");
 }
 
-void print_usage(const char *prog)
+static void print_usage(const char *prog)
 {
 	printf("Usage: %s [-DsbdlHOLC3]\n", prog);
 	puts("  -D --device   device to use (default /dev/spidev1.1)\n"
@@ -85,7 +85,7 @@ void print_usage(const char *prog)
 	exit(1);
 }
 
-void parse_opts(int argc, char *argv[])
+static void parse_opts(int argc, char *argv[])
 {
 	while (1) {
 		static const struct option lopts[] = {
diff --git a/Documentation/video4linux/v4lgrab.c b/Documentation/video4linux/v4lgrab.c
index 05769cf..c8ded17 100644
--- a/Documentation/video4linux/v4lgrab.c
+++ b/Documentation/video4linux/v4lgrab.c
@@ -89,7 +89,7 @@
 	}                                                               \
 }
 
-int get_brightness_adj(unsigned char *image, long size, int *brightness) {
+static int get_brightness_adj(unsigned char *image, long size, int *brightness) {
   long i, tot = 0;
   for (i=0;i<size*3;i++)
     tot += image[i];
diff --git a/Documentation/vm/page-types.c b/Documentation/vm/page-types.c
index 0833f44..3eda8ea 100644
--- a/Documentation/vm/page-types.c
+++ b/Documentation/vm/page-types.c
@@ -158,12 +158,12 @@ static uint64_t 	page_flags[HASH_SIZE];
 	type __min2 = (y);			\
 	__min1 < __min2 ? __min1 : __min2; })
 
-unsigned long pages2mb(unsigned long pages)
+static unsigned long pages2mb(unsigned long pages)
 {
 	return (pages * page_size) >> 20;
 }
 
-void fatal(const char *x, ...)
+static void fatal(const char *x, ...)
 {
 	va_list ap;
 
@@ -178,7 +178,7 @@ void fatal(const char *x, ...)
  * page flag names
  */
 
-char *page_flag_name(uint64_t flags)
+static char *page_flag_name(uint64_t flags)
 {
 	static char buf[65];
 	int present;
@@ -197,7 +197,7 @@ char *page_flag_name(uint64_t flags)
 	return buf;
 }
 
-char *page_flag_longname(uint64_t flags)
+static char *page_flag_longname(uint64_t flags)
 {
 	static char buf[1024];
 	int i, n;
@@ -221,7 +221,7 @@ char *page_flag_longname(uint64_t flags)
  * page list and summary
  */
 
-void show_page_range(unsigned long offset, uint64_t flags)
+static void show_page_range(unsigned long offset, uint64_t flags)
 {
 	static uint64_t      flags0;
 	static unsigned long index;
@@ -241,12 +241,12 @@ void show_page_range(unsigned long offset, uint64_t flags)
 	count  = 1;
 }
 
-void show_page(unsigned long offset, uint64_t flags)
+static void show_page(unsigned long offset, uint64_t flags)
 {
 	printf("%lu\t%s\n", offset, page_flag_name(flags));
 }
 
-void show_summary(void)
+static void show_summary(void)
 {
 	int i;
 
@@ -272,7 +272,7 @@ void show_summary(void)
  * page flag filters
  */
 
-int bit_mask_ok(uint64_t flags)
+static int bit_mask_ok(uint64_t flags)
 {
 	int i;
 
@@ -289,7 +289,7 @@ int bit_mask_ok(uint64_t flags)
 	return 1;
 }
 
-uint64_t expand_overloaded_flags(uint64_t flags)
+static uint64_t expand_overloaded_flags(uint64_t flags)
 {
 	/* SLOB/SLUB overload several page flags */
 	if (flags & BIT(SLAB)) {
@@ -308,7 +308,7 @@ uint64_t expand_overloaded_flags(uint64_t flags)
 	return flags;
 }
 
-uint64_t well_known_flags(uint64_t flags)
+static uint64_t well_known_flags(uint64_t flags)
 {
 	/* hide flags intended only for kernel hacker */
 	flags &= ~KPF_HACKERS_BITS;
@@ -325,7 +325,7 @@ uint64_t well_known_flags(uint64_t flags)
  * page frame walker
  */
 
-int hash_slot(uint64_t flags)
+static int hash_slot(uint64_t flags)
 {
 	int k = HASH_KEY(flags);
 	int i;
@@ -352,7 +352,7 @@ int hash_slot(uint64_t flags)
 	exit(EXIT_FAILURE);
 }
 
-void add_page(unsigned long offset, uint64_t flags)
+static void add_page(unsigned long offset, uint64_t flags)
 {
 	flags = expand_overloaded_flags(flags);
 
@@ -371,7 +371,7 @@ void add_page(unsigned long offset, uint64_t flags)
 	total_pages++;
 }
 
-void walk_pfn(unsigned long index, unsigned long count)
+static void walk_pfn(unsigned long index, unsigned long count)
 {
 	unsigned long batch;
 	unsigned long n;
@@ -404,7 +404,7 @@ void walk_pfn(unsigned long index, unsigned long count)
 	}
 }
 
-void walk_addr_ranges(void)
+static void walk_addr_ranges(void)
 {
 	int i;
 
@@ -428,7 +428,7 @@ void walk_addr_ranges(void)
  * user interface
  */
 
-const char *page_flag_type(uint64_t flag)
+static const char *page_flag_type(uint64_t flag)
 {
 	if (flag & KPF_HACKERS_BITS)
 		return "(r)";
@@ -437,7 +437,7 @@ const char *page_flag_type(uint64_t flag)
 	return "   ";
 }
 
-void usage(void)
+static void usage(void)
 {
 	int i, j;
 
@@ -482,7 +482,7 @@ void usage(void)
 		"(r) raw mode bits  (o) overloaded bits\n");
 }
 
-unsigned long long parse_number(const char *str)
+static unsigned long long parse_number(const char *str)
 {
 	unsigned long long n;
 
@@ -494,16 +494,16 @@ unsigned long long parse_number(const char *str)
 	return n;
 }
 
-void parse_pid(const char *str)
+static void parse_pid(const char *str)
 {
 	opt_pid = parse_number(str);
 }
 
-void parse_file(const char *name)
+static void parse_file(const char *name)
 {
 }
 
-void add_addr_range(unsigned long offset, unsigned long size)
+static void add_addr_range(unsigned long offset, unsigned long size)
 {
 	if (nr_addr_ranges >= MAX_ADDR_RANGES)
 		fatal("too much addr ranges\n");
@@ -513,7 +513,7 @@ void add_addr_range(unsigned long offset, unsigned long size)
 	nr_addr_ranges++;
 }
 
-void parse_addr_range(const char *optarg)
+static void parse_addr_range(const char *optarg)
 {
 	unsigned long offset;
 	unsigned long size;
@@ -547,7 +547,7 @@ void parse_addr_range(const char *optarg)
 	add_addr_range(offset, size);
 }
 
-void add_bits_filter(uint64_t mask, uint64_t bits)
+static void add_bits_filter(uint64_t mask, uint64_t bits)
 {
 	if (nr_bit_filters >= MAX_BIT_FILTERS)
 		fatal("too much bit filters\n");
@@ -557,7 +557,7 @@ void add_bits_filter(uint64_t mask, uint64_t bits)
 	nr_bit_filters++;
 }
 
-uint64_t parse_flag_name(const char *str, int len)
+static uint64_t parse_flag_name(const char *str, int len)
 {
 	int i;
 
@@ -577,7 +577,7 @@ uint64_t parse_flag_name(const char *str, int len)
 	return parse_number(str);
 }
 
-uint64_t parse_flag_names(const char *str, int all)
+static uint64_t parse_flag_names(const char *str, int all)
 {
 	const char *p    = str;
 	uint64_t   flags = 0;
@@ -596,7 +596,7 @@ uint64_t parse_flag_names(const char *str, int all)
 	return flags;
 }
 
-void parse_bits_mask(const char *optarg)
+static void parse_bits_mask(const char *optarg)
 {
 	uint64_t mask;
 	uint64_t bits;
@@ -621,7 +621,7 @@ void parse_bits_mask(const char *optarg)
 }
 
 
-struct option opts[] = {
+static struct option opts[] = {
 	{ "raw"       , 0, NULL, 'r' },
 	{ "pid"       , 1, NULL, 'p' },
 	{ "file"      , 1, NULL, 'f' },
diff --git a/Documentation/vm/slabinfo.c b/Documentation/vm/slabinfo.c
index df32276..92e729f 100644
--- a/Documentation/vm/slabinfo.c
+++ b/Documentation/vm/slabinfo.c
@@ -87,7 +87,7 @@ int page_size;
 
 regex_t pattern;
 
-void fatal(const char *x, ...)
+static void fatal(const char *x, ...)
 {
 	va_list ap;
 
@@ -97,7 +97,7 @@ void fatal(const char *x, ...)
 	exit(EXIT_FAILURE);
 }
 
-void usage(void)
+static void usage(void)
 {
 	printf("slabinfo 5/7/2007. (c) 2007 sgi.\n\n"
 		"slabinfo [-ahnpvtsz] [-d debugopts] [slab-regexp]\n"
@@ -131,7 +131,7 @@ void usage(void)
 	);
 }
 
-unsigned long read_obj(const char *name)
+static unsigned long read_obj(const char *name)
 {
 	FILE *f = fopen(name, "r");
 
@@ -151,7 +151,7 @@ unsigned long read_obj(const char *name)
 /*
  * Get the contents of an attribute
  */
-unsigned long get_obj(const char *name)
+static unsigned long get_obj(const char *name)
 {
 	if (!read_obj(name))
 		return 0;
@@ -159,7 +159,7 @@ unsigned long get_obj(const char *name)
 	return atol(buffer);
 }
 
-unsigned long get_obj_and_str(const char *name, char **x)
+static unsigned long get_obj_and_str(const char *name, char **x)
 {
 	unsigned long result = 0;
 	char *p;
@@ -178,7 +178,7 @@ unsigned long get_obj_and_str(const char *name, char **x)
 	return result;
 }
 
-void set_obj(struct slabinfo *s, const char *name, int n)
+static void set_obj(struct slabinfo *s, const char *name, int n)
 {
 	char x[100];
 	FILE *f;
@@ -192,7 +192,7 @@ void set_obj(struct slabinfo *s, const char *name, int n)
 	fclose(f);
 }
 
-unsigned long read_slab_obj(struct slabinfo *s, const char *name)
+static unsigned long read_slab_obj(struct slabinfo *s, const char *name)
 {
 	char x[100];
 	FILE *f;
@@ -215,7 +215,7 @@ unsigned long read_slab_obj(struct slabinfo *s, const char *name)
 /*
  * Put a size string together
  */
-int store_size(char *buffer, unsigned long value)
+static int store_size(char *buffer, unsigned long value)
 {
 	unsigned long divisor = 1;
 	char trailer = 0;
@@ -247,7 +247,7 @@ int store_size(char *buffer, unsigned long value)
 	return n;
 }
 
-void decode_numa_list(int *numa, char *t)
+static void decode_numa_list(int *numa, char *t)
 {
 	int node;
 	int nr;
@@ -272,7 +272,7 @@ void decode_numa_list(int *numa, char *t)
 	}
 }
 
-void slab_validate(struct slabinfo *s)
+static void slab_validate(struct slabinfo *s)
 {
 	if (strcmp(s->name, "*") == 0)
 		return;
@@ -280,7 +280,7 @@ void slab_validate(struct slabinfo *s)
 	set_obj(s, "validate", 1);
 }
 
-void slab_shrink(struct slabinfo *s)
+static void slab_shrink(struct slabinfo *s)
 {
 	if (strcmp(s->name, "*") == 0)
 		return;
@@ -290,7 +290,7 @@ void slab_shrink(struct slabinfo *s)
 
 int line = 0;
 
-void first_line(void)
+static void first_line(void)
 {
 	if (show_activity)
 		printf("Name                   Objects      Alloc       Free   %%Fast Fallb O\n");
@@ -302,7 +302,7 @@ void first_line(void)
 /*
  * Find the shortest alias of a slab
  */
-struct aliasinfo *find_one_alias(struct slabinfo *find)
+static struct aliasinfo *find_one_alias(struct slabinfo *find)
 {
 	struct aliasinfo *a;
 	struct aliasinfo *best = NULL;
@@ -318,18 +318,18 @@ struct aliasinfo *find_one_alias(struct slabinfo *find)
 	return best;
 }
 
-unsigned long slab_size(struct slabinfo *s)
+static unsigned long slab_size(struct slabinfo *s)
 {
 	return 	s->slabs * (page_size << s->order);
 }
 
-unsigned long slab_activity(struct slabinfo *s)
+static unsigned long slab_activity(struct slabinfo *s)
 {
 	return 	s->alloc_fastpath + s->free_fastpath +
 		s->alloc_slowpath + s->free_slowpath;
 }
 
-void slab_numa(struct slabinfo *s, int mode)
+static void slab_numa(struct slabinfo *s, int mode)
 {
 	int node;
 
@@ -374,7 +374,7 @@ void slab_numa(struct slabinfo *s, int mode)
 	line++;
 }
 
-void show_tracking(struct slabinfo *s)
+static void show_tracking(struct slabinfo *s)
 {
 	printf("\n%s: Kernel object allocation\n", s->name);
 	printf("-----------------------------------------------------------------------\n");
@@ -392,7 +392,7 @@ void show_tracking(struct slabinfo *s)
 
 }
 
-void ops(struct slabinfo *s)
+static void ops(struct slabinfo *s)
 {
 	if (strcmp(s->name, "*") == 0)
 		return;
@@ -405,14 +405,14 @@ void ops(struct slabinfo *s)
 		printf("\n%s has no kmem_cache operations\n", s->name);
 }
 
-const char *onoff(int x)
+static const char *onoff(int x)
 {
 	if (x)
 		return "On ";
 	return "Off";
 }
 
-void slab_stats(struct slabinfo *s)
+static void slab_stats(struct slabinfo *s)
 {
 	unsigned long total_alloc;
 	unsigned long total_free;
@@ -477,7 +477,7 @@ void slab_stats(struct slabinfo *s)
 			s->deactivate_to_tail, (s->deactivate_to_tail * 100) / total);
 }
 
-void report(struct slabinfo *s)
+static void report(struct slabinfo *s)
 {
 	if (strcmp(s->name, "*") == 0)
 		return;
@@ -518,7 +518,7 @@ void report(struct slabinfo *s)
 	slab_stats(s);
 }
 
-void slabcache(struct slabinfo *s)
+static void slabcache(struct slabinfo *s)
 {
 	char size_str[20];
 	char dist_str[40];
@@ -593,7 +593,7 @@ void slabcache(struct slabinfo *s)
 /*
  * Analyze debug options. Return false if something is amiss.
  */
-int debug_opt_scan(char *opt)
+static int debug_opt_scan(char *opt)
 {
 	if (!opt || !opt[0] || strcmp(opt, "-") == 0)
 		return 1;
@@ -642,7 +642,7 @@ int debug_opt_scan(char *opt)
 	return 1;
 }
 
-int slab_empty(struct slabinfo *s)
+static int slab_empty(struct slabinfo *s)
 {
 	if (s->objects > 0)
 		return 0;
@@ -657,7 +657,7 @@ int slab_empty(struct slabinfo *s)
 	return 1;
 }
 
-void slab_debug(struct slabinfo *s)
+static void slab_debug(struct slabinfo *s)
 {
 	if (strcmp(s->name, "*") == 0)
 		return;
@@ -717,7 +717,7 @@ void slab_debug(struct slabinfo *s)
 		set_obj(s, "trace", 1);
 }
 
-void totals(void)
+static void totals(void)
 {
 	struct slabinfo *s;
 
@@ -976,7 +976,7 @@ void totals(void)
 			b1,	b2,	b3);
 }
 
-void sort_slabs(void)
+static void sort_slabs(void)
 {
 	struct slabinfo *s1,*s2;
 
@@ -1005,7 +1005,7 @@ void sort_slabs(void)
 	}
 }
 
-void sort_aliases(void)
+static void sort_aliases(void)
 {
 	struct aliasinfo *a1,*a2;
 
@@ -1030,7 +1030,7 @@ void sort_aliases(void)
 	}
 }
 
-void link_slabs(void)
+static void link_slabs(void)
 {
 	struct aliasinfo *a;
 	struct slabinfo *s;
@@ -1048,7 +1048,7 @@ void link_slabs(void)
 	}
 }
 
-void alias(void)
+static void alias(void)
 {
 	struct aliasinfo *a;
 	char *active = NULL;
@@ -1079,7 +1079,7 @@ void alias(void)
 }
 
 
-void rename_slabs(void)
+static void rename_slabs(void)
 {
 	struct slabinfo *s;
 	struct aliasinfo *a;
@@ -1102,12 +1102,12 @@ void rename_slabs(void)
 	}
 }
 
-int slab_mismatch(char *slab)
+static int slab_mismatch(char *slab)
 {
 	return regexec(&pattern, slab, 0, NULL, 0);
 }
 
-void read_slab_dir(void)
+static void read_slab_dir(void)
 {
 	DIR *dir;
 	struct dirent *de;
@@ -1209,7 +1209,7 @@ void read_slab_dir(void)
 		fatal("Too many aliases\n");
 }
 
-void output_slabs(void)
+static void output_slabs(void)
 {
 	struct slabinfo *slab;
 
diff --git a/Documentation/watchdog/src/watchdog-test.c b/Documentation/watchdog/src/watchdog-test.c
index 65f6c19..a750532 100644
--- a/Documentation/watchdog/src/watchdog-test.c
+++ b/Documentation/watchdog/src/watchdog-test.c
@@ -18,7 +18,7 @@ int fd;
  * the PC Watchdog card to reset its internal timer so it doesn't trigger
  * a computer reset.
  */
-void keep_alive(void)
+static void keep_alive(void)
 {
     int dummy;
 
diff --git a/firmware/ihex2fw.c b/firmware/ihex2fw.c
index 8f7fdaa..5a03ba8 100644
--- a/firmware/ihex2fw.c
+++ b/firmware/ihex2fw.c
@@ -56,7 +56,7 @@ static int output_records(int outfd);
 static int sort_records = 0;
 static int wide_records = 0;
 
-int usage(void)
+static int usage(void)
 {
 	fprintf(stderr, "ihex2fw: Convert ihex files into binary "
 		"representation for use by Linux kernel\n");
diff --git a/scripts/genksyms/genksyms.c b/scripts/genksyms/genksyms.c
index 3a8297b..af6b836 100644
--- a/scripts/genksyms/genksyms.c
+++ b/scripts/genksyms/genksyms.c
@@ -176,7 +176,7 @@ static int is_unknown_symbol(struct symbol *sym)
 			strcmp(defn->string, "{") == 0);
 }
 
-struct symbol *__add_symbol(const char *name, enum symbol_type type,
+static struct symbol *__add_symbol(const char *name, enum symbol_type type,
 			    struct string_list *defn, int is_extern,
 			    int is_reference)
 {
@@ -265,7 +265,7 @@ struct symbol *add_symbol(const char *name, enum symbol_type type,
 	return __add_symbol(name, type, defn, is_extern, 0);
 }
 
-struct symbol *add_reference_symbol(const char *name, enum symbol_type type,
+static struct symbol *add_reference_symbol(const char *name, enum symbol_type type,
 				    struct string_list *defn, int is_extern)
 {
 	return __add_symbol(name, type, defn, is_extern, 1);
@@ -313,7 +313,7 @@ static int equal_list(struct string_list *a, struct string_list *b)
 
 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
 
-struct string_list *read_node(FILE *f)
+static struct string_list *read_node(FILE *f)
 {
 	char buffer[256];
 	struct string_list node = {
-- 
cgit v0.10.2


From 88e9d34c727883d7d6f02cf1475b3ec98b8480c7 Mon Sep 17 00:00:00 2001
From: James Morris <jmorris@namei.org>
Date: Tue, 22 Sep 2009 16:43:43 -0700
Subject: seq_file: constify seq_operations

Make all seq_operations structs const, to help mitigate against
revectoring user-triggerable function pointers.

This is derived from the grsecurity patch, although generated from scratch
because it's simpler than extracting the changes from there.

Signed-off-by: James Morris <jmorris@namei.org>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/mn10300/kernel/setup.c b/arch/mn10300/kernel/setup.c
index 79890ed..3f24c29 100644
--- a/arch/mn10300/kernel/setup.c
+++ b/arch/mn10300/kernel/setup.c
@@ -285,7 +285,7 @@ static void c_stop(struct seq_file *m, void *v)
 {
 }
 
-struct seq_operations cpuinfo_op = {
+const struct seq_operations cpuinfo_op = {
 	.start	= c_start,
 	.next	= c_next,
 	.stop	= c_stop,
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 02fed27..1d5570a 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -328,7 +328,7 @@ static void c_stop(struct seq_file *m, void *v)
 {
 }
 
-struct seq_operations cpuinfo_op = {
+const struct seq_operations cpuinfo_op = {
 	.start =c_start,
 	.next =	c_next,
 	.stop =	c_stop,
diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c
index eae51ef..3631a4f 100644
--- a/arch/powerpc/platforms/pseries/hvCall_inst.c
+++ b/arch/powerpc/platforms/pseries/hvCall_inst.c
@@ -71,7 +71,7 @@ static int hc_show(struct seq_file *m, void *p)
 	return 0;
 }
 
-static struct seq_operations hcall_inst_seq_ops = {
+static const struct seq_operations hcall_inst_seq_ops = {
         .start = hc_start,
         .next  = hc_next,
         .stop  = hc_stop,
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 4f19105..24c3e21 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -363,7 +363,7 @@ static void cciss_seq_stop(struct seq_file *seq, void *v)
 	h->busy_configuring = 0;
 }
 
-static struct seq_operations cciss_seq_ops = {
+static const struct seq_operations cciss_seq_ops = {
 	.start = cciss_seq_start,
 	.show  = cciss_seq_show,
 	.next  = cciss_seq_next,
diff --git a/drivers/char/misc.c b/drivers/char/misc.c
index 1ee27cc..07fa612 100644
--- a/drivers/char/misc.c
+++ b/drivers/char/misc.c
@@ -91,7 +91,7 @@ static int misc_seq_show(struct seq_file *seq, void *v)
 }
 
 
-static struct seq_operations misc_seq_ops = {
+static const struct seq_operations misc_seq_ops = {
 	.start = misc_seq_start,
 	.next  = misc_seq_next,
 	.stop  = misc_seq_stop,
diff --git a/drivers/char/tpm/tpm_bios.c b/drivers/char/tpm/tpm_bios.c
index 0c2f55a..bf2170f 100644
--- a/drivers/char/tpm/tpm_bios.c
+++ b/drivers/char/tpm/tpm_bios.c
@@ -343,14 +343,14 @@ static int tpm_ascii_bios_measurements_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-static struct seq_operations tpm_ascii_b_measurments_seqops = {
+static const struct seq_operations tpm_ascii_b_measurments_seqops = {
 	.start = tpm_bios_measurements_start,
 	.next = tpm_bios_measurements_next,
 	.stop = tpm_bios_measurements_stop,
 	.show = tpm_ascii_bios_measurements_show,
 };
 
-static struct seq_operations tpm_binary_b_measurments_seqops = {
+static const struct seq_operations tpm_binary_b_measurments_seqops = {
 	.start = tpm_bios_measurements_start,
 	.next = tpm_bios_measurements_next,
 	.stop = tpm_bios_measurements_stop,
diff --git a/drivers/isdn/capi/kcapi_proc.c b/drivers/isdn/capi/kcapi_proc.c
index 50ed778..09d4db7 100644
--- a/drivers/isdn/capi/kcapi_proc.c
+++ b/drivers/isdn/capi/kcapi_proc.c
@@ -89,14 +89,14 @@ static int contrstats_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations seq_controller_ops = {
+static const struct seq_operations seq_controller_ops = {
 	.start	= controller_start,
 	.next	= controller_next,
 	.stop	= controller_stop,
 	.show	= controller_show,
 };
 
-static struct seq_operations seq_contrstats_ops = {
+static const struct seq_operations seq_contrstats_ops = {
 	.start	= controller_start,
 	.next	= controller_next,
 	.stop	= controller_stop,
@@ -194,14 +194,14 @@ applstats_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations seq_applications_ops = {
+static const struct seq_operations seq_applications_ops = {
 	.start	= applications_start,
 	.next	= applications_next,
 	.stop	= applications_stop,
 	.show	= applications_show,
 };
 
-static struct seq_operations seq_applstats_ops = {
+static const struct seq_operations seq_applstats_ops = {
 	.start	= applications_start,
 	.next	= applications_next,
 	.stop	= applications_stop,
@@ -264,7 +264,7 @@ static int capi_driver_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations seq_capi_driver_ops = {
+static const struct seq_operations seq_capi_driver_ops = {
 	.start	= capi_driver_start,
 	.next	= capi_driver_next,
 	.stop	= capi_driver_stop,
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 4968c4c..848b594 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -2233,7 +2233,7 @@ static struct file_operations dev_fops = {
 	.open = sg_proc_open_dev,
 	.release = seq_release,
 };
-static struct seq_operations dev_seq_ops = {
+static const struct seq_operations dev_seq_ops = {
 	.start = dev_seq_start,
 	.next  = dev_seq_next,
 	.stop  = dev_seq_stop,
@@ -2246,7 +2246,7 @@ static struct file_operations devstrs_fops = {
 	.open = sg_proc_open_devstrs,
 	.release = seq_release,
 };
-static struct seq_operations devstrs_seq_ops = {
+static const struct seq_operations devstrs_seq_ops = {
 	.start = dev_seq_start,
 	.next  = dev_seq_next,
 	.stop  = dev_seq_stop,
@@ -2259,7 +2259,7 @@ static struct file_operations debug_fops = {
 	.open = sg_proc_open_debug,
 	.release = seq_release,
 };
-static struct seq_operations debug_seq_ops = {
+static const struct seq_operations debug_seq_ops = {
 	.start = dev_seq_start,
 	.next  = dev_seq_next,
 	.stop  = dev_seq_stop,
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 8630615..852739d 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -28,7 +28,7 @@ static int afs_proc_cells_show(struct seq_file *m, void *v);
 static ssize_t afs_proc_cells_write(struct file *file, const char __user *buf,
 				    size_t size, loff_t *_pos);
 
-static struct seq_operations afs_proc_cells_ops = {
+static const struct seq_operations afs_proc_cells_ops = {
 	.start	= afs_proc_cells_start,
 	.next	= afs_proc_cells_next,
 	.stop	= afs_proc_cells_stop,
@@ -70,7 +70,7 @@ static void *afs_proc_cell_volumes_next(struct seq_file *p, void *v,
 static void afs_proc_cell_volumes_stop(struct seq_file *p, void *v);
 static int afs_proc_cell_volumes_show(struct seq_file *m, void *v);
 
-static struct seq_operations afs_proc_cell_volumes_ops = {
+static const struct seq_operations afs_proc_cell_volumes_ops = {
 	.start	= afs_proc_cell_volumes_start,
 	.next	= afs_proc_cell_volumes_next,
 	.stop	= afs_proc_cell_volumes_stop,
@@ -95,7 +95,7 @@ static void *afs_proc_cell_vlservers_next(struct seq_file *p, void *v,
 static void afs_proc_cell_vlservers_stop(struct seq_file *p, void *v);
 static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v);
 
-static struct seq_operations afs_proc_cell_vlservers_ops = {
+static const struct seq_operations afs_proc_cell_vlservers_ops = {
 	.start	= afs_proc_cell_vlservers_start,
 	.next	= afs_proc_cell_vlservers_next,
 	.stop	= afs_proc_cell_vlservers_stop,
@@ -119,7 +119,7 @@ static void *afs_proc_cell_servers_next(struct seq_file *p, void *v,
 static void afs_proc_cell_servers_stop(struct seq_file *p, void *v);
 static int afs_proc_cell_servers_show(struct seq_file *m, void *v);
 
-static struct seq_operations afs_proc_cell_servers_ops = {
+static const struct seq_operations afs_proc_cell_servers_ops = {
 	.start	= afs_proc_cell_servers_start,
 	.next	= afs_proc_cell_servers_next,
 	.stop	= afs_proc_cell_servers_stop,
diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c
index 1d1d274..1c8bb8c 100644
--- a/fs/dlm/debug_fs.c
+++ b/fs/dlm/debug_fs.c
@@ -386,9 +386,9 @@ static int table_seq_show(struct seq_file *seq, void *iter_ptr)
 	return rv;
 }
 
-static struct seq_operations format1_seq_ops;
-static struct seq_operations format2_seq_ops;
-static struct seq_operations format3_seq_ops;
+static const struct seq_operations format1_seq_ops;
+static const struct seq_operations format2_seq_ops;
+static const struct seq_operations format3_seq_ops;
 
 static void *table_seq_start(struct seq_file *seq, loff_t *pos)
 {
@@ -534,21 +534,21 @@ static void table_seq_stop(struct seq_file *seq, void *iter_ptr)
 	}
 }
 
-static struct seq_operations format1_seq_ops = {
+static const struct seq_operations format1_seq_ops = {
 	.start = table_seq_start,
 	.next  = table_seq_next,
 	.stop  = table_seq_stop,
 	.show  = table_seq_show,
 };
 
-static struct seq_operations format2_seq_ops = {
+static const struct seq_operations format2_seq_ops = {
 	.start = table_seq_start,
 	.next  = table_seq_next,
 	.stop  = table_seq_stop,
 	.show  = table_seq_show,
 };
 
-static struct seq_operations format3_seq_ops = {
+static const struct seq_operations format3_seq_ops = {
 	.start = table_seq_start,
 	.next  = table_seq_next,
 	.stop  = table_seq_stop,
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index a8a358b..53b86e1 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -768,7 +768,7 @@ static void jbd2_seq_history_stop(struct seq_file *seq, void *v)
 {
 }
 
-static struct seq_operations jbd2_seq_history_ops = {
+static const struct seq_operations jbd2_seq_history_ops = {
 	.start  = jbd2_seq_history_start,
 	.next   = jbd2_seq_history_next,
 	.stop   = jbd2_seq_history_stop,
@@ -872,7 +872,7 @@ static void jbd2_seq_info_stop(struct seq_file *seq, void *v)
 {
 }
 
-static struct seq_operations jbd2_seq_info_ops = {
+static const struct seq_operations jbd2_seq_info_ops = {
 	.start  = jbd2_seq_info_start,
 	.next   = jbd2_seq_info_next,
 	.stop   = jbd2_seq_info_stop,
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index a7ce15d..1520253 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -1531,7 +1531,7 @@ static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos);
 static void nfs_server_list_stop(struct seq_file *p, void *v);
 static int nfs_server_list_show(struct seq_file *m, void *v);
 
-static struct seq_operations nfs_server_list_ops = {
+static const struct seq_operations nfs_server_list_ops = {
 	.start	= nfs_server_list_start,
 	.next	= nfs_server_list_next,
 	.stop	= nfs_server_list_stop,
@@ -1552,7 +1552,7 @@ static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos);
 static void nfs_volume_list_stop(struct seq_file *p, void *v);
 static int nfs_volume_list_show(struct seq_file *m, void *v);
 
-static struct seq_operations nfs_volume_list_ops = {
+static const struct seq_operations nfs_volume_list_ops = {
 	.start	= nfs_volume_list_start,
 	.next	= nfs_volume_list_next,
 	.stop	= nfs_volume_list_stop,
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 984a5eb..c1c9e03 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -1517,7 +1517,7 @@ static int e_show(struct seq_file *m, void *p)
 	return svc_export_show(m, &svc_export_cache, cp);
 }
 
-struct seq_operations nfs_exports_op = {
+const struct seq_operations nfs_exports_op = {
 	.start	= e_start,
 	.next	= e_next,
 	.stop	= e_stop,
diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c
index f842487..cfb2be7 100644
--- a/fs/ocfs2/cluster/netdebug.c
+++ b/fs/ocfs2/cluster/netdebug.c
@@ -163,7 +163,7 @@ static void nst_seq_stop(struct seq_file *seq, void *v)
 {
 }
 
-static struct seq_operations nst_seq_ops = {
+static const struct seq_operations nst_seq_ops = {
 	.start = nst_seq_start,
 	.next = nst_seq_next,
 	.stop = nst_seq_stop,
@@ -344,7 +344,7 @@ static void sc_seq_stop(struct seq_file *seq, void *v)
 {
 }
 
-static struct seq_operations sc_seq_ops = {
+static const struct seq_operations sc_seq_ops = {
 	.start = sc_seq_start,
 	.next = sc_seq_next,
 	.stop = sc_seq_stop,
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c
index df52f70..c5c8812 100644
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -683,7 +683,7 @@ static int lockres_seq_show(struct seq_file *s, void *v)
 	return 0;
 }
 
-static struct seq_operations debug_lockres_ops = {
+static const struct seq_operations debug_lockres_ops = {
 	.start =	lockres_seq_start,
 	.stop =		lockres_seq_stop,
 	.next =		lockres_seq_next,
diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c
index 7e14d1a..9fe7d7e 100644
--- a/fs/proc/nommu.c
+++ b/fs/proc/nommu.c
@@ -109,7 +109,7 @@ static void *nommu_region_list_next(struct seq_file *m, void *v, loff_t *pos)
 	return rb_next((struct rb_node *) v);
 }
 
-static struct seq_operations proc_nommu_region_list_seqop = {
+static const struct seq_operations proc_nommu_region_list_seqop = {
 	.start	= nommu_region_list_start,
 	.next	= nommu_region_list_next,
 	.stop	= nommu_region_list_stop,
diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
index 03bbe9039..510ffdd 100644
--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -60,7 +60,7 @@ extern spinlock_t		nfsd_drc_lock;
 extern unsigned int		nfsd_drc_max_mem;
 extern unsigned int		nfsd_drc_mem_used;
 
-extern struct seq_operations nfs_exports_op;
+extern const struct seq_operations nfs_exports_op;
 
 /*
  * Function prototypes.
diff --git a/ipc/util.c b/ipc/util.c
index b8e4ba9..79ce84e 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -942,7 +942,7 @@ static int sysvipc_proc_show(struct seq_file *s, void *it)
 	return iface->show(s, it);
 }
 
-static struct seq_operations sysvipc_proc_seqops = {
+static const struct seq_operations sysvipc_proc_seqops = {
 	.start = sysvipc_proc_start,
 	.stop  = sysvipc_proc_stop,
 	.next  = sysvipc_proc_next,
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 213b7f9..cd83d99 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2314,7 +2314,7 @@ static int cgroup_tasks_show(struct seq_file *s, void *v)
 	return seq_printf(s, "%d\n", *(int *)v);
 }
 
-static struct seq_operations cgroup_tasks_seq_operations = {
+static const struct seq_operations cgroup_tasks_seq_operations = {
 	.start = cgroup_tasks_start,
 	.stop = cgroup_tasks_stop,
 	.next = cgroup_tasks_next,
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index ef177d6..cfadc12 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1321,7 +1321,7 @@ static int __kprobes show_kprobe_addr(struct seq_file *pi, void *v)
 	return 0;
 }
 
-static struct seq_operations kprobes_seq_ops = {
+static const struct seq_operations kprobes_seq_ops = {
 	.start = kprobe_seq_start,
 	.next  = kprobe_seq_next,
 	.stop  = kprobe_seq_stop,
diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c
index d4b3dbc..d4aba4f 100644
--- a/kernel/lockdep_proc.c
+++ b/kernel/lockdep_proc.c
@@ -594,7 +594,7 @@ static int ls_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-static struct seq_operations lockstat_ops = {
+static const struct seq_operations lockstat_ops = {
 	.start	= ls_start,
 	.next	= ls_next,
 	.stop	= ls_stop,
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index c71e91b..23df777 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1520,7 +1520,7 @@ static int t_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-static struct seq_operations show_ftrace_seq_ops = {
+static const struct seq_operations show_ftrace_seq_ops = {
 	.start = t_start,
 	.next = t_next,
 	.stop = t_stop,
@@ -2459,7 +2459,7 @@ static int g_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-static struct seq_operations ftrace_graph_seq_ops = {
+static const struct seq_operations ftrace_graph_seq_ops = {
 	.start = g_start,
 	.next = g_next,
 	.stop = g_stop,
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index a35925d..6c0f6a8 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1949,7 +1949,7 @@ static int s_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-static struct seq_operations tracer_seq_ops = {
+static const struct seq_operations tracer_seq_ops = {
 	.start		= s_start,
 	.next		= s_next,
 	.stop		= s_stop,
@@ -2163,7 +2163,7 @@ static int t_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-static struct seq_operations show_traces_seq_ops = {
+static const struct seq_operations show_traces_seq_ops = {
 	.start		= t_start,
 	.next		= t_next,
 	.stop		= t_stop,
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 3907510..090675e 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -324,7 +324,7 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations ipmr_mfc_seq_ops = {
+static const struct seq_operations ipmr_mfc_seq_ops = {
 	.start = ipmr_mfc_seq_start,
 	.next  = ipmr_mfc_seq_next,
 	.stop  = ipmr_mfc_seq_stop,
diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c
index 6bfc7ea..8e9777b 100644
--- a/security/integrity/ima/ima_fs.c
+++ b/security/integrity/ima/ima_fs.c
@@ -146,7 +146,7 @@ static int ima_measurements_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-static struct seq_operations ima_measurments_seqops = {
+static const struct seq_operations ima_measurments_seqops = {
 	.start = ima_measurements_start,
 	.next = ima_measurements_next,
 	.stop = ima_measurements_stop,
@@ -221,7 +221,7 @@ static int ima_ascii_measurements_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-static struct seq_operations ima_ascii_measurements_seqops = {
+static const struct seq_operations ima_ascii_measurements_seqops = {
 	.start = ima_measurements_start,
 	.next = ima_measurements_next,
 	.stop = ima_measurements_stop,
diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c
index f83a809..aeead75 100644
--- a/security/smack/smackfs.c
+++ b/security/smack/smackfs.c
@@ -187,7 +187,7 @@ static void load_seq_stop(struct seq_file *s, void *v)
 	/* No-op */
 }
 
-static struct seq_operations load_seq_ops = {
+static const struct seq_operations load_seq_ops = {
 	.start = load_seq_start,
 	.next  = load_seq_next,
 	.show  = load_seq_show,
@@ -503,7 +503,7 @@ static void cipso_seq_stop(struct seq_file *s, void *v)
 	/* No-op */
 }
 
-static struct seq_operations cipso_seq_ops = {
+static const struct seq_operations cipso_seq_ops = {
 	.start = cipso_seq_start,
 	.stop  = cipso_seq_stop,
 	.next  = cipso_seq_next,
@@ -697,7 +697,7 @@ static void netlbladdr_seq_stop(struct seq_file *s, void *v)
 	/* No-op */
 }
 
-static struct seq_operations netlbladdr_seq_ops = {
+static const struct seq_operations netlbladdr_seq_ops = {
 	.start = netlbladdr_seq_start,
 	.stop  = netlbladdr_seq_stop,
 	.next  = netlbladdr_seq_next,
-- 
cgit v0.10.2


From 02b51df1b07b4e9ca823c89284e704cadb323cd1 Mon Sep 17 00:00:00 2001
From: Scott James Remnant <scott@ubuntu.com>
Date: Tue, 22 Sep 2009 16:43:44 -0700
Subject: proc connector: add event for process becoming session leader

The act of a process becoming a session leader is a useful signal to a
supervising init daemon such as Upstart.

While a daemon will normally do this as part of the process of becoming a
daemon, it is rare for its children to do so.  When the children do, it is
nearly always a sign that the child should be considered detached from the
parent and not supervised along with it.

The poster-child example is OpenSSH; the per-login children call setsid()
so that they may control the pty connected to them.  If the primary daemon
dies or is restarted, we do not want to consider the per-login children
and want to respawn the primary daemon without killing the children.

This patch adds a new PROC_SID_EVENT and associated structure to the
proc_event event_data union, it arranges for this to be emitted when the
special PIDTYPE_SID pid is set.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Scott James Remnant <scott@ubuntu.com>
Acked-by: Matt Helsley <matthltc@us.ibm.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Evgeniy Polyakov <johnpol@2ka.mipt.ru>
Acked-by: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c
index 85e5dc0..abf4a25 100644
--- a/drivers/connector/cn_proc.c
+++ b/drivers/connector/cn_proc.c
@@ -139,6 +139,31 @@ void proc_id_connector(struct task_struct *task, int which_id)
 	cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL);
 }
 
+void proc_sid_connector(struct task_struct *task)
+{
+	struct cn_msg *msg;
+	struct proc_event *ev;
+	struct timespec ts;
+	__u8 buffer[CN_PROC_MSG_SIZE];
+
+	if (atomic_read(&proc_event_num_listeners) < 1)
+		return;
+
+	msg = (struct cn_msg *)buffer;
+	ev = (struct proc_event *)msg->data;
+	get_seq(&msg->seq, &ev->cpu);
+	ktime_get_ts(&ts); /* get high res monotonic timestamp */
+	put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns);
+	ev->what = PROC_EVENT_SID;
+	ev->event_data.sid.process_pid = task->pid;
+	ev->event_data.sid.process_tgid = task->tgid;
+
+	memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id));
+	msg->ack = 0; /* not used */
+	msg->len = sizeof(*ev);
+	cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL);
+}
+
 void proc_exit_connector(struct task_struct *task)
 {
 	struct cn_msg *msg;
diff --git a/include/linux/cn_proc.h b/include/linux/cn_proc.h
index b8125b2..47dac5e 100644
--- a/include/linux/cn_proc.h
+++ b/include/linux/cn_proc.h
@@ -52,6 +52,7 @@ struct proc_event {
 		PROC_EVENT_EXEC = 0x00000002,
 		PROC_EVENT_UID  = 0x00000004,
 		PROC_EVENT_GID  = 0x00000040,
+		PROC_EVENT_SID  = 0x00000080,
 		/* "next" should be 0x00000400 */
 		/* "last" is the last process event: exit */
 		PROC_EVENT_EXIT = 0x80000000
@@ -89,6 +90,11 @@ struct proc_event {
 			} e;
 		} id;
 
+		struct sid_proc_event {
+			__kernel_pid_t process_pid;
+			__kernel_pid_t process_tgid;
+		} sid;
+
 		struct exit_proc_event {
 			__kernel_pid_t process_pid;
 			__kernel_pid_t process_tgid;
@@ -102,6 +108,7 @@ struct proc_event {
 void proc_fork_connector(struct task_struct *task);
 void proc_exec_connector(struct task_struct *task);
 void proc_id_connector(struct task_struct *task, int which_id);
+void proc_sid_connector(struct task_struct *task);
 void proc_exit_connector(struct task_struct *task);
 #else
 static inline void proc_fork_connector(struct task_struct *task)
@@ -114,6 +121,9 @@ static inline void proc_id_connector(struct task_struct *task,
 				     int which_id)
 {}
 
+static inline void proc_sid_connector(struct task_struct *task)
+{}
+
 static inline void proc_exit_connector(struct task_struct *task)
 {}
 #endif	/* CONFIG_PROC_EVENTS */
diff --git a/kernel/exit.c b/kernel/exit.c
index e47ee8a..61bb176 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -359,8 +359,10 @@ void __set_special_pids(struct pid *pid)
 {
 	struct task_struct *curr = current->group_leader;
 
-	if (task_session(curr) != pid)
+	if (task_session(curr) != pid) {
 		change_pid(curr, PIDTYPE_SID, pid);
+		proc_sid_connector(curr);
+	}
 
 	if (task_pgrp(curr) != pid)
 		change_pid(curr, PIDTYPE_PGID, pid);
-- 
cgit v0.10.2


From 70867453092297be9afb2249e712a1f960ec0a09 Mon Sep 17 00:00:00 2001
From: Roland Dreier <rdreier@cisco.com>
Date: Tue, 22 Sep 2009 16:43:46 -0700
Subject: printk_once(): use bool for boolean flag

Using the type bool (instead of int) for the __print_once flag in the
printk_once() macro matches the intent of the code better, and allows the
compiler to generate smaller code; eg a typical callsite with gcc 4.3.3 on
i386:

add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-6 (-6)
function                                     old     new   delta
static.__print_once                            4       1      -3
get_cpu_vendor                               146     143      -3

Saving 6 bytes of object size per callsite by slightly improving the
readability of the source seems like a win to me.

Signed-off-by: Roland Dreier <rolandd@cisco.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 0a2a190..55723af 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -252,10 +252,10 @@ extern int printk_delay_msec;
  * Print a one-time message (analogous to WARN_ONCE() et al):
  */
 #define printk_once(x...) ({			\
-	static int __print_once = 1;		\
+	static bool __print_once = true;	\
 						\
 	if (__print_once) {			\
-		__print_once = 0;		\
+		__print_once = false;		\
 		printk(x);			\
 	}					\
 })
-- 
cgit v0.10.2


From 88e0fbc452ed94393bf89585c2b90edb94749b45 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Tue, 22 Sep 2009 16:43:50 -0700
Subject: fs: turn iprune_mutex into rwsem

We have had a report of bad memory allocation latency during DVD-RAM (UDF)
writing.  This is causing the user's desktop session to become unusable.

Jan tracked the cause of this down to UDF inode reclaim blocking:

gnome-screens D ffff810006d1d598     0 20686      1
 ffff810006d1d508 0000000000000082 ffff810037db6718 0000000000000800
 ffff810006d1d488 ffffffff807e4280 ffffffff807e4280 ffff810006d1a580
 ffff8100bccbc140 ffff810006d1a8c0 0000000006d1d4e8 ffff810006d1a8c0
Call Trace:
 [<ffffffff804477f3>] io_schedule+0x63/0xa5
 [<ffffffff802c2587>] sync_buffer+0x3b/0x3f
 [<ffffffff80447d2a>] __wait_on_bit+0x47/0x79
 [<ffffffff80447dc6>] out_of_line_wait_on_bit+0x6a/0x77
 [<ffffffff802c24f6>] __wait_on_buffer+0x1f/0x21
 [<ffffffff802c442a>] __bread+0x70/0x86
 [<ffffffff88de9ec7>] :udf:udf_tread+0x38/0x3a
 [<ffffffff88de0fcf>] :udf:udf_update_inode+0x4d/0x68c
 [<ffffffff88de26e1>] :udf:udf_write_inode+0x1d/0x2b
 [<ffffffff802bcf85>] __writeback_single_inode+0x1c0/0x394
 [<ffffffff802bd205>] write_inode_now+0x7d/0xc4
 [<ffffffff88de2e76>] :udf:udf_clear_inode+0x3d/0x53
 [<ffffffff802b39ae>] clear_inode+0xc2/0x11b
 [<ffffffff802b3ab1>] dispose_list+0x5b/0x102
 [<ffffffff802b3d35>] shrink_icache_memory+0x1dd/0x213
 [<ffffffff8027ede3>] shrink_slab+0xe3/0x158
 [<ffffffff8027fbab>] try_to_free_pages+0x177/0x232
 [<ffffffff8027a578>] __alloc_pages+0x1fa/0x392
 [<ffffffff802951fa>] alloc_page_vma+0x176/0x189
 [<ffffffff802822d8>] __do_fault+0x10c/0x417
 [<ffffffff80284232>] handle_mm_fault+0x466/0x940
 [<ffffffff8044b922>] do_page_fault+0x676/0xabf

This blocks with iprune_mutex held, which then blocks other reclaimers:

X             D ffff81009d47c400     0 17285  14831
 ffff8100844f3728 0000000000000086 0000000000000000 ffff81000000e288
 ffff81000000da00 ffffffff807e4280 ffffffff807e4280 ffff81009d47c400
 ffffffff805ff890 ffff81009d47c740 00000000844f3808 ffff81009d47c740
Call Trace:
 [<ffffffff80447f8c>] __mutex_lock_slowpath+0x72/0xa9
 [<ffffffff80447e1a>] mutex_lock+0x1e/0x22
 [<ffffffff802b3ba1>] shrink_icache_memory+0x49/0x213
 [<ffffffff8027ede3>] shrink_slab+0xe3/0x158
 [<ffffffff8027fbab>] try_to_free_pages+0x177/0x232
 [<ffffffff8027a578>] __alloc_pages+0x1fa/0x392
 [<ffffffff8029507f>] alloc_pages_current+0xd1/0xd6
 [<ffffffff80279ac0>] __get_free_pages+0xe/0x4d
 [<ffffffff802ae1b7>] __pollwait+0x5e/0xdf
 [<ffffffff8860f2b4>] :nvidia:nv_kern_poll+0x2e/0x73
 [<ffffffff802ad949>] do_select+0x308/0x506
 [<ffffffff802adced>] core_sys_select+0x1a6/0x254
 [<ffffffff802ae0b7>] sys_select+0xb5/0x157

Now I think the main problem is having the filesystem block (and do IO) in
inode reclaim.  The problem is that this doesn't get accounted well and
penalizes a random allocator with a big latency spike caused by work
generated from elsewhere.

I think the best idea would be to avoid this.  By design if possible, or
by deferring the hard work to an asynchronous context.  If the latter,
then the fs would probably want to throttle creation of new work with
queue size of the deferred work, but let's not get into those details.

Anyway, the other obvious thing we looked at is the iprune_mutex which is
causing the cascading blocking.  We could turn this into an rwsem to
improve concurrency.  It is unreasonable to totally ban all potentially
slow or blocking operations in inode reclaim, so I think this is a cheap
way to get a small improvement.

This doesn't solve the whole problem of course.  The process doing inode
reclaim will still take the latency hit, and concurrent processes may end
up contending on filesystem locks.  So fs developers should keep these
problems in mind.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Jan Kara <jack@ucw.cz>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/inode.c b/fs/inode.c
index f5ff71c..76582b06 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -14,6 +14,7 @@
 #include <linux/module.h>
 #include <linux/backing-dev.h>
 #include <linux/wait.h>
+#include <linux/rwsem.h>
 #include <linux/hash.h>
 #include <linux/swap.h>
 #include <linux/security.h>
@@ -87,14 +88,18 @@ static struct hlist_head *inode_hashtable __read_mostly;
 DEFINE_SPINLOCK(inode_lock);
 
 /*
- * iprune_mutex provides exclusion between the kswapd or try_to_free_pages
+ * iprune_sem provides exclusion between the kswapd or try_to_free_pages
  * icache shrinking path, and the umount path.  Without this exclusion,
  * by the time prune_icache calls iput for the inode whose pages it has
  * been invalidating, or by the time it calls clear_inode & destroy_inode
  * from its final dispose_list, the struct super_block they refer to
  * (for inode->i_sb->s_op) may already have been freed and reused.
+ *
+ * We make this an rwsem because the fastpath is icache shrinking. In
+ * some cases a filesystem may be doing a significant amount of work in
+ * its inode reclaim code, so this should improve parallelism.
  */
-static DEFINE_MUTEX(iprune_mutex);
+static DECLARE_RWSEM(iprune_sem);
 
 /*
  * Statistics gathering..
@@ -381,7 +386,7 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose)
 		/*
 		 * We can reschedule here without worrying about the list's
 		 * consistency because the per-sb list of inodes must not
-		 * change during umount anymore, and because iprune_mutex keeps
+		 * change during umount anymore, and because iprune_sem keeps
 		 * shrink_icache_memory() away.
 		 */
 		cond_resched_lock(&inode_lock);
@@ -420,7 +425,7 @@ int invalidate_inodes(struct super_block *sb)
 	int busy;
 	LIST_HEAD(throw_away);
 
-	mutex_lock(&iprune_mutex);
+	down_write(&iprune_sem);
 	spin_lock(&inode_lock);
 	inotify_unmount_inodes(&sb->s_inodes);
 	fsnotify_unmount_inodes(&sb->s_inodes);
@@ -428,7 +433,7 @@ int invalidate_inodes(struct super_block *sb)
 	spin_unlock(&inode_lock);
 
 	dispose_list(&throw_away);
-	mutex_unlock(&iprune_mutex);
+	up_write(&iprune_sem);
 
 	return busy;
 }
@@ -467,7 +472,7 @@ static void prune_icache(int nr_to_scan)
 	int nr_scanned;
 	unsigned long reap = 0;
 
-	mutex_lock(&iprune_mutex);
+	down_read(&iprune_sem);
 	spin_lock(&inode_lock);
 	for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) {
 		struct inode *inode;
@@ -509,7 +514,7 @@ static void prune_icache(int nr_to_scan)
 	spin_unlock(&inode_lock);
 
 	dispose_list(&freeable);
-	mutex_unlock(&iprune_mutex);
+	up_read(&iprune_sem);
 }
 
 /*
-- 
cgit v0.10.2


From 1fe72eaa0f46a0fa4cdcd8f3f7853b6d39469784 Mon Sep 17 00:00:00 2001
From: H Hartley Sweeten <hartleys@visionengravers.com>
Date: Tue, 22 Sep 2009 16:43:51 -0700
Subject: fs/buffer.c: clean up EXPORT* macros

According to Documentation/CodingStyle the EXPORT* macro should follow
immediately after the closing function brace line.

Also, mark_buffer_async_write_endio() and do_thaw_all() are not used
elsewhere so they should be marked as static.

In addition, file_fsync() is actually in fs/sync.c so move the EXPORT* to
that file.

Signed-off-by: H Hartley Sweeten <hsweeten@visionengravers.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/buffer.c b/fs/buffer.c
index 90a9886..209f7f1 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -52,6 +52,7 @@ init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private)
 	bh->b_end_io = handler;
 	bh->b_private = private;
 }
+EXPORT_SYMBOL(init_buffer);
 
 static int sync_buffer(void *word)
 {
@@ -80,6 +81,7 @@ void unlock_buffer(struct buffer_head *bh)
 	smp_mb__after_clear_bit();
 	wake_up_bit(&bh->b_state, BH_Lock);
 }
+EXPORT_SYMBOL(unlock_buffer);
 
 /*
  * Block until a buffer comes unlocked.  This doesn't stop it
@@ -90,6 +92,7 @@ void __wait_on_buffer(struct buffer_head * bh)
 {
 	wait_on_bit(&bh->b_state, BH_Lock, sync_buffer, TASK_UNINTERRUPTIBLE);
 }
+EXPORT_SYMBOL(__wait_on_buffer);
 
 static void
 __clear_page_buffers(struct page *page)
@@ -144,6 +147,7 @@ void end_buffer_read_sync(struct buffer_head *bh, int uptodate)
 	__end_buffer_read_notouch(bh, uptodate);
 	put_bh(bh);
 }
+EXPORT_SYMBOL(end_buffer_read_sync);
 
 void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
 {
@@ -164,6 +168,7 @@ void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
 	unlock_buffer(bh);
 	put_bh(bh);
 }
+EXPORT_SYMBOL(end_buffer_write_sync);
 
 /*
  * Various filesystems appear to want __find_get_block to be non-blocking.
@@ -272,6 +277,7 @@ void invalidate_bdev(struct block_device *bdev)
 	invalidate_bh_lrus();
 	invalidate_mapping_pages(mapping, 0, -1);
 }
+EXPORT_SYMBOL(invalidate_bdev);
 
 /*
  * Kick pdflush then try to free up some ZONE_NORMAL memory.
@@ -410,6 +416,7 @@ still_busy:
 	local_irq_restore(flags);
 	return;
 }
+EXPORT_SYMBOL(end_buffer_async_write);
 
 /*
  * If a page's buffers are under async readin (end_buffer_async_read
@@ -438,8 +445,8 @@ static void mark_buffer_async_read(struct buffer_head *bh)
 	set_buffer_async_read(bh);
 }
 
-void mark_buffer_async_write_endio(struct buffer_head *bh,
-				   bh_end_io_t *handler)
+static void mark_buffer_async_write_endio(struct buffer_head *bh,
+					  bh_end_io_t *handler)
 {
 	bh->b_end_io = handler;
 	set_buffer_async_write(bh);
@@ -553,7 +560,7 @@ repeat:
 	return err;
 }
 
-void do_thaw_all(struct work_struct *work)
+static void do_thaw_all(struct work_struct *work)
 {
 	struct super_block *sb;
 	char b[BDEVNAME_SIZE];
@@ -1172,6 +1179,7 @@ void mark_buffer_dirty(struct buffer_head *bh)
 		}
 	}
 }
+EXPORT_SYMBOL(mark_buffer_dirty);
 
 /*
  * Decrement a buffer_head's reference count.  If all buffers against a page
@@ -1188,6 +1196,7 @@ void __brelse(struct buffer_head * buf)
 	}
 	WARN(1, KERN_ERR "VFS: brelse: Trying to free free buffer\n");
 }
+EXPORT_SYMBOL(__brelse);
 
 /*
  * bforget() is like brelse(), except it discards any
@@ -1206,6 +1215,7 @@ void __bforget(struct buffer_head *bh)
 	}
 	__brelse(bh);
 }
+EXPORT_SYMBOL(__bforget);
 
 static struct buffer_head *__bread_slow(struct buffer_head *bh)
 {
@@ -2218,6 +2228,7 @@ int block_read_full_page(struct page *page, get_block_t *get_block)
 	}
 	return 0;
 }
+EXPORT_SYMBOL(block_read_full_page);
 
 /* utility function for filesystems that need to do work on expanding
  * truncates.  Uses filesystem pagecache writes to allow the filesystem to
@@ -2252,6 +2263,7 @@ int generic_cont_expand_simple(struct inode *inode, loff_t size)
 out:
 	return err;
 }
+EXPORT_SYMBOL(generic_cont_expand_simple);
 
 static int cont_expand_zero(struct file *file, struct address_space *mapping,
 			    loff_t pos, loff_t *bytes)
@@ -2352,6 +2364,7 @@ int cont_write_begin(struct file *file, struct address_space *mapping,
 out:
 	return err;
 }
+EXPORT_SYMBOL(cont_write_begin);
 
 int block_prepare_write(struct page *page, unsigned from, unsigned to,
 			get_block_t *get_block)
@@ -2362,6 +2375,7 @@ int block_prepare_write(struct page *page, unsigned from, unsigned to,
 		ClearPageUptodate(page);
 	return err;
 }
+EXPORT_SYMBOL(block_prepare_write);
 
 int block_commit_write(struct page *page, unsigned from, unsigned to)
 {
@@ -2369,6 +2383,7 @@ int block_commit_write(struct page *page, unsigned from, unsigned to)
 	__block_commit_write(inode,page,from,to);
 	return 0;
 }
+EXPORT_SYMBOL(block_commit_write);
 
 /*
  * block_page_mkwrite() is not allowed to change the file size as it gets
@@ -2426,6 +2441,7 @@ block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
 out:
 	return ret;
 }
+EXPORT_SYMBOL(block_page_mkwrite);
 
 /*
  * nobh_write_begin()'s prereads are special: the buffer_heads are freed
@@ -2849,6 +2865,7 @@ unlock:
 out:
 	return err;
 }
+EXPORT_SYMBOL(block_truncate_page);
 
 /*
  * The generic ->writepage function for buffer-backed address_spaces
@@ -2890,6 +2907,7 @@ int block_write_full_page_endio(struct page *page, get_block_t *get_block,
 	zero_user_segment(page, offset, PAGE_CACHE_SIZE);
 	return __block_write_full_page(inode, page, get_block, wbc, handler);
 }
+EXPORT_SYMBOL(block_write_full_page_endio);
 
 /*
  * The generic ->writepage function for buffer-backed address_spaces
@@ -2900,7 +2918,7 @@ int block_write_full_page(struct page *page, get_block_t *get_block,
 	return block_write_full_page_endio(page, get_block, wbc,
 					   end_buffer_async_write);
 }
-
+EXPORT_SYMBOL(block_write_full_page);
 
 sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
 			    get_block_t *get_block)
@@ -2913,6 +2931,7 @@ sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
 	get_block(inode, block, &tmp, 0);
 	return tmp.b_blocknr;
 }
+EXPORT_SYMBOL(generic_block_bmap);
 
 static void end_bio_bh_io_sync(struct bio *bio, int err)
 {
@@ -2982,6 +3001,7 @@ int submit_bh(int rw, struct buffer_head * bh)
 	bio_put(bio);
 	return ret;
 }
+EXPORT_SYMBOL(submit_bh);
 
 /**
  * ll_rw_block: low-level access to block devices (DEPRECATED)
@@ -3043,6 +3063,7 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
 		unlock_buffer(bh);
 	}
 }
+EXPORT_SYMBOL(ll_rw_block);
 
 /*
  * For a data-integrity writeout, we need to wait upon any in-progress I/O
@@ -3071,6 +3092,7 @@ int sync_dirty_buffer(struct buffer_head *bh)
 	}
 	return ret;
 }
+EXPORT_SYMBOL(sync_dirty_buffer);
 
 /*
  * try_to_free_buffers() checks if all the buffers on this particular page
@@ -3185,6 +3207,7 @@ void block_sync_page(struct page *page)
 	if (mapping)
 		blk_run_backing_dev(mapping->backing_dev_info, page);
 }
+EXPORT_SYMBOL(block_sync_page);
 
 /*
  * There are no bdflush tunables left.  But distributions are
@@ -3361,29 +3384,3 @@ void __init buffer_init(void)
 	max_buffer_heads = nrpages * (PAGE_SIZE / sizeof(struct buffer_head));
 	hotcpu_notifier(buffer_cpu_notify, 0);
 }
-
-EXPORT_SYMBOL(__bforget);
-EXPORT_SYMBOL(__brelse);
-EXPORT_SYMBOL(__wait_on_buffer);
-EXPORT_SYMBOL(block_commit_write);
-EXPORT_SYMBOL(block_prepare_write);
-EXPORT_SYMBOL(block_page_mkwrite);
-EXPORT_SYMBOL(block_read_full_page);
-EXPORT_SYMBOL(block_sync_page);
-EXPORT_SYMBOL(block_truncate_page);
-EXPORT_SYMBOL(block_write_full_page);
-EXPORT_SYMBOL(block_write_full_page_endio);
-EXPORT_SYMBOL(cont_write_begin);
-EXPORT_SYMBOL(end_buffer_read_sync);
-EXPORT_SYMBOL(end_buffer_write_sync);
-EXPORT_SYMBOL(end_buffer_async_write);
-EXPORT_SYMBOL(file_fsync);
-EXPORT_SYMBOL(generic_block_bmap);
-EXPORT_SYMBOL(generic_cont_expand_simple);
-EXPORT_SYMBOL(init_buffer);
-EXPORT_SYMBOL(invalidate_bdev);
-EXPORT_SYMBOL(ll_rw_block);
-EXPORT_SYMBOL(mark_buffer_dirty);
-EXPORT_SYMBOL(submit_bh);
-EXPORT_SYMBOL(sync_dirty_buffer);
-EXPORT_SYMBOL(unlock_buffer);
diff --git a/fs/sync.c b/fs/sync.c
index c08467a..d104591 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -183,6 +183,7 @@ int file_fsync(struct file *filp, struct dentry *dentry, int datasync)
 		ret = err;
 	return ret;
 }
+EXPORT_SYMBOL(file_fsync);
 
 /**
  * vfs_fsync_range - helper to sync a range of data & metadata to disk
-- 
cgit v0.10.2


From 8c87df457cb58fe75b9b893007917cf8095660a0 Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@novell.com>
Date: Tue, 22 Sep 2009 16:43:52 -0700
Subject: BUILD_BUG_ON(): fix it and a couple of bogus uses of it

gcc permitting variable length arrays makes the current construct used for
BUILD_BUG_ON() useless, as that doesn't produce any diagnostic if the
controlling expression isn't really constant.  Instead, this patch makes
it so that a bit field gets used here.  Consequently, those uses where the
condition isn't really constant now also need fixing.

Note that in the gfp.h, kmemcheck.h, and virtio_config.h cases
MAYBE_BUILD_BUG_ON() really just serves documentation purposes - even if
the expression is compile time constant (__builtin_constant_p() yields
true), the array is still deemed of variable length by gcc, and hence the
whole expression doesn't have the intended effect.

[akpm@linux-foundation.org: make arch/sparc/include/asm/vio.h compile]
[akpm@linux-foundation.org: more nonsensical assertions in tpm.c..]
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Rajiv Andrade <srajiv@linux.vnet.ibm.com>
Cc: Mimi Zohar <zohar@us.ibm.com>
Cc: James Morris <jmorris@namei.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/sparc/include/asm/vio.h b/arch/sparc/include/asm/vio.h
index d4de32f..6cdbf7e 100644
--- a/arch/sparc/include/asm/vio.h
+++ b/arch/sparc/include/asm/vio.h
@@ -258,7 +258,7 @@ static inline void *vio_dring_entry(struct vio_dring_state *dr,
 static inline u32 vio_dring_avail(struct vio_dring_state *dr,
 				  unsigned int ring_size)
 {
-	BUILD_BUG_ON(!is_power_of_2(ring_size));
+	MAYBE_BUILD_BUG_ON(!is_power_of_2(ring_size));
 
 	return (dr->pending -
 		((dr->prod - dr->cons) & (ring_size - 1)));
diff --git a/drivers/char/tpm/tpm.c b/drivers/char/tpm/tpm.c
index b0603b2..32b957e 100644
--- a/drivers/char/tpm/tpm.c
+++ b/drivers/char/tpm/tpm.c
@@ -696,7 +696,7 @@ int __tpm_pcr_read(struct tpm_chip *chip, int pcr_idx, u8 *res_buf)
 
 	cmd.header.in = pcrread_header;
 	cmd.params.pcrread_in.pcr_idx = cpu_to_be32(pcr_idx);
-	BUILD_BUG_ON(cmd.header.in.length > READ_PCR_RESULT_SIZE);
+	BUG_ON(cmd.header.in.length > READ_PCR_RESULT_SIZE);
 	rc = transmit_cmd(chip, &cmd, cmd.header.in.length,
 			  "attempting to read a pcr value");
 
@@ -760,7 +760,7 @@ int tpm_pcr_extend(u32 chip_num, int pcr_idx, const u8 *hash)
 		return -ENODEV;
 
 	cmd.header.in = pcrextend_header;
-	BUILD_BUG_ON(be32_to_cpu(cmd.header.in.length) > EXTEND_PCR_SIZE);
+	BUG_ON(be32_to_cpu(cmd.header.in.length) > EXTEND_PCR_SIZE);
 	cmd.params.pcrextend_in.pcr_idx = cpu_to_be32(pcr_idx);
 	memcpy(cmd.params.pcrextend_in.hash, hash, TPM_DIGEST_SIZE);
 	rc = transmit_cmd(chip, &cmd, cmd.header.in.length,
diff --git a/drivers/net/niu.c b/drivers/net/niu.c
index 76cc261..f9364d0 100644
--- a/drivers/net/niu.c
+++ b/drivers/net/niu.c
@@ -5615,7 +5615,7 @@ static void niu_init_tx_mac(struct niu *np)
 	/* The XMAC_MIN register only accepts values for TX min which
 	 * have the low 3 bits cleared.
 	 */
-	BUILD_BUG_ON(min & 0x7);
+	BUG_ON(min & 0x7);
 
 	if (np->flags & NIU_FLAGS_XMAC)
 		niu_init_tx_xmac(np, min, max);
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index f53e9b8..557bdad 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -220,7 +220,7 @@ static inline enum zone_type gfp_zone(gfp_t flags)
 					 ((1 << ZONES_SHIFT) - 1);
 
 	if (__builtin_constant_p(bit))
-		BUILD_BUG_ON((GFP_ZONE_BAD >> bit) & 1);
+		MAYBE_BUILD_BUG_ON((GFP_ZONE_BAD >> bit) & 1);
 	else {
 #ifdef CONFIG_DEBUG_VM
 		BUG_ON((GFP_ZONE_BAD >> bit) & 1);
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 55723af..63dcaec 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -678,13 +678,17 @@ struct sysinfo {
 };
 
 /* Force a compilation error if condition is true */
-#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
+#define BUILD_BUG_ON(condition) ((void)BUILD_BUG_ON_ZERO(condition))
+
+/* Force a compilation error if condition is constant and true */
+#define MAYBE_BUILD_BUG_ON(cond) ((void)sizeof(char[1 - 2 * !!(cond)]))
 
 /* Force a compilation error if condition is true, but also produce a
    result (of value 0 and type size_t), so the expression can be used
    e.g. in a structure initializer (or where-ever else comma expressions
    aren't permitted). */
-#define BUILD_BUG_ON_ZERO(e) (sizeof(char[1 - 2 * !!(e)]) - 1)
+#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); }))
+#define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); }))
 
 /* Trap pasters of __FUNCTION__ at compile-time */
 #define __FUNCTION__ (__func__)
diff --git a/include/linux/kmemcheck.h b/include/linux/kmemcheck.h
index 136cdcd..e880d4c 100644
--- a/include/linux/kmemcheck.h
+++ b/include/linux/kmemcheck.h
@@ -152,7 +152,7 @@ static inline bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size)
 									\
 		_n = (long) &((ptr)->name##_end)			\
 			- (long) &((ptr)->name##_begin);		\
-		BUILD_BUG_ON(_n < 0);					\
+		MAYBE_BUILD_BUG_ON(_n < 0);				\
 									\
 		kmemcheck_mark_initialized(&((ptr)->name##_begin), _n);	\
 	} while (0)
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index e547e3c..0093dd7 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -109,8 +109,7 @@ static inline bool virtio_has_feature(const struct virtio_device *vdev,
 				      unsigned int fbit)
 {
 	/* Did you forget to fix assumptions on max features? */
-	if (__builtin_constant_p(fbit))
-		BUILD_BUG_ON(fbit >= 32);
+	MAYBE_BUILD_BUG_ON(fbit >= 32);
 
 	if (fbit < VIRTIO_TRANSPORT_F_START)
 		virtio_check_driver_offered_feature(vdev, fbit);
-- 
cgit v0.10.2


From 385773e04806e8903e9ec683f5c4bd14926a86dc Mon Sep 17 00:00:00 2001
From: H Hartley Sweeten <hartleys@visionengravers.com>
Date: Tue, 22 Sep 2009 16:43:53 -0700
Subject: aio.c: move EXPORT* macros to line after function

As mentioned in Documentation/CodingStyle, move EXPORT* macro's
to the line immediately after the closing function brace line.

Also, move the __initcall() similarly.

Signed-off-by: H Hartley Sweeten <hsweeten@visionengravers.com>
Cc: Zach Brown <zach.brown@oracle.com>
Cc: Benjamin LaHaise <bcrl@kvack.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/aio.c b/fs/aio.c
index fc21c23..02a2c93 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -78,6 +78,7 @@ static int __init aio_setup(void)
 
 	return 0;
 }
+__initcall(aio_setup);
 
 static void aio_free_ring(struct kioctx *ctx)
 {
@@ -380,6 +381,7 @@ ssize_t wait_on_sync_kiocb(struct kiocb *iocb)
 	__set_current_state(TASK_RUNNING);
 	return iocb->ki_user_data;
 }
+EXPORT_SYMBOL(wait_on_sync_kiocb);
 
 /* exit_aio: called when the last user of mm goes away.  At this point, 
  * there is no way for any new requests to be submited or any of the 
@@ -573,6 +575,7 @@ int aio_put_req(struct kiocb *req)
 	spin_unlock_irq(&ctx->ctx_lock);
 	return ret;
 }
+EXPORT_SYMBOL(aio_put_req);
 
 static struct kioctx *lookup_ioctx(unsigned long ctx_id)
 {
@@ -992,6 +995,7 @@ put_rq:
 	spin_unlock_irqrestore(&ctx->ctx_lock, flags);
 	return ret;
 }
+EXPORT_SYMBOL(aio_complete);
 
 /* aio_read_evt
  *	Pull an event off of the ioctx's event ring.  Returns the number of 
@@ -1780,9 +1784,3 @@ SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id,
 	asmlinkage_protect(5, ret, ctx_id, min_nr, nr, events, timeout);
 	return ret;
 }
-
-__initcall(aio_setup);
-
-EXPORT_SYMBOL(aio_complete);
-EXPORT_SYMBOL(aio_put_req);
-EXPORT_SYMBOL(wait_on_sync_kiocb);
-- 
cgit v0.10.2


From 515350b6fd041396f425180589e08812dd13615f Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Tue, 22 Sep 2009 16:43:54 -0700
Subject: MAINTAINERS: remove dead ncpfs list

On Saturday 01 August 2009 00:30:39 Mail Delivery Subsystem wrote:
> Delivery to the following recipient failed permanently:
>
>      linware@sh.cvut.cz
>
> Technical details of permanent failure:
> Google tried to deliver your message, but it was rejected by the recipient
> domain. We recommend contacting the other email provider for further
> information about the cause of this error. The error that the other server
> returned was: 450 450 <linware@sh.cvut.cz>: Recipient address rejected:
> undeliverable address: unknown user: "linware" (state 14).

Cc: Petr Vandrovec <vandrove@vc.cvut.cz>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/MAINTAINERS b/MAINTAINERS
index d24c882..017cf69 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3527,7 +3527,6 @@ F:	drivers/net/natsemi.c
 
 NCP FILESYSTEM
 M:	Petr Vandrovec <vandrove@vc.cvut.cz>
-L:	linware@sh.cvut.cz
 S:	Maintained
 F:	fs/ncpfs/
 
-- 
cgit v0.10.2


From 562787a5c32ccdf182de27793a83a9f2ee86cd77 Mon Sep 17 00:00:00 2001
From: Davide Libenzi <davidel@xmailserver.org>
Date: Tue, 22 Sep 2009 16:43:57 -0700
Subject: anonfd: split interface into file creation and install

Split the anonfd interface into a bare file pointer creation one, and a
file pointer creation plus install one.

There are cases, like the usage of eventfds inside other kernel
interfaces, where the file pointer created by anonfd needs to be used
inside the initialization of other structures.

As it is right now, as soon as anon_inode_getfd() returns, the kenrle can
race with userspace closing the newly installed file descriptor.

This patch, while keeping the old anon_inode_getfd(), introduces a new
anon_inode_getfile() (whose services are reused in anon_inode_getfd())
that allows to split the file creation phase and the fd install one.

Once all the kernel structures are initialized, the code can call the
proper fd_install().

Gregory manifested the need for something like this inside KVM.

Signed-off-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: James Morris <jmorris@namei.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Gregory Haskins <ghaskins@novell.com>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Acked-by: Roland Dreier <rolandd@cisco.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 47d4a01..d11c51f 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -77,28 +77,24 @@ static const struct address_space_operations anon_aops = {
  *
  * Creates a new file by hooking it on a single inode. This is useful for files
  * that do not need to have a full-fledged inode in order to operate correctly.
- * All the files created with anon_inode_getfd() will share a single inode,
+ * All the files created with anon_inode_getfile() will share a single inode,
  * hence saving memory and avoiding code duplication for the file/inode/dentry
- * setup.  Returns new descriptor or -error.
+ * setup.  Returns the newly created file* or an error pointer.
  */
-int anon_inode_getfd(const char *name, const struct file_operations *fops,
-		     void *priv, int flags)
+struct file *anon_inode_getfile(const char *name,
+				const struct file_operations *fops,
+				void *priv, int flags)
 {
 	struct qstr this;
 	struct dentry *dentry;
 	struct file *file;
-	int error, fd;
+	int error;
 
 	if (IS_ERR(anon_inode_inode))
-		return -ENODEV;
+		return ERR_PTR(-ENODEV);
 
 	if (fops->owner && !try_module_get(fops->owner))
-		return -ENOENT;
-
-	error = get_unused_fd_flags(flags);
-	if (error < 0)
-		goto err_module;
-	fd = error;
+		return ERR_PTR(-ENOENT);
 
 	/*
 	 * Link the inode to a directory entry by creating a unique name
@@ -110,7 +106,7 @@ int anon_inode_getfd(const char *name, const struct file_operations *fops,
 	this.hash = 0;
 	dentry = d_alloc(anon_inode_mnt->mnt_sb->s_root, &this);
 	if (!dentry)
-		goto err_put_unused_fd;
+		goto err_module;
 
 	/*
 	 * We know the anon_inode inode count is always greater than zero,
@@ -136,16 +132,54 @@ int anon_inode_getfd(const char *name, const struct file_operations *fops,
 	file->f_version = 0;
 	file->private_data = priv;
 
+	return file;
+
+err_dput:
+	dput(dentry);
+err_module:
+	module_put(fops->owner);
+	return ERR_PTR(error);
+}
+EXPORT_SYMBOL_GPL(anon_inode_getfile);
+
+/**
+ * anon_inode_getfd - creates a new file instance by hooking it up to an
+ *                    anonymous inode, and a dentry that describe the "class"
+ *                    of the file
+ *
+ * @name:    [in]    name of the "class" of the new file
+ * @fops:    [in]    file operations for the new file
+ * @priv:    [in]    private data for the new file (will be file's private_data)
+ * @flags:   [in]    flags
+ *
+ * Creates a new file by hooking it on a single inode. This is useful for files
+ * that do not need to have a full-fledged inode in order to operate correctly.
+ * All the files created with anon_inode_getfd() will share a single inode,
+ * hence saving memory and avoiding code duplication for the file/inode/dentry
+ * setup.  Returns new descriptor or an error code.
+ */
+int anon_inode_getfd(const char *name, const struct file_operations *fops,
+		     void *priv, int flags)
+{
+	int error, fd;
+	struct file *file;
+
+	error = get_unused_fd_flags(flags);
+	if (error < 0)
+		return error;
+	fd = error;
+
+	file = anon_inode_getfile(name, fops, priv, flags);
+	if (IS_ERR(file)) {
+		error = PTR_ERR(file);
+		goto err_put_unused_fd;
+	}
 	fd_install(fd, file);
 
 	return fd;
 
-err_dput:
-	dput(dentry);
 err_put_unused_fd:
 	put_unused_fd(fd);
-err_module:
-	module_put(fops->owner);
 	return error;
 }
 EXPORT_SYMBOL_GPL(anon_inode_getfd);
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 31d12de..8b47e42 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -68,11 +68,16 @@ int eventfd_signal(struct eventfd_ctx *ctx, int n)
 }
 EXPORT_SYMBOL_GPL(eventfd_signal);
 
+static void eventfd_free_ctx(struct eventfd_ctx *ctx)
+{
+	kfree(ctx);
+}
+
 static void eventfd_free(struct kref *kref)
 {
 	struct eventfd_ctx *ctx = container_of(kref, struct eventfd_ctx, kref);
 
-	kfree(ctx);
+	eventfd_free_ctx(ctx);
 }
 
 /**
@@ -298,9 +303,23 @@ struct eventfd_ctx *eventfd_ctx_fileget(struct file *file)
 }
 EXPORT_SYMBOL_GPL(eventfd_ctx_fileget);
 
-SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
+/**
+ * eventfd_file_create - Creates an eventfd file pointer.
+ * @count: Initial eventfd counter value.
+ * @flags: Flags for the eventfd file.
+ *
+ * This function creates an eventfd file pointer, w/out installing it into
+ * the fd table. This is useful when the eventfd file is used during the
+ * initialization of data structures that require extra setup after the eventfd
+ * creation. So the eventfd creation is split into the file pointer creation
+ * phase, and the file descriptor installation phase.
+ * In this way races with userspace closing the newly installed file descriptor
+ * can be avoided.
+ * Returns an eventfd file pointer, or a proper error pointer.
+ */
+struct file *eventfd_file_create(unsigned int count, int flags)
 {
-	int fd;
+	struct file *file;
 	struct eventfd_ctx *ctx;
 
 	/* Check the EFD_* constants for consistency.  */
@@ -308,26 +327,48 @@ SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
 	BUILD_BUG_ON(EFD_NONBLOCK != O_NONBLOCK);
 
 	if (flags & ~EFD_FLAGS_SET)
-		return -EINVAL;
+		return ERR_PTR(-EINVAL);
 
 	ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
 	if (!ctx)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
 	kref_init(&ctx->kref);
 	init_waitqueue_head(&ctx->wqh);
 	ctx->count = count;
 	ctx->flags = flags;
 
-	/*
-	 * When we call this, the initialization must be complete, since
-	 * anon_inode_getfd() will install the fd.
-	 */
-	fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx,
-			      flags & EFD_SHARED_FCNTL_FLAGS);
-	if (fd < 0)
-		kfree(ctx);
+	file = anon_inode_getfile("[eventfd]", &eventfd_fops, ctx,
+				  flags & EFD_SHARED_FCNTL_FLAGS);
+	if (IS_ERR(file))
+		eventfd_free_ctx(ctx);
+
+	return file;
+}
+
+SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
+{
+	int fd, error;
+	struct file *file;
+
+	error = get_unused_fd_flags(flags & EFD_SHARED_FCNTL_FLAGS);
+	if (error < 0)
+		return error;
+	fd = error;
+
+	file = eventfd_file_create(count, flags);
+	if (IS_ERR(file)) {
+		error = PTR_ERR(file);
+		goto err_put_unused_fd;
+	}
+	fd_install(fd, file);
+
 	return fd;
+
+err_put_unused_fd:
+	put_unused_fd(fd);
+
+	return error;
 }
 
 SYSCALL_DEFINE1(eventfd, unsigned int, count)
diff --git a/include/linux/anon_inodes.h b/include/linux/anon_inodes.h
index e0a0cdc..69a21e0 100644
--- a/include/linux/anon_inodes.h
+++ b/include/linux/anon_inodes.h
@@ -8,6 +8,9 @@
 #ifndef _LINUX_ANON_INODES_H
 #define _LINUX_ANON_INODES_H
 
+struct file *anon_inode_getfile(const char *name,
+				const struct file_operations *fops,
+				void *priv, int flags);
 int anon_inode_getfd(const char *name, const struct file_operations *fops,
 		     void *priv, int flags);
 
diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h
index 3b85ba6..94dd103 100644
--- a/include/linux/eventfd.h
+++ b/include/linux/eventfd.h
@@ -27,6 +27,7 @@
 
 #ifdef CONFIG_EVENTFD
 
+struct file *eventfd_file_create(unsigned int count, int flags);
 struct eventfd_ctx *eventfd_ctx_get(struct eventfd_ctx *ctx);
 void eventfd_ctx_put(struct eventfd_ctx *ctx);
 struct file *eventfd_fget(int fd);
@@ -40,6 +41,11 @@ int eventfd_signal(struct eventfd_ctx *ctx, int n);
  * Ugly ugly ugly error layer to support modules that uses eventfd but
  * pretend to work in !CONFIG_EVENTFD configurations. Namely, AIO.
  */
+static inline struct file *eventfd_file_create(unsigned int count, int flags)
+{
+	return ERR_PTR(-ENOSYS);
+}
+
 static inline struct eventfd_ctx *eventfd_ctx_fdget(int fd)
 {
 	return ERR_PTR(-ENOSYS);
-- 
cgit v0.10.2


From 8a9f47ddb1d5cc3cda2d1f26f8da74e059fa7b87 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 22 Sep 2009 16:43:58 -0700
Subject: ntfs: remove ntfs_file_write

do_sync_write() does the right thing for turning the aio_writev method
into a normal non-vectored synchronous write, no need to duplicate it in
ntfs.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Anton Altaparmakov <aia21@cantab.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 4350d49..663c0e3 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -2146,46 +2146,6 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 }
 
 /**
- * ntfs_file_writev -
- *
- * Basically the same as generic_file_writev() except that it ends up calling
- * ntfs_file_aio_write_nolock() instead of __generic_file_aio_write_nolock().
- */
-static ssize_t ntfs_file_writev(struct file *file, const struct iovec *iov,
-		unsigned long nr_segs, loff_t *ppos)
-{
-	struct address_space *mapping = file->f_mapping;
-	struct inode *inode = mapping->host;
-	struct kiocb kiocb;
-	ssize_t ret;
-
-	mutex_lock(&inode->i_mutex);
-	init_sync_kiocb(&kiocb, file);
-	ret = ntfs_file_aio_write_nolock(&kiocb, iov, nr_segs, ppos);
-	if (ret == -EIOCBQUEUED)
-		ret = wait_on_sync_kiocb(&kiocb);
-	mutex_unlock(&inode->i_mutex);
-	if (ret > 0) {
-		int err = generic_write_sync(file, *ppos - ret, ret);
-		if (err < 0)
-			ret = err;
-	}
-	return ret;
-}
-
-/**
- * ntfs_file_write - simple wrapper for ntfs_file_writev()
- */
-static ssize_t ntfs_file_write(struct file *file, const char __user *buf,
-		size_t count, loff_t *ppos)
-{
-	struct iovec local_iov = { .iov_base = (void __user *)buf,
-				   .iov_len = count };
-
-	return ntfs_file_writev(file, &local_iov, 1, ppos);
-}
-
-/**
  * ntfs_file_fsync - sync a file to disk
  * @filp:	file to be synced
  * @dentry:	dentry describing the file to sync
@@ -2247,7 +2207,7 @@ const struct file_operations ntfs_file_ops = {
 	.read		= do_sync_read,		 /* Read from file. */
 	.aio_read	= generic_file_aio_read, /* Async read from file. */
 #ifdef NTFS_RW
-	.write		= ntfs_file_write,	 /* Write to file. */
+	.write		= do_sync_write,	 /* Write to file. */
 	.aio_write	= ntfs_file_aio_write,	 /* Async write to file. */
 	/*.release	= ,*/			 /* Last file is closed.  See
 						    fs/ext2/file.c::
-- 
cgit v0.10.2


From 945ffe54bbd56ceed62de3b908800fd7c6ffb284 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 22 Sep 2009 16:43:59 -0700
Subject: qnx4: remove write support

qnx4 wrte support has never been fully implement, is broken since the dawn
of time and hasn't been actively developed since before git history
started.

Instead of letting it further bitrot and complicate API transition (like
the new truncate code) remove it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Cc: Anders Larsen <al@alarsen.net>
Cc: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/qnx4/Kconfig b/fs/qnx4/Kconfig
index be8e0e1..5f60899 100644
--- a/fs/qnx4/Kconfig
+++ b/fs/qnx4/Kconfig
@@ -6,20 +6,9 @@ config QNX4FS_FS
 	  QNX 4 and QNX 6 (the latter is also called QNX RTP).
 	  Further information is available at <http://www.qnx.com/>.
 	  Say Y if you intend to mount QNX hard disks or floppies.
-	  Unless you say Y to "QNX4FS read-write support" below, you will
-	  only be able to read these file systems.
 
 	  To compile this file system support as a module, choose M here: the
 	  module will be called qnx4.
 
 	  If you don't know whether you need it, then you don't need it:
 	  answer N.
-
-config QNX4FS_RW
-	bool "QNX4FS write support (DANGEROUS)"
-	depends on QNX4FS_FS && EXPERIMENTAL && BROKEN
-	help
-	  Say Y if you want to test write support for QNX4 file systems.
-
-	  It's currently broken, so for now:
-	  answer N.
diff --git a/fs/qnx4/Makefile b/fs/qnx4/Makefile
index e4d408c..4a283b3 100644
--- a/fs/qnx4/Makefile
+++ b/fs/qnx4/Makefile
@@ -4,4 +4,4 @@
 
 obj-$(CONFIG_QNX4FS_FS) += qnx4.o
 
-qnx4-objs := inode.o dir.o namei.o file.o bitmap.o truncate.o
+qnx4-objs := inode.o dir.o namei.o bitmap.o
diff --git a/fs/qnx4/bitmap.c b/fs/qnx4/bitmap.c
index e1cd061..0afba06 100644
--- a/fs/qnx4/bitmap.c
+++ b/fs/qnx4/bitmap.c
@@ -78,84 +78,3 @@ unsigned long qnx4_count_free_blocks(struct super_block *sb)
 
 	return total_free;
 }
-
-#ifdef CONFIG_QNX4FS_RW
-
-int qnx4_is_free(struct super_block *sb, long block)
-{
-	int start = le32_to_cpu(qnx4_sb(sb)->BitMap->di_first_xtnt.xtnt_blk) - 1;
-	int size = le32_to_cpu(qnx4_sb(sb)->BitMap->di_size);
-	struct buffer_head *bh;
-	const char *g;
-	int ret = -EIO;
-
-	start += block / (QNX4_BLOCK_SIZE * 8);
-	QNX4DEBUG(("qnx4: is_free requesting block [%lu], bitmap in block [%lu]\n",
-		   (unsigned long) block, (unsigned long) start));
-	(void) size;		/* CHECKME */
-	bh = sb_bread(sb, start);
-	if (bh == NULL) {
-		return -EIO;
-	}
-	g = bh->b_data + (block % QNX4_BLOCK_SIZE);
-	if (((*g) & (1 << (block % 8))) == 0) {
-		QNX4DEBUG(("qnx4: is_free -> block is free\n"));
-		ret = 1;
-	} else {
-		QNX4DEBUG(("qnx4: is_free -> block is busy\n"));
-		ret = 0;
-	}
-	brelse(bh);
-
-	return ret;
-}
-
-int qnx4_set_bitmap(struct super_block *sb, long block, int busy)
-{
-	int start = le32_to_cpu(qnx4_sb(sb)->BitMap->di_first_xtnt.xtnt_blk) - 1;
-	int size = le32_to_cpu(qnx4_sb(sb)->BitMap->di_size);
-	struct buffer_head *bh;
-	char *g;
-
-	start += block / (QNX4_BLOCK_SIZE * 8);
-	QNX4DEBUG(("qnx4: set_bitmap requesting block [%lu], bitmap in block [%lu]\n",
-		   (unsigned long) block, (unsigned long) start));
-	(void) size;		/* CHECKME */
-	bh = sb_bread(sb, start);
-	if (bh == NULL) {
-		return -EIO;
-	}
-	g = bh->b_data + (block % QNX4_BLOCK_SIZE);
-	if (busy == 0) {
-		(*g) &= ~(1 << (block % 8));
-	} else {
-		(*g) |= (1 << (block % 8));
-	}
-	mark_buffer_dirty(bh);
-	brelse(bh);
-
-	return 0;
-}
-
-static void qnx4_clear_inode(struct inode *inode)
-{
-	struct qnx4_inode_entry *qnx4_ino = qnx4_raw_inode(inode);
-	/* What for? */
-	memset(qnx4_ino->di_fname, 0, sizeof qnx4_ino->di_fname);
-	qnx4_ino->di_size = 0;
-	qnx4_ino->di_num_xtnts = 0;
-	qnx4_ino->di_mode = 0;
-	qnx4_ino->di_status = 0;
-}
-
-void qnx4_free_inode(struct inode *inode)
-{
-	if (inode->i_ino < 1) {
-		printk("free_inode: inode 0 or nonexistent inode\n");
-		return;
-	}
-	qnx4_clear_inode(inode);
-	clear_inode(inode);
-}
-
-#endif
diff --git a/fs/qnx4/dir.c b/fs/qnx4/dir.c
index 003c68f..86cc39c 100644
--- a/fs/qnx4/dir.c
+++ b/fs/qnx4/dir.c
@@ -85,9 +85,4 @@ const struct file_operations qnx4_dir_operations =
 const struct inode_operations qnx4_dir_inode_operations =
 {
 	.lookup		= qnx4_lookup,
-#ifdef CONFIG_QNX4FS_RW
-	.create		= qnx4_create,
-	.unlink		= qnx4_unlink,
-	.rmdir		= qnx4_rmdir,
-#endif
 };
diff --git a/fs/qnx4/file.c b/fs/qnx4/file.c
deleted file mode 100644
index 09b170a..0000000
--- a/fs/qnx4/file.c
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * QNX4 file system, Linux implementation.
- *
- * Version : 0.2.1
- *
- * Using parts of the xiafs filesystem.
- *
- * History :
- *
- * 25-05-1998 by Richard Frowijn : first release.
- * 21-06-1998 by Frank Denis : wrote qnx4_readpage to use generic_file_read.
- * 27-06-1998 by Frank Denis : file overwriting.
- */
-
-#include "qnx4.h"
-
-/*
- * We have mostly NULL's here: the current defaults are ok for
- * the qnx4 filesystem.
- */
-const struct file_operations qnx4_file_operations =
-{
-	.llseek		= generic_file_llseek,
-	.read		= do_sync_read,
-	.aio_read	= generic_file_aio_read,
-	.mmap		= generic_file_mmap,
-	.splice_read	= generic_file_splice_read,
-#ifdef CONFIG_QNX4FS_RW
-	.write		= do_sync_write,
-	.aio_write	= generic_file_aio_write,
-	.fsync		= simple_fsync,
-#endif
-};
-
-const struct inode_operations qnx4_file_inode_operations =
-{
-#ifdef CONFIG_QNX4FS_RW
-	.truncate	= qnx4_truncate,
-#endif
-};
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 681df5f..d2cd179 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -28,73 +28,6 @@
 
 static const struct super_operations qnx4_sops;
 
-#ifdef CONFIG_QNX4FS_RW
-
-static void qnx4_delete_inode(struct inode *inode)
-{
-	QNX4DEBUG(("qnx4: deleting inode [%lu]\n", (unsigned long) inode->i_ino));
-	truncate_inode_pages(&inode->i_data, 0);
-	inode->i_size = 0;
-	qnx4_truncate(inode);
-	lock_kernel();
-	qnx4_free_inode(inode);
-	unlock_kernel();
-}
-
-static int qnx4_write_inode(struct inode *inode, int do_sync)
-{
-	struct qnx4_inode_entry *raw_inode;
-	int block, ino;
-	struct buffer_head *bh;
-	ino = inode->i_ino;
-
-	QNX4DEBUG(("qnx4: write inode 1.\n"));
-	if (inode->i_nlink == 0) {
-		return 0;
-	}
-	if (!ino) {
-		printk("qnx4: bad inode number on dev %s: %d is out of range\n",
-		       inode->i_sb->s_id, ino);
-		return -EIO;
-	}
-	QNX4DEBUG(("qnx4: write inode 2.\n"));
-	block = ino / QNX4_INODES_PER_BLOCK;
-	lock_kernel();
-	if (!(bh = sb_bread(inode->i_sb, block))) {
-		printk("qnx4: major problem: unable to read inode from dev "
-		       "%s\n", inode->i_sb->s_id);
-		unlock_kernel();
-		return -EIO;
-	}
-	raw_inode = ((struct qnx4_inode_entry *) bh->b_data) +
-	    (ino % QNX4_INODES_PER_BLOCK);
-	raw_inode->di_mode  = cpu_to_le16(inode->i_mode);
-	raw_inode->di_uid   = cpu_to_le16(fs_high2lowuid(inode->i_uid));
-	raw_inode->di_gid   = cpu_to_le16(fs_high2lowgid(inode->i_gid));
-	raw_inode->di_nlink = cpu_to_le16(inode->i_nlink);
-	raw_inode->di_size  = cpu_to_le32(inode->i_size);
-	raw_inode->di_mtime = cpu_to_le32(inode->i_mtime.tv_sec);
-	raw_inode->di_atime = cpu_to_le32(inode->i_atime.tv_sec);
-	raw_inode->di_ctime = cpu_to_le32(inode->i_ctime.tv_sec);
-	raw_inode->di_first_xtnt.xtnt_size = cpu_to_le32(inode->i_blocks);
-	mark_buffer_dirty(bh);
-	if (do_sync) {
-		sync_dirty_buffer(bh);
-		if (buffer_req(bh) && !buffer_uptodate(bh)) {
-			printk("qnx4: IO error syncing inode [%s:%08x]\n",
-					inode->i_sb->s_id, ino);
-			brelse(bh);
-			unlock_kernel();
-			return -EIO;
-		}
-	}
-	brelse(bh);
-	unlock_kernel();
-	return 0;
-}
-
-#endif
-
 static void qnx4_put_super(struct super_block *sb);
 static struct inode *qnx4_alloc_inode(struct super_block *sb);
 static void qnx4_destroy_inode(struct inode *inode);
@@ -108,10 +41,6 @@ static const struct super_operations qnx4_sops =
 	.put_super	= qnx4_put_super,
 	.statfs		= qnx4_statfs,
 	.remount_fs	= qnx4_remount,
-#ifdef CONFIG_QNX4FS_RW
-	.write_inode	= qnx4_write_inode,
-	.delete_inode	= qnx4_delete_inode,
-#endif
 };
 
 static int qnx4_remount(struct super_block *sb, int *flags, char *data)
@@ -120,15 +49,7 @@ static int qnx4_remount(struct super_block *sb, int *flags, char *data)
 
 	qs = qnx4_sb(sb);
 	qs->Version = QNX4_VERSION;
-#ifndef CONFIG_QNX4FS_RW
 	*flags |= MS_RDONLY;
-#endif
-	if (*flags & MS_RDONLY) {
-		return 0;
-	}
-
-	mark_buffer_dirty(qs->sb_buf);
-
 	return 0;
 }
 
@@ -354,9 +275,7 @@ static int qnx4_fill_super(struct super_block *s, void *data, int silent)
 	}
 	s->s_op = &qnx4_sops;
 	s->s_magic = QNX4_SUPER_MAGIC;
-#ifndef CONFIG_QNX4FS_RW
 	s->s_flags |= MS_RDONLY;	/* Yup, read-only yet */
-#endif
 	qnx4_sb(s)->sb_buf = bh;
 	qnx4_sb(s)->sb = (struct qnx4_super_block *) bh->b_data;
 
@@ -489,8 +408,7 @@ struct inode *qnx4_iget(struct super_block *sb, unsigned long ino)
 
 	memcpy(qnx4_inode, raw_inode, QNX4_DIR_ENTRY_SIZE);
 	if (S_ISREG(inode->i_mode)) {
-		inode->i_op = &qnx4_file_inode_operations;
-		inode->i_fop = &qnx4_file_operations;
+		inode->i_fop = &generic_ro_fops;
 		inode->i_mapping->a_ops = &qnx4_aops;
 		qnx4_i(inode)->mmu_private = inode->i_size;
 	} else if (S_ISDIR(inode->i_mode)) {
diff --git a/fs/qnx4/namei.c b/fs/qnx4/namei.c
index 5972ed2..ae1e7ed 100644
--- a/fs/qnx4/namei.c
+++ b/fs/qnx4/namei.c
@@ -134,108 +134,3 @@ out:
 
 	return NULL;
 }
-
-#ifdef CONFIG_QNX4FS_RW
-int qnx4_create(struct inode *dir, struct dentry *dentry, int mode,
-		struct nameidata *nd)
-{
-	QNX4DEBUG(("qnx4: qnx4_create\n"));
-	if (dir == NULL) {
-		return -ENOENT;
-	}
-	return -ENOSPC;
-}
-
-int qnx4_rmdir(struct inode *dir, struct dentry *dentry)
-{
-	struct buffer_head *bh;
-	struct qnx4_inode_entry *de;
-	struct inode *inode;
-	int retval;
-	int ino;
-
-	QNX4DEBUG(("qnx4: qnx4_rmdir [%s]\n", dentry->d_name.name));
-	lock_kernel();
-	bh = qnx4_find_entry(dentry->d_name.len, dir, dentry->d_name.name,
-			     &de, &ino);
-	if (bh == NULL) {
-		unlock_kernel();
-		return -ENOENT;
-	}
-	inode = dentry->d_inode;
-	if (inode->i_ino != ino) {
-		retval = -EIO;
-		goto end_rmdir;
-	}
-#if 0
-	if (!empty_dir(inode)) {
-		retval = -ENOTEMPTY;
-		goto end_rmdir;
-	}
-#endif
-	if (inode->i_nlink != 2) {
-		QNX4DEBUG(("empty directory has nlink!=2 (%d)\n", inode->i_nlink));
-	}
-	QNX4DEBUG(("qnx4: deleting directory\n"));
-	de->di_status = 0;
-	memset(de->di_fname, 0, sizeof de->di_fname);
-	de->di_mode = 0;
-	mark_buffer_dirty_inode(bh, dir);
-	clear_nlink(inode);
-	mark_inode_dirty(inode);
-	inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
-	inode_dec_link_count(dir);
-	retval = 0;
-
-      end_rmdir:
-	brelse(bh);
-
-	unlock_kernel();
-	return retval;
-}
-
-int qnx4_unlink(struct inode *dir, struct dentry *dentry)
-{
-	struct buffer_head *bh;
-	struct qnx4_inode_entry *de;
-	struct inode *inode;
-	int retval;
-	int ino;
-
-	QNX4DEBUG(("qnx4: qnx4_unlink [%s]\n", dentry->d_name.name));
-	lock_kernel();
-	bh = qnx4_find_entry(dentry->d_name.len, dir, dentry->d_name.name,
-			     &de, &ino);
-	if (bh == NULL) {
-		unlock_kernel();
-		return -ENOENT;
-	}
-	inode = dentry->d_inode;
-	if (inode->i_ino != ino) {
-		retval = -EIO;
-		goto end_unlink;
-	}
-	retval = -EPERM;
-	if (!inode->i_nlink) {
-		QNX4DEBUG(("Deleting nonexistent file (%s:%lu), %d\n",
-			   inode->i_sb->s_id,
-			   inode->i_ino, inode->i_nlink));
-		inode->i_nlink = 1;
-	}
-	de->di_status = 0;
-	memset(de->di_fname, 0, sizeof de->di_fname);
-	de->di_mode = 0;
-	mark_buffer_dirty_inode(bh, dir);
-	dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
-	mark_inode_dirty(dir);
-	inode->i_ctime = dir->i_ctime;
-	inode_dec_link_count(inode);
-	retval = 0;
-
-end_unlink:
-	unlock_kernel();
-	brelse(bh);
-
-	return retval;
-}
-#endif
diff --git a/fs/qnx4/qnx4.h b/fs/qnx4/qnx4.h
index 9efc089..33a6085 100644
--- a/fs/qnx4/qnx4.h
+++ b/fs/qnx4/qnx4.h
@@ -29,17 +29,9 @@ extern unsigned long qnx4_block_map(struct inode *inode, long iblock);
 
 extern struct buffer_head *qnx4_bread(struct inode *, int, int);
 
-extern const struct inode_operations qnx4_file_inode_operations;
 extern const struct inode_operations qnx4_dir_inode_operations;
-extern const struct file_operations qnx4_file_operations;
 extern const struct file_operations qnx4_dir_operations;
 extern int qnx4_is_free(struct super_block *sb, long block);
-extern int qnx4_set_bitmap(struct super_block *sb, long block, int busy);
-extern int qnx4_create(struct inode *inode, struct dentry *dentry, int mode, struct nameidata *nd);
-extern void qnx4_truncate(struct inode *inode);
-extern void qnx4_free_inode(struct inode *inode);
-extern int qnx4_unlink(struct inode *dir, struct dentry *dentry);
-extern int qnx4_rmdir(struct inode *dir, struct dentry *dentry);
 
 static inline struct qnx4_sb_info *qnx4_sb(struct super_block *sb)
 {
diff --git a/fs/qnx4/truncate.c b/fs/qnx4/truncate.c
deleted file mode 100644
index d94d9ee..0000000
--- a/fs/qnx4/truncate.c
+++ /dev/null
@@ -1,34 +0,0 @@
-/* 
- * QNX4 file system, Linux implementation.
- * 
- * Version : 0.1
- * 
- * Using parts of the xiafs filesystem.
- * 
- * History :
- * 
- * 30-06-1998 by Frank DENIS : ugly filler.
- */
-
-#include <linux/smp_lock.h>
-#include "qnx4.h"
-
-#ifdef CONFIG_QNX4FS_RW
-
-void qnx4_truncate(struct inode *inode)
-{
-	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
-	      S_ISLNK(inode->i_mode))) {
-		return;
-	}
-	lock_kernel();
-	if (!(S_ISDIR(inode->i_mode))) {
-		/* TODO */
-	}
-	QNX4DEBUG(("qnx4: qnx4_truncate called\n"));
-	inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
-	mark_inode_dirty(inode);
-	unlock_kernel();
-}
-
-#endif
-- 
cgit v0.10.2


From 54447c3e8f63524fcdd40395bb2d405cab5555a7 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Date: Tue, 22 Sep 2009 16:44:01 -0700
Subject: vlynq: includecheck fix: drivers/vlynq/vlynq.c

Fix the following 'make includecheck' warning:

drivers/vlynq/vlynq.c: linux/device.h is included more than once.

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Florian Fainelli <florian@openwrt.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/vlynq/vlynq.c b/drivers/vlynq/vlynq.c
index f05d2a3..ba3d71f 100644
--- a/drivers/vlynq/vlynq.c
+++ b/drivers/vlynq/vlynq.c
@@ -28,7 +28,6 @@
 #include <linux/errno.h>
 #include <linux/platform_device.h>
 #include <linux/interrupt.h>
-#include <linux/device.h>
 #include <linux/delay.h>
 #include <linux/io.h>
 
-- 
cgit v0.10.2


From d7d7561c908afa001ab0fc8212eee94731a213a6 Mon Sep 17 00:00:00 2001
From: Suzuki Poulose <suzuki@in.ibm.com>
Date: Tue, 22 Sep 2009 16:44:02 -0700
Subject: fix compat_sys_utimensat()

Compat utimensat() returns EINVAL when the tv_nsec is one of UTIME_OMIT or
UTIME_NOW and the tv_sec is set to non-zero.  As per man pages, the tv_sec
field should be ignored.

sys_utimensat() works fine in this case.

Test case:

#define _GNU_SOURCE
#define _ATFILE_SOURCE
#include <stdio.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/stat.h>
#include <stdlib.h>

main(int argc, char *argv[])
{
	struct timespec ts[2];
	struct timespec *tsp;

	if (argc < 2) {
		fprintf(stderr, "Usage : %s filename\n", argv[0]);
		exit (-1);
	}

	ts[0].tv_nsec = ts[1].tv_nsec = UTIME_NOW;
	ts[0].tv_sec = ts[1].tv_sec = 1;

	tsp = ts;

	if (utimensat(AT_FDCWD, argv[1],tsp,0) == -1)
		perror("utimensat");
	else
		fprintf(stdout, "utimensat success\n");
	return 0;
}
mjs22lp5:~ # cc -m64 utimensat-test.c -o utimensat_test64
mjs22lp5:~ # cc -m32 utimensat-test.c -o utimensat_test32
mjs22lp5:~ # ./utimensat_test32 /tmp/utimensat_test
utimensat: Invalid argument
mjs22lp5:~ # ./utimensat_test64 /tmp/utimensat_test
utimensat success
mjs22lp5:~ # uname -r
2.6.31-rc8

With the patch :

mjs22lp5:~ # ./utimensat_test64 /tmp/utimensat_test
utimensat success
mjs22lp5:~ # ./utimensat_test32 /tmp/utimensat_test
utimensat success
mjs22lp5:~ # uname -r
2.6.31-rc8utimensat

Signed-off-by: Suzuki K P <suzuki@in.ibm.com>
Cc: Ulrich Drepper <drepper@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/compat.c b/fs/compat.c
index 6d6f98f..3aa4883 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -100,13 +100,6 @@ asmlinkage long compat_sys_utimensat(unsigned int dfd, char __user *filename, st
 		    get_compat_timespec(&tv[1], &t[1]))
 			return -EFAULT;
 
-		if ((tv[0].tv_nsec == UTIME_OMIT || tv[0].tv_nsec == UTIME_NOW)
-		    && tv[0].tv_sec != 0)
-			return -EINVAL;
-		if ((tv[1].tv_nsec == UTIME_OMIT || tv[1].tv_nsec == UTIME_NOW)
-		    && tv[1].tv_sec != 0)
-			return -EINVAL;
-
 		if (tv[0].tv_nsec == UTIME_OMIT && tv[1].tv_nsec == UTIME_OMIT)
 			return 0;
 	}
-- 
cgit v0.10.2


From a49c59c042c63b432307c1bbf7dac5a104c786e6 Mon Sep 17 00:00:00 2001
From: Rolf Eike Beer <eike-kernel@sf-tec.de>
Date: Tue, 22 Sep 2009 16:44:03 -0700
Subject: Make sure the value in abs() does not get truncated if it is greater
 than 2^32

abs() will truncate the input if is it outside the 2^32 range.  Fix that
by assuming `long' input.

This might generate worse code in the common case.

Signed-off-by: Rolf Eike Beer <eike-kernel@sf-tec.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 63dcaec..d3cd23f 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -146,7 +146,7 @@ extern int _cond_resched(void);
 #define might_sleep_if(cond) do { if (cond) might_sleep(); } while (0)
 
 #define abs(x) ({				\
-		int __x = (x);			\
+		long __x = (x);			\
 		(__x < 0) ? -__x : __x;		\
 	})
 
-- 
cgit v0.10.2


From b28cfd2c0616e1b42acc6ee3c77ef6cc3873c510 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Tue, 22 Sep 2009 16:44:05 -0700
Subject: kmap_types.h: rename D macro

I tend to use a 'D' debugging macro a lot during debugging.  When I define
it before includes I often get conflicts with kmap_types.h's use of 'D'
too.  It's not very nice when a global include pollutes the name space
like this.

Rename the kmap_types.h D to KMAP_D.  It is only used temporarily in the
header so has no effect on anything else.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Reviewed-by: WANG Cong <xiyou.wangcong@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/asm-generic/kmap_types.h b/include/asm-generic/kmap_types.h
index eddbce0..e5f234a 100644
--- a/include/asm-generic/kmap_types.h
+++ b/include/asm-generic/kmap_types.h
@@ -2,34 +2,35 @@
 #define _ASM_GENERIC_KMAP_TYPES_H
 
 #ifdef __WITH_KM_FENCE
-# define D(n) __KM_FENCE_##n ,
+# define KMAP_D(n) __KM_FENCE_##n ,
 #else
-# define D(n)
+# define KMAP_D(n)
 #endif
 
 enum km_type {
-D(0)	KM_BOUNCE_READ,
-D(1)	KM_SKB_SUNRPC_DATA,
-D(2)	KM_SKB_DATA_SOFTIRQ,
-D(3)	KM_USER0,
-D(4)	KM_USER1,
-D(5)	KM_BIO_SRC_IRQ,
-D(6)	KM_BIO_DST_IRQ,
-D(7)	KM_PTE0,
-D(8)	KM_PTE1,
-D(9)	KM_IRQ0,
-D(10)	KM_IRQ1,
-D(11)	KM_SOFTIRQ0,
-D(12)	KM_SOFTIRQ1,
-D(13)	KM_SYNC_ICACHE,
-D(14)	KM_SYNC_DCACHE,
-D(15)	KM_UML_USERCOPY, /* UML specific, for copy_*_user - used in do_op_one_page */
-D(16)	KM_IRQ_PTE,
-D(17)	KM_NMI,
-D(18)	KM_NMI_PTE,
-D(19)	KM_TYPE_NR
+KMAP_D(0)	KM_BOUNCE_READ,
+KMAP_D(1)	KM_SKB_SUNRPC_DATA,
+KMAP_D(2)	KM_SKB_DATA_SOFTIRQ,
+KMAP_D(3)	KM_USER0,
+KMAP_D(4)	KM_USER1,
+KMAP_D(5)	KM_BIO_SRC_IRQ,
+KMAP_D(6)	KM_BIO_DST_IRQ,
+KMAP_D(7)	KM_PTE0,
+KMAP_D(8)	KM_PTE1,
+KMAP_D(9)	KM_IRQ0,
+KMAP_D(10)	KM_IRQ1,
+KMAP_D(11)	KM_SOFTIRQ0,
+KMAP_D(12)	KM_SOFTIRQ1,
+KMAP_D(13)	KM_SYNC_ICACHE,
+KMAP_D(14)	KM_SYNC_DCACHE,
+/* UML specific, for copy_*_user - used in do_op_one_page */
+KMAP_D(15)	KM_UML_USERCOPY,
+KMAP_D(16)	KM_IRQ_PTE,
+KMAP_D(17)	KM_NMI,
+KMAP_D(18)	KM_NMI_PTE,
+KMAP_D(19)	KM_TYPE_NR
 };
 
-#undef D
+#undef KMAP_D
 
 #endif
-- 
cgit v0.10.2


From 1f10206cf8e945220f7220a809d8bfc15c21f9a5 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Tue, 22 Sep 2009 16:44:10 -0700
Subject: getrusage: fill ru_maxrss value

Make ->ru_maxrss value in struct rusage filled accordingly to rss hiwater
mark.  This struct is filled as a parameter to getrusage syscall.
->ru_maxrss value is set to KBs which is the way it is done in BSD
systems.  /usr/bin/time (gnu time) application converts ->ru_maxrss to KBs
which seems to be incorrect behavior.  Maintainer of this util was
notified by me with the patch which corrects it and cc'ed.

To make this happen we extend struct signal_struct by two fields.  The
first one is ->maxrss which we use to store rss hiwater of the task.  The
second one is ->cmaxrss which we use to store highest rss hiwater of all
task childs.  These values are used in k_getrusage() to actually fill
->ru_maxrss.  k_getrusage() uses current rss hiwater value directly if mm
struct exists.

Note:
exec() clear mm->hiwater_rss, but doesn't clear sig->maxrss.
it is intetionally behavior. *BSD getrusage have exec() inheriting.

test programs
========================================================

getrusage.c
===========
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <sys/types.h>
 #include <sys/time.h>
 #include <sys/resource.h>
 #include <sys/types.h>
 #include <sys/wait.h>
 #include <unistd.h>
 #include <signal.h>
 #include <sys/mman.h>

 #include "common.h"

 #define err(str) perror(str), exit(1)

int main(int argc, char** argv)
{
	int status;

	printf("allocate 100MB\n");
	consume(100);

	printf("testcase1: fork inherit? \n");
	printf("  expect: initial.self ~= child.self\n");
	show_rusage("initial");
	if (__fork()) {
		wait(&status);
	} else {
		show_rusage("fork child");
		_exit(0);
	}
	printf("\n");

	printf("testcase2: fork inherit? (cont.) \n");
	printf("  expect: initial.children ~= 100MB, but child.children = 0\n");
	show_rusage("initial");
	if (__fork()) {
		wait(&status);
	} else {
		show_rusage("child");
		_exit(0);
	}
	printf("\n");

	printf("testcase3: fork + malloc \n");
	printf("  expect: child.self ~= initial.self + 50MB\n");
	show_rusage("initial");
	if (__fork()) {
		wait(&status);
	} else {
		printf("allocate +50MB\n");
		consume(50);
		show_rusage("fork child");
		_exit(0);
	}
	printf("\n");

	printf("testcase4: grandchild maxrss\n");
	printf("  expect: post_wait.children ~= 300MB\n");
	show_rusage("initial");
	if (__fork()) {
		wait(&status);
		show_rusage("post_wait");
	} else {
		system("./child -n 0 -g 300");
		_exit(0);
	}
	printf("\n");

	printf("testcase5: zombie\n");
	printf("  expect: pre_wait ~= initial, IOW the zombie process is not accounted.\n");
	printf("          post_wait ~= 400MB, IOW wait() collect child's max_rss. \n");
	show_rusage("initial");
	if (__fork()) {
		sleep(1); /* children become zombie */
		show_rusage("pre_wait");
		wait(&status);
		show_rusage("post_wait");
	} else {
		system("./child -n 400");
		_exit(0);
	}
	printf("\n");

	printf("testcase6: SIG_IGN\n");
	printf("  expect: initial ~= after_zombie (child's 500MB alloc should be ignored).\n");
	show_rusage("initial");
	signal(SIGCHLD, SIG_IGN);
	if (__fork()) {
		sleep(1); /* children become zombie */
		show_rusage("after_zombie");
	} else {
		system("./child -n 500");
		_exit(0);
	}
	printf("\n");
	signal(SIGCHLD, SIG_DFL);

	printf("testcase7: exec (without fork) \n");
	printf("  expect: initial ~= exec \n");
	show_rusage("initial");
	execl("./child", "child", "-v", NULL);

	return 0;
}

child.c
=======
 #include <sys/types.h>
 #include <unistd.h>
 #include <sys/types.h>
 #include <sys/wait.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <sys/types.h>
 #include <sys/time.h>
 #include <sys/resource.h>

 #include "common.h"

int main(int argc, char** argv)
{
	int status;
	int c;
	long consume_size = 0;
	long grandchild_consume_size = 0;
	int show = 0;

	while ((c = getopt(argc, argv, "n:g:v")) != -1) {
		switch (c) {
		case 'n':
			consume_size = atol(optarg);
			break;
		case 'v':
			show = 1;
			break;
		case 'g':

			grandchild_consume_size = atol(optarg);
			break;
		default:
			break;
		}
	}

	if (show)
		show_rusage("exec");

	if (consume_size) {
		printf("child alloc %ldMB\n", consume_size);
		consume(consume_size);
	}

	if (grandchild_consume_size) {
		if (fork()) {
			wait(&status);
		} else {
			printf("grandchild alloc %ldMB\n", grandchild_consume_size);
			consume(grandchild_consume_size);

			exit(0);
		}
	}

	return 0;
}

common.c
========
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <sys/types.h>
 #include <sys/time.h>
 #include <sys/resource.h>
 #include <sys/types.h>
 #include <sys/wait.h>
 #include <unistd.h>
 #include <signal.h>
 #include <sys/mman.h>

 #include "common.h"
 #define err(str) perror(str), exit(1)

void show_rusage(char *prefix)
{
    	int err, err2;
    	struct rusage rusage_self;
    	struct rusage rusage_children;

    	printf("%s: ", prefix);
    	err = getrusage(RUSAGE_SELF, &rusage_self);
    	if (!err)
    		printf("self %ld ", rusage_self.ru_maxrss);
    	err2 = getrusage(RUSAGE_CHILDREN, &rusage_children);
    	if (!err2)
    		printf("children %ld ", rusage_children.ru_maxrss);

    	printf("\n");
}

/* Some buggy OS need this worthless CPU waste. */
void make_pagefault(void)
{
	void *addr;
	int size = getpagesize();
	int i;

	for (i=0; i<1000; i++) {
		addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
		if (addr == MAP_FAILED)
			err("make_pagefault");
		memset(addr, 0, size);
		munmap(addr, size);
	}
}

void consume(int mega)
{
    	size_t sz = mega * 1024 * 1024;
    	void *ptr;

    	ptr = malloc(sz);
    	memset(ptr, 0, sz);
	make_pagefault();
}

pid_t __fork(void)
{
	pid_t pid;

	pid = fork();
	make_pagefault();

	return pid;
}

common.h
========
void show_rusage(char *prefix);
void make_pagefault(void);
void consume(int mega);
pid_t __fork(void);

FreeBSD result (expected result)
========================================================
allocate 100MB
testcase1: fork inherit?
  expect: initial.self ~= child.self
initial: self 103492 children 0
fork child: self 103540 children 0

testcase2: fork inherit? (cont.)
  expect: initial.children ~= 100MB, but child.children = 0
initial: self 103540 children 103540
child: self 103564 children 0

testcase3: fork + malloc
  expect: child.self ~= initial.self + 50MB
initial: self 103564 children 103564
allocate +50MB
fork child: self 154860 children 0

testcase4: grandchild maxrss
  expect: post_wait.children ~= 300MB
initial: self 103564 children 154860
grandchild alloc 300MB
post_wait: self 103564 children 308720

testcase5: zombie
  expect: pre_wait ~= initial, IOW the zombie process is not accounted.
          post_wait ~= 400MB, IOW wait() collect child's max_rss.
initial: self 103564 children 308720
child alloc 400MB
pre_wait: self 103564 children 308720
post_wait: self 103564 children 411312

testcase6: SIG_IGN
  expect: initial ~= after_zombie (child's 500MB alloc should be ignored).
initial: self 103564 children 411312
child alloc 500MB
after_zombie: self 103624 children 411312

testcase7: exec (without fork)
  expect: initial ~= exec
initial: self 103624 children 411312
exec: self 103624 children 411312

Linux result (actual test result)
========================================================
allocate 100MB
testcase1: fork inherit?
  expect: initial.self ~= child.self
initial: self 102848 children 0
fork child: self 102572 children 0

testcase2: fork inherit? (cont.)
  expect: initial.children ~= 100MB, but child.children = 0
initial: self 102876 children 102644
child: self 102572 children 0

testcase3: fork + malloc
  expect: child.self ~= initial.self + 50MB
initial: self 102876 children 102644
allocate +50MB
fork child: self 153804 children 0

testcase4: grandchild maxrss
  expect: post_wait.children ~= 300MB
initial: self 102876 children 153864
grandchild alloc 300MB
post_wait: self 102876 children 307536

testcase5: zombie
  expect: pre_wait ~= initial, IOW the zombie process is not accounted.
          post_wait ~= 400MB, IOW wait() collect child's max_rss.
initial: self 102876 children 307536
child alloc 400MB
pre_wait: self 102876 children 307536
post_wait: self 102876 children 410076

testcase6: SIG_IGN
  expect: initial ~= after_zombie (child's 500MB alloc should be ignored).
initial: self 102876 children 410076
child alloc 500MB
after_zombie: self 102880 children 410076

testcase7: exec (without fork)
  expect: initial ~= exec
initial: self 102880 children 410076
exec: self 102880 children 410076

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/exec.c b/fs/exec.c
index 434dba7..69bb9d8 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -845,6 +845,9 @@ static int de_thread(struct task_struct *tsk)
 	sig->notify_count = 0;
 
 no_thread_group:
+	if (current->mm)
+		setmax_mm_hiwater_rss(&sig->maxrss, current->mm);
+
 	exit_itimers(sig);
 	flush_itimer_signals();
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 97b10da..6448bbc 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -426,6 +426,15 @@ static inline unsigned long get_mm_hiwater_rss(struct mm_struct *mm)
 	return max(mm->hiwater_rss, get_mm_rss(mm));
 }
 
+static inline void setmax_mm_hiwater_rss(unsigned long *maxrss,
+					 struct mm_struct *mm)
+{
+	unsigned long hiwater_rss = get_mm_hiwater_rss(mm);
+
+	if (*maxrss < hiwater_rss)
+		*maxrss = hiwater_rss;
+}
+
 static inline unsigned long get_mm_hiwater_vm(struct mm_struct *mm)
 {
 	return max(mm->hiwater_vm, mm->total_vm);
@@ -612,6 +621,7 @@ struct signal_struct {
 	unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
 	unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt;
 	unsigned long inblock, oublock, cinblock, coublock;
+	unsigned long maxrss, cmaxrss;
 	struct task_io_accounting ioac;
 
 	/*
diff --git a/kernel/exit.c b/kernel/exit.c
index 61bb176..60d6fdc 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -947,6 +947,8 @@ NORET_TYPE void do_exit(long code)
 	if (group_dead) {
 		hrtimer_cancel(&tsk->signal->real_timer);
 		exit_itimers(tsk->signal);
+		if (tsk->mm)
+			setmax_mm_hiwater_rss(&tsk->signal->maxrss, tsk->mm);
 	}
 	acct_collect(code, group_dead);
 	if (group_dead)
@@ -1210,6 +1212,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
 	if (likely(!traced) && likely(!task_detached(p))) {
 		struct signal_struct *psig;
 		struct signal_struct *sig;
+		unsigned long maxrss;
 
 		/*
 		 * The resource counters for the group leader are in its
@@ -1258,6 +1261,9 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
 		psig->coublock +=
 			task_io_get_oublock(p) +
 			sig->oublock + sig->coublock;
+		maxrss = max(sig->maxrss, sig->cmaxrss);
+		if (psig->cmaxrss < maxrss)
+			psig->cmaxrss = maxrss;
 		task_io_accounting_add(&psig->ioac, &p->ioac);
 		task_io_accounting_add(&psig->ioac, &sig->ioac);
 		spin_unlock_irq(&p->real_parent->sighand->siglock);
diff --git a/kernel/fork.c b/kernel/fork.c
index 1020977..7cf4581 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -866,6 +866,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 	sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
 	sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
 	sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
+	sig->maxrss = sig->cmaxrss = 0;
 	task_io_accounting_init(&sig->ioac);
 	sig->sum_sched_runtime = 0;
 	taskstats_tgid_init(sig);
diff --git a/kernel/sys.c b/kernel/sys.c
index ea5c3bc..ebcb156 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1338,6 +1338,7 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
 	unsigned long flags;
 	cputime_t utime, stime;
 	struct task_cputime cputime;
+	unsigned long maxrss = 0;
 
 	memset((char *) r, 0, sizeof *r);
 	utime = stime = cputime_zero;
@@ -1346,6 +1347,7 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
 		utime = task_utime(current);
 		stime = task_stime(current);
 		accumulate_thread_rusage(p, r);
+		maxrss = p->signal->maxrss;
 		goto out;
 	}
 
@@ -1363,6 +1365,7 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
 			r->ru_majflt = p->signal->cmaj_flt;
 			r->ru_inblock = p->signal->cinblock;
 			r->ru_oublock = p->signal->coublock;
+			maxrss = p->signal->cmaxrss;
 
 			if (who == RUSAGE_CHILDREN)
 				break;
@@ -1377,6 +1380,8 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
 			r->ru_majflt += p->signal->maj_flt;
 			r->ru_inblock += p->signal->inblock;
 			r->ru_oublock += p->signal->oublock;
+			if (maxrss < p->signal->maxrss)
+				maxrss = p->signal->maxrss;
 			t = p;
 			do {
 				accumulate_thread_rusage(t, r);
@@ -1392,6 +1397,15 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
 out:
 	cputime_to_timeval(utime, &r->ru_utime);
 	cputime_to_timeval(stime, &r->ru_stime);
+
+	if (who != RUSAGE_CHILDREN) {
+		struct mm_struct *mm = get_task_mm(p);
+		if (mm) {
+			setmax_mm_hiwater_rss(&maxrss, mm);
+			mmput(mm);
+		}
+	}
+	r->ru_maxrss = maxrss * (PAGE_SIZE / 1024); /* convert pages to KBs */
 }
 
 int getrusage(struct task_struct *p, int who, struct rusage __user *ru)
-- 
cgit v0.10.2


From a9ece53c4089ef23d4002d34c4c7148d94622a40 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Tue, 22 Sep 2009 16:44:12 -0700
Subject: kallsyms: fix segfault in prefix_underscores_count()

Commit b478b782e110fdb4135caa3062b6d687e989d994 "kallsyms, tracing: output
more proper symbol name" introduces a "bugfix" that introduces a segfault
in kallsyms in my configurations.

The cause is the introduction of prefix_underscores_count() which attempts
to count underscores, even in symbols that do not have them.  As a result,
it just uselessly runs past the end of the buffer until it crashes:

  CC      init/version.o
  LD      init/built-in.o
  LD      .tmp_vmlinux1
  KSYM    .tmp_kallsyms1.S
/bin/sh: line 1: 16934 Done                    sh-linux-gnu-nm -n .tmp_vmlinux1
     16935 Segmentation fault      | scripts/kallsyms > .tmp_kallsyms1.S
make: *** [.tmp_kallsyms1.S] Error 139

This simplifies the logic and just does a straightforward count.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
Reviewed-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Sam Ravnborg <sam@ravnborg.org>
Cc: Paulo Marques <pmarques@grupopie.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: <stable@kernel.org>		[2.6.30.x, 2.6.31.x]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c
index 64343cc..86c3896 100644
--- a/scripts/kallsyms.c
+++ b/scripts/kallsyms.c
@@ -585,7 +585,7 @@ static int prefix_underscores_count(const char *str)
 {
 	const char *tail = str;
 
-	while (*tail != '_')
+	while (*tail == '_')
 		tail++;
 
 	return tail - str;
-- 
cgit v0.10.2


From 00afe029aab03bd95eba210b5e74a252017c4692 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Tue, 22 Sep 2009 16:44:14 -0700
Subject: asm/sections: add text/data checking functions for arches to override

Some ports (like the Blackfin arch) have a discontiguous memory map which
means there may be text or data that falls outside of the standard range
of the start/end text/data symbols.  Creating some helper functions allows
these non-standard ports to declare these regions without adversely
affecting anyone else.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Robin Getz <rgetz@blackfin.uclinux.org>
Cc: Sam Ravnborg <sam@ravnborg.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h
index d083561..b3bfabc 100644
--- a/include/asm-generic/sections.h
+++ b/include/asm-generic/sections.h
@@ -23,4 +23,20 @@ extern char __ctors_start[], __ctors_end[];
 #define dereference_function_descriptor(p) (p)
 #endif
 
+/* random extra sections (if any).  Override
+ * in asm/sections.h */
+#ifndef arch_is_kernel_text
+static inline int arch_is_kernel_text(unsigned long addr)
+{
+	return 0;
+}
+#endif
+
+#ifndef arch_is_kernel_data
+static inline int arch_is_kernel_data(unsigned long addr)
+{
+	return 0;
+}
+#endif
+
 #endif /* _ASM_GENERIC_SECTIONS_H_ */
-- 
cgit v0.10.2


From 128e8db38e30c1786498dfc011d0f9dd7f9f9266 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Tue, 22 Sep 2009 16:44:15 -0700
Subject: kallsyms: use new arch_is_kernel_text()

This allows kallsyms to locate symbols that are in arch-specific text
sections (such as text in Blackfin on-chip SRAM regions).

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Robin Getz <rgetz@blackfin.uclinux.org>
Cc: Sam Ravnborg <sam@ravnborg.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 3a29dbe..8b6b8b6 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -59,7 +59,8 @@ static inline int is_kernel_inittext(unsigned long addr)
 
 static inline int is_kernel_text(unsigned long addr)
 {
-	if (addr >= (unsigned long)_stext && addr <= (unsigned long)_etext)
+	if ((addr >= (unsigned long)_stext && addr <= (unsigned long)_etext) ||
+	    arch_is_kernel_text(addr))
 		return 1;
 	return in_gate_area_no_task(addr);
 }
-- 
cgit v0.10.2


From 2a9ad18deb2870a9968f50351a0d4b8cc2a04099 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Tue, 22 Sep 2009 16:44:16 -0700
Subject: lockdep: use new arch_is_kernel_data()

This allows lockdep to locate symbols that are in arch-specific data
sections (such as data in Blackfin on-chip SRAM regions).

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Robin Getz <rgetz@blackfin.uclinux.org>
Cc: Sam Ravnborg <sam@ravnborg.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index f74d2d7..3815ac1d 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -578,6 +578,9 @@ static int static_obj(void *obj)
 	if ((addr >= start) && (addr < end))
 		return 1;
 
+	if (arch_is_kernel_data(addr))
+		return 1;
+
 #ifdef CONFIG_SMP
 	/*
 	 * percpu var?
-- 
cgit v0.10.2


From e56770fbc48c1517f620f9f68e3f728e74d52bf5 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Tue, 22 Sep 2009 16:44:17 -0700
Subject: Blackfin: override text/data checking functions

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Robin Getz <rgetz@blackfin.uclinux.org>
Cc: Sam Ravnborg <sam@ravnborg.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/blackfin/include/asm/sections.h b/arch/blackfin/include/asm/sections.h
index e7fd0ec..ae4dae1 100644
--- a/arch/blackfin/include/asm/sections.h
+++ b/arch/blackfin/include/asm/sections.h
@@ -1,9 +1,6 @@
 #ifndef _BLACKFIN_SECTIONS_H
 #define _BLACKFIN_SECTIONS_H
 
-/* nothing to see, move along */
-#include <asm-generic/sections.h>
-
 /* only used when MTD_UCLINUX */
 extern unsigned long memory_mtd_start, memory_mtd_end, mtd_size;
 
@@ -15,4 +12,39 @@ extern char _stext_l1[], _etext_l1[], _sdata_l1[], _edata_l1[], _sbss_l1[],
 	_stext_l2[], _etext_l2[], _sdata_l2[], _edata_l2[], _sbss_l2[],
 	_ebss_l2[], _l2_lma_start[];
 
+#include <asm/mem_map.h>
+
+/* Blackfin systems have discontinuous memory map and no virtualized memory */
+static inline int arch_is_kernel_text(unsigned long addr)
+{
+	return
+		(L1_CODE_LENGTH &&
+		 addr >= (unsigned long)_stext_l1 &&
+		 addr <  (unsigned long)_etext_l1)
+		||
+		(L2_LENGTH &&
+		 addr >= (unsigned long)_stext_l2 &&
+		 addr <  (unsigned long)_etext_l2);
+}
+#define arch_is_kernel_text(addr) arch_is_kernel_text(addr)
+
+static inline int arch_is_kernel_data(unsigned long addr)
+{
+	return
+		(L1_DATA_A_LENGTH &&
+		 addr >= (unsigned long)_sdata_l1 &&
+		 addr <  (unsigned long)_ebss_l1)
+		||
+		(L1_DATA_B_LENGTH &&
+		 addr >= (unsigned long)_sdata_b_l1 &&
+		 addr <  (unsigned long)_ebss_b_l1)
+		||
+		(L2_LENGTH &&
+		 addr >= (unsigned long)_sdata_l2 &&
+		 addr <  (unsigned long)_ebss_l2);
+}
+#define arch_is_kernel_data(addr) arch_is_kernel_data(addr)
+
+#include <asm-generic/sections.h>
+
 #endif
-- 
cgit v0.10.2


From 500f35648e5ebd04be00f974738a9db959a892b8 Mon Sep 17 00:00:00 2001
From: Balaji Rao <balajirrao@openmoko.org>
Date: Tue, 22 Sep 2009 16:44:18 -0700
Subject: mmc: in mmc_power_up(), use previously selected ocr if available

When mmc_power_up is called during unsafe resume, host->ocr should be used
instead of host->ocr_avail.

Signed-off-by: Balaji Rao <balajirrao@openmoko.org>
Cc: Andy Green <andy@openmoko.com>
Cc: Pierre Ossman <drzeus-mmc@drzeus.cx>
Cc: Ian Molton <ian@mnementh.co.uk>
Cc: "Roberto A. Foglietta" <roberto.foglietta@gmail.com>
Cc: Philip Langdale <philipl@overt.org>
Acked-by: Matt Fleming <matt@console-pimps.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index d84c880..e22d2b5 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -687,7 +687,13 @@ void mmc_set_timing(struct mmc_host *host, unsigned int timing)
  */
 static void mmc_power_up(struct mmc_host *host)
 {
-	int bit = fls(host->ocr_avail) - 1;
+	int bit;
+
+	/* If ocr is set, we use it */
+	if (host->ocr)
+		bit = ffs(host->ocr) - 1;
+	else
+		bit = fls(host->ocr_avail) - 1;
 
 	host->ios.vdd = bit;
 	if (mmc_host_is_spi(host)) {
-- 
cgit v0.10.2


From ccdfe3a66a57f5e36f56101e60946ee341eb5f1b Mon Sep 17 00:00:00 2001
From: Anand Gadiyar <gadiyar@ti.com>
Date: Tue, 22 Sep 2009 16:44:21 -0700
Subject: OMAP: HSMMC: do not enable buffer ready interrupt if using DMA

This considerably reduces the number of interrupts during a transfer
and ought to result in some power saving.

Signed-off-by: Anand Gadiyar <gadiyar@ti.com>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Acked-by: Kishore Kadiyala <kishore.kadiyala@ti.com>
Cc: Pierre Ossman <drzeus@drzeus.cx>
Cc: Matt Fleming <matt@console-pimps.org>
Cc: Ian Molton <ian@mnementh.co.uk>
Cc: "Roberto A. Foglietta" <roberto.foglietta@gmail.com>
Cc: Philip Langdale <philipl@overt.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 1cf9cfb..fee895b 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -70,6 +70,8 @@
 #define DTO_MASK		0x000F0000
 #define DTO_SHIFT		16
 #define INT_EN_MASK		0x307F0033
+#define BWR_ENABLE		(1 << 4)
+#define BRR_ENABLE		(1 << 5)
 #define INIT_STREAM		(1 << 1)
 #define DP_SELECT		(1 << 21)
 #define DDIR			(1 << 4)
@@ -241,7 +243,12 @@ mmc_omap_start_command(struct mmc_omap_host *host, struct mmc_command *cmd,
 	 */
 	OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR);
 	OMAP_HSMMC_WRITE(host->base, ISE, INT_EN_MASK);
-	OMAP_HSMMC_WRITE(host->base, IE, INT_EN_MASK);
+
+	if (host->use_dma)
+		OMAP_HSMMC_WRITE(host->base, IE,
+				 INT_EN_MASK & ~(BRR_ENABLE | BWR_ENABLE));
+	else
+		OMAP_HSMMC_WRITE(host->base, IE, INT_EN_MASK);
 
 	host->response_busy = 0;
 	if (cmd->flags & MMC_RSP_PRESENT) {
-- 
cgit v0.10.2


From 9d2bd7383c71d38c60328a3dc8a946eda2013826 Mon Sep 17 00:00:00 2001
From: San Mehat <san@android.com>
Date: Tue, 22 Sep 2009 16:44:22 -0700
Subject: mmc: msm_sdccc: driver for HTC Dream

MMC Driver for HTC Dream.  I picked the code up from Google git trees,
removed stuff not strictly necessary, and did a few cleanups.  It still
works :-).

Signed-off-by: Pavel Machek <pavel@ucw.cz>
Cc: Brian Swetland <swetland@google.com>
Cc: Pierre Ossman <drzeus-list@drzeus.cx>
Cc: Joe Perches <joe@perches.com>
Cc: Matt Fleming <matt@console-pimps.org>
Cc: Ian Molton <ian@mnementh.co.uk>
Cc: "Roberto A. Foglietta" <roberto.foglietta@gmail.com>
Cc: Philip Langdale <philipl@overt.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig
index 891ef18..34fdfa9 100644
--- a/drivers/mmc/host/Kconfig
+++ b/drivers/mmc/host/Kconfig
@@ -199,6 +199,13 @@ config MMC_IMX
 
 	  If unsure, say N.
 
+config MMC_MSM7X00A
+	tristate "Qualcomm MSM 7X00A SDCC Controller Support"
+	depends on MMC && ARCH_MSM
+	help
+	  This provides support for the SD/MMC cell found in the
+          MSM 7X00A controllers from Qualcomm.
+
 config MMC_MXC
 	tristate "Freescale i.MX2/3 Multimedia Card Interface support"
 	depends on ARCH_MXC
diff --git a/drivers/mmc/host/Makefile b/drivers/mmc/host/Makefile
index cf153f6..abcb040 100644
--- a/drivers/mmc/host/Makefile
+++ b/drivers/mmc/host/Makefile
@@ -23,6 +23,7 @@ obj-$(CONFIG_MMC_OMAP_HS)	+= omap_hsmmc.o
 obj-$(CONFIG_MMC_AT91)		+= at91_mci.o
 obj-$(CONFIG_MMC_ATMELMCI)	+= atmel-mci.o
 obj-$(CONFIG_MMC_TIFM_SD)	+= tifm_sd.o
+obj-$(CONFIG_MMC_MSM7X00A)	+= msm_sdcc.o
 obj-$(CONFIG_MMC_MVSDIO)	+= mvsdio.o
 obj-$(CONFIG_MMC_SPI)		+= mmc_spi.o
 ifeq ($(CONFIG_OF),y)
diff --git a/drivers/mmc/host/msm_sdcc.c b/drivers/mmc/host/msm_sdcc.c
new file mode 100644
index 0000000..042f217
--- /dev/null
+++ b/drivers/mmc/host/msm_sdcc.c
@@ -0,0 +1,1301 @@
+/*
+ *  linux/drivers/mmc/host/msm_sdcc.c - Qualcomm MSM 7X00A SDCC Driver
+ *
+ *  Copyright (C) 2007 Google Inc,
+ *  Copyright (C) 2003 Deep Blue Solutions, Ltd, All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Based on mmci.c
+ *
+ * Author: San Mehat (san@android.com)
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/device.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/highmem.h>
+#include <linux/log2.h>
+#include <linux/mmc/host.h>
+#include <linux/mmc/card.h>
+#include <linux/clk.h>
+#include <linux/scatterlist.h>
+#include <linux/platform_device.h>
+#include <linux/dma-mapping.h>
+#include <linux/debugfs.h>
+#include <linux/io.h>
+#include <linux/memory.h>
+
+#include <asm/cacheflush.h>
+#include <asm/div64.h>
+#include <asm/sizes.h>
+
+#include <asm/mach/mmc.h>
+#include <mach/msm_iomap.h>
+#include <mach/dma.h>
+#include <mach/htc_pwrsink.h>
+
+
+#include "msm_sdcc.h"
+
+#define DRIVER_NAME "msm-sdcc"
+
+static unsigned int msmsdcc_fmin = 144000;
+static unsigned int msmsdcc_fmax = 50000000;
+static unsigned int msmsdcc_4bit = 1;
+static unsigned int msmsdcc_pwrsave = 1;
+static unsigned int msmsdcc_piopoll = 1;
+static unsigned int msmsdcc_sdioirq;
+
+#define PIO_SPINMAX 30
+#define CMD_SPINMAX 20
+
+
+
+static void
+msmsdcc_start_command(struct msmsdcc_host *host, struct mmc_command *cmd,
+		      u32 c);
+
+static void
+msmsdcc_request_end(struct msmsdcc_host *host, struct mmc_request *mrq)
+{
+	writel(0, host->base + MMCICOMMAND);
+
+	BUG_ON(host->curr.data);
+
+	host->curr.mrq = NULL;
+	host->curr.cmd = NULL;
+
+	if (mrq->data)
+		mrq->data->bytes_xfered = host->curr.data_xfered;
+	if (mrq->cmd->error == -ETIMEDOUT)
+		mdelay(5);
+
+	/*
+	 * Need to drop the host lock here; mmc_request_done may call
+	 * back into the driver...
+	 */
+	spin_unlock(&host->lock);
+	mmc_request_done(host->mmc, mrq);
+	spin_lock(&host->lock);
+}
+
+static void
+msmsdcc_stop_data(struct msmsdcc_host *host)
+{
+	writel(0, host->base + MMCIDATACTRL);
+	host->curr.data = NULL;
+	host->curr.got_dataend = host->curr.got_datablkend = 0;
+}
+
+uint32_t msmsdcc_fifo_addr(struct msmsdcc_host *host)
+{
+	if (host->pdev_id == 1)
+		return MSM_SDC1_PHYS + MMCIFIFO;
+	else if (host->pdev_id == 2)
+		return MSM_SDC2_PHYS + MMCIFIFO;
+	else if (host->pdev_id == 3)
+		return MSM_SDC3_PHYS + MMCIFIFO;
+	else if (host->pdev_id == 4)
+		return MSM_SDC4_PHYS + MMCIFIFO;
+	else
+		BUG();
+	return 0;
+}
+
+static void
+msmsdcc_dma_complete_func(struct msm_dmov_cmd *cmd,
+			  unsigned int result,
+			  struct msm_dmov_errdata *err)
+{
+	struct msmsdcc_dma_data	*dma_data =
+		container_of(cmd, struct msmsdcc_dma_data, hdr);
+	struct msmsdcc_host	*host = dma_data->host;
+	unsigned long		flags;
+	struct mmc_request	*mrq;
+
+	spin_lock_irqsave(&host->lock, flags);
+	mrq = host->curr.mrq;
+	BUG_ON(!mrq);
+
+	if (!(result & DMOV_RSLT_VALID)) {
+		printk(KERN_ERR "msmsdcc: Invalid DataMover result\n");
+		goto out;
+	}
+
+	if (result & DMOV_RSLT_DONE) {
+		host->curr.data_xfered = host->curr.xfer_size;
+	} else {
+		/* Error or flush  */
+		if (result & DMOV_RSLT_ERROR)
+			printk(KERN_ERR "%s: DMA error (0x%.8x)\n",
+			       mmc_hostname(host->mmc), result);
+		if (result & DMOV_RSLT_FLUSH)
+			printk(KERN_ERR "%s: DMA channel flushed (0x%.8x)\n",
+			       mmc_hostname(host->mmc), result);
+		if (err)
+			printk(KERN_ERR
+			       "Flush data: %.8x %.8x %.8x %.8x %.8x %.8x\n",
+			       err->flush[0], err->flush[1], err->flush[2],
+			       err->flush[3], err->flush[4], err->flush[5]);
+		if (!mrq->data->error)
+			mrq->data->error = -EIO;
+	}
+	host->dma.busy = 0;
+	dma_unmap_sg(mmc_dev(host->mmc), host->dma.sg, host->dma.num_ents,
+		     host->dma.dir);
+
+	if (host->curr.user_pages) {
+		struct scatterlist *sg = host->dma.sg;
+		int i;
+
+		for (i = 0; i < host->dma.num_ents; i++, sg++)
+			flush_dcache_page(sg_page(sg));
+	}
+
+	host->dma.sg = NULL;
+
+	if ((host->curr.got_dataend && host->curr.got_datablkend)
+	     || mrq->data->error) {
+
+		/*
+		 * If we've already gotten our DATAEND / DATABLKEND
+		 * for this request, then complete it through here.
+		 */
+		msmsdcc_stop_data(host);
+
+		if (!mrq->data->error)
+			host->curr.data_xfered = host->curr.xfer_size;
+		if (!mrq->data->stop || mrq->cmd->error) {
+			writel(0, host->base + MMCICOMMAND);
+			host->curr.mrq = NULL;
+			host->curr.cmd = NULL;
+			mrq->data->bytes_xfered = host->curr.data_xfered;
+
+			spin_unlock_irqrestore(&host->lock, flags);
+			mmc_request_done(host->mmc, mrq);
+			return;
+		} else
+			msmsdcc_start_command(host, mrq->data->stop, 0);
+	}
+
+out:
+	spin_unlock_irqrestore(&host->lock, flags);
+	return;
+}
+
+static int validate_dma(struct msmsdcc_host *host, struct mmc_data *data)
+{
+	if (host->dma.channel == -1)
+		return -ENOENT;
+
+	if ((data->blksz * data->blocks) < MCI_FIFOSIZE)
+		return -EINVAL;
+	if ((data->blksz * data->blocks) % MCI_FIFOSIZE)
+		return -EINVAL;
+	return 0;
+}
+
+static int msmsdcc_config_dma(struct msmsdcc_host *host, struct mmc_data *data)
+{
+	struct msmsdcc_nc_dmadata *nc;
+	dmov_box *box;
+	uint32_t rows;
+	uint32_t crci;
+	unsigned int n;
+	int i, rc;
+	struct scatterlist *sg = data->sg;
+
+	rc = validate_dma(host, data);
+	if (rc)
+		return rc;
+
+	host->dma.sg = data->sg;
+	host->dma.num_ents = data->sg_len;
+
+	nc = host->dma.nc;
+
+	if (host->pdev_id == 1)
+		crci = MSMSDCC_CRCI_SDC1;
+	else if (host->pdev_id == 2)
+		crci = MSMSDCC_CRCI_SDC2;
+	else if (host->pdev_id == 3)
+		crci = MSMSDCC_CRCI_SDC3;
+	else if (host->pdev_id == 4)
+		crci = MSMSDCC_CRCI_SDC4;
+	else {
+		host->dma.sg = NULL;
+		host->dma.num_ents = 0;
+		return -ENOENT;
+	}
+
+	if (data->flags & MMC_DATA_READ)
+		host->dma.dir = DMA_FROM_DEVICE;
+	else
+		host->dma.dir = DMA_TO_DEVICE;
+
+	host->curr.user_pages = 0;
+
+	n = dma_map_sg(mmc_dev(host->mmc), host->dma.sg,
+			host->dma.num_ents, host->dma.dir);
+
+	if (n != host->dma.num_ents) {
+		printk(KERN_ERR "%s: Unable to map in all sg elements\n",
+		       mmc_hostname(host->mmc));
+		host->dma.sg = NULL;
+		host->dma.num_ents = 0;
+		return -ENOMEM;
+	}
+
+	box = &nc->cmd[0];
+	for (i = 0; i < host->dma.num_ents; i++) {
+		box->cmd = CMD_MODE_BOX;
+
+		if (i == (host->dma.num_ents - 1))
+			box->cmd |= CMD_LC;
+		rows = (sg_dma_len(sg) % MCI_FIFOSIZE) ?
+			(sg_dma_len(sg) / MCI_FIFOSIZE) + 1 :
+			(sg_dma_len(sg) / MCI_FIFOSIZE) ;
+
+		if (data->flags & MMC_DATA_READ) {
+			box->src_row_addr = msmsdcc_fifo_addr(host);
+			box->dst_row_addr = sg_dma_address(sg);
+
+			box->src_dst_len = (MCI_FIFOSIZE << 16) |
+					   (MCI_FIFOSIZE);
+			box->row_offset = MCI_FIFOSIZE;
+
+			box->num_rows = rows * ((1 << 16) + 1);
+			box->cmd |= CMD_SRC_CRCI(crci);
+		} else {
+			box->src_row_addr = sg_dma_address(sg);
+			box->dst_row_addr = msmsdcc_fifo_addr(host);
+
+			box->src_dst_len = (MCI_FIFOSIZE << 16) |
+					   (MCI_FIFOSIZE);
+			box->row_offset = (MCI_FIFOSIZE << 16);
+
+			box->num_rows = rows * ((1 << 16) + 1);
+			box->cmd |= CMD_DST_CRCI(crci);
+		}
+		box++;
+		sg++;
+	}
+
+	/* location of command block must be 64 bit aligned */
+	BUG_ON(host->dma.cmd_busaddr & 0x07);
+
+	nc->cmdptr = (host->dma.cmd_busaddr >> 3) | CMD_PTR_LP;
+	host->dma.hdr.cmdptr = DMOV_CMD_PTR_LIST |
+			       DMOV_CMD_ADDR(host->dma.cmdptr_busaddr);
+	host->dma.hdr.complete_func = msmsdcc_dma_complete_func;
+
+	return 0;
+}
+
+static void
+msmsdcc_start_data(struct msmsdcc_host *host, struct mmc_data *data)
+{
+	unsigned int datactrl, timeout;
+	unsigned long long clks;
+	void __iomem *base = host->base;
+	unsigned int pio_irqmask = 0;
+
+	host->curr.data = data;
+	host->curr.xfer_size = data->blksz * data->blocks;
+	host->curr.xfer_remain = host->curr.xfer_size;
+	host->curr.data_xfered = 0;
+	host->curr.got_dataend = 0;
+	host->curr.got_datablkend = 0;
+
+	memset(&host->pio, 0, sizeof(host->pio));
+
+	clks = (unsigned long long)data->timeout_ns * host->clk_rate;
+	do_div(clks, 1000000000UL);
+	timeout = data->timeout_clks + (unsigned int)clks;
+	writel(timeout, base + MMCIDATATIMER);
+
+	writel(host->curr.xfer_size, base + MMCIDATALENGTH);
+
+	datactrl = MCI_DPSM_ENABLE | (data->blksz << 4);
+
+	if (!msmsdcc_config_dma(host, data))
+		datactrl |= MCI_DPSM_DMAENABLE;
+	else {
+		host->pio.sg = data->sg;
+		host->pio.sg_len = data->sg_len;
+		host->pio.sg_off = 0;
+
+		if (data->flags & MMC_DATA_READ) {
+			pio_irqmask = MCI_RXFIFOHALFFULLMASK;
+			if (host->curr.xfer_remain < MCI_FIFOSIZE)
+				pio_irqmask |= MCI_RXDATAAVLBLMASK;
+		} else
+			pio_irqmask = MCI_TXFIFOHALFEMPTYMASK;
+	}
+
+	if (data->flags & MMC_DATA_READ)
+		datactrl |= MCI_DPSM_DIRECTION;
+
+	writel(pio_irqmask, base + MMCIMASK1);
+	writel(datactrl, base + MMCIDATACTRL);
+
+	if (datactrl & MCI_DPSM_DMAENABLE) {
+		host->dma.busy = 1;
+		msm_dmov_enqueue_cmd(host->dma.channel, &host->dma.hdr);
+	}
+}
+
+static void
+msmsdcc_start_command(struct msmsdcc_host *host, struct mmc_command *cmd, u32 c)
+{
+	void __iomem *base = host->base;
+
+	if (readl(base + MMCICOMMAND) & MCI_CPSM_ENABLE) {
+		writel(0, base + MMCICOMMAND);
+		udelay(2 + ((5 * 1000000) / host->clk_rate));
+	}
+
+	c |= cmd->opcode | MCI_CPSM_ENABLE;
+
+	if (cmd->flags & MMC_RSP_PRESENT) {
+		if (cmd->flags & MMC_RSP_136)
+			c |= MCI_CPSM_LONGRSP;
+		c |= MCI_CPSM_RESPONSE;
+	}
+
+	if ((((cmd->opcode == 17) || (cmd->opcode == 18))  ||
+	     ((cmd->opcode == 24) || (cmd->opcode == 25))) ||
+	      (cmd->opcode == 53))
+		c |= MCI_CSPM_DATCMD;
+
+	if (cmd == cmd->mrq->stop)
+		c |= MCI_CSPM_MCIABORT;
+
+	host->curr.cmd = cmd;
+
+	host->stats.cmds++;
+
+	writel(cmd->arg, base + MMCIARGUMENT);
+	writel(c, base + MMCICOMMAND);
+}
+
+static void
+msmsdcc_data_err(struct msmsdcc_host *host, struct mmc_data *data,
+		 unsigned int status)
+{
+	if (status & MCI_DATACRCFAIL) {
+		printk(KERN_ERR "%s: Data CRC error\n",
+		       mmc_hostname(host->mmc));
+		printk(KERN_ERR "%s: opcode 0x%.8x\n", __func__,
+		       data->mrq->cmd->opcode);
+		printk(KERN_ERR "%s: blksz %d, blocks %d\n", __func__,
+		       data->blksz, data->blocks);
+		data->error = -EILSEQ;
+	} else if (status & MCI_DATATIMEOUT) {
+		printk(KERN_ERR "%s: Data timeout\n", mmc_hostname(host->mmc));
+		data->error = -ETIMEDOUT;
+	} else if (status & MCI_RXOVERRUN) {
+		printk(KERN_ERR "%s: RX overrun\n", mmc_hostname(host->mmc));
+		data->error = -EIO;
+	} else if (status & MCI_TXUNDERRUN) {
+		printk(KERN_ERR "%s: TX underrun\n", mmc_hostname(host->mmc));
+		data->error = -EIO;
+	} else {
+		printk(KERN_ERR "%s: Unknown error (0x%.8x)\n",
+		      mmc_hostname(host->mmc), status);
+		data->error = -EIO;
+	}
+}
+
+
+static int
+msmsdcc_pio_read(struct msmsdcc_host *host, char *buffer, unsigned int remain)
+{
+	void __iomem	*base = host->base;
+	uint32_t	*ptr = (uint32_t *) buffer;
+	int		count = 0;
+
+	while (readl(base + MMCISTATUS) & MCI_RXDATAAVLBL) {
+
+		*ptr = readl(base + MMCIFIFO + (count % MCI_FIFOSIZE));
+		ptr++;
+		count += sizeof(uint32_t);
+
+		remain -=  sizeof(uint32_t);
+		if (remain == 0)
+			break;
+	}
+	return count;
+}
+
+static int
+msmsdcc_pio_write(struct msmsdcc_host *host, char *buffer,
+		  unsigned int remain, u32 status)
+{
+	void __iomem *base = host->base;
+	char *ptr = buffer;
+
+	do {
+		unsigned int count, maxcnt;
+
+		maxcnt = status & MCI_TXFIFOEMPTY ? MCI_FIFOSIZE :
+						    MCI_FIFOHALFSIZE;
+		count = min(remain, maxcnt);
+
+		writesl(base + MMCIFIFO, ptr, count >> 2);
+		ptr += count;
+		remain -= count;
+
+		if (remain == 0)
+			break;
+
+		status = readl(base + MMCISTATUS);
+	} while (status & MCI_TXFIFOHALFEMPTY);
+
+	return ptr - buffer;
+}
+
+static int
+msmsdcc_spin_on_status(struct msmsdcc_host *host, uint32_t mask, int maxspin)
+{
+	while (maxspin) {
+		if ((readl(host->base + MMCISTATUS) & mask))
+			return 0;
+		udelay(1);
+		--maxspin;
+	}
+	return -ETIMEDOUT;
+}
+
+static int
+msmsdcc_pio_irq(int irq, void *dev_id)
+{
+	struct msmsdcc_host	*host = dev_id;
+	void __iomem		*base = host->base;
+	uint32_t		status;
+
+	status = readl(base + MMCISTATUS);
+
+	do {
+		unsigned long flags;
+		unsigned int remain, len;
+		char *buffer;
+
+		if (!(status & (MCI_TXFIFOHALFEMPTY | MCI_RXDATAAVLBL))) {
+			if (host->curr.xfer_remain == 0 || !msmsdcc_piopoll)
+				break;
+
+			if (msmsdcc_spin_on_status(host,
+						   (MCI_TXFIFOHALFEMPTY |
+						   MCI_RXDATAAVLBL),
+						   PIO_SPINMAX)) {
+				break;
+			}
+		}
+
+		/* Map the current scatter buffer */
+		local_irq_save(flags);
+		buffer = kmap_atomic(sg_page(host->pio.sg),
+				     KM_BIO_SRC_IRQ) + host->pio.sg->offset;
+		buffer += host->pio.sg_off;
+		remain = host->pio.sg->length - host->pio.sg_off;
+		len = 0;
+		if (status & MCI_RXACTIVE)
+			len = msmsdcc_pio_read(host, buffer, remain);
+		if (status & MCI_TXACTIVE)
+			len = msmsdcc_pio_write(host, buffer, remain, status);
+
+		/* Unmap the buffer */
+		kunmap_atomic(buffer, KM_BIO_SRC_IRQ);
+		local_irq_restore(flags);
+
+		host->pio.sg_off += len;
+		host->curr.xfer_remain -= len;
+		host->curr.data_xfered += len;
+		remain -= len;
+
+		if (remain == 0) {
+			/* This sg page is full - do some housekeeping */
+			if (status & MCI_RXACTIVE && host->curr.user_pages)
+				flush_dcache_page(sg_page(host->pio.sg));
+
+			if (!--host->pio.sg_len) {
+				memset(&host->pio, 0, sizeof(host->pio));
+				break;
+			}
+
+			/* Advance to next sg */
+			host->pio.sg++;
+			host->pio.sg_off = 0;
+		}
+
+		status = readl(base + MMCISTATUS);
+	} while (1);
+
+	if (status & MCI_RXACTIVE && host->curr.xfer_remain < MCI_FIFOSIZE)
+		writel(MCI_RXDATAAVLBLMASK, base + MMCIMASK1);
+
+	if (!host->curr.xfer_remain)
+		writel(0, base + MMCIMASK1);
+
+	return IRQ_HANDLED;
+}
+
+static void msmsdcc_do_cmdirq(struct msmsdcc_host *host, uint32_t status)
+{
+	struct mmc_command *cmd = host->curr.cmd;
+	void __iomem	   *base = host->base;
+
+	host->curr.cmd = NULL;
+	cmd->resp[0] = readl(base + MMCIRESPONSE0);
+	cmd->resp[1] = readl(base + MMCIRESPONSE1);
+	cmd->resp[2] = readl(base + MMCIRESPONSE2);
+	cmd->resp[3] = readl(base + MMCIRESPONSE3);
+
+	del_timer(&host->command_timer);
+	if (status & MCI_CMDTIMEOUT) {
+		cmd->error = -ETIMEDOUT;
+	} else if (status & MCI_CMDCRCFAIL &&
+		   cmd->flags & MMC_RSP_CRC) {
+		printk(KERN_ERR "%s: Command CRC error\n",
+		       mmc_hostname(host->mmc));
+		cmd->error = -EILSEQ;
+	}
+
+	if (!cmd->data || cmd->error) {
+		if (host->curr.data && host->dma.sg)
+			msm_dmov_stop_cmd(host->dma.channel,
+					  &host->dma.hdr, 0);
+		else if (host->curr.data) { /* Non DMA */
+			msmsdcc_stop_data(host);
+			msmsdcc_request_end(host, cmd->mrq);
+		} else /* host->data == NULL */
+			msmsdcc_request_end(host, cmd->mrq);
+	} else if (!(cmd->data->flags & MMC_DATA_READ))
+		msmsdcc_start_data(host, cmd->data);
+}
+
+static irqreturn_t
+msmsdcc_irq(int irq, void *dev_id)
+{
+	struct msmsdcc_host	*host = dev_id;
+	void __iomem		*base = host->base;
+	u32			status;
+	int			ret = 0;
+	int			cardint = 0;
+
+	spin_lock(&host->lock);
+
+	do {
+		struct mmc_data *data;
+		status = readl(base + MMCISTATUS);
+
+		status &= (readl(base + MMCIMASK0) |
+					      MCI_DATABLOCKENDMASK);
+		writel(status, base + MMCICLEAR);
+
+		data = host->curr.data;
+		if (data) {
+			/* Check for data errors */
+			if (status & (MCI_DATACRCFAIL|MCI_DATATIMEOUT|
+				      MCI_TXUNDERRUN|MCI_RXOVERRUN)) {
+				msmsdcc_data_err(host, data, status);
+				host->curr.data_xfered = 0;
+				if (host->dma.sg)
+					msm_dmov_stop_cmd(host->dma.channel,
+							  &host->dma.hdr, 0);
+				else {
+					msmsdcc_stop_data(host);
+					if (!data->stop)
+						msmsdcc_request_end(host,
+								    data->mrq);
+					else
+						msmsdcc_start_command(host,
+								     data->stop,
+								     0);
+				}
+			}
+
+			/* Check for data done */
+			if (!host->curr.got_dataend && (status & MCI_DATAEND))
+				host->curr.got_dataend = 1;
+
+			if (!host->curr.got_datablkend &&
+			    (status & MCI_DATABLOCKEND)) {
+				host->curr.got_datablkend = 1;
+			}
+
+			if (host->curr.got_dataend &&
+			    host->curr.got_datablkend) {
+				/*
+				 * If DMA is still in progress, we complete
+				 * via the completion handler
+				 */
+				if (!host->dma.busy) {
+					/*
+					 * There appears to be an issue in the
+					 * controller where if you request a
+					 * small block transfer (< fifo size),
+					 * you may get your DATAEND/DATABLKEND
+					 * irq without the PIO data irq.
+					 *
+					 * Check to see if theres still data
+					 * to be read, and simulate a PIO irq.
+					 */
+					if (readl(base + MMCISTATUS) &
+							       MCI_RXDATAAVLBL)
+						msmsdcc_pio_irq(1, host);
+
+					msmsdcc_stop_data(host);
+					if (!data->error)
+						host->curr.data_xfered =
+							host->curr.xfer_size;
+
+					if (!data->stop)
+						msmsdcc_request_end(host,
+								    data->mrq);
+					else
+						msmsdcc_start_command(host,
+							      data->stop, 0);
+				}
+			}
+		}
+
+		if (status & (MCI_CMDSENT | MCI_CMDRESPEND | MCI_CMDCRCFAIL |
+			      MCI_CMDTIMEOUT) && host->curr.cmd) {
+			msmsdcc_do_cmdirq(host, status);
+		}
+
+		if (status & MCI_SDIOINTOPER) {
+			cardint = 1;
+			status &= ~MCI_SDIOINTOPER;
+		}
+		ret = 1;
+	} while (status);
+
+	spin_unlock(&host->lock);
+
+	/*
+	 * We have to delay handling the card interrupt as it calls
+	 * back into the driver.
+	 */
+	if (cardint)
+		mmc_signal_sdio_irq(host->mmc);
+
+	return IRQ_RETVAL(ret);
+}
+
+static void
+msmsdcc_request(struct mmc_host *mmc, struct mmc_request *mrq)
+{
+	struct msmsdcc_host *host = mmc_priv(mmc);
+	unsigned long flags;
+
+	WARN_ON(host->curr.mrq != NULL);
+	WARN_ON(host->pwr == 0);
+
+	spin_lock_irqsave(&host->lock, flags);
+
+	host->stats.reqs++;
+
+	if (host->eject) {
+		if (mrq->data && !(mrq->data->flags & MMC_DATA_READ)) {
+			mrq->cmd->error = 0;
+			mrq->data->bytes_xfered = mrq->data->blksz *
+						  mrq->data->blocks;
+		} else
+			mrq->cmd->error = -ENOMEDIUM;
+
+		spin_unlock_irqrestore(&host->lock, flags);
+		mmc_request_done(mmc, mrq);
+		return;
+	}
+
+	host->curr.mrq = mrq;
+
+	if (mrq->data && mrq->data->flags & MMC_DATA_READ)
+		msmsdcc_start_data(host, mrq->data);
+
+	msmsdcc_start_command(host, mrq->cmd, 0);
+
+	if (host->cmdpoll && !msmsdcc_spin_on_status(host,
+				MCI_CMDRESPEND|MCI_CMDCRCFAIL|MCI_CMDTIMEOUT,
+				CMD_SPINMAX)) {
+		uint32_t status = readl(host->base + MMCISTATUS);
+		msmsdcc_do_cmdirq(host, status);
+		writel(MCI_CMDRESPEND | MCI_CMDCRCFAIL | MCI_CMDTIMEOUT,
+		       host->base + MMCICLEAR);
+		host->stats.cmdpoll_hits++;
+	} else {
+		host->stats.cmdpoll_misses++;
+		mod_timer(&host->command_timer, jiffies + HZ);
+	}
+	spin_unlock_irqrestore(&host->lock, flags);
+}
+
+static void
+msmsdcc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
+{
+	struct msmsdcc_host *host = mmc_priv(mmc);
+	u32 clk = 0, pwr = 0;
+	int rc;
+
+	if (ios->clock) {
+
+		if (!host->clks_on) {
+			clk_enable(host->pclk);
+			clk_enable(host->clk);
+			host->clks_on = 1;
+		}
+		if (ios->clock != host->clk_rate) {
+			rc = clk_set_rate(host->clk, ios->clock);
+			if (rc < 0)
+				printk(KERN_ERR
+				       "Error setting clock rate (%d)\n", rc);
+			else
+				host->clk_rate = ios->clock;
+		}
+		clk |= MCI_CLK_ENABLE;
+	}
+
+	if (ios->bus_width == MMC_BUS_WIDTH_4)
+		clk |= (2 << 10); /* Set WIDEBUS */
+
+	if (ios->clock > 400000 && msmsdcc_pwrsave)
+		clk |= (1 << 9); /* PWRSAVE */
+
+	clk |= (1 << 12); /* FLOW_ENA */
+	clk |= (1 << 15); /* feedback clock */
+
+	if (host->plat->translate_vdd)
+		pwr |= host->plat->translate_vdd(mmc_dev(mmc), ios->vdd);
+
+	switch (ios->power_mode) {
+	case MMC_POWER_OFF:
+		htc_pwrsink_set(PWRSINK_SDCARD, 0);
+		break;
+	case MMC_POWER_UP:
+		pwr |= MCI_PWR_UP;
+		break;
+	case MMC_POWER_ON:
+		htc_pwrsink_set(PWRSINK_SDCARD, 100);
+		pwr |= MCI_PWR_ON;
+		break;
+	}
+
+	if (ios->bus_mode == MMC_BUSMODE_OPENDRAIN)
+		pwr |= MCI_OD;
+
+	writel(clk, host->base + MMCICLOCK);
+
+	if (host->pwr != pwr) {
+		host->pwr = pwr;
+		writel(pwr, host->base + MMCIPOWER);
+	}
+
+	if (!(clk & MCI_CLK_ENABLE) && host->clks_on) {
+		clk_disable(host->clk);
+		clk_disable(host->pclk);
+		host->clks_on = 0;
+	}
+}
+
+static void msmsdcc_enable_sdio_irq(struct mmc_host *mmc, int enable)
+{
+	struct msmsdcc_host *host = mmc_priv(mmc);
+	unsigned long flags;
+	u32 status;
+
+	spin_lock_irqsave(&host->lock, flags);
+	if (msmsdcc_sdioirq == 1) {
+		status = readl(host->base + MMCIMASK0);
+		if (enable)
+			status |= MCI_SDIOINTOPERMASK;
+		else
+			status &= ~MCI_SDIOINTOPERMASK;
+		host->saved_irq0mask = status;
+		writel(status, host->base + MMCIMASK0);
+	}
+	spin_unlock_irqrestore(&host->lock, flags);
+}
+
+static const struct mmc_host_ops msmsdcc_ops = {
+	.request	= msmsdcc_request,
+	.set_ios	= msmsdcc_set_ios,
+	.enable_sdio_irq = msmsdcc_enable_sdio_irq,
+};
+
+static void
+msmsdcc_check_status(unsigned long data)
+{
+	struct msmsdcc_host *host = (struct msmsdcc_host *)data;
+	unsigned int status;
+
+	if (!host->plat->status) {
+		mmc_detect_change(host->mmc, 0);
+		goto out;
+	}
+
+	status = host->plat->status(mmc_dev(host->mmc));
+	host->eject = !status;
+	if (status ^ host->oldstat) {
+		printk(KERN_INFO
+		       "%s: Slot status change detected (%d -> %d)\n",
+		       mmc_hostname(host->mmc), host->oldstat, status);
+		if (status)
+			mmc_detect_change(host->mmc, (5 * HZ) / 2);
+		else
+			mmc_detect_change(host->mmc, 0);
+	}
+
+	host->oldstat = status;
+
+out:
+	if (host->timer.function)
+		mod_timer(&host->timer, jiffies + HZ);
+}
+
+static irqreturn_t
+msmsdcc_platform_status_irq(int irq, void *dev_id)
+{
+	struct msmsdcc_host *host = dev_id;
+
+	printk(KERN_DEBUG "%s: %d\n", __func__, irq);
+	msmsdcc_check_status((unsigned long) host);
+	return IRQ_HANDLED;
+}
+
+static void
+msmsdcc_status_notify_cb(int card_present, void *dev_id)
+{
+	struct msmsdcc_host *host = dev_id;
+
+	printk(KERN_DEBUG "%s: card_present %d\n", mmc_hostname(host->mmc),
+	       card_present);
+	msmsdcc_check_status((unsigned long) host);
+}
+
+/*
+ * called when a command expires.
+ * Dump some debugging, and then error
+ * out the transaction.
+ */
+static void
+msmsdcc_command_expired(unsigned long _data)
+{
+	struct msmsdcc_host	*host = (struct msmsdcc_host *) _data;
+	struct mmc_request	*mrq;
+	unsigned long		flags;
+
+	spin_lock_irqsave(&host->lock, flags);
+	mrq = host->curr.mrq;
+
+	if (!mrq) {
+		printk(KERN_INFO "%s: Command expiry misfire\n",
+		       mmc_hostname(host->mmc));
+		spin_unlock_irqrestore(&host->lock, flags);
+		return;
+	}
+
+	printk(KERN_ERR "%s: Command timeout (%p %p %p %p)\n",
+	       mmc_hostname(host->mmc), mrq, mrq->cmd,
+	       mrq->data, host->dma.sg);
+
+	mrq->cmd->error = -ETIMEDOUT;
+	msmsdcc_stop_data(host);
+
+	writel(0, host->base + MMCICOMMAND);
+
+	host->curr.mrq = NULL;
+	host->curr.cmd = NULL;
+
+	spin_unlock_irqrestore(&host->lock, flags);
+	mmc_request_done(host->mmc, mrq);
+}
+
+static int
+msmsdcc_init_dma(struct msmsdcc_host *host)
+{
+	memset(&host->dma, 0, sizeof(struct msmsdcc_dma_data));
+	host->dma.host = host;
+	host->dma.channel = -1;
+
+	if (!host->dmares)
+		return -ENODEV;
+
+	host->dma.nc = dma_alloc_coherent(NULL,
+					  sizeof(struct msmsdcc_nc_dmadata),
+					  &host->dma.nc_busaddr,
+					  GFP_KERNEL);
+	if (host->dma.nc == NULL) {
+		printk(KERN_ERR "Unable to allocate DMA buffer\n");
+		return -ENOMEM;
+	}
+	memset(host->dma.nc, 0x00, sizeof(struct msmsdcc_nc_dmadata));
+	host->dma.cmd_busaddr = host->dma.nc_busaddr;
+	host->dma.cmdptr_busaddr = host->dma.nc_busaddr +
+				offsetof(struct msmsdcc_nc_dmadata, cmdptr);
+	host->dma.channel = host->dmares->start;
+
+	return 0;
+}
+
+#ifdef CONFIG_MMC_MSM7X00A_RESUME_IN_WQ
+static void
+do_resume_work(struct work_struct *work)
+{
+	struct msmsdcc_host *host =
+		container_of(work, struct msmsdcc_host, resume_task);
+	struct mmc_host	*mmc = host->mmc;
+
+	if (mmc) {
+		mmc_resume_host(mmc);
+		if (host->stat_irq)
+			enable_irq(host->stat_irq);
+	}
+}
+#endif
+
+static int
+msmsdcc_probe(struct platform_device *pdev)
+{
+	struct mmc_platform_data *plat = pdev->dev.platform_data;
+	struct msmsdcc_host *host;
+	struct mmc_host *mmc;
+	struct resource *cmd_irqres = NULL;
+	struct resource *pio_irqres = NULL;
+	struct resource *stat_irqres = NULL;
+	struct resource *memres = NULL;
+	struct resource *dmares = NULL;
+	int ret;
+
+	/* must have platform data */
+	if (!plat) {
+		printk(KERN_ERR "%s: Platform data not available\n", __func__);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (pdev->id < 1 || pdev->id > 4)
+		return -EINVAL;
+
+	if (pdev->resource == NULL || pdev->num_resources < 2) {
+		printk(KERN_ERR "%s: Invalid resource\n", __func__);
+		return -ENXIO;
+	}
+
+	memres = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	dmares = platform_get_resource(pdev, IORESOURCE_DMA, 0);
+	cmd_irqres = platform_get_resource_byname(pdev, IORESOURCE_IRQ,
+						  "cmd_irq");
+	pio_irqres = platform_get_resource_byname(pdev, IORESOURCE_IRQ,
+						  "pio_irq");
+	stat_irqres = platform_get_resource_byname(pdev, IORESOURCE_IRQ,
+						   "status_irq");
+
+	if (!cmd_irqres || !pio_irqres || !memres) {
+		printk(KERN_ERR "%s: Invalid resource\n", __func__);
+		return -ENXIO;
+	}
+
+	/*
+	 * Setup our host structure
+	 */
+
+	mmc = mmc_alloc_host(sizeof(struct msmsdcc_host), &pdev->dev);
+	if (!mmc) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	host = mmc_priv(mmc);
+	host->pdev_id = pdev->id;
+	host->plat = plat;
+	host->mmc = mmc;
+
+	host->cmdpoll = 1;
+
+	host->base = ioremap(memres->start, PAGE_SIZE);
+	if (!host->base) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	host->cmd_irqres = cmd_irqres;
+	host->pio_irqres = pio_irqres;
+	host->memres = memres;
+	host->dmares = dmares;
+	spin_lock_init(&host->lock);
+
+	/*
+	 * Setup DMA
+	 */
+	msmsdcc_init_dma(host);
+
+	/*
+	 * Setup main peripheral bus clock
+	 */
+	host->pclk = clk_get(&pdev->dev, "sdc_pclk");
+	if (IS_ERR(host->pclk)) {
+		ret = PTR_ERR(host->pclk);
+		goto host_free;
+	}
+
+	ret = clk_enable(host->pclk);
+	if (ret)
+		goto pclk_put;
+
+	host->pclk_rate = clk_get_rate(host->pclk);
+
+	/*
+	 * Setup SDC MMC clock
+	 */
+	host->clk = clk_get(&pdev->dev, "sdc_clk");
+	if (IS_ERR(host->clk)) {
+		ret = PTR_ERR(host->clk);
+		goto pclk_disable;
+	}
+
+	ret = clk_enable(host->clk);
+	if (ret)
+		goto clk_put;
+
+	ret = clk_set_rate(host->clk, msmsdcc_fmin);
+	if (ret) {
+		printk(KERN_ERR "%s: Clock rate set failed (%d)\n",
+		       __func__, ret);
+		goto clk_disable;
+	}
+
+	host->clk_rate = clk_get_rate(host->clk);
+
+	host->clks_on = 1;
+
+	/*
+	 * Setup MMC host structure
+	 */
+	mmc->ops = &msmsdcc_ops;
+	mmc->f_min = msmsdcc_fmin;
+	mmc->f_max = msmsdcc_fmax;
+	mmc->ocr_avail = plat->ocr_mask;
+
+	if (msmsdcc_4bit)
+		mmc->caps |= MMC_CAP_4_BIT_DATA;
+	if (msmsdcc_sdioirq)
+		mmc->caps |= MMC_CAP_SDIO_IRQ;
+	mmc->caps |= MMC_CAP_MMC_HIGHSPEED | MMC_CAP_SD_HIGHSPEED;
+
+	mmc->max_phys_segs = NR_SG;
+	mmc->max_hw_segs = NR_SG;
+	mmc->max_blk_size = 4096;	/* MCI_DATA_CTL BLOCKSIZE up to 4096 */
+	mmc->max_blk_count = 65536;
+
+	mmc->max_req_size = 33554432;	/* MCI_DATA_LENGTH is 25 bits */
+	mmc->max_seg_size = mmc->max_req_size;
+
+	writel(0, host->base + MMCIMASK0);
+	writel(0x5e007ff, host->base + MMCICLEAR); /* Add: 1 << 25 */
+
+	writel(MCI_IRQENABLE, host->base + MMCIMASK0);
+	host->saved_irq0mask = MCI_IRQENABLE;
+
+	/*
+	 * Setup card detect change
+	 */
+
+	memset(&host->timer, 0, sizeof(host->timer));
+
+	if (stat_irqres && !(stat_irqres->flags & IORESOURCE_DISABLED)) {
+		unsigned long irqflags = IRQF_SHARED |
+			(stat_irqres->flags & IRQF_TRIGGER_MASK);
+
+		host->stat_irq = stat_irqres->start;
+		ret = request_irq(host->stat_irq,
+				  msmsdcc_platform_status_irq,
+				  irqflags,
+				  DRIVER_NAME " (slot)",
+				  host);
+		if (ret) {
+			printk(KERN_ERR "Unable to get slot IRQ %d (%d)\n",
+			       host->stat_irq, ret);
+			goto clk_disable;
+		}
+	} else if (plat->register_status_notify) {
+		plat->register_status_notify(msmsdcc_status_notify_cb, host);
+	} else if (!plat->status)
+		printk(KERN_ERR "%s: No card detect facilities available\n",
+		       mmc_hostname(mmc));
+	else {
+		init_timer(&host->timer);
+		host->timer.data = (unsigned long)host;
+		host->timer.function = msmsdcc_check_status;
+		host->timer.expires = jiffies + HZ;
+		add_timer(&host->timer);
+	}
+
+	if (plat->status) {
+		host->oldstat = host->plat->status(mmc_dev(host->mmc));
+		host->eject = !host->oldstat;
+	}
+
+	/*
+	 * Setup a command timer. We currently need this due to
+	 * some 'strange' timeout / error handling situations.
+	 */
+	init_timer(&host->command_timer);
+	host->command_timer.data = (unsigned long) host;
+	host->command_timer.function = msmsdcc_command_expired;
+
+	ret = request_irq(cmd_irqres->start, msmsdcc_irq, IRQF_SHARED,
+			  DRIVER_NAME " (cmd)", host);
+	if (ret)
+		goto stat_irq_free;
+
+	ret = request_irq(pio_irqres->start, msmsdcc_pio_irq, IRQF_SHARED,
+			  DRIVER_NAME " (pio)", host);
+	if (ret)
+		goto cmd_irq_free;
+
+	mmc_set_drvdata(pdev, mmc);
+	mmc_add_host(mmc);
+
+	printk(KERN_INFO
+	       "%s: Qualcomm MSM SDCC at 0x%016llx irq %d,%d dma %d\n",
+	       mmc_hostname(mmc), (unsigned long long)memres->start,
+	       (unsigned int) cmd_irqres->start,
+	       (unsigned int) host->stat_irq, host->dma.channel);
+	printk(KERN_INFO "%s: 4 bit data mode %s\n", mmc_hostname(mmc),
+	       (mmc->caps & MMC_CAP_4_BIT_DATA ? "enabled" : "disabled"));
+	printk(KERN_INFO "%s: MMC clock %u -> %u Hz, PCLK %u Hz\n",
+	       mmc_hostname(mmc), msmsdcc_fmin, msmsdcc_fmax, host->pclk_rate);
+	printk(KERN_INFO "%s: Slot eject status = %d\n", mmc_hostname(mmc),
+	       host->eject);
+	printk(KERN_INFO "%s: Power save feature enable = %d\n",
+	       mmc_hostname(mmc), msmsdcc_pwrsave);
+
+	if (host->dma.channel != -1) {
+		printk(KERN_INFO
+		       "%s: DM non-cached buffer at %p, dma_addr 0x%.8x\n",
+		       mmc_hostname(mmc), host->dma.nc, host->dma.nc_busaddr);
+		printk(KERN_INFO
+		       "%s: DM cmd busaddr 0x%.8x, cmdptr busaddr 0x%.8x\n",
+		       mmc_hostname(mmc), host->dma.cmd_busaddr,
+		       host->dma.cmdptr_busaddr);
+	} else
+		printk(KERN_INFO
+		       "%s: PIO transfer enabled\n", mmc_hostname(mmc));
+	if (host->timer.function)
+		printk(KERN_INFO "%s: Polling status mode enabled\n",
+		       mmc_hostname(mmc));
+
+	return 0;
+ cmd_irq_free:
+	free_irq(cmd_irqres->start, host);
+ stat_irq_free:
+	if (host->stat_irq)
+		free_irq(host->stat_irq, host);
+ clk_disable:
+	clk_disable(host->clk);
+ clk_put:
+	clk_put(host->clk);
+ pclk_disable:
+	clk_disable(host->pclk);
+ pclk_put:
+	clk_put(host->pclk);
+ host_free:
+	mmc_free_host(mmc);
+ out:
+	return ret;
+}
+
+static int
+msmsdcc_suspend(struct platform_device *dev, pm_message_t state)
+{
+	struct mmc_host *mmc = mmc_get_drvdata(dev);
+	int rc = 0;
+
+	if (mmc) {
+		struct msmsdcc_host *host = mmc_priv(mmc);
+
+		if (host->stat_irq)
+			disable_irq(host->stat_irq);
+
+		if (mmc->card && mmc->card->type != MMC_TYPE_SDIO)
+			rc = mmc_suspend_host(mmc, state);
+		if (!rc) {
+			writel(0, host->base + MMCIMASK0);
+
+			if (host->clks_on) {
+				clk_disable(host->clk);
+				clk_disable(host->pclk);
+				host->clks_on = 0;
+			}
+		}
+	}
+	return rc;
+}
+
+static int
+msmsdcc_resume(struct platform_device *dev)
+{
+	struct mmc_host *mmc = mmc_get_drvdata(dev);
+	unsigned long flags;
+
+	if (mmc) {
+		struct msmsdcc_host *host = mmc_priv(mmc);
+
+		spin_lock_irqsave(&host->lock, flags);
+
+		if (!host->clks_on) {
+			clk_enable(host->pclk);
+			clk_enable(host->clk);
+			host->clks_on = 1;
+		}
+
+		writel(host->saved_irq0mask, host->base + MMCIMASK0);
+
+		spin_unlock_irqrestore(&host->lock, flags);
+
+		if (mmc->card && mmc->card->type != MMC_TYPE_SDIO)
+			mmc_resume_host(mmc);
+			if (host->stat_irq)
+				enable_irq(host->stat_irq);
+		else if (host->stat_irq)
+			enable_irq(host->stat_irq);
+	}
+	return 0;
+}
+
+static struct platform_driver msmsdcc_driver = {
+	.probe		= msmsdcc_probe,
+	.suspend	= msmsdcc_suspend,
+	.resume		= msmsdcc_resume,
+	.driver		= {
+		.name	= "msm_sdcc",
+	},
+};
+
+static int __init msmsdcc_init(void)
+{
+	return platform_driver_register(&msmsdcc_driver);
+}
+
+static void __exit msmsdcc_exit(void)
+{
+	platform_driver_unregister(&msmsdcc_driver);
+}
+
+module_init(msmsdcc_init);
+module_exit(msmsdcc_exit);
+
+MODULE_DESCRIPTION("Qualcomm MSM 7X00A Multimedia Card Interface driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/mmc/host/msm_sdcc.h b/drivers/mmc/host/msm_sdcc.h
new file mode 100644
index 0000000..8c84484
--- /dev/null
+++ b/drivers/mmc/host/msm_sdcc.h
@@ -0,0 +1,238 @@
+/*
+ *  linux/drivers/mmc/host/msmsdcc.h - QCT MSM7K SDC Controller
+ *
+ *  Copyright (C) 2008 Google, All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * - Based on mmci.h
+ */
+
+#ifndef _MSM_SDCC_H
+#define _MSM_SDCC_H
+
+#define MSMSDCC_CRCI_SDC1	6
+#define MSMSDCC_CRCI_SDC2	7
+#define MSMSDCC_CRCI_SDC3	12
+#define MSMSDCC_CRCI_SDC4	13
+
+#define MMCIPOWER		0x000
+#define MCI_PWR_OFF		0x00
+#define MCI_PWR_UP		0x02
+#define MCI_PWR_ON		0x03
+#define MCI_OD			(1 << 6)
+
+#define MMCICLOCK		0x004
+#define MCI_CLK_ENABLE		(1 << 8)
+#define MCI_CLK_PWRSAVE		(1 << 9)
+#define MCI_CLK_WIDEBUS		(1 << 10)
+#define MCI_CLK_FLOWENA		(1 << 12)
+#define MCI_CLK_INVERTOUT	(1 << 13)
+#define MCI_CLK_SELECTIN	(1 << 14)
+
+#define MMCIARGUMENT		0x008
+#define MMCICOMMAND		0x00c
+#define MCI_CPSM_RESPONSE	(1 << 6)
+#define MCI_CPSM_LONGRSP	(1 << 7)
+#define MCI_CPSM_INTERRUPT	(1 << 8)
+#define MCI_CPSM_PENDING	(1 << 9)
+#define MCI_CPSM_ENABLE		(1 << 10)
+#define MCI_CPSM_PROGENA	(1 << 11)
+#define MCI_CSPM_DATCMD		(1 << 12)
+#define MCI_CSPM_MCIABORT	(1 << 13)
+#define MCI_CSPM_CCSENABLE	(1 << 14)
+#define MCI_CSPM_CCSDISABLE	(1 << 15)
+
+
+#define MMCIRESPCMD		0x010
+#define MMCIRESPONSE0		0x014
+#define MMCIRESPONSE1		0x018
+#define MMCIRESPONSE2		0x01c
+#define MMCIRESPONSE3		0x020
+#define MMCIDATATIMER		0x024
+#define MMCIDATALENGTH		0x028
+
+#define MMCIDATACTRL		0x02c
+#define MCI_DPSM_ENABLE		(1 << 0)
+#define MCI_DPSM_DIRECTION	(1 << 1)
+#define MCI_DPSM_MODE		(1 << 2)
+#define MCI_DPSM_DMAENABLE	(1 << 3)
+
+#define MMCIDATACNT		0x030
+#define MMCISTATUS		0x034
+#define MCI_CMDCRCFAIL		(1 << 0)
+#define MCI_DATACRCFAIL		(1 << 1)
+#define MCI_CMDTIMEOUT		(1 << 2)
+#define MCI_DATATIMEOUT		(1 << 3)
+#define MCI_TXUNDERRUN		(1 << 4)
+#define MCI_RXOVERRUN		(1 << 5)
+#define MCI_CMDRESPEND		(1 << 6)
+#define MCI_CMDSENT		(1 << 7)
+#define MCI_DATAEND		(1 << 8)
+#define MCI_DATABLOCKEND	(1 << 10)
+#define MCI_CMDACTIVE		(1 << 11)
+#define MCI_TXACTIVE		(1 << 12)
+#define MCI_RXACTIVE		(1 << 13)
+#define MCI_TXFIFOHALFEMPTY	(1 << 14)
+#define MCI_RXFIFOHALFFULL	(1 << 15)
+#define MCI_TXFIFOFULL		(1 << 16)
+#define MCI_RXFIFOFULL		(1 << 17)
+#define MCI_TXFIFOEMPTY		(1 << 18)
+#define MCI_RXFIFOEMPTY		(1 << 19)
+#define MCI_TXDATAAVLBL		(1 << 20)
+#define MCI_RXDATAAVLBL		(1 << 21)
+#define MCI_SDIOINTR		(1 << 22)
+#define MCI_PROGDONE		(1 << 23)
+#define MCI_ATACMDCOMPL		(1 << 24)
+#define MCI_SDIOINTOPER		(1 << 25)
+#define MCI_CCSTIMEOUT		(1 << 26)
+
+#define MMCICLEAR		0x038
+#define MCI_CMDCRCFAILCLR	(1 << 0)
+#define MCI_DATACRCFAILCLR	(1 << 1)
+#define MCI_CMDTIMEOUTCLR	(1 << 2)
+#define MCI_DATATIMEOUTCLR	(1 << 3)
+#define MCI_TXUNDERRUNCLR	(1 << 4)
+#define MCI_RXOVERRUNCLR	(1 << 5)
+#define MCI_CMDRESPENDCLR	(1 << 6)
+#define MCI_CMDSENTCLR		(1 << 7)
+#define MCI_DATAENDCLR		(1 << 8)
+#define MCI_DATABLOCKENDCLR	(1 << 10)
+
+#define MMCIMASK0		0x03c
+#define MCI_CMDCRCFAILMASK	(1 << 0)
+#define MCI_DATACRCFAILMASK	(1 << 1)
+#define MCI_CMDTIMEOUTMASK	(1 << 2)
+#define MCI_DATATIMEOUTMASK	(1 << 3)
+#define MCI_TXUNDERRUNMASK	(1 << 4)
+#define MCI_RXOVERRUNMASK	(1 << 5)
+#define MCI_CMDRESPENDMASK	(1 << 6)
+#define MCI_CMDSENTMASK		(1 << 7)
+#define MCI_DATAENDMASK		(1 << 8)
+#define MCI_DATABLOCKENDMASK	(1 << 10)
+#define MCI_CMDACTIVEMASK	(1 << 11)
+#define MCI_TXACTIVEMASK	(1 << 12)
+#define MCI_RXACTIVEMASK	(1 << 13)
+#define MCI_TXFIFOHALFEMPTYMASK	(1 << 14)
+#define MCI_RXFIFOHALFFULLMASK	(1 << 15)
+#define MCI_TXFIFOFULLMASK	(1 << 16)
+#define MCI_RXFIFOFULLMASK	(1 << 17)
+#define MCI_TXFIFOEMPTYMASK	(1 << 18)
+#define MCI_RXFIFOEMPTYMASK	(1 << 19)
+#define MCI_TXDATAAVLBLMASK	(1 << 20)
+#define MCI_RXDATAAVLBLMASK	(1 << 21)
+#define MCI_SDIOINTMASK		(1 << 22)
+#define MCI_PROGDONEMASK	(1 << 23)
+#define MCI_ATACMDCOMPLMASK	(1 << 24)
+#define MCI_SDIOINTOPERMASK	(1 << 25)
+#define MCI_CCSTIMEOUTMASK	(1 << 26)
+
+#define MMCIMASK1		0x040
+#define MMCIFIFOCNT		0x044
+#define MCICCSTIMER		0x058
+
+#define MMCIFIFO		0x080 /* to 0x0bc */
+
+#define MCI_IRQENABLE	\
+	(MCI_CMDCRCFAILMASK|MCI_DATACRCFAILMASK|MCI_CMDTIMEOUTMASK|	\
+	MCI_DATATIMEOUTMASK|MCI_TXUNDERRUNMASK|MCI_RXOVERRUNMASK|	\
+	MCI_CMDRESPENDMASK|MCI_CMDSENTMASK|MCI_DATAENDMASK)
+
+/*
+ * The size of the FIFO in bytes.
+ */
+#define MCI_FIFOSIZE	(16*4)
+
+#define MCI_FIFOHALFSIZE (MCI_FIFOSIZE / 2)
+
+#define NR_SG		32
+
+struct clk;
+
+struct msmsdcc_nc_dmadata {
+	dmov_box	cmd[NR_SG];
+	uint32_t	cmdptr;
+};
+
+struct msmsdcc_dma_data {
+	struct msmsdcc_nc_dmadata	*nc;
+	dma_addr_t			nc_busaddr;
+	dma_addr_t			cmd_busaddr;
+	dma_addr_t			cmdptr_busaddr;
+
+	struct msm_dmov_cmd		hdr;
+	enum dma_data_direction		dir;
+
+	struct scatterlist		*sg;
+	int				num_ents;
+
+	int				channel;
+	struct msmsdcc_host		*host;
+	int				busy; /* Set if DM is busy */
+};
+
+struct msmsdcc_pio_data {
+	struct scatterlist	*sg;
+	unsigned int		sg_len;
+	unsigned int		sg_off;
+};
+
+struct msmsdcc_curr_req {
+	struct mmc_request	*mrq;
+	struct mmc_command	*cmd;
+	struct mmc_data		*data;
+	unsigned int		xfer_size;	/* Total data size */
+	unsigned int		xfer_remain;	/* Bytes remaining to send */
+	unsigned int		data_xfered;	/* Bytes acked by BLKEND irq */
+	int			got_dataend;
+	int			got_datablkend;
+	int			user_pages;
+};
+
+struct msmsdcc_stats {
+	unsigned int reqs;
+	unsigned int cmds;
+	unsigned int cmdpoll_hits;
+	unsigned int cmdpoll_misses;
+};
+
+struct msmsdcc_host {
+	struct resource		*cmd_irqres;
+	struct resource		*pio_irqres;
+	struct resource		*memres;
+	struct resource		*dmares;
+	void __iomem		*base;
+	int			pdev_id;
+	unsigned int		stat_irq;
+
+	struct msmsdcc_curr_req	curr;
+
+	struct mmc_host		*mmc;
+	struct clk		*clk;		/* main MMC bus clock */
+	struct clk		*pclk;		/* SDCC peripheral bus clock */
+	unsigned int		clks_on;	/* set if clocks are enabled */
+	struct timer_list	command_timer;
+
+	unsigned int		eject;		/* eject state */
+
+	spinlock_t		lock;
+
+	unsigned int		clk_rate;	/* Current clock rate */
+	unsigned int		pclk_rate;
+
+	u32			pwr;
+	u32			saved_irq0mask;	/* MMCIMASK0 reg value */
+	struct mmc_platform_data *plat;
+
+	struct timer_list	timer;
+	unsigned int		oldstat;
+
+	struct msmsdcc_dma_data	dma;
+	struct msmsdcc_pio_data	pio;
+	int			cmdpoll;
+	struct msmsdcc_stats	stats;
+};
+
+#endif
-- 
cgit v0.10.2


From 0a7ff7c7573011ec1f52052a8baeae68f4066dde Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 22 Sep 2009 16:44:23 -0700
Subject: msm_sdcc.c: convert printk(KERN_<level> to pr_<level>(

Signed-off-by: Joe Perches <joe@perches.com>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Brian Swetland <swetland@google.com>
Cc: Pierre Ossman <drzeus-list@drzeus.cx>
Cc: San Mehat <san@android.com>
Cc: Matt Fleming <matt@console-pimps.org>
Cc: Ian Molton <ian@mnementh.co.uk>
Cc: "Roberto A. Foglietta" <roberto.foglietta@gmail.com>
Cc: Philip Langdale <philipl@overt.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/mmc/host/msm_sdcc.c b/drivers/mmc/host/msm_sdcc.c
index 042f217..68bce64 100644
--- a/drivers/mmc/host/msm_sdcc.c
+++ b/drivers/mmc/host/msm_sdcc.c
@@ -127,7 +127,7 @@ msmsdcc_dma_complete_func(struct msm_dmov_cmd *cmd,
 	BUG_ON(!mrq);
 
 	if (!(result & DMOV_RSLT_VALID)) {
-		printk(KERN_ERR "msmsdcc: Invalid DataMover result\n");
+		pr_err("msmsdcc: Invalid DataMover result\n");
 		goto out;
 	}
 
@@ -136,14 +136,13 @@ msmsdcc_dma_complete_func(struct msm_dmov_cmd *cmd,
 	} else {
 		/* Error or flush  */
 		if (result & DMOV_RSLT_ERROR)
-			printk(KERN_ERR "%s: DMA error (0x%.8x)\n",
+			pr_err("%s: DMA error (0x%.8x)\n",
 			       mmc_hostname(host->mmc), result);
 		if (result & DMOV_RSLT_FLUSH)
-			printk(KERN_ERR "%s: DMA channel flushed (0x%.8x)\n",
+			pr_err("%s: DMA channel flushed (0x%.8x)\n",
 			       mmc_hostname(host->mmc), result);
 		if (err)
-			printk(KERN_ERR
-			       "Flush data: %.8x %.8x %.8x %.8x %.8x %.8x\n",
+			pr_err("Flush data: %.8x %.8x %.8x %.8x %.8x %.8x\n",
 			       err->flush[0], err->flush[1], err->flush[2],
 			       err->flush[3], err->flush[4], err->flush[5]);
 		if (!mrq->data->error)
@@ -248,7 +247,7 @@ static int msmsdcc_config_dma(struct msmsdcc_host *host, struct mmc_data *data)
 			host->dma.num_ents, host->dma.dir);
 
 	if (n != host->dma.num_ents) {
-		printk(KERN_ERR "%s: Unable to map in all sg elements\n",
+		pr_err("%s: Unable to map in all sg elements\n",
 		       mmc_hostname(host->mmc));
 		host->dma.sg = NULL;
 		host->dma.num_ents = 0;
@@ -393,25 +392,24 @@ msmsdcc_data_err(struct msmsdcc_host *host, struct mmc_data *data,
 		 unsigned int status)
 {
 	if (status & MCI_DATACRCFAIL) {
-		printk(KERN_ERR "%s: Data CRC error\n",
-		       mmc_hostname(host->mmc));
-		printk(KERN_ERR "%s: opcode 0x%.8x\n", __func__,
+		pr_err("%s: Data CRC error\n", mmc_hostname(host->mmc));
+		pr_err("%s: opcode 0x%.8x\n", __func__,
 		       data->mrq->cmd->opcode);
-		printk(KERN_ERR "%s: blksz %d, blocks %d\n", __func__,
+		pr_err("%s: blksz %d, blocks %d\n", __func__,
 		       data->blksz, data->blocks);
 		data->error = -EILSEQ;
 	} else if (status & MCI_DATATIMEOUT) {
-		printk(KERN_ERR "%s: Data timeout\n", mmc_hostname(host->mmc));
+		pr_err("%s: Data timeout\n", mmc_hostname(host->mmc));
 		data->error = -ETIMEDOUT;
 	} else if (status & MCI_RXOVERRUN) {
-		printk(KERN_ERR "%s: RX overrun\n", mmc_hostname(host->mmc));
+		pr_err("%s: RX overrun\n", mmc_hostname(host->mmc));
 		data->error = -EIO;
 	} else if (status & MCI_TXUNDERRUN) {
-		printk(KERN_ERR "%s: TX underrun\n", mmc_hostname(host->mmc));
+		pr_err("%s: TX underrun\n", mmc_hostname(host->mmc));
 		data->error = -EIO;
 	} else {
-		printk(KERN_ERR "%s: Unknown error (0x%.8x)\n",
-		      mmc_hostname(host->mmc), status);
+		pr_err("%s: Unknown error (0x%.8x)\n",
+		       mmc_hostname(host->mmc), status);
 		data->error = -EIO;
 	}
 }
@@ -566,8 +564,7 @@ static void msmsdcc_do_cmdirq(struct msmsdcc_host *host, uint32_t status)
 		cmd->error = -ETIMEDOUT;
 	} else if (status & MCI_CMDCRCFAIL &&
 		   cmd->flags & MMC_RSP_CRC) {
-		printk(KERN_ERR "%s: Command CRC error\n",
-		       mmc_hostname(host->mmc));
+		pr_err("%s: Command CRC error\n", mmc_hostname(host->mmc));
 		cmd->error = -EILSEQ;
 	}
 
@@ -759,8 +756,8 @@ msmsdcc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 		if (ios->clock != host->clk_rate) {
 			rc = clk_set_rate(host->clk, ios->clock);
 			if (rc < 0)
-				printk(KERN_ERR
-				       "Error setting clock rate (%d)\n", rc);
+				pr_err("%s: Error setting clock rate (%d)\n",
+				       mmc_hostname(host->mmc), rc);
 			else
 				host->clk_rate = ios->clock;
 		}
@@ -848,9 +845,8 @@ msmsdcc_check_status(unsigned long data)
 	status = host->plat->status(mmc_dev(host->mmc));
 	host->eject = !status;
 	if (status ^ host->oldstat) {
-		printk(KERN_INFO
-		       "%s: Slot status change detected (%d -> %d)\n",
-		       mmc_hostname(host->mmc), host->oldstat, status);
+		pr_info("%s: Slot status change detected (%d -> %d)\n",
+			mmc_hostname(host->mmc), host->oldstat, status);
 		if (status)
 			mmc_detect_change(host->mmc, (5 * HZ) / 2);
 		else
@@ -900,13 +896,13 @@ msmsdcc_command_expired(unsigned long _data)
 	mrq = host->curr.mrq;
 
 	if (!mrq) {
-		printk(KERN_INFO "%s: Command expiry misfire\n",
-		       mmc_hostname(host->mmc));
+		pr_info("%s: Command expiry misfire\n",
+			mmc_hostname(host->mmc));
 		spin_unlock_irqrestore(&host->lock, flags);
 		return;
 	}
 
-	printk(KERN_ERR "%s: Command timeout (%p %p %p %p)\n",
+	pr_err("%s: Command timeout (%p %p %p %p)\n",
 	       mmc_hostname(host->mmc), mrq, mrq->cmd,
 	       mrq->data, host->dma.sg);
 
@@ -937,7 +933,7 @@ msmsdcc_init_dma(struct msmsdcc_host *host)
 					  &host->dma.nc_busaddr,
 					  GFP_KERNEL);
 	if (host->dma.nc == NULL) {
-		printk(KERN_ERR "Unable to allocate DMA buffer\n");
+		pr_err("Unable to allocate DMA buffer\n");
 		return -ENOMEM;
 	}
 	memset(host->dma.nc, 0x00, sizeof(struct msmsdcc_nc_dmadata));
@@ -980,7 +976,7 @@ msmsdcc_probe(struct platform_device *pdev)
 
 	/* must have platform data */
 	if (!plat) {
-		printk(KERN_ERR "%s: Platform data not available\n", __func__);
+		pr_err("%s: Platform data not available\n", __func__);
 		ret = -EINVAL;
 		goto out;
 	}
@@ -989,7 +985,7 @@ msmsdcc_probe(struct platform_device *pdev)
 		return -EINVAL;
 
 	if (pdev->resource == NULL || pdev->num_resources < 2) {
-		printk(KERN_ERR "%s: Invalid resource\n", __func__);
+		pr_err("%s: Invalid resource\n", __func__);
 		return -ENXIO;
 	}
 
@@ -1003,7 +999,7 @@ msmsdcc_probe(struct platform_device *pdev)
 						   "status_irq");
 
 	if (!cmd_irqres || !pio_irqres || !memres) {
-		printk(KERN_ERR "%s: Invalid resource\n", __func__);
+		pr_err("%s: Invalid resource\n", __func__);
 		return -ENXIO;
 	}
 
@@ -1071,8 +1067,7 @@ msmsdcc_probe(struct platform_device *pdev)
 
 	ret = clk_set_rate(host->clk, msmsdcc_fmin);
 	if (ret) {
-		printk(KERN_ERR "%s: Clock rate set failed (%d)\n",
-		       __func__, ret);
+		pr_err("%s: Clock rate set failed (%d)\n", __func__, ret);
 		goto clk_disable;
 	}
 
@@ -1125,14 +1120,14 @@ msmsdcc_probe(struct platform_device *pdev)
 				  DRIVER_NAME " (slot)",
 				  host);
 		if (ret) {
-			printk(KERN_ERR "Unable to get slot IRQ %d (%d)\n",
-			       host->stat_irq, ret);
+			pr_err("%s: Unable to get slot IRQ %d (%d)\n",
+			       mmc_hostname(mmc), host->stat_irq, ret);
 			goto clk_disable;
 		}
 	} else if (plat->register_status_notify) {
 		plat->register_status_notify(msmsdcc_status_notify_cb, host);
 	} else if (!plat->status)
-		printk(KERN_ERR "%s: No card detect facilities available\n",
+		pr_err("%s: No card detect facilities available\n",
 		       mmc_hostname(mmc));
 	else {
 		init_timer(&host->timer);
@@ -1168,34 +1163,28 @@ msmsdcc_probe(struct platform_device *pdev)
 	mmc_set_drvdata(pdev, mmc);
 	mmc_add_host(mmc);
 
-	printk(KERN_INFO
-	       "%s: Qualcomm MSM SDCC at 0x%016llx irq %d,%d dma %d\n",
-	       mmc_hostname(mmc), (unsigned long long)memres->start,
-	       (unsigned int) cmd_irqres->start,
-	       (unsigned int) host->stat_irq, host->dma.channel);
-	printk(KERN_INFO "%s: 4 bit data mode %s\n", mmc_hostname(mmc),
-	       (mmc->caps & MMC_CAP_4_BIT_DATA ? "enabled" : "disabled"));
-	printk(KERN_INFO "%s: MMC clock %u -> %u Hz, PCLK %u Hz\n",
-	       mmc_hostname(mmc), msmsdcc_fmin, msmsdcc_fmax, host->pclk_rate);
-	printk(KERN_INFO "%s: Slot eject status = %d\n", mmc_hostname(mmc),
-	       host->eject);
-	printk(KERN_INFO "%s: Power save feature enable = %d\n",
-	       mmc_hostname(mmc), msmsdcc_pwrsave);
+	pr_info("%s: Qualcomm MSM SDCC at 0x%016llx irq %d,%d dma %d\n",
+		mmc_hostname(mmc), (unsigned long long)memres->start,
+		(unsigned int) cmd_irqres->start,
+		(unsigned int) host->stat_irq, host->dma.channel);
+	pr_info("%s: 4 bit data mode %s\n", mmc_hostname(mmc),
+		(mmc->caps & MMC_CAP_4_BIT_DATA ? "enabled" : "disabled"));
+	pr_info("%s: MMC clock %u -> %u Hz, PCLK %u Hz\n",
+		mmc_hostname(mmc), msmsdcc_fmin, msmsdcc_fmax, host->pclk_rate);
+	pr_info("%s: Slot eject status = %d\n", mmc_hostname(mmc), host->eject);
+	pr_info("%s: Power save feature enable = %d\n",
+		mmc_hostname(mmc), msmsdcc_pwrsave);
 
 	if (host->dma.channel != -1) {
-		printk(KERN_INFO
-		       "%s: DM non-cached buffer at %p, dma_addr 0x%.8x\n",
-		       mmc_hostname(mmc), host->dma.nc, host->dma.nc_busaddr);
-		printk(KERN_INFO
-		       "%s: DM cmd busaddr 0x%.8x, cmdptr busaddr 0x%.8x\n",
-		       mmc_hostname(mmc), host->dma.cmd_busaddr,
-		       host->dma.cmdptr_busaddr);
+		pr_info("%s: DM non-cached buffer at %p, dma_addr 0x%.8x\n",
+			mmc_hostname(mmc), host->dma.nc, host->dma.nc_busaddr);
+		pr_info("%s: DM cmd busaddr 0x%.8x, cmdptr busaddr 0x%.8x\n",
+			mmc_hostname(mmc), host->dma.cmd_busaddr,
+			host->dma.cmdptr_busaddr);
 	} else
-		printk(KERN_INFO
-		       "%s: PIO transfer enabled\n", mmc_hostname(mmc));
+		pr_info("%s: PIO transfer enabled\n", mmc_hostname(mmc));
 	if (host->timer.function)
-		printk(KERN_INFO "%s: Polling status mode enabled\n",
-		       mmc_hostname(mmc));
+		pr_info("%s: Polling status mode enabled\n", mmc_hostname(mmc));
 
 	return 0;
  cmd_irq_free:
-- 
cgit v0.10.2


From 75d145283b2e42619d7ee1e00b78466bacd51808 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 22 Sep 2009 16:44:24 -0700
Subject: msm_sdcc.c: stylistic cleaning

Make it a bit more like typical kernel style.

Signed-off-by: Joe Perches <joe@perches.com>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Brian Swetland <swetland@google.com>
Cc: Pierre Ossman <drzeus-list@drzeus.cx>
Cc: San Mehat <san@android.com>
Cc: Matt Fleming <matt@console-pimps.org>
Cc: Ian Molton <ian@mnementh.co.uk>
Cc: "Roberto A. Foglietta" <roberto.foglietta@gmail.com>
Cc: Philip Langdale <philipl@overt.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/mmc/host/msm_sdcc.c b/drivers/mmc/host/msm_sdcc.c
index 68bce64..874de6a 100644
--- a/drivers/mmc/host/msm_sdcc.c
+++ b/drivers/mmc/host/msm_sdcc.c
@@ -43,7 +43,6 @@
 #include <mach/dma.h>
 #include <mach/htc_pwrsink.h>
 
-
 #include "msm_sdcc.h"
 
 #define DRIVER_NAME "msm-sdcc"
@@ -58,8 +57,6 @@ static unsigned int msmsdcc_sdioirq;
 #define PIO_SPINMAX 30
 #define CMD_SPINMAX 20
 
-
-
 static void
 msmsdcc_start_command(struct msmsdcc_host *host, struct mmc_command *cmd,
 		      u32 c);
@@ -98,16 +95,17 @@ msmsdcc_stop_data(struct msmsdcc_host *host)
 
 uint32_t msmsdcc_fifo_addr(struct msmsdcc_host *host)
 {
-	if (host->pdev_id == 1)
+	switch (host->pdev_id) {
+	case 1:
 		return MSM_SDC1_PHYS + MMCIFIFO;
-	else if (host->pdev_id == 2)
+	case 2:
 		return MSM_SDC2_PHYS + MMCIFIFO;
-	else if (host->pdev_id == 3)
+	case 3:
 		return MSM_SDC3_PHYS + MMCIFIFO;
-	else if (host->pdev_id == 4)
+	case 4:
 		return MSM_SDC4_PHYS + MMCIFIFO;
-	else
-		BUG();
+	}
+	BUG();
 	return 0;
 }
 
@@ -156,8 +154,8 @@ msmsdcc_dma_complete_func(struct msm_dmov_cmd *cmd,
 		struct scatterlist *sg = host->dma.sg;
 		int i;
 
-		for (i = 0; i < host->dma.num_ents; i++, sg++)
-			flush_dcache_page(sg_page(sg));
+		for (i = 0; i < host->dma.num_ents; i++)
+			flush_dcache_page(sg_page(sg++));
 	}
 
 	host->dma.sg = NULL;
@@ -222,15 +220,20 @@ static int msmsdcc_config_dma(struct msmsdcc_host *host, struct mmc_data *data)
 
 	nc = host->dma.nc;
 
-	if (host->pdev_id == 1)
+	switch (host->pdev_id) {
+	case 1:
 		crci = MSMSDCC_CRCI_SDC1;
-	else if (host->pdev_id == 2)
+		break;
+	case 2:
 		crci = MSMSDCC_CRCI_SDC2;
-	else if (host->pdev_id == 3)
+		break;
+	case 3:
 		crci = MSMSDCC_CRCI_SDC3;
-	else if (host->pdev_id == 4)
+		break;
+	case 4:
 		crci = MSMSDCC_CRCI_SDC4;
-	else {
+		break;
+	default:
 		host->dma.sg = NULL;
 		host->dma.num_ents = 0;
 		return -ENOENT;
@@ -244,7 +247,7 @@ static int msmsdcc_config_dma(struct msmsdcc_host *host, struct mmc_data *data)
 	host->curr.user_pages = 0;
 
 	n = dma_map_sg(mmc_dev(host->mmc), host->dma.sg,
-			host->dma.num_ents, host->dma.dir);
+		       host->dma.num_ents, host->dma.dir);
 
 	if (n != host->dma.num_ents) {
 		pr_err("%s: Unable to map in all sg elements\n",
@@ -318,7 +321,7 @@ msmsdcc_start_data(struct msmsdcc_host *host, struct mmc_data *data)
 	memset(&host->pio, 0, sizeof(host->pio));
 
 	clks = (unsigned long long)data->timeout_ns * host->clk_rate;
-	do_div(clks, 1000000000UL);
+	do_div(clks, NSEC_PER_SEC);
 	timeout = data->timeout_clks + (unsigned int)clks;
 	writel(timeout, base + MMCIDATATIMER);
 
@@ -371,9 +374,9 @@ msmsdcc_start_command(struct msmsdcc_host *host, struct mmc_command *cmd, u32 c)
 		c |= MCI_CPSM_RESPONSE;
 	}
 
-	if ((((cmd->opcode == 17) || (cmd->opcode == 18))  ||
-	     ((cmd->opcode == 24) || (cmd->opcode == 25))) ||
-	      (cmd->opcode == 53))
+	if (cmd->opcode == 17 || cmd->opcode == 18 ||
+	    cmd->opcode == 24 || cmd->opcode == 25 ||
+	    cmd->opcode == 53)
 		c |= MCI_CSPM_DATCMD;
 
 	if (cmd == cmd->mrq->stop)
-- 
cgit v0.10.2


From b5a74d6058e86546868242bb5283e16fb10fd90a Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 22 Sep 2009 16:44:25 -0700
Subject: msm_sdcc.c: move overly indented code to separate function

Signed-off-by: Joe Perches <joe@perches.com>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Brian Swetland <swetland@google.com>
Cc: Pierre Ossman <drzeus-list@drzeus.cx>
Cc: San Mehat <san@android.com>
Cc: Matt Fleming <matt@console-pimps.org>
Cc: Ian Molton <ian@mnementh.co.uk>
Cc: "Roberto A. Foglietta" <roberto.foglietta@gmail.com>
Cc: Philip Langdale <philipl@overt.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/mmc/host/msm_sdcc.c b/drivers/mmc/host/msm_sdcc.c
index 874de6a..dba4600 100644
--- a/drivers/mmc/host/msm_sdcc.c
+++ b/drivers/mmc/host/msm_sdcc.c
@@ -584,6 +584,67 @@ static void msmsdcc_do_cmdirq(struct msmsdcc_host *host, uint32_t status)
 		msmsdcc_start_data(host, cmd->data);
 }
 
+static void
+msmsdcc_handle_irq_data(struct msmsdcc_host *host, u32 status,
+			void __iomem *base)
+{
+	struct mmc_data *data = host->curr.data;
+
+	if (!data)
+		return;
+
+	/* Check for data errors */
+	if (status & (MCI_DATACRCFAIL | MCI_DATATIMEOUT |
+		      MCI_TXUNDERRUN | MCI_RXOVERRUN)) {
+		msmsdcc_data_err(host, data, status);
+		host->curr.data_xfered = 0;
+		if (host->dma.sg)
+			msm_dmov_stop_cmd(host->dma.channel,
+					  &host->dma.hdr, 0);
+		else {
+			msmsdcc_stop_data(host);
+			if (!data->stop)
+				msmsdcc_request_end(host, data->mrq);
+			else
+				msmsdcc_start_command(host, data->stop, 0);
+		}
+	}
+
+	/* Check for data done */
+	if (!host->curr.got_dataend && (status & MCI_DATAEND))
+		host->curr.got_dataend = 1;
+
+	if (!host->curr.got_datablkend && (status & MCI_DATABLOCKEND))
+		host->curr.got_datablkend = 1;
+
+	/*
+	 * If DMA is still in progress, we complete via the completion handler
+	 */
+	if (host->curr.got_dataend && host->curr.got_datablkend &&
+	    !host->dma.busy) {
+		/*
+		 * There appears to be an issue in the controller where
+		 * if you request a small block transfer (< fifo size),
+		 * you may get your DATAEND/DATABLKEND irq without the
+		 * PIO data irq.
+		 *
+		 * Check to see if there is still data to be read,
+		 * and simulate a PIO irq.
+		 */
+		if (readl(base + MMCISTATUS) & MCI_RXDATAAVLBL)
+			msmsdcc_pio_irq(1, host);
+
+		msmsdcc_stop_data(host);
+		if (!data->error)
+			host->curr.data_xfered = host->curr.xfer_size;
+
+		if (!data->stop)
+			msmsdcc_request_end(host, data->mrq);
+		else
+			msmsdcc_start_command(host, data->stop, 0);
+	}
+}
+
 static irqreturn_t
 msmsdcc_irq(int irq, void *dev_id)
 {
@@ -596,79 +657,12 @@ msmsdcc_irq(int irq, void *dev_id)
 	spin_lock(&host->lock);
 
 	do {
-		struct mmc_data *data;
 		status = readl(base + MMCISTATUS);
 
-		status &= (readl(base + MMCIMASK0) |
-					      MCI_DATABLOCKENDMASK);
+		status &= (readl(base + MMCIMASK0) | MCI_DATABLOCKENDMASK);
 		writel(status, base + MMCICLEAR);
 
-		data = host->curr.data;
-		if (data) {
-			/* Check for data errors */
-			if (status & (MCI_DATACRCFAIL|MCI_DATATIMEOUT|
-				      MCI_TXUNDERRUN|MCI_RXOVERRUN)) {
-				msmsdcc_data_err(host, data, status);
-				host->curr.data_xfered = 0;
-				if (host->dma.sg)
-					msm_dmov_stop_cmd(host->dma.channel,
-							  &host->dma.hdr, 0);
-				else {
-					msmsdcc_stop_data(host);
-					if (!data->stop)
-						msmsdcc_request_end(host,
-								    data->mrq);
-					else
-						msmsdcc_start_command(host,
-								     data->stop,
-								     0);
-				}
-			}
-
-			/* Check for data done */
-			if (!host->curr.got_dataend && (status & MCI_DATAEND))
-				host->curr.got_dataend = 1;
-
-			if (!host->curr.got_datablkend &&
-			    (status & MCI_DATABLOCKEND)) {
-				host->curr.got_datablkend = 1;
-			}
-
-			if (host->curr.got_dataend &&
-			    host->curr.got_datablkend) {
-				/*
-				 * If DMA is still in progress, we complete
-				 * via the completion handler
-				 */
-				if (!host->dma.busy) {
-					/*
-					 * There appears to be an issue in the
-					 * controller where if you request a
-					 * small block transfer (< fifo size),
-					 * you may get your DATAEND/DATABLKEND
-					 * irq without the PIO data irq.
-					 *
-					 * Check to see if theres still data
-					 * to be read, and simulate a PIO irq.
-					 */
-					if (readl(base + MMCISTATUS) &
-							       MCI_RXDATAAVLBL)
-						msmsdcc_pio_irq(1, host);
-
-					msmsdcc_stop_data(host);
-					if (!data->error)
-						host->curr.data_xfered =
-							host->curr.xfer_size;
-
-					if (!data->stop)
-						msmsdcc_request_end(host,
-								    data->mrq);
-					else
-						msmsdcc_start_command(host,
-							      data->stop, 0);
-				}
-			}
-		}
+		msmsdcc_handle_irq_data(host, status, base);
 
 		if (status & (MCI_CMDSENT | MCI_CMDRESPEND | MCI_CMDCRCFAIL |
 			      MCI_CMDTIMEOUT) && host->curr.cmd) {
-- 
cgit v0.10.2


From f400cd8c84ba51e0ef3d9b680fe9f45ab54c792c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Tue, 22 Sep 2009 16:44:26 -0700
Subject: mmc: register mmci-omap-hs using platform_driver_probe
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

omap_mmc_probe lives in .init.text, so using platform_driver_register to
register it is wrong because binding a device after the init memory is
discarded (e.g.  via sysfs) results in an oops.

As requested by David Brownell platform_driver_probe is used instead of
moving the probe function to .devinit.text as proposed initially.  This
saves some memory, but devices registered after the driver is probed are
not bound (probably there are none) and binding via sysfs isn't possible.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Cc: Jean Pihet <jpihet@mvista.com>
Cc: Tony Lindgren <tony@atomide.com>
Cc: Pierre Ossman <drzeus@drzeus.cx>
Cc: Andy Lowe <alowe@mvista.com>
Cc: Adrian Hunter <ext-adrian.hunter@nokia.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Acked-by: David Brownell <dbrownell@users.sourceforge.net>
Cc: Madhusudhan Chikkature<madhu.cr@ti.com>
Cc: Greg Kroah-Hartman <gregkh@suse.de>
Cc: Matt Fleming <matt@console-pimps.org>
Cc: Ian Molton <ian@mnementh.co.uk>
Cc: "Roberto A. Foglietta" <roberto.foglietta@gmail.com>
Cc: Philip Langdale <philipl@overt.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index fee895b..4b3af9e 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -1314,7 +1314,6 @@ clk_en_err:
 #endif
 
 static struct platform_driver omap_mmc_driver = {
-	.probe		= omap_mmc_probe,
 	.remove		= omap_mmc_remove,
 	.suspend	= omap_mmc_suspend,
 	.resume		= omap_mmc_resume,
@@ -1327,7 +1326,7 @@ static struct platform_driver omap_mmc_driver = {
 static int __init omap_mmc_init(void)
 {
 	/* Register the MMC driver */
-	return platform_driver_register(&omap_mmc_driver);
+	return platform_driver_probe(&omap_mmc_driver, omap_mmc_probe);
 }
 
 static void __exit omap_mmc_cleanup(void)
-- 
cgit v0.10.2


From 27cce39f555def6f5ebe7f03d69ccc44ab25f0b2 Mon Sep 17 00:00:00 2001
From: Ohad Ben-Cohen <ohad@bencohen.org>
Date: Tue, 22 Sep 2009 16:44:28 -0700
Subject: sdio: do not ignore MMC_VDD_165_195

This is needed for 1.8V embedded SDIO devices and supporting host controllers
(e.g. TI 127x and ZOOM2 boards)

Signed-off-by: Ohad Ben-Cohen <ohad@bencohen.org>
Acked-by: Matt Fleming <matt@console-pimps.org>
Cc: Ian Molton <ian@mnementh.co.uk>
Cc: Pierre Ossman <pierre@ossman.eu>
Cc: Ian Molton <ian@mnementh.co.uk>
Cc: "Roberto A. Foglietta" <roberto.foglietta@gmail.com>
Cc: Philip Langdale <philipl@overt.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index fb99ccf..6f221dc 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -275,13 +275,6 @@ int mmc_attach_sdio(struct mmc_host *host, u32 ocr)
 		ocr &= ~0x7F;
 	}
 
-	if (ocr & MMC_VDD_165_195) {
-		printk(KERN_WARNING "%s: SDIO card claims to support the "
-		       "incompletely defined 'low voltage range'. This "
-		       "will be ignored.\n", mmc_hostname(host));
-		ocr &= ~MMC_VDD_165_195;
-	}
-
 	host->ocr = mmc_select_voltage(host, ocr);
 
 	/*
-- 
cgit v0.10.2


From 8ea926b22e2d13238e4d65d8f61c48fe424e6f4f Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@nokia.com>
Date: Tue, 22 Sep 2009 16:44:29 -0700
Subject: mmc: add 'enable' and 'disable' methods to mmc host

MMC hosts that support power saving can use the 'enable' and 'disable'
methods to exit and enter power saving states.  An explanation of their
use is provided in the comments added to include/linux/mmc/host.h.

Signed-off-by: Adrian Hunter <adrian.hunter@nokia.com>
Acked-by: Matt Fleming <matt@console-pimps.org>
Cc: Ian Molton <ian@mnementh.co.uk>
Cc: "Roberto A. Foglietta" <roberto.foglietta@gmail.com>
Cc: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Cc: Denis Karpov <ext-denis.2.karpov@nokia.com>
Cc: Pierre Ossman <pierre@ossman.eu>
Cc: Philip Langdale <philipl@overt.org>
Cc: "Madhusudhan" <madhu.cr@ti.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index e22d2b5..fb24a09 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -344,6 +344,101 @@ unsigned int mmc_align_data_size(struct mmc_card *card, unsigned int sz)
 EXPORT_SYMBOL(mmc_align_data_size);
 
 /**
+ *	mmc_host_enable - enable a host.
+ *	@host: mmc host to enable
+ *
+ *	Hosts that support power saving can use the 'enable' and 'disable'
+ *	methods to exit and enter power saving states. For more information
+ *	see comments for struct mmc_host_ops.
+ */
+int mmc_host_enable(struct mmc_host *host)
+{
+	if (!(host->caps & MMC_CAP_DISABLE))
+		return 0;
+
+	if (host->en_dis_recurs)
+		return 0;
+
+	if (host->nesting_cnt++)
+		return 0;
+
+	cancel_delayed_work_sync(&host->disable);
+
+	if (host->enabled)
+		return 0;
+
+	if (host->ops->enable) {
+		int err;
+
+		host->en_dis_recurs = 1;
+		err = host->ops->enable(host);
+		host->en_dis_recurs = 0;
+
+		if (err) {
+			pr_debug("%s: enable error %d\n",
+				 mmc_hostname(host), err);
+			return err;
+		}
+	}
+	host->enabled = 1;
+	return 0;
+}
+EXPORT_SYMBOL(mmc_host_enable);
+
+static int mmc_host_do_disable(struct mmc_host *host, int lazy)
+{
+	if (host->ops->disable) {
+		int err;
+
+		host->en_dis_recurs = 1;
+		err = host->ops->disable(host, lazy);
+		host->en_dis_recurs = 0;
+
+		if (err < 0) {
+			pr_debug("%s: disable error %d\n",
+				 mmc_hostname(host), err);
+			return err;
+		}
+		if (err > 0) {
+			unsigned long delay = msecs_to_jiffies(err);
+
+			mmc_schedule_delayed_work(&host->disable, delay);
+		}
+	}
+	host->enabled = 0;
+	return 0;
+}
+
+/**
+ *	mmc_host_disable - disable a host.
+ *	@host: mmc host to disable
+ *
+ *	Hosts that support power saving can use the 'enable' and 'disable'
+ *	methods to exit and enter power saving states. For more information
+ *	see comments for struct mmc_host_ops.
+ */
+int mmc_host_disable(struct mmc_host *host)
+{
+	int err;
+
+	if (!(host->caps & MMC_CAP_DISABLE))
+		return 0;
+
+	if (host->en_dis_recurs)
+		return 0;
+
+	if (--host->nesting_cnt)
+		return 0;
+
+	if (!host->enabled)
+		return 0;
+
+	err = mmc_host_do_disable(host, 0);
+	return err;
+}
+EXPORT_SYMBOL(mmc_host_disable);
+
+/**
  *	__mmc_claim_host - exclusively claim a host
  *	@host: mmc host to claim
  *	@abort: whether or not the operation should be aborted
@@ -379,11 +474,81 @@ int __mmc_claim_host(struct mmc_host *host, atomic_t *abort)
 		wake_up(&host->wq);
 	spin_unlock_irqrestore(&host->lock, flags);
 	remove_wait_queue(&host->wq, &wait);
+	if (!stop)
+		mmc_host_enable(host);
 	return stop;
 }
 
 EXPORT_SYMBOL(__mmc_claim_host);
 
+static int mmc_try_claim_host(struct mmc_host *host)
+{
+	int claimed_host = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&host->lock, flags);
+	if (!host->claimed) {
+		host->claimed = 1;
+		claimed_host = 1;
+	}
+	spin_unlock_irqrestore(&host->lock, flags);
+	return claimed_host;
+}
+
+static void mmc_do_release_host(struct mmc_host *host)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&host->lock, flags);
+	host->claimed = 0;
+	spin_unlock_irqrestore(&host->lock, flags);
+
+	wake_up(&host->wq);
+}
+
+void mmc_host_deeper_disable(struct work_struct *work)
+{
+	struct mmc_host *host =
+		container_of(work, struct mmc_host, disable.work);
+
+	/* If the host is claimed then we do not want to disable it anymore */
+	if (!mmc_try_claim_host(host))
+		return;
+	mmc_host_do_disable(host, 1);
+	mmc_do_release_host(host);
+}
+
+/**
+ *	mmc_host_lazy_disable - lazily disable a host.
+ *	@host: mmc host to disable
+ *
+ *	Hosts that support power saving can use the 'enable' and 'disable'
+ *	methods to exit and enter power saving states. For more information
+ *	see comments for struct mmc_host_ops.
+ */
+int mmc_host_lazy_disable(struct mmc_host *host)
+{
+	if (!(host->caps & MMC_CAP_DISABLE))
+		return 0;
+
+	if (host->en_dis_recurs)
+		return 0;
+
+	if (--host->nesting_cnt)
+		return 0;
+
+	if (!host->enabled)
+		return 0;
+
+	if (host->disable_delay) {
+		mmc_schedule_delayed_work(&host->disable,
+				msecs_to_jiffies(host->disable_delay));
+		return 0;
+	} else
+		return mmc_host_do_disable(host, 1);
+}
+EXPORT_SYMBOL(mmc_host_lazy_disable);
+
 /**
  *	mmc_release_host - release a host
  *	@host: mmc host to release
@@ -393,15 +558,11 @@ EXPORT_SYMBOL(__mmc_claim_host);
  */
 void mmc_release_host(struct mmc_host *host)
 {
-	unsigned long flags;
-
 	WARN_ON(!host->claimed);
 
-	spin_lock_irqsave(&host->lock, flags);
-	host->claimed = 0;
-	spin_unlock_irqrestore(&host->lock, flags);
+	mmc_host_lazy_disable(host);
 
-	wake_up(&host->wq);
+	mmc_do_release_host(host);
 }
 
 EXPORT_SYMBOL(mmc_release_host);
@@ -953,6 +1114,8 @@ void mmc_stop_host(struct mmc_host *host)
 	spin_unlock_irqrestore(&host->lock, flags);
 #endif
 
+	if (host->caps & MMC_CAP_DISABLE)
+		cancel_delayed_work(&host->disable);
 	cancel_delayed_work(&host->detect);
 	mmc_flush_scheduled_work();
 
@@ -981,6 +1144,8 @@ void mmc_stop_host(struct mmc_host *host)
  */
 int mmc_suspend_host(struct mmc_host *host, pm_message_t state)
 {
+	if (host->caps & MMC_CAP_DISABLE)
+		cancel_delayed_work(&host->disable);
 	cancel_delayed_work(&host->detect);
 	mmc_flush_scheduled_work();
 
diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
index 5e945e6..a268d12 100644
--- a/drivers/mmc/core/host.c
+++ b/drivers/mmc/core/host.c
@@ -83,6 +83,7 @@ struct mmc_host *mmc_alloc_host(int extra, struct device *dev)
 	spin_lock_init(&host->lock);
 	init_waitqueue_head(&host->wq);
 	INIT_DELAYED_WORK(&host->detect, mmc_rescan);
+	INIT_DELAYED_WORK_DEFERRABLE(&host->disable, mmc_host_deeper_disable);
 
 	/*
 	 * By default, hosts do not support SGIO or large requests.
diff --git a/drivers/mmc/core/host.h b/drivers/mmc/core/host.h
index c2dc3d2..8c87e11 100644
--- a/drivers/mmc/core/host.h
+++ b/drivers/mmc/core/host.h
@@ -14,5 +14,7 @@
 int mmc_register_host_class(void);
 void mmc_unregister_host_class(void);
 
+void mmc_host_deeper_disable(struct work_struct *work);
+
 #endif
 
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 3e7615e..338a9b3 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -51,6 +51,35 @@ struct mmc_ios {
 };
 
 struct mmc_host_ops {
+	/*
+	 * Hosts that support power saving can use the 'enable' and 'disable'
+	 * methods to exit and enter power saving states. 'enable' is called
+	 * when the host is claimed and 'disable' is called (or scheduled with
+	 * a delay) when the host is released. The 'disable' is scheduled if
+	 * the disable delay set by 'mmc_set_disable_delay()' is non-zero,
+	 * otherwise 'disable' is called immediately. 'disable' may be
+	 * scheduled repeatedly, to permit ever greater power saving at the
+	 * expense of ever greater latency to re-enable. Rescheduling is
+	 * determined by the return value of the 'disable' method. A positive
+	 * value gives the delay in milliseconds.
+	 *
+	 * In the case where a host function (like set_ios) may be called
+	 * with or without the host claimed, enabling and disabling can be
+	 * done directly and will nest correctly. Call 'mmc_host_enable()' and
+	 * 'mmc_host_lazy_disable()' for this purpose, but note that these
+	 * functions must be paired.
+	 *
+	 * Alternatively, 'mmc_host_enable()' may be paired with
+	 * 'mmc_host_disable()' which calls 'disable' immediately.  In this
+	 * case the 'disable' method will be called with 'lazy' set to 0.
+	 * This is mainly useful for error paths.
+	 *
+	 * Because lazy disable may be called from a work queue, the 'disable'
+	 * method must claim the host when 'lazy' != 0, which will work
+	 * correctly because recursion is detected and handled.
+	 */
+	int (*enable)(struct mmc_host *host);
+	int (*disable)(struct mmc_host *host, int lazy);
 	void	(*request)(struct mmc_host *host, struct mmc_request *req);
 	/*
 	 * Avoid calling these three functions too often or in a "fast path",
@@ -118,6 +147,7 @@ struct mmc_host {
 #define MMC_CAP_SPI		(1 << 4)	/* Talks only SPI protocols */
 #define MMC_CAP_NEEDS_POLL	(1 << 5)	/* Needs polling for card-detection */
 #define MMC_CAP_8_BIT_DATA	(1 << 6)	/* Can the host do 8 bit transfers */
+#define MMC_CAP_DISABLE		(1 << 7)	/* Can the host be disabled */
 
 	/* host specific block data */
 	unsigned int		max_seg_size;	/* see blk_queue_max_segment_size */
@@ -142,6 +172,13 @@ struct mmc_host {
 	unsigned int		removed:1;	/* host is being removed */
 #endif
 
+	/* Only used with MMC_CAP_DISABLE */
+	int			enabled;	/* host is enabled */
+	int			nesting_cnt;	/* "enable" nesting count */
+	int			en_dis_recurs;	/* detect recursion */
+	unsigned int		disable_delay;	/* disable delay in msecs */
+	struct delayed_work	disable;	/* disabling work */
+
 	struct mmc_card		*card;		/* device attached to this host */
 
 	wait_queue_head_t	wq;
@@ -197,5 +234,15 @@ struct regulator;
 int mmc_regulator_get_ocrmask(struct regulator *supply);
 int mmc_regulator_set_ocr(struct regulator *supply, unsigned short vdd_bit);
 
+int mmc_host_enable(struct mmc_host *host);
+int mmc_host_disable(struct mmc_host *host);
+int mmc_host_lazy_disable(struct mmc_host *host);
+
+static inline void mmc_set_disable_delay(struct mmc_host *host,
+					 unsigned int disable_delay)
+{
+	host->disable_delay = disable_delay;
+}
+
 #endif
 
-- 
cgit v0.10.2


From 319a3f1429c91147058ac26c5f5bac8ec1730bc6 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@nokia.com>
Date: Tue, 22 Sep 2009 16:44:30 -0700
Subject: mmc: allow host claim / release nesting

This change allows the MMC host to be claimed in situations where the host
may or may not have already been claimed.  Also 'mmc_try_claim_host()' is
now exported.

Signed-off-by: Adrian Hunter <adrian.hunter@nokia.com>
Acked-by: Matt Fleming <matt@console-pimps.org>
Cc: Ian Molton <ian@mnementh.co.uk>
Cc: "Roberto A. Foglietta" <roberto.foglietta@gmail.com>
Cc: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Cc: Denis Karpov <ext-denis.2.karpov@nokia.com>
Cc: Pierre Ossman <pierre@ossman.eu>
Cc: Philip Langdale <philipl@overt.org>
Cc: "Madhusudhan" <madhu.cr@ti.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index fb24a09..02f2b18 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -461,16 +461,18 @@ int __mmc_claim_host(struct mmc_host *host, atomic_t *abort)
 	while (1) {
 		set_current_state(TASK_UNINTERRUPTIBLE);
 		stop = abort ? atomic_read(abort) : 0;
-		if (stop || !host->claimed)
+		if (stop || !host->claimed || host->claimer == current)
 			break;
 		spin_unlock_irqrestore(&host->lock, flags);
 		schedule();
 		spin_lock_irqsave(&host->lock, flags);
 	}
 	set_current_state(TASK_RUNNING);
-	if (!stop)
+	if (!stop) {
 		host->claimed = 1;
-	else
+		host->claimer = current;
+		host->claim_cnt += 1;
+	} else
 		wake_up(&host->wq);
 	spin_unlock_irqrestore(&host->lock, flags);
 	remove_wait_queue(&host->wq, &wait);
@@ -481,29 +483,43 @@ int __mmc_claim_host(struct mmc_host *host, atomic_t *abort)
 
 EXPORT_SYMBOL(__mmc_claim_host);
 
-static int mmc_try_claim_host(struct mmc_host *host)
+/**
+ *	mmc_try_claim_host - try exclusively to claim a host
+ *	@host: mmc host to claim
+ *
+ *	Returns %1 if the host is claimed, %0 otherwise.
+ */
+int mmc_try_claim_host(struct mmc_host *host)
 {
 	int claimed_host = 0;
 	unsigned long flags;
 
 	spin_lock_irqsave(&host->lock, flags);
-	if (!host->claimed) {
+	if (!host->claimed || host->claimer == current) {
 		host->claimed = 1;
+		host->claimer = current;
+		host->claim_cnt += 1;
 		claimed_host = 1;
 	}
 	spin_unlock_irqrestore(&host->lock, flags);
 	return claimed_host;
 }
+EXPORT_SYMBOL(mmc_try_claim_host);
 
 static void mmc_do_release_host(struct mmc_host *host)
 {
 	unsigned long flags;
 
 	spin_lock_irqsave(&host->lock, flags);
-	host->claimed = 0;
-	spin_unlock_irqrestore(&host->lock, flags);
-
-	wake_up(&host->wq);
+	if (--host->claim_cnt) {
+		/* Release for nested claim */
+		spin_unlock_irqrestore(&host->lock, flags);
+	} else {
+		host->claimed = 0;
+		host->claimer = NULL;
+		spin_unlock_irqrestore(&host->lock, flags);
+		wake_up(&host->wq);
+	}
 }
 
 void mmc_host_deeper_disable(struct work_struct *work)
diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h
index 7ac8b50..e4898e9 100644
--- a/include/linux/mmc/core.h
+++ b/include/linux/mmc/core.h
@@ -139,6 +139,7 @@ extern unsigned int mmc_align_data_size(struct mmc_card *, unsigned int);
 
 extern int __mmc_claim_host(struct mmc_host *host, atomic_t *abort);
 extern void mmc_release_host(struct mmc_host *host);
+extern int mmc_try_claim_host(struct mmc_host *host);
 
 /**
  *	mmc_claim_host - exclusively claim a host
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 338a9b3..631a2fe 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -182,6 +182,8 @@ struct mmc_host {
 	struct mmc_card		*card;		/* device attached to this host */
 
 	wait_queue_head_t	wq;
+	struct task_struct	*claimer;	/* task that has host claimed */
+	int			claim_cnt;	/* "claim" nesting count */
 
 	struct delayed_work	detect;
 
-- 
cgit v0.10.2


From 9feae246963c648b212abad0f0eb8938de5f5fe5 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@nokia.com>
Date: Tue, 22 Sep 2009 16:44:32 -0700
Subject: mmc: add MMC_CAP_NONREMOVABLE host capability

eMMC's are not removable, so unsafe resume is OK always.

To permit this a new host capability MMC_CAP_NONREMOVABLE has been added
and suspend / resume updated accordingly.

Signed-off-by: Adrian Hunter <adrian.hunter@nokia.com>
Acked-by: Matt Fleming <matt@console-pimps.org>
Cc: Ian Molton <ian@mnementh.co.uk>
Cc: "Roberto A. Foglietta" <roberto.foglietta@gmail.com>
Cc: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Cc: Denis Karpov <ext-denis.2.karpov@nokia.com>
Cc: Pierre Ossman <pierre@ossman.eu>
Cc: Philip Langdale <philipl@overt.org>
Cc: "Madhusudhan" <madhu.cr@ti.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 2fb9d5f..995db18 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -507,8 +507,6 @@ static void mmc_detect(struct mmc_host *host)
 	}
 }
 
-#ifdef CONFIG_MMC_UNSAFE_RESUME
-
 /*
  * Suspend callback from host.
  */
@@ -551,20 +549,49 @@ static void mmc_resume(struct mmc_host *host)
 
 }
 
-#else
+#ifdef CONFIG_MMC_UNSAFE_RESUME
 
-#define mmc_suspend NULL
-#define mmc_resume NULL
+static const struct mmc_bus_ops mmc_ops = {
+	.remove = mmc_remove,
+	.detect = mmc_detect,
+	.suspend = mmc_suspend,
+	.resume = mmc_resume,
+};
 
-#endif
+static void mmc_attach_bus_ops(struct mmc_host *host)
+{
+	mmc_attach_bus(host, &mmc_ops);
+}
+
+#else
 
 static const struct mmc_bus_ops mmc_ops = {
 	.remove = mmc_remove,
 	.detect = mmc_detect,
+	.suspend = NULL,
+	.resume = NULL,
+};
+
+static const struct mmc_bus_ops mmc_ops_unsafe = {
+	.remove = mmc_remove,
+	.detect = mmc_detect,
 	.suspend = mmc_suspend,
 	.resume = mmc_resume,
 };
 
+static void mmc_attach_bus_ops(struct mmc_host *host)
+{
+	const struct mmc_bus_ops *bus_ops;
+
+	if (host->caps & MMC_CAP_NONREMOVABLE)
+		bus_ops = &mmc_ops_unsafe;
+	else
+		bus_ops = &mmc_ops;
+	mmc_attach_bus(host, bus_ops);
+}
+
+#endif
+
 /*
  * Starting point for MMC card init.
  */
@@ -575,7 +602,7 @@ int mmc_attach_mmc(struct mmc_host *host, u32 ocr)
 	BUG_ON(!host);
 	WARN_ON(!host->claimed);
 
-	mmc_attach_bus(host, &mmc_ops);
+	mmc_attach_bus_ops(host);
 
 	/*
 	 * We need to get OCR a different way for SPI.
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index 7ad646f..92fa9dc 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -561,8 +561,6 @@ static void mmc_sd_detect(struct mmc_host *host)
 	}
 }
 
-#ifdef CONFIG_MMC_UNSAFE_RESUME
-
 /*
  * Suspend callback from host.
  */
@@ -605,20 +603,49 @@ static void mmc_sd_resume(struct mmc_host *host)
 
 }
 
-#else
+#ifdef CONFIG_MMC_UNSAFE_RESUME
 
-#define mmc_sd_suspend NULL
-#define mmc_sd_resume NULL
+static const struct mmc_bus_ops mmc_sd_ops = {
+	.remove = mmc_sd_remove,
+	.detect = mmc_sd_detect,
+	.suspend = mmc_sd_suspend,
+	.resume = mmc_sd_resume,
+};
 
-#endif
+static void mmc_sd_attach_bus_ops(struct mmc_host *host)
+{
+	mmc_attach_bus(host, &mmc_sd_ops);
+}
+
+#else
 
 static const struct mmc_bus_ops mmc_sd_ops = {
 	.remove = mmc_sd_remove,
 	.detect = mmc_sd_detect,
+	.suspend = NULL,
+	.resume = NULL,
+};
+
+static const struct mmc_bus_ops mmc_sd_ops_unsafe = {
+	.remove = mmc_sd_remove,
+	.detect = mmc_sd_detect,
 	.suspend = mmc_sd_suspend,
 	.resume = mmc_sd_resume,
 };
 
+static void mmc_sd_attach_bus_ops(struct mmc_host *host)
+{
+	const struct mmc_bus_ops *bus_ops;
+
+	if (host->caps & MMC_CAP_NONREMOVABLE)
+		bus_ops = &mmc_sd_ops_unsafe;
+	else
+		bus_ops = &mmc_sd_ops;
+	mmc_attach_bus(host, bus_ops);
+}
+
+#endif
+
 /*
  * Starting point for SD card init.
  */
@@ -629,7 +656,7 @@ int mmc_attach_sd(struct mmc_host *host, u32 ocr)
 	BUG_ON(!host);
 	WARN_ON(!host->claimed);
 
-	mmc_attach_bus(host, &mmc_sd_ops);
+	mmc_sd_attach_bus_ops(host);
 
 	/*
 	 * We need to get OCR a different way for SPI.
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 631a2fe..bb867d2 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -148,6 +148,7 @@ struct mmc_host {
 #define MMC_CAP_NEEDS_POLL	(1 << 5)	/* Needs polling for card-detection */
 #define MMC_CAP_8_BIT_DATA	(1 << 6)	/* Can the host do 8 bit transfers */
 #define MMC_CAP_DISABLE		(1 << 7)	/* Can the host be disabled */
+#define MMC_CAP_NONREMOVABLE	(1 << 8)	/* Nonremovable e.g. eMMC */
 
 	/* host specific block data */
 	unsigned int		max_seg_size;	/* see blk_queue_max_segment_size */
-- 
cgit v0.10.2


From eae1aeeed852aae37621b82a9e7f6c05096a18fd Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@nokia.com>
Date: Tue, 22 Sep 2009 16:44:33 -0700
Subject: mmc: add ability to save power by powering off cards

Power can be saved by powering off cards that are not in use.  This is
similar to suspend / resume except it is under the control of the driver,
and does not require any power management support.  It can only be used
when the driver can monitor whether the card is removed, otherwise it is
unsafe.  This is possible because, unlike suspend, the driver still
receives card detect and / or cover switch interrupts.

Signed-off-by: Adrian Hunter <adrian.hunter@nokia.com>
Acked-by: Matt Fleming <matt@console-pimps.org>
Cc: Ian Molton <ian@mnementh.co.uk>
Cc: "Roberto A. Foglietta" <roberto.foglietta@gmail.com>
Cc: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Cc: Denis Karpov <ext-denis.2.karpov@nokia.com>
Cc: Pierre Ossman <pierre@ossman.eu>
Cc: Philip Langdale <philipl@overt.org>
Cc: "Madhusudhan" <madhu.cr@ti.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 02f2b18..be1fc01 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -1151,6 +1151,40 @@ void mmc_stop_host(struct mmc_host *host)
 	mmc_power_off(host);
 }
 
+void mmc_power_save_host(struct mmc_host *host)
+{
+	mmc_bus_get(host);
+
+	if (!host->bus_ops || host->bus_dead || !host->bus_ops->power_restore) {
+		mmc_bus_put(host);
+		return;
+	}
+
+	if (host->bus_ops->power_save)
+		host->bus_ops->power_save(host);
+
+	mmc_bus_put(host);
+
+	mmc_power_off(host);
+}
+EXPORT_SYMBOL(mmc_power_save_host);
+
+void mmc_power_restore_host(struct mmc_host *host)
+{
+	mmc_bus_get(host);
+
+	if (!host->bus_ops || host->bus_dead || !host->bus_ops->power_restore) {
+		mmc_bus_put(host);
+		return;
+	}
+
+	mmc_power_up(host);
+	host->bus_ops->power_restore(host);
+
+	mmc_bus_put(host);
+}
+EXPORT_SYMBOL(mmc_power_restore_host);
+
 #ifdef CONFIG_PM
 
 /**
diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h
index c819eff..f7eb4c4 100644
--- a/drivers/mmc/core/core.h
+++ b/drivers/mmc/core/core.h
@@ -20,6 +20,8 @@ struct mmc_bus_ops {
 	void (*detect)(struct mmc_host *);
 	void (*suspend)(struct mmc_host *);
 	void (*resume)(struct mmc_host *);
+	void (*power_save)(struct mmc_host *);
+	void (*power_restore)(struct mmc_host *);
 };
 
 void mmc_attach_bus(struct mmc_host *host, const struct mmc_bus_ops *ops);
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 995db18..27e842d 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -549,6 +549,14 @@ static void mmc_resume(struct mmc_host *host)
 
 }
 
+static void mmc_power_restore(struct mmc_host *host)
+{
+	host->card->state &= ~MMC_STATE_HIGHSPEED;
+	mmc_claim_host(host);
+	mmc_init_card(host, host->ocr, host->card);
+	mmc_release_host(host);
+}
+
 #ifdef CONFIG_MMC_UNSAFE_RESUME
 
 static const struct mmc_bus_ops mmc_ops = {
@@ -556,6 +564,7 @@ static const struct mmc_bus_ops mmc_ops = {
 	.detect = mmc_detect,
 	.suspend = mmc_suspend,
 	.resume = mmc_resume,
+	.power_restore = mmc_power_restore,
 };
 
 static void mmc_attach_bus_ops(struct mmc_host *host)
@@ -570,6 +579,7 @@ static const struct mmc_bus_ops mmc_ops = {
 	.detect = mmc_detect,
 	.suspend = NULL,
 	.resume = NULL,
+	.power_restore = mmc_power_restore,
 };
 
 static const struct mmc_bus_ops mmc_ops_unsafe = {
@@ -577,6 +587,7 @@ static const struct mmc_bus_ops mmc_ops_unsafe = {
 	.detect = mmc_detect,
 	.suspend = mmc_suspend,
 	.resume = mmc_resume,
+	.power_restore = mmc_power_restore,
 };
 
 static void mmc_attach_bus_ops(struct mmc_host *host)
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index 92fa9dc..222a609 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -603,6 +603,14 @@ static void mmc_sd_resume(struct mmc_host *host)
 
 }
 
+static void mmc_sd_power_restore(struct mmc_host *host)
+{
+	host->card->state &= ~MMC_STATE_HIGHSPEED;
+	mmc_claim_host(host);
+	mmc_sd_init_card(host, host->ocr, host->card);
+	mmc_release_host(host);
+}
+
 #ifdef CONFIG_MMC_UNSAFE_RESUME
 
 static const struct mmc_bus_ops mmc_sd_ops = {
@@ -610,6 +618,7 @@ static const struct mmc_bus_ops mmc_sd_ops = {
 	.detect = mmc_sd_detect,
 	.suspend = mmc_sd_suspend,
 	.resume = mmc_sd_resume,
+	.power_restore = mmc_sd_power_restore,
 };
 
 static void mmc_sd_attach_bus_ops(struct mmc_host *host)
@@ -624,6 +633,7 @@ static const struct mmc_bus_ops mmc_sd_ops = {
 	.detect = mmc_sd_detect,
 	.suspend = NULL,
 	.resume = NULL,
+	.power_restore = mmc_sd_power_restore,
 };
 
 static const struct mmc_bus_ops mmc_sd_ops_unsafe = {
@@ -631,6 +641,7 @@ static const struct mmc_bus_ops mmc_sd_ops_unsafe = {
 	.detect = mmc_sd_detect,
 	.suspend = mmc_sd_suspend,
 	.resume = mmc_sd_resume,
+	.power_restore = mmc_sd_power_restore,
 };
 
 static void mmc_sd_attach_bus_ops(struct mmc_host *host)
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index bb867d2..c1cbe59 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -223,6 +223,9 @@ static inline void *mmc_priv(struct mmc_host *host)
 extern int mmc_suspend_host(struct mmc_host *, pm_message_t);
 extern int mmc_resume_host(struct mmc_host *);
 
+extern void mmc_power_save_host(struct mmc_host *host);
+extern void mmc_power_restore_host(struct mmc_host *host);
+
 extern void mmc_detect_change(struct mmc_host *, unsigned long delay);
 extern void mmc_request_done(struct mmc_host *, struct mmc_request *);
 
-- 
cgit v0.10.2


From b1ebe38456f7fe61a88af2844361e763ac6ea5ae Mon Sep 17 00:00:00 2001
From: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Date: Tue, 22 Sep 2009 16:44:34 -0700
Subject: mmc: add mmc card sleep and awake support

Add support for the new MMC command SLEEP_AWAKE.

Signed-off-by: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Signed-off-by: Adrian Hunter <adrian.hunter@nokia.com>
Acked-by: Matt Fleming <matt@console-pimps.org>
Cc: Ian Molton <ian@mnementh.co.uk>
Cc: "Roberto A. Foglietta" <roberto.foglietta@gmail.com>
Cc: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Cc: Denis Karpov <ext-denis.2.karpov@nokia.com>
Cc: Pierre Ossman <pierre@ossman.eu>
Cc: Philip Langdale <philipl@overt.org>
Cc: "Madhusudhan" <madhu.cr@ti.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index be1fc01..828e60e 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -1185,6 +1185,46 @@ void mmc_power_restore_host(struct mmc_host *host)
 }
 EXPORT_SYMBOL(mmc_power_restore_host);
 
+int mmc_card_awake(struct mmc_host *host)
+{
+	int err = -ENOSYS;
+
+	mmc_bus_get(host);
+
+	if (host->bus_ops && !host->bus_dead && host->bus_ops->awake)
+		err = host->bus_ops->awake(host);
+
+	mmc_bus_put(host);
+
+	return err;
+}
+EXPORT_SYMBOL(mmc_card_awake);
+
+int mmc_card_sleep(struct mmc_host *host)
+{
+	int err = -ENOSYS;
+
+	mmc_bus_get(host);
+
+	if (host->bus_ops && !host->bus_dead && host->bus_ops->awake)
+		err = host->bus_ops->sleep(host);
+
+	mmc_bus_put(host);
+
+	return err;
+}
+EXPORT_SYMBOL(mmc_card_sleep);
+
+int mmc_card_can_sleep(struct mmc_host *host)
+{
+	struct mmc_card *card = host->card;
+
+	if (card && mmc_card_mmc(card) && card->ext_csd.rev >= 3)
+		return 1;
+	return 0;
+}
+EXPORT_SYMBOL(mmc_card_can_sleep);
+
 #ifdef CONFIG_PM
 
 /**
diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h
index f7eb4c4..c386348 100644
--- a/drivers/mmc/core/core.h
+++ b/drivers/mmc/core/core.h
@@ -16,6 +16,8 @@
 #define MMC_CMD_RETRIES        3
 
 struct mmc_bus_ops {
+	int (*awake)(struct mmc_host *);
+	int (*sleep)(struct mmc_host *);
 	void (*remove)(struct mmc_host *);
 	void (*detect)(struct mmc_host *);
 	void (*suspend)(struct mmc_host *);
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 27e842d..e0bfa95 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -160,7 +160,6 @@ static int mmc_read_ext_csd(struct mmc_card *card)
 {
 	int err;
 	u8 *ext_csd;
-	unsigned int ext_csd_struct;
 
 	BUG_ON(!card);
 
@@ -207,16 +206,16 @@ static int mmc_read_ext_csd(struct mmc_card *card)
 		goto out;
 	}
 
-	ext_csd_struct = ext_csd[EXT_CSD_REV];
-	if (ext_csd_struct > 3) {
+	card->ext_csd.rev = ext_csd[EXT_CSD_REV];
+	if (card->ext_csd.rev > 3) {
 		printk(KERN_ERR "%s: unrecognised EXT_CSD structure "
 			"version %d\n", mmc_hostname(card->host),
-			ext_csd_struct);
+			card->ext_csd.rev);
 		err = -EINVAL;
 		goto out;
 	}
 
-	if (ext_csd_struct >= 2) {
+	if (card->ext_csd.rev >= 2) {
 		card->ext_csd.sectors =
 			ext_csd[EXT_CSD_SEC_CNT + 0] << 0 |
 			ext_csd[EXT_CSD_SEC_CNT + 1] << 8 |
@@ -241,6 +240,15 @@ static int mmc_read_ext_csd(struct mmc_card *card)
 		goto out;
 	}
 
+	if (card->ext_csd.rev >= 3) {
+		u8 sa_shift = ext_csd[EXT_CSD_S_A_TIMEOUT];
+
+		/* Sleep / awake timeout in 100ns units */
+		if (sa_shift > 0 && sa_shift <= 0x17)
+			card->ext_csd.sa_timeout =
+					1 << ext_csd[EXT_CSD_S_A_TIMEOUT];
+	}
+
 out:
 	kfree(ext_csd);
 
@@ -557,9 +565,41 @@ static void mmc_power_restore(struct mmc_host *host)
 	mmc_release_host(host);
 }
 
+static int mmc_sleep(struct mmc_host *host)
+{
+	struct mmc_card *card = host->card;
+	int err = -ENOSYS;
+
+	if (card && card->ext_csd.rev >= 3) {
+		err = mmc_card_sleepawake(host, 1);
+		if (err < 0)
+			pr_debug("%s: Error %d while putting card into sleep",
+				 mmc_hostname(host), err);
+	}
+
+	return err;
+}
+
+static int mmc_awake(struct mmc_host *host)
+{
+	struct mmc_card *card = host->card;
+	int err = -ENOSYS;
+
+	if (card && card->ext_csd.rev >= 3) {
+		err = mmc_card_sleepawake(host, 0);
+		if (err < 0)
+			pr_debug("%s: Error %d while awaking sleeping card",
+				 mmc_hostname(host), err);
+	}
+
+	return err;
+}
+
 #ifdef CONFIG_MMC_UNSAFE_RESUME
 
 static const struct mmc_bus_ops mmc_ops = {
+	.awake = mmc_awake,
+	.sleep = mmc_sleep,
 	.remove = mmc_remove,
 	.detect = mmc_detect,
 	.suspend = mmc_suspend,
@@ -575,6 +615,8 @@ static void mmc_attach_bus_ops(struct mmc_host *host)
 #else
 
 static const struct mmc_bus_ops mmc_ops = {
+	.awake = mmc_awake,
+	.sleep = mmc_sleep,
 	.remove = mmc_remove,
 	.detect = mmc_detect,
 	.suspend = NULL,
@@ -583,6 +625,8 @@ static const struct mmc_bus_ops mmc_ops = {
 };
 
 static const struct mmc_bus_ops mmc_ops_unsafe = {
+	.awake = mmc_awake,
+	.sleep = mmc_sleep,
 	.remove = mmc_remove,
 	.detect = mmc_detect,
 	.suspend = mmc_suspend,
diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c
index 34ce270..355c604 100644
--- a/drivers/mmc/core/mmc_ops.c
+++ b/drivers/mmc/core/mmc_ops.c
@@ -57,6 +57,42 @@ int mmc_deselect_cards(struct mmc_host *host)
 	return _mmc_select_card(host, NULL);
 }
 
+int mmc_card_sleepawake(struct mmc_host *host, int sleep)
+{
+	struct mmc_command cmd;
+	struct mmc_card *card = host->card;
+	int err;
+
+	if (sleep)
+		mmc_deselect_cards(host);
+
+	memset(&cmd, 0, sizeof(struct mmc_command));
+
+	cmd.opcode = MMC_SLEEP_AWAKE;
+	cmd.arg = card->rca << 16;
+	if (sleep)
+		cmd.arg |= 1 << 15;
+
+	cmd.flags = MMC_RSP_R1B | MMC_CMD_AC;
+	err = mmc_wait_for_cmd(host, &cmd, 0);
+	if (err)
+		return err;
+
+	/*
+	 * If the host does not wait while the card signals busy, then we will
+	 * will have to wait the sleep/awake timeout.  Note, we cannot use the
+	 * SEND_STATUS command to poll the status because that command (and most
+	 * others) is invalid while the card sleeps.
+	 */
+	if (!(host->caps & MMC_CAP_WAIT_WHILE_BUSY))
+		mmc_delay(DIV_ROUND_UP(card->ext_csd.sa_timeout, 10000));
+
+	if (!sleep)
+		err = mmc_select_card(card);
+
+	return err;
+}
+
 int mmc_go_idle(struct mmc_host *host)
 {
 	int err;
diff --git a/drivers/mmc/core/mmc_ops.h b/drivers/mmc/core/mmc_ops.h
index 17854bf..653eb8e 100644
--- a/drivers/mmc/core/mmc_ops.h
+++ b/drivers/mmc/core/mmc_ops.h
@@ -25,6 +25,7 @@ int mmc_send_status(struct mmc_card *card, u32 *status);
 int mmc_send_cid(struct mmc_host *host, u32 *cid);
 int mmc_spi_read_ocr(struct mmc_host *host, int highcap, u32 *ocrp);
 int mmc_spi_set_crc(struct mmc_host *host, int use_crc);
+int mmc_card_sleepawake(struct mmc_host *host, int sleep);
 
 #endif
 
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 403aa50..58f5917 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -40,6 +40,8 @@ struct mmc_csd {
 };
 
 struct mmc_ext_csd {
+	u8			rev;
+	unsigned int		sa_timeout;		/* Units: 100ns */
 	unsigned int		hs_max_dtr;
 	unsigned int		sectors;
 };
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index c1cbe59..81bb423 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -149,6 +149,7 @@ struct mmc_host {
 #define MMC_CAP_8_BIT_DATA	(1 << 6)	/* Can the host do 8 bit transfers */
 #define MMC_CAP_DISABLE		(1 << 7)	/* Can the host be disabled */
 #define MMC_CAP_NONREMOVABLE	(1 << 8)	/* Nonremovable e.g. eMMC */
+#define MMC_CAP_WAIT_WHILE_BUSY	(1 << 9)	/* Waits while card is busy */
 
 	/* host specific block data */
 	unsigned int		max_seg_size;	/* see blk_queue_max_segment_size */
@@ -240,6 +241,10 @@ struct regulator;
 int mmc_regulator_get_ocrmask(struct regulator *supply);
 int mmc_regulator_set_ocr(struct regulator *supply, unsigned short vdd_bit);
 
+int mmc_card_awake(struct mmc_host *host);
+int mmc_card_sleep(struct mmc_host *host);
+int mmc_card_can_sleep(struct mmc_host *host);
+
 int mmc_host_enable(struct mmc_host *host);
 int mmc_host_disable(struct mmc_host *host);
 int mmc_host_lazy_disable(struct mmc_host *host);
diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index 14b81f3..b2b4095 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -31,6 +31,7 @@
 #define MMC_ALL_SEND_CID          2   /* bcr                     R2  */
 #define MMC_SET_RELATIVE_ADDR     3   /* ac   [31:16] RCA        R1  */
 #define MMC_SET_DSR               4   /* bc   [31:16] RCA            */
+#define MMC_SLEEP_AWAKE		  5   /* ac   [31:16] RCA 15:flg R1b */
 #define MMC_SWITCH                6   /* ac   [31:0] See below   R1b */
 #define MMC_SELECT_CARD           7   /* ac   [31:16] RCA        R1  */
 #define MMC_SEND_EXT_CSD          8   /* adtc                    R1  */
@@ -254,6 +255,7 @@ struct _mmc_csd {
 #define EXT_CSD_CARD_TYPE	196	/* RO */
 #define EXT_CSD_REV		192	/* RO */
 #define EXT_CSD_SEC_CNT		212	/* RO, 4 bytes */
+#define EXT_CSD_S_A_TIMEOUT	217
 
 /*
  * EXT_CSD field definitions
-- 
cgit v0.10.2


From 53509f0fe28e049e772897aa8fa1f5183b6823a2 Mon Sep 17 00:00:00 2001
From: Denis Karpov <ext-denis.2.karpov@nokia.com>
Date: Tue, 22 Sep 2009 16:44:36 -0700
Subject: mmc: power off once at removal

Fix MMC host stop sequence: power off once.

Signed-off-by: Denis Karpov <ext-denis.2.karpov@nokia.com>
Signed-off-by: Adrian Hunter <adrian.hunter@nokia.com>
Acked-by: Matt Fleming <matt@console-pimps.org>
Cc: Ian Molton <ian@mnementh.co.uk>
Cc: "Roberto A. Foglietta" <roberto.foglietta@gmail.com>
Cc: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Cc: Denis Karpov <ext-denis.2.karpov@nokia.com>
Cc: Pierre Ossman <pierre@ossman.eu>
Cc: Philip Langdale <philipl@overt.org>
Cc: "Madhusudhan" <madhu.cr@ti.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 828e60e..0842f68 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -1143,6 +1143,8 @@ void mmc_stop_host(struct mmc_host *host)
 		mmc_claim_host(host);
 		mmc_detach_bus(host);
 		mmc_release_host(host);
+		mmc_bus_put(host);
+		return;
 	}
 	mmc_bus_put(host);
 
-- 
cgit v0.10.2


From ef0b27d4ccacac32afc3d1c0e8a95e4091dfbc8c Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@nokia.com>
Date: Tue, 22 Sep 2009 16:44:37 -0700
Subject: mmc: check status after MMC SWITCH command

According to the standard, the SWITCH command should be followed by a
SEND_STATUS command to check for errors.

Signed-off-by: Adrian Hunter <adrian.hunter@nokia.com>
Acked-by: Matt Fleming <matt@console-pimps.org>
Cc: Ian Molton <ian@mnementh.co.uk>
Cc: "Roberto A. Foglietta" <roberto.foglietta@gmail.com>
Cc: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Cc: Denis Karpov <ext-denis.2.karpov@nokia.com>
Cc: Pierre Ossman <pierre@ossman.eu>
Cc: Philip Langdale <philipl@overt.org>
Cc: "Madhusudhan" <madhu.cr@ti.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index e0bfa95..a6b5fe9 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -416,12 +416,17 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 		(host->caps & MMC_CAP_MMC_HIGHSPEED)) {
 		err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
 			EXT_CSD_HS_TIMING, 1);
-		if (err)
+		if (err && err != -EBADMSG)
 			goto free_card;
 
-		mmc_card_set_highspeed(card);
-
-		mmc_set_timing(card->host, MMC_TIMING_MMC_HS);
+		if (err) {
+			printk(KERN_WARNING "%s: switch to highspeed failed\n",
+			       mmc_hostname(card->host));
+			err = 0;
+		} else {
+			mmc_card_set_highspeed(card);
+			mmc_set_timing(card->host, MMC_TIMING_MMC_HS);
+		}
 	}
 
 	/*
@@ -456,10 +461,17 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 		err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
 				 EXT_CSD_BUS_WIDTH, ext_csd_bit);
 
-		if (err)
+		if (err && err != -EBADMSG)
 			goto free_card;
 
-		mmc_set_bus_width(card->host, bus_width);
+		if (err) {
+			printk(KERN_WARNING "%s: switch to bus width %d "
+			       "failed\n", mmc_hostname(card->host),
+			       1 << bus_width);
+			err = 0;
+		} else {
+			mmc_set_bus_width(card->host, bus_width);
+		}
 	}
 
 	if (!oldcard)
diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c
index 355c604..d2cb5c6 100644
--- a/drivers/mmc/core/mmc_ops.c
+++ b/drivers/mmc/core/mmc_ops.c
@@ -390,6 +390,7 @@ int mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value)
 {
 	int err;
 	struct mmc_command cmd;
+	u32 status;
 
 	BUG_ON(!card);
 	BUG_ON(!card->host);
@@ -407,6 +408,28 @@ int mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value)
 	if (err)
 		return err;
 
+	/* Must check status to be sure of no errors */
+	do {
+		err = mmc_send_status(card, &status);
+		if (err)
+			return err;
+		if (card->host->caps & MMC_CAP_WAIT_WHILE_BUSY)
+			break;
+		if (mmc_host_is_spi(card->host))
+			break;
+	} while (R1_CURRENT_STATE(status) == 7);
+
+	if (mmc_host_is_spi(card->host)) {
+		if (status & R1_SPI_ILLEGAL_COMMAND)
+			return -EBADMSG;
+	} else {
+		if (status & 0xFDFFA000)
+			printk(KERN_WARNING "%s: unexpected status %#x after "
+			       "switch", mmc_hostname(card->host), status);
+		if (status & R1_SWITCH_ERROR)
+			return -EBADMSG;
+	}
+
 	return 0;
 }
 
diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index b2b4095..c02c8db 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -128,6 +128,7 @@
 #define R1_STATUS(x)            (x & 0xFFFFE000)
 #define R1_CURRENT_STATE(x)	((x & 0x00001E00) >> 9)	/* sx, b (4 bits) */
 #define R1_READY_FOR_DATA	(1 << 8)	/* sx, a */
+#define R1_SWITCH_ERROR		(1 << 7)	/* sx, c */
 #define R1_APP_CMD		(1 << 5)	/* sr, c */
 
 /*
-- 
cgit v0.10.2


From d900f7128cf73d77467209672db2dcf0ff02dde6 Mon Sep 17 00:00:00 2001
From: Denis Karpov <ext-denis.2.karpov@nokia.com>
Date: Tue, 22 Sep 2009 16:44:38 -0700
Subject: omap_hsmmc: add debugfs entry (host registers)

Adds <debugfs_root>/kernel/debug/mmc<N>/regs entry, contents show
registers' state and some driver internal state variables.

Signed-off-by: Denis Karpov <ext-denis.2.karpov@nokia.com>
Signed-off-by: Adrian Hunter <adrian.hunter@nokia.com>
Acked-by: Matt Fleming <matt@console-pimps.org>
Cc: Ian Molton <ian@mnementh.co.uk>
Cc: "Roberto A. Foglietta" <roberto.foglietta@gmail.com>
Cc: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Cc: Denis Karpov <ext-denis.2.karpov@nokia.com>
Cc: Pierre Ossman <pierre@ossman.eu>
Cc: Philip Langdale <philipl@overt.org>
Cc: "Madhusudhan" <madhu.cr@ti.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 4b3af9e..259c07d 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -17,6 +17,8 @@
 
 #include <linux/module.h>
 #include <linux/init.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
 #include <linux/interrupt.h>
 #include <linux/delay.h>
 #include <linux/dma-mapping.h>
@@ -974,6 +976,59 @@ static struct mmc_host_ops mmc_omap_ops = {
 	/* NYET -- enable_sdio_irq */
 };
 
+#ifdef CONFIG_DEBUG_FS
+
+static int mmc_regs_show(struct seq_file *s, void *data)
+{
+	struct mmc_host *mmc = s->private;
+	struct mmc_omap_host *host = mmc_priv(mmc);
+
+	seq_printf(s, "mmc%d regs:\n", mmc->index);
+
+	seq_printf(s, "SYSCONFIG:\t0x%08x\n",
+			OMAP_HSMMC_READ(host->base, SYSCONFIG));
+	seq_printf(s, "CON:\t\t0x%08x\n",
+			OMAP_HSMMC_READ(host->base, CON));
+	seq_printf(s, "HCTL:\t\t0x%08x\n",
+			OMAP_HSMMC_READ(host->base, HCTL));
+	seq_printf(s, "SYSCTL:\t\t0x%08x\n",
+			OMAP_HSMMC_READ(host->base, SYSCTL));
+	seq_printf(s, "IE:\t\t0x%08x\n",
+			OMAP_HSMMC_READ(host->base, IE));
+	seq_printf(s, "ISE:\t\t0x%08x\n",
+			OMAP_HSMMC_READ(host->base, ISE));
+	seq_printf(s, "CAPA:\t\t0x%08x\n",
+			OMAP_HSMMC_READ(host->base, CAPA));
+	return 0;
+}
+
+static int mmc_regs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, mmc_regs_show, inode->i_private);
+}
+
+static const struct file_operations mmc_regs_fops = {
+	.open           = mmc_regs_open,
+	.read           = seq_read,
+	.llseek         = seq_lseek,
+	.release        = single_release,
+};
+
+static void omap_mmc_debugfs(struct mmc_host *mmc)
+{
+	if (mmc->debugfs_root)
+		debugfs_create_file("regs", S_IRUSR, mmc->debugfs_root,
+			mmc, &mmc_regs_fops);
+}
+
+#else
+
+static void omap_mmc_debugfs(struct mmc_host *mmc)
+{
+}
+
+#endif
+
 static int __init omap_mmc_probe(struct platform_device *pdev)
 {
 	struct omap_mmc_platform_data *pdata = pdev->dev.platform_data;
@@ -1157,6 +1212,8 @@ static int __init omap_mmc_probe(struct platform_device *pdev)
 			goto err_cover_switch;
 	}
 
+	omap_mmc_debugfs(mmc);
+
 	return 0;
 
 err_cover_switch:
-- 
cgit v0.10.2


From 5e2ea6173d71cee81e53da00911dcab8cc092acc Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@nokia.com>
Date: Tue, 22 Sep 2009 16:44:39 -0700
Subject: omap_hsmmc: make use of new enable/disable interface

For the moment enable / disable just turns the fclk on and off.

Signed-off-by: Adrian Hunter <adrian.hunter@nokia.com>
Acked-by: Matt Fleming <matt@console-pimps.org>
Cc: Ian Molton <ian@mnementh.co.uk>
Cc: "Roberto A. Foglietta" <roberto.foglietta@gmail.com>
Cc: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Cc: Denis Karpov <ext-denis.2.karpov@nokia.com>
Cc: Pierre Ossman <pierre@ossman.eu>
Cc: Philip Langdale <philipl@overt.org>
Cc: "Madhusudhan" <madhu.cr@ti.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 259c07d..7699f0a 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -605,7 +605,9 @@ static void mmc_omap_detect(struct work_struct *work)
 	if (host->carddetect) {
 		mmc_detect_change(host->mmc, (HZ * 200) / 1000);
 	} else {
+		mmc_host_enable(host->mmc);
 		mmc_omap_reset_controller_fsm(host, SRD);
+		mmc_host_lazy_disable(host->mmc);
 		mmc_detect_change(host->mmc, (HZ * 50) / 1000);
 	}
 }
@@ -818,6 +820,27 @@ mmc_omap_prepare_data(struct mmc_omap_host *host, struct mmc_request *req)
 	return 0;
 }
 
+static int omap_mmc_enable(struct mmc_host *mmc)
+{
+	struct mmc_omap_host *host = mmc_priv(mmc);
+	int err;
+
+	err = clk_enable(host->fclk);
+	if (err)
+		return err;
+	dev_dbg(mmc_dev(host->mmc), "mmc_fclk: enabled\n");
+	return 0;
+}
+
+static int omap_mmc_disable(struct mmc_host *mmc, int lazy)
+{
+	struct mmc_omap_host *host = mmc_priv(mmc);
+
+	clk_disable(host->fclk);
+	dev_dbg(mmc_dev(host->mmc), "mmc_fclk: disabled\n");
+	return 0;
+}
+
 /*
  * Request function. for read/write operation
  */
@@ -841,6 +864,8 @@ static void omap_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 	unsigned long timeout;
 	u32 con;
 
+	mmc_host_enable(host->mmc);
+
 	switch (ios->power_mode) {
 	case MMC_POWER_OFF:
 		mmc_slot(host).set_power(host->dev, host->slot_id, 0, 0);
@@ -919,6 +944,8 @@ static void omap_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 	if (ios->bus_mode == MMC_BUSMODE_OPENDRAIN)
 		OMAP_HSMMC_WRITE(host->base, CON,
 				OMAP_HSMMC_READ(host->base, CON) | OD);
+
+	mmc_host_lazy_disable(host->mmc);
 }
 
 static int omap_hsmmc_get_cd(struct mmc_host *mmc)
@@ -969,6 +996,8 @@ static void omap_hsmmc_init(struct mmc_omap_host *host)
 }
 
 static struct mmc_host_ops mmc_omap_ops = {
+	.enable = omap_mmc_enable,
+	.disable = omap_mmc_disable,
 	.request = omap_mmc_request,
 	.set_ios = omap_mmc_set_ios,
 	.get_cd = omap_hsmmc_get_cd,
@@ -983,7 +1012,16 @@ static int mmc_regs_show(struct seq_file *s, void *data)
 	struct mmc_host *mmc = s->private;
 	struct mmc_omap_host *host = mmc_priv(mmc);
 
-	seq_printf(s, "mmc%d regs:\n", mmc->index);
+	seq_printf(s, "mmc%d:\n"
+			" enabled:\t%d\n"
+			" nesting_cnt:\t%d\n"
+			"\nregs:\n",
+			mmc->index, mmc->enabled ? 1 : 0, mmc->nesting_cnt);
+
+	if (clk_enable(host->fclk) != 0) {
+		seq_printf(s, "can't read the regs\n");
+		goto err;
+	}
 
 	seq_printf(s, "SYSCONFIG:\t0x%08x\n",
 			OMAP_HSMMC_READ(host->base, SYSCONFIG));
@@ -999,6 +1037,9 @@ static int mmc_regs_show(struct seq_file *s, void *data)
 			OMAP_HSMMC_READ(host->base, ISE));
 	seq_printf(s, "CAPA:\t\t0x%08x\n",
 			OMAP_HSMMC_READ(host->base, CAPA));
+
+	clk_disable(host->fclk);
+err:
 	return 0;
 }
 
@@ -1099,14 +1140,16 @@ static int __init omap_mmc_probe(struct platform_device *pdev)
 		goto err1;
 	}
 
-	if (clk_enable(host->fclk) != 0) {
+	mmc->caps |= MMC_CAP_DISABLE;
+	mmc_set_disable_delay(mmc, 100);
+	if (mmc_host_enable(host->mmc) != 0) {
 		clk_put(host->iclk);
 		clk_put(host->fclk);
 		goto err1;
 	}
 
 	if (clk_enable(host->iclk) != 0) {
-		clk_disable(host->fclk);
+		mmc_host_disable(host->mmc);
 		clk_put(host->iclk);
 		clk_put(host->fclk);
 		goto err1;
@@ -1197,6 +1240,8 @@ static int __init omap_mmc_probe(struct platform_device *pdev)
 	OMAP_HSMMC_WRITE(host->base, ISE, INT_EN_MASK);
 	OMAP_HSMMC_WRITE(host->base, IE, INT_EN_MASK);
 
+	mmc_host_lazy_disable(host->mmc);
+
 	mmc_add_host(mmc);
 
 	if (host->pdata->slots[host->slot_id].name != NULL) {
@@ -1225,7 +1270,7 @@ err_irq_cd:
 err_irq_cd_init:
 	free_irq(host->irq, host);
 err_irq:
-	clk_disable(host->fclk);
+	mmc_host_disable(host->mmc);
 	clk_disable(host->iclk);
 	clk_put(host->fclk);
 	clk_put(host->iclk);
@@ -1250,6 +1295,7 @@ static int omap_mmc_remove(struct platform_device *pdev)
 	struct resource *res;
 
 	if (host) {
+		mmc_host_enable(host->mmc);
 		mmc_remove_host(host->mmc);
 		if (host->pdata->cleanup)
 			host->pdata->cleanup(&pdev->dev);
@@ -1258,7 +1304,7 @@ static int omap_mmc_remove(struct platform_device *pdev)
 			free_irq(mmc_slot(host).card_detect_irq, host);
 		flush_scheduled_work();
 
-		clk_disable(host->fclk);
+		mmc_host_disable(host->mmc);
 		clk_disable(host->iclk);
 		clk_put(host->fclk);
 		clk_put(host->iclk);
@@ -1289,6 +1335,7 @@ static int omap_mmc_suspend(struct platform_device *pdev, pm_message_t state)
 		return 0;
 
 	if (host) {
+		mmc_host_enable(host->mmc);
 		ret = mmc_suspend_host(host->mmc, state);
 		if (ret == 0) {
 			host->suspended = 1;
@@ -1307,10 +1354,11 @@ static int omap_mmc_suspend(struct platform_device *pdev, pm_message_t state)
 
 			OMAP_HSMMC_WRITE(host->base, HCTL,
 					 OMAP_HSMMC_READ(host->base, HCTL) & ~SDBP);
-			clk_disable(host->fclk);
+			mmc_host_disable(host->mmc);
 			clk_disable(host->iclk);
 			clk_disable(host->dbclk);
-		}
+		} else
+			mmc_host_disable(host->mmc);
 
 	}
 	return ret;
@@ -1327,13 +1375,12 @@ static int omap_mmc_resume(struct platform_device *pdev)
 
 	if (host) {
 
-		ret = clk_enable(host->fclk);
-		if (ret)
+		if (mmc_host_enable(host->mmc) != 0)
 			goto clk_en_err;
 
 		ret = clk_enable(host->iclk);
 		if (ret) {
-			clk_disable(host->fclk);
+			mmc_host_disable(host->mmc);
 			clk_put(host->fclk);
 			goto clk_en_err;
 		}
@@ -1355,6 +1402,7 @@ static int omap_mmc_resume(struct platform_device *pdev)
 		ret = mmc_resume_host(host->mmc);
 		if (ret == 0)
 			host->suspended = 0;
+		mmc_host_lazy_disable(host->mmc);
 	}
 
 	return ret;
-- 
cgit v0.10.2


From 1887bde391a3216789c9a03ac111d5f6ebfb0c00 Mon Sep 17 00:00:00 2001
From: Denis Karpov <ext-denis.2.karpov@nokia.com>
Date: Tue, 22 Sep 2009 16:44:40 -0700
Subject: ARM: OMAP: mmc-twl4030: add context loss counter support

PM dynamic OFF state results in context loss.  That is, the host
controller has been powered off at some point, which means the registers
have been reset.  The driver must detect when this happens, and restore
the context.  This patch adds the means to detect context loss.

Note, the PM side is not yet implemented.

Signed-off-by: Denis Karpov <ext-denis.2.karpov@nokia.com>
Signed-off-by: Adrian Hunter <adrian.hunter@nokia.com>
Acked-by: Matt Fleming <matt@console-pimps.org>
Cc: Ian Molton <ian@mnementh.co.uk>
Cc: "Roberto A. Foglietta" <roberto.foglietta@gmail.com>
Cc: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Cc: Denis Karpov <ext-denis.2.karpov@nokia.com>
Cc: Pierre Ossman <pierre@ossman.eu>
Cc: Philip Langdale <philipl@overt.org>
Cc: "Madhusudhan" <madhu.cr@ti.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/arm/mach-omap2/mmc-twl4030.c b/arch/arm/mach-omap2/mmc-twl4030.c
index 3c04c2f..30d0286 100644
--- a/arch/arm/mach-omap2/mmc-twl4030.c
+++ b/arch/arm/mach-omap2/mmc-twl4030.c
@@ -198,6 +198,18 @@ static int twl_mmc_resume(struct device *dev, int slot)
 #define twl_mmc_resume	NULL
 #endif
 
+#if defined(CONFIG_ARCH_OMAP3) && defined(CONFIG_PM)
+
+static int twl4030_mmc_get_context_loss(struct device *dev)
+{
+	/* FIXME: PM DPS not implemented yet */
+	return 0;
+}
+
+#else
+#define twl4030_mmc_get_context_loss NULL
+#endif
+
 static int twl_mmc1_set_power(struct device *dev, int slot, int power_on,
 				int vdd)
 {
@@ -390,6 +402,9 @@ void __init twl4030_mmc_init(struct twl4030_hsmmc_info *controllers)
 		} else
 			mmc->slots[0].switch_pin = -EINVAL;
 
+		mmc->get_context_loss_count =
+				twl4030_mmc_get_context_loss;
+
 		/* write protect normally uses an OMAP gpio */
 		if (gpio_is_valid(c->gpio_wp)) {
 			gpio_request(c->gpio_wp, "mmc_wp");
diff --git a/arch/arm/plat-omap/include/mach/mmc.h b/arch/arm/plat-omap/include/mach/mmc.h
index 81d5b36..2f7cf31 100644
--- a/arch/arm/plat-omap/include/mach/mmc.h
+++ b/arch/arm/plat-omap/include/mach/mmc.h
@@ -59,6 +59,9 @@ struct omap_mmc_platform_data {
 	int (*suspend)(struct device *dev, int slot);
 	int (*resume)(struct device *dev, int slot);
 
+	/* Return context loss count due to PM states changing */
+	int (*get_context_loss_count)(struct device *dev);
+
 	u64 dma_mask;
 
 	struct omap_mmc_slot_data {
-- 
cgit v0.10.2


From a3621465b4fd91b7f8560f394afc494a57b77501 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@nokia.com>
Date: Tue, 22 Sep 2009 16:44:42 -0700
Subject: omap_hsmmc: keep track of power mode

This patch is preparation for adding context save and restore support.

Keep track of the current power mode so that the context restore function
can avoid restoring the context for a card if the power has been switched
off.  If the power is off, the card must be reinitialized anyway which
will re-establish the context.

Signed-off-by: Adrian Hunter <adrian.hunter@nokia.com>
Acked-by: Matt Fleming <matt@console-pimps.org>
Cc: Ian Molton <ian@mnementh.co.uk>
Cc: "Roberto A. Foglietta" <roberto.foglietta@gmail.com>
Cc: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Cc: Denis Karpov <ext-denis.2.karpov@nokia.com>
Cc: Pierre Ossman <pierre@ossman.eu>
Cc: Philip Langdale <philipl@overt.org>
Cc: "Madhusudhan" <madhu.cr@ti.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 7699f0a..30b863b 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -143,6 +143,7 @@ struct mmc_omap_host {
 	unsigned int		dma_len;
 	unsigned int		dma_sg_idx;
 	unsigned char		bus_mode;
+	unsigned char		power_mode;
 	u32			*buffer;
 	u32			bytesleft;
 	int			suspended;
@@ -863,16 +864,25 @@ static void omap_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 	unsigned long regval;
 	unsigned long timeout;
 	u32 con;
+	int do_send_init_stream = 0;
 
 	mmc_host_enable(host->mmc);
 
-	switch (ios->power_mode) {
-	case MMC_POWER_OFF:
-		mmc_slot(host).set_power(host->dev, host->slot_id, 0, 0);
-		break;
-	case MMC_POWER_UP:
-		mmc_slot(host).set_power(host->dev, host->slot_id, 1, ios->vdd);
-		break;
+	if (ios->power_mode != host->power_mode) {
+		switch (ios->power_mode) {
+		case MMC_POWER_OFF:
+			mmc_slot(host).set_power(host->dev, host->slot_id,
+						 0, 0);
+			break;
+		case MMC_POWER_UP:
+			mmc_slot(host).set_power(host->dev, host->slot_id,
+						 1, ios->vdd);
+			break;
+		case MMC_POWER_ON:
+			do_send_init_stream = 1;
+			break;
+		}
+		host->power_mode = ios->power_mode;
 	}
 
 	con = OMAP_HSMMC_READ(host->base, CON);
@@ -938,7 +948,7 @@ static void omap_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 	OMAP_HSMMC_WRITE(host->base, SYSCTL,
 		OMAP_HSMMC_READ(host->base, SYSCTL) | CEN);
 
-	if (ios->power_mode == MMC_POWER_ON)
+	if (do_send_init_stream)
 		send_init_stream(host);
 
 	if (ios->bus_mode == MMC_BUSMODE_OPENDRAIN)
@@ -1116,6 +1126,7 @@ static int __init omap_mmc_probe(struct platform_device *pdev)
 	host->slot_id	= 0;
 	host->mapbase	= res->start;
 	host->base	= ioremap(host->mapbase, SZ_4K);
+	host->power_mode = -1;
 
 	platform_set_drvdata(pdev, host);
 	INIT_WORK(&host->mmc_carddetect_work, mmc_omap_detect);
-- 
cgit v0.10.2


From 11dd62a741047b9fd1faf37620e2b58595f04ce5 Mon Sep 17 00:00:00 2001
From: Denis Karpov <ext-denis.2.karpov@nokia.com>
Date: Tue, 22 Sep 2009 16:44:43 -0700
Subject: omap_hsmmc: context save/restore support

Keep the context over PM dynamic OFF states.

Signed-off-by: Denis Karpov <ext-denis.2.karpov@nokia.com>
Signed-off-by: Adrian Hunter <adrian.hunter@nokia.com>
Acked-by: Matt Fleming <matt@console-pimps.org>
Cc: Ian Molton <ian@mnementh.co.uk>
Cc: "Roberto A. Foglietta" <roberto.foglietta@gmail.com>
Cc: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Cc: Denis Karpov <ext-denis.2.karpov@nokia.com>
Cc: Pierre Ossman <pierre@ossman.eu>
Cc: Philip Langdale <philipl@overt.org>
Cc: "Madhusudhan" <madhu.cr@ti.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 30b863b..74d506a 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -37,6 +37,7 @@
 
 /* OMAP HSMMC Host Controller Registers */
 #define OMAP_HSMMC_SYSCONFIG	0x0010
+#define OMAP_HSMMC_SYSSTATUS	0x0014
 #define OMAP_HSMMC_CON		0x002C
 #define OMAP_HSMMC_BLK		0x0104
 #define OMAP_HSMMC_ARG		0x0108
@@ -96,6 +97,8 @@
 #define DUAL_VOLT_OCR_BIT	7
 #define SRC			(1 << 25)
 #define SRD			(1 << 26)
+#define SOFTRESET		(1 << 1)
+#define RESETDONE		(1 << 0)
 
 /*
  * FIXME: Most likely all the data using these _DEVID defines should come
@@ -154,6 +157,8 @@ struct mmc_omap_host {
 	int			slot_id;
 	int			dbclk_enabled;
 	int			response_busy;
+	int			context_loss;
+
 	struct	omap_mmc_platform_data	*pdata;
 };
 
@@ -168,6 +173,166 @@ static void omap_mmc_stop_clock(struct mmc_omap_host *host)
 		dev_dbg(mmc_dev(host->mmc), "MMC Clock is not stoped\n");
 }
 
+#ifdef CONFIG_PM
+
+/*
+ * Restore the MMC host context, if it was lost as result of a
+ * power state change.
+ */
+static int omap_mmc_restore_ctx(struct mmc_omap_host *host)
+{
+	struct mmc_ios *ios = &host->mmc->ios;
+	struct omap_mmc_platform_data *pdata = host->pdata;
+	int context_loss = 0;
+	u32 hctl, capa, con;
+	u16 dsor = 0;
+	unsigned long timeout;
+
+	if (pdata->get_context_loss_count) {
+		context_loss = pdata->get_context_loss_count(host->dev);
+		if (context_loss < 0)
+			return 1;
+	}
+
+	dev_dbg(mmc_dev(host->mmc), "context was %slost\n",
+		context_loss == host->context_loss ? "not " : "");
+	if (host->context_loss == context_loss)
+		return 1;
+
+	/* Wait for hardware reset */
+	timeout = jiffies + msecs_to_jiffies(MMC_TIMEOUT_MS);
+	while ((OMAP_HSMMC_READ(host->base, SYSSTATUS) & RESETDONE) != RESETDONE
+		&& time_before(jiffies, timeout))
+		;
+
+	/* Do software reset */
+	OMAP_HSMMC_WRITE(host->base, SYSCONFIG, SOFTRESET);
+	timeout = jiffies + msecs_to_jiffies(MMC_TIMEOUT_MS);
+	while ((OMAP_HSMMC_READ(host->base, SYSSTATUS) & RESETDONE) != RESETDONE
+		&& time_before(jiffies, timeout))
+		;
+
+	OMAP_HSMMC_WRITE(host->base, SYSCONFIG,
+			OMAP_HSMMC_READ(host->base, SYSCONFIG) | AUTOIDLE);
+
+	if (host->id == OMAP_MMC1_DEVID) {
+		if (host->power_mode != MMC_POWER_OFF &&
+		    (1 << ios->vdd) <= MMC_VDD_23_24)
+			hctl = SDVS18;
+		else
+			hctl = SDVS30;
+		capa = VS30 | VS18;
+	} else {
+		hctl = SDVS18;
+		capa = VS18;
+	}
+
+	OMAP_HSMMC_WRITE(host->base, HCTL,
+			OMAP_HSMMC_READ(host->base, HCTL) | hctl);
+
+	OMAP_HSMMC_WRITE(host->base, CAPA,
+			OMAP_HSMMC_READ(host->base, CAPA) | capa);
+
+	OMAP_HSMMC_WRITE(host->base, HCTL,
+			OMAP_HSMMC_READ(host->base, HCTL) | SDBP);
+
+	timeout = jiffies + msecs_to_jiffies(MMC_TIMEOUT_MS);
+	while ((OMAP_HSMMC_READ(host->base, HCTL) & SDBP) != SDBP
+		&& time_before(jiffies, timeout))
+		;
+
+	OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR);
+	OMAP_HSMMC_WRITE(host->base, ISE, INT_EN_MASK);
+	OMAP_HSMMC_WRITE(host->base, IE, INT_EN_MASK);
+
+	/* Do not initialize card-specific things if the power is off */
+	if (host->power_mode == MMC_POWER_OFF)
+		goto out;
+
+	con = OMAP_HSMMC_READ(host->base, CON);
+	switch (ios->bus_width) {
+	case MMC_BUS_WIDTH_8:
+		OMAP_HSMMC_WRITE(host->base, CON, con | DW8);
+		break;
+	case MMC_BUS_WIDTH_4:
+		OMAP_HSMMC_WRITE(host->base, CON, con & ~DW8);
+		OMAP_HSMMC_WRITE(host->base, HCTL,
+			OMAP_HSMMC_READ(host->base, HCTL) | FOUR_BIT);
+		break;
+	case MMC_BUS_WIDTH_1:
+		OMAP_HSMMC_WRITE(host->base, CON, con & ~DW8);
+		OMAP_HSMMC_WRITE(host->base, HCTL,
+			OMAP_HSMMC_READ(host->base, HCTL) & ~FOUR_BIT);
+		break;
+	}
+
+	if (ios->clock) {
+		dsor = OMAP_MMC_MASTER_CLOCK / ios->clock;
+		if (dsor < 1)
+			dsor = 1;
+
+		if (OMAP_MMC_MASTER_CLOCK / dsor > ios->clock)
+			dsor++;
+
+		if (dsor > 250)
+			dsor = 250;
+	}
+
+	OMAP_HSMMC_WRITE(host->base, SYSCTL,
+		OMAP_HSMMC_READ(host->base, SYSCTL) & ~CEN);
+	OMAP_HSMMC_WRITE(host->base, SYSCTL, (dsor << 6) | (DTO << 16));
+	OMAP_HSMMC_WRITE(host->base, SYSCTL,
+		OMAP_HSMMC_READ(host->base, SYSCTL) | ICE);
+
+	timeout = jiffies + msecs_to_jiffies(MMC_TIMEOUT_MS);
+	while ((OMAP_HSMMC_READ(host->base, SYSCTL) & ICS) != ICS
+		&& time_before(jiffies, timeout))
+		;
+
+	OMAP_HSMMC_WRITE(host->base, SYSCTL,
+		OMAP_HSMMC_READ(host->base, SYSCTL) | CEN);
+
+	con = OMAP_HSMMC_READ(host->base, CON);
+	if (ios->bus_mode == MMC_BUSMODE_OPENDRAIN)
+		OMAP_HSMMC_WRITE(host->base, CON, con | OD);
+	else
+		OMAP_HSMMC_WRITE(host->base, CON, con & ~OD);
+out:
+	host->context_loss = context_loss;
+
+	dev_dbg(mmc_dev(host->mmc), "context is restored\n");
+	return 0;
+}
+
+/*
+ * Save the MMC host context (store the number of power state changes so far).
+ */
+static void omap_mmc_save_ctx(struct mmc_omap_host *host)
+{
+	struct omap_mmc_platform_data *pdata = host->pdata;
+	int context_loss;
+
+	if (pdata->get_context_loss_count) {
+		context_loss = pdata->get_context_loss_count(host->dev);
+		if (context_loss < 0)
+			return;
+		host->context_loss = context_loss;
+	}
+}
+
+#else
+
+static int omap_mmc_restore_ctx(struct mmc_omap_host *host)
+{
+	return 0;
+}
+
+static void omap_mmc_save_ctx(struct mmc_omap_host *host)
+{
+}
+
+#endif
+
 /*
  * Send init stream sequence to card
  * before sending IDLE command
@@ -830,6 +995,7 @@ static int omap_mmc_enable(struct mmc_host *mmc)
 	if (err)
 		return err;
 	dev_dbg(mmc_dev(host->mmc), "mmc_fclk: enabled\n");
+	omap_mmc_restore_ctx(host);
 	return 0;
 }
 
@@ -837,6 +1003,7 @@ static int omap_mmc_disable(struct mmc_host *mmc, int lazy)
 {
 	struct mmc_omap_host *host = mmc_priv(mmc);
 
+	omap_mmc_save_ctx(host);
 	clk_disable(host->fclk);
 	dev_dbg(mmc_dev(host->mmc), "mmc_fclk: disabled\n");
 	return 0;
@@ -941,7 +1108,7 @@ static void omap_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 
 	/* Wait till the ICS bit is set */
 	timeout = jiffies + msecs_to_jiffies(MMC_TIMEOUT_MS);
-	while ((OMAP_HSMMC_READ(host->base, SYSCTL) & ICS) != 0x2
+	while ((OMAP_HSMMC_READ(host->base, SYSCTL) & ICS) != ICS
 		&& time_before(jiffies, timeout))
 		msleep(1);
 
@@ -1021,12 +1188,19 @@ static int mmc_regs_show(struct seq_file *s, void *data)
 {
 	struct mmc_host *mmc = s->private;
 	struct mmc_omap_host *host = mmc_priv(mmc);
+	struct omap_mmc_platform_data *pdata = host->pdata;
+	int context_loss = 0;
+
+	if (pdata->get_context_loss_count)
+		context_loss = pdata->get_context_loss_count(host->dev);
 
 	seq_printf(s, "mmc%d:\n"
 			" enabled:\t%d\n"
 			" nesting_cnt:\t%d\n"
+			" ctx_loss:\t%d:%d\n"
 			"\nregs:\n",
-			mmc->index, mmc->enabled ? 1 : 0, mmc->nesting_cnt);
+			mmc->index, mmc->enabled ? 1 : 0, mmc->nesting_cnt,
+			host->context_loss, context_loss);
 
 	if (clk_enable(host->fclk) != 0) {
 		seq_printf(s, "can't read the regs\n");
@@ -1151,6 +1325,8 @@ static int __init omap_mmc_probe(struct platform_device *pdev)
 		goto err1;
 	}
 
+	omap_mmc_save_ctx(host);
+
 	mmc->caps |= MMC_CAP_DISABLE;
 	mmc_set_disable_delay(mmc, 100);
 	if (mmc_host_enable(host->mmc) != 0) {
@@ -1385,21 +1561,19 @@ static int omap_mmc_resume(struct platform_device *pdev)
 		return 0;
 
 	if (host) {
-
-		if (mmc_host_enable(host->mmc) != 0)
-			goto clk_en_err;
-
 		ret = clk_enable(host->iclk);
-		if (ret) {
-			mmc_host_disable(host->mmc);
-			clk_put(host->fclk);
+		if (ret)
 			goto clk_en_err;
-		}
 
 		if (clk_enable(host->dbclk) != 0)
 			dev_dbg(mmc_dev(host->mmc),
 					"Enabling debounce clk failed\n");
 
+		if (mmc_host_enable(host->mmc) != 0) {
+			clk_disable(host->iclk);
+			goto clk_en_err;
+		}
+
 		omap_hsmmc_init(host);
 
 		if (host->pdata->resume) {
-- 
cgit v0.10.2


From abb28e731a751f0b6c293a67b3e564eb0e336d53 Mon Sep 17 00:00:00 2001
From: Denis Karpov <ext-denis.2.karpov@nokia.com>
Date: Tue, 22 Sep 2009 16:44:44 -0700
Subject: omap_hsmmc: set open drain bit correctly

The code could set the bit to 1 but not reset it to 0.

Signed-off-by: Denis Karpov <ext-denis.2.karpov@nokia.com>
Signed-off-by: Adrian Hunter <adrian.hunter@nokia.com>
Acked-by: Matt Fleming <matt@console-pimps.org>
Cc: Ian Molton <ian@mnementh.co.uk>
Cc: "Roberto A. Foglietta" <roberto.foglietta@gmail.com>
Cc: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Cc: Denis Karpov <ext-denis.2.karpov@nokia.com>
Cc: Pierre Ossman <pierre@ossman.eu>
Cc: Philip Langdale <philipl@overt.org>
Cc: "Madhusudhan" <madhu.cr@ti.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 74d506a..59f0010 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -1118,9 +1118,11 @@ static void omap_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 	if (do_send_init_stream)
 		send_init_stream(host);
 
+	con = OMAP_HSMMC_READ(host->base, CON);
 	if (ios->bus_mode == MMC_BUSMODE_OPENDRAIN)
-		OMAP_HSMMC_WRITE(host->base, CON,
-				OMAP_HSMMC_READ(host->base, CON) | OD);
+		OMAP_HSMMC_WRITE(host->base, CON, con | OD);
+	else
+		OMAP_HSMMC_WRITE(host->base, CON, con & ~OD);
 
 	mmc_host_lazy_disable(host->mmc);
 }
-- 
cgit v0.10.2


From a6b2240da2b090874095832afc7eb9ed2968b27f Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@nokia.com>
Date: Tue, 22 Sep 2009 16:44:45 -0700
Subject: omap_hsmmc: ensure workqueues are empty before suspend

Signed-off-by: Adrian Hunter <adrian.hunter@nokia.com>
Acked-by: Matt Fleming <matt@console-pimps.org>
Cc: Ian Molton <ian@mnementh.co.uk>
Cc: "Roberto A. Foglietta" <roberto.foglietta@gmail.com>
Cc: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Cc: Denis Karpov <ext-denis.2.karpov@nokia.com>
Cc: Pierre Ossman <pierre@ossman.eu>
Cc: Philip Langdale <philipl@overt.org>
Cc: "Madhusudhan" <madhu.cr@ti.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 59f0010..9599dd1 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -151,7 +151,6 @@ struct mmc_omap_host {
 	u32			bytesleft;
 	int			suspended;
 	int			irq;
-	int			carddetect;
 	int			use_dma, dma_ch;
 	int			dma_line_tx, dma_line_rx;
 	int			slot_id;
@@ -761,14 +760,19 @@ static void mmc_omap_detect(struct work_struct *work)
 	struct mmc_omap_host *host = container_of(work, struct mmc_omap_host,
 						mmc_carddetect_work);
 	struct omap_mmc_slot_data *slot = &mmc_slot(host);
+	int carddetect;
+
+	if (host->suspended)
+		return;
+
+	sysfs_notify(&host->mmc->class_dev.kobj, NULL, "cover_switch");
 
 	if (mmc_slot(host).card_detect)
-		host->carddetect = slot->card_detect(slot->card_detect_irq);
+		carddetect = slot->card_detect(slot->card_detect_irq);
 	else
-		host->carddetect = -ENOSYS;
+		carddetect = -ENOSYS;
 
-	sysfs_notify(&host->mmc->class_dev.kobj, NULL, "cover_switch");
-	if (host->carddetect) {
+	if (carddetect) {
 		mmc_detect_change(host->mmc, (HZ * 200) / 1000);
 	} else {
 		mmc_host_enable(host->mmc);
@@ -785,6 +789,8 @@ static irqreturn_t omap_mmc_cd_handler(int irq, void *dev_id)
 {
 	struct mmc_omap_host *host = (struct mmc_omap_host *)dev_id;
 
+	if (host->suspended)
+		return IRQ_HANDLED;
 	schedule_work(&host->mmc_carddetect_work);
 
 	return IRQ_HANDLED;
@@ -1524,30 +1530,42 @@ static int omap_mmc_suspend(struct platform_device *pdev, pm_message_t state)
 		return 0;
 
 	if (host) {
+		host->suspended = 1;
+		if (host->pdata->suspend) {
+			ret = host->pdata->suspend(&pdev->dev,
+							host->slot_id);
+			if (ret) {
+				dev_dbg(mmc_dev(host->mmc),
+					"Unable to handle MMC board"
+					" level suspend\n");
+				host->suspended = 0;
+				return ret;
+			}
+		}
+		cancel_work_sync(&host->mmc_carddetect_work);
 		mmc_host_enable(host->mmc);
 		ret = mmc_suspend_host(host->mmc, state);
 		if (ret == 0) {
-			host->suspended = 1;
-
 			OMAP_HSMMC_WRITE(host->base, ISE, 0);
 			OMAP_HSMMC_WRITE(host->base, IE, 0);
 
-			if (host->pdata->suspend) {
-				ret = host->pdata->suspend(&pdev->dev,
-								host->slot_id);
-				if (ret)
-					dev_dbg(mmc_dev(host->mmc),
-						"Unable to handle MMC board"
-						" level suspend\n");
-			}
 
 			OMAP_HSMMC_WRITE(host->base, HCTL,
 					 OMAP_HSMMC_READ(host->base, HCTL) & ~SDBP);
 			mmc_host_disable(host->mmc);
 			clk_disable(host->iclk);
 			clk_disable(host->dbclk);
-		} else
+		} else {
+			host->suspended = 0;
+			if (host->pdata->resume) {
+				ret = host->pdata->resume(&pdev->dev,
+							  host->slot_id);
+				if (ret)
+					dev_dbg(mmc_dev(host->mmc),
+						"Unmask interrupt failed\n");
+			}
 			mmc_host_disable(host->mmc);
+		}
 
 	}
 	return ret;
-- 
cgit v0.10.2


From a3f406f861456987c9fce8cfa0a00d07b16cdec0 Mon Sep 17 00:00:00 2001
From: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Date: Tue, 22 Sep 2009 16:44:46 -0700
Subject: omap_hsmmc: fix scatter-gather list sanity checking

Do not use host->dma_len when it is uninitialzed.  Finish the request with
an error if the mmc_omap_prepare_data() fails.

Signed-off-by: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Signed-off-by: Adrian Hunter <adrian.hunter@nokia.com>
Acked-by: Matt Fleming <matt@console-pimps.org>
Cc: Ian Molton <ian@mnementh.co.uk>
Cc: "Roberto A. Foglietta" <roberto.foglietta@gmail.com>
Cc: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Cc: Denis Karpov <ext-denis.2.karpov@nokia.com>
Cc: Pierre Ossman <pierre@ossman.eu>
Cc: Philip Langdale <philipl@overt.org>
Cc: "Madhusudhan" <madhu.cr@ti.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 9599dd1..f588def 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -878,7 +878,7 @@ mmc_omap_start_dma_transfer(struct mmc_omap_host *host, struct mmc_request *req)
 	struct mmc_data *data = req->data;
 
 	/* Sanity check: all the SG entries must be aligned by block size. */
-	for (i = 0; i < host->dma_len; i++) {
+	for (i = 0; i < data->sg_len; i++) {
 		struct scatterlist *sgl;
 
 		sgl = data->sg + i;
@@ -1021,10 +1021,20 @@ static int omap_mmc_disable(struct mmc_host *mmc, int lazy)
 static void omap_mmc_request(struct mmc_host *mmc, struct mmc_request *req)
 {
 	struct mmc_omap_host *host = mmc_priv(mmc);
+	int err;
 
 	WARN_ON(host->mrq != NULL);
 	host->mrq = req;
-	mmc_omap_prepare_data(host, req);
+	err = mmc_omap_prepare_data(host, req);
+	if (err) {
+		req->cmd->error = err;
+		if (req->data)
+			req->data->error = err;
+		host->mrq = NULL;
+		mmc_request_done(mmc, req);
+		return;
+	}
+
 	mmc_omap_start_command(host, req->cmd, req->data);
 }
 
-- 
cgit v0.10.2


From 23d99bb923fc23aeb1086d60eb1c70602b4e2036 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@nokia.com>
Date: Tue, 22 Sep 2009 16:44:48 -0700
Subject: omap_hsmmc: make use of new MMC_CAP_NONREMOVABLE host capability

Let the board specify that a card is nonremovable e.g. eMMC

Signed-off-by: Adrian Hunter <adrian.hunter@nokia.com>
Acked-by: Matt Fleming <matt@console-pimps.org>
Cc: Ian Molton <ian@mnementh.co.uk>
Cc: "Roberto A. Foglietta" <roberto.foglietta@gmail.com>
Cc: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Cc: Denis Karpov <ext-denis.2.karpov@nokia.com>
Cc: Pierre Ossman <pierre@ossman.eu>
Cc: Philip Langdale <philipl@overt.org>
Cc: "Madhusudhan" <madhu.cr@ti.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/arm/mach-omap2/mmc-twl4030.c b/arch/arm/mach-omap2/mmc-twl4030.c
index 30d0286..56f07f2 100644
--- a/arch/arm/mach-omap2/mmc-twl4030.c
+++ b/arch/arm/mach-omap2/mmc-twl4030.c
@@ -415,6 +415,9 @@ void __init twl4030_mmc_init(struct twl4030_hsmmc_info *controllers)
 		} else
 			mmc->slots[0].gpio_wp = -EINVAL;
 
+		if (c->nonremovable)
+			mmc->slots[0].nonremovable = 1;
+
 		/* NOTE:  MMC slots should have a Vcc regulator set up.
 		 * This may be from a TWL4030-family chip, another
 		 * controllable regulator, or a fixed supply.
diff --git a/arch/arm/mach-omap2/mmc-twl4030.h b/arch/arm/mach-omap2/mmc-twl4030.h
index 3807c45..75b0c64 100644
--- a/arch/arm/mach-omap2/mmc-twl4030.h
+++ b/arch/arm/mach-omap2/mmc-twl4030.h
@@ -12,6 +12,7 @@ struct twl4030_hsmmc_info {
 	bool	transceiver;	/* MMC-2 option */
 	bool	ext_clock;	/* use external pin for input clock */
 	bool	cover_only;	/* No card detect - just cover switch */
+	bool	nonremovable;	/* Nonremovable e.g. eMMC */
 	int	gpio_cd;	/* or -EINVAL */
 	int	gpio_wp;	/* or -EINVAL */
 	char	*name;		/* or NULL for default */
diff --git a/arch/arm/plat-omap/include/mach/mmc.h b/arch/arm/plat-omap/include/mach/mmc.h
index 2f7cf31..bab486c 100644
--- a/arch/arm/plat-omap/include/mach/mmc.h
+++ b/arch/arm/plat-omap/include/mach/mmc.h
@@ -83,6 +83,9 @@ struct omap_mmc_platform_data {
 		/* use the internal clock */
 		unsigned internal_clock:1;
 
+		/* nonremovable e.g. eMMC */
+		unsigned nonremovable:1;
+
 		int switch_pin;			/* gpio (card detect) */
 		int gpio_wp;			/* gpio (write protect) */
 
diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index f588def..a20c383 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -1390,6 +1390,9 @@ static int __init omap_mmc_probe(struct platform_device *pdev)
 	else if (pdata->slots[host->slot_id].wires >= 4)
 		mmc->caps |= MMC_CAP_4_BIT_DATA;
 
+	if (pdata->slots[host->slot_id].nonremovable)
+		mmc->caps |= MMC_CAP_NONREMOVABLE;
+
 	omap_hsmmc_init(host);
 
 	/* Select DMA lines */
-- 
cgit v0.10.2


From dd498effcfa6a196ba097adae3c5aa641115df88 Mon Sep 17 00:00:00 2001
From: Denis Karpov <ext-denis.2.karpov@nokia.com>
Date: Tue, 22 Sep 2009 16:44:49 -0700
Subject: omap_hsmmc: support for deeper power saving states

Support for multi-level dynamic power saving states in omap_hsmmc
(ENABLED->DISABLED->OFF).  In the "deepest" state (OFF) we switch off the
voltage regulators.

Signed-off-by: Denis Karpov <ext-denis.2.karpov@nokia.com>
Signed-off-by: Adrian Hunter <adrian.hunter@nokia.com>
Acked-by: Matt Fleming <matt@console-pimps.org>
Cc: Ian Molton <ian@mnementh.co.uk>
Cc: "Roberto A. Foglietta" <roberto.foglietta@gmail.com>
Cc: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Cc: Denis Karpov <ext-denis.2.karpov@nokia.com>
Cc: Pierre Ossman <pierre@ossman.eu>
Cc: Philip Langdale <philipl@overt.org>
Cc: "Madhusudhan" <madhu.cr@ti.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/arm/mach-omap2/mmc-twl4030.c b/arch/arm/mach-omap2/mmc-twl4030.c
index 56f07f2..cb1cbd7 100644
--- a/arch/arm/mach-omap2/mmc-twl4030.c
+++ b/arch/arm/mach-omap2/mmc-twl4030.c
@@ -418,6 +418,9 @@ void __init twl4030_mmc_init(struct twl4030_hsmmc_info *controllers)
 		if (c->nonremovable)
 			mmc->slots[0].nonremovable = 1;
 
+		if (c->power_saving)
+			mmc->slots[0].power_saving = 1;
+
 		/* NOTE:  MMC slots should have a Vcc regulator set up.
 		 * This may be from a TWL4030-family chip, another
 		 * controllable regulator, or a fixed supply.
diff --git a/arch/arm/mach-omap2/mmc-twl4030.h b/arch/arm/mach-omap2/mmc-twl4030.h
index 75b0c64..a47e685 100644
--- a/arch/arm/mach-omap2/mmc-twl4030.h
+++ b/arch/arm/mach-omap2/mmc-twl4030.h
@@ -13,6 +13,7 @@ struct twl4030_hsmmc_info {
 	bool	ext_clock;	/* use external pin for input clock */
 	bool	cover_only;	/* No card detect - just cover switch */
 	bool	nonremovable;	/* Nonremovable e.g. eMMC */
+	bool	power_saving;	/* Try to sleep or power off when possible */
 	int	gpio_cd;	/* or -EINVAL */
 	int	gpio_wp;	/* or -EINVAL */
 	char	*name;		/* or NULL for default */
diff --git a/arch/arm/plat-omap/include/mach/mmc.h b/arch/arm/plat-omap/include/mach/mmc.h
index bab486c..82f1e29 100644
--- a/arch/arm/plat-omap/include/mach/mmc.h
+++ b/arch/arm/plat-omap/include/mach/mmc.h
@@ -86,6 +86,9 @@ struct omap_mmc_platform_data {
 		/* nonremovable e.g. eMMC */
 		unsigned nonremovable:1;
 
+		/* Try to sleep or power off when possible */
+		unsigned power_saving:1;
+
 		int switch_pin;			/* gpio (card detect) */
 		int gpio_wp;			/* gpio (write protect) */
 
diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index a20c383..016914c 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -113,6 +113,10 @@
 #define OMAP_MMC_MASTER_CLOCK	96000000
 #define DRIVER_NAME		"mmci-omap-hs"
 
+/* Timeouts for entering power saving states on inactivity, msec */
+#define OMAP_MMC_DISABLED_TIMEOUT	100
+#define OMAP_MMC_OFF_TIMEOUT		1000
+
 /*
  * One controller can have multiple slots, like on some omap boards using
  * omap.c controller driver. Luckily this is not currently done on any known
@@ -157,6 +161,7 @@ struct mmc_omap_host {
 	int			dbclk_enabled;
 	int			response_busy;
 	int			context_loss;
+	int			dpm_state;
 
 	struct	omap_mmc_platform_data	*pdata;
 };
@@ -992,29 +997,6 @@ mmc_omap_prepare_data(struct mmc_omap_host *host, struct mmc_request *req)
 	return 0;
 }
 
-static int omap_mmc_enable(struct mmc_host *mmc)
-{
-	struct mmc_omap_host *host = mmc_priv(mmc);
-	int err;
-
-	err = clk_enable(host->fclk);
-	if (err)
-		return err;
-	dev_dbg(mmc_dev(host->mmc), "mmc_fclk: enabled\n");
-	omap_mmc_restore_ctx(host);
-	return 0;
-}
-
-static int omap_mmc_disable(struct mmc_host *mmc, int lazy)
-{
-	struct mmc_omap_host *host = mmc_priv(mmc);
-
-	omap_mmc_save_ctx(host);
-	clk_disable(host->fclk);
-	dev_dbg(mmc_dev(host->mmc), "mmc_fclk: disabled\n");
-	return 0;
-}
-
 /*
  * Request function. for read/write operation
  */
@@ -1068,6 +1050,8 @@ static void omap_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 		host->power_mode = ios->power_mode;
 	}
 
+	/* FIXME: set registers based only on changes to ios */
+
 	con = OMAP_HSMMC_READ(host->base, CON);
 	switch (mmc->ios.bus_width) {
 	case MMC_BUS_WIDTH_8:
@@ -1140,7 +1124,10 @@ static void omap_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 	else
 		OMAP_HSMMC_WRITE(host->base, CON, con & ~OD);
 
-	mmc_host_lazy_disable(host->mmc);
+	if (host->power_mode == MMC_POWER_OFF)
+		mmc_host_disable(host->mmc);
+	else
+		mmc_host_lazy_disable(host->mmc);
 }
 
 static int omap_hsmmc_get_cd(struct mmc_host *mmc)
@@ -1190,7 +1177,191 @@ static void omap_hsmmc_init(struct mmc_omap_host *host)
 	set_sd_bus_power(host);
 }
 
-static struct mmc_host_ops mmc_omap_ops = {
+/*
+ * Dynamic power saving handling, FSM:
+ *   ENABLED -> DISABLED -> OFF
+ *     ^___________|          |
+ *     |______________________|
+ *
+ * ENABLED:   mmc host is fully functional
+ * DISABLED:  fclk is off
+ * OFF:       fclk is off,voltage regulator is off
+ *
+ * Transition handlers return the timeout for the next state transition
+ * or negative error.
+ */
+
+enum {ENABLED = 0, DISABLED, OFF};
+
+/* Handler for [ENABLED -> DISABLED] transition */
+static int omap_mmc_enabled_to_disabled(struct mmc_omap_host *host)
+{
+	omap_mmc_save_ctx(host);
+	clk_disable(host->fclk);
+	host->dpm_state = DISABLED;
+
+	dev_dbg(mmc_dev(host->mmc), "ENABLED -> DISABLED\n");
+
+	if (host->power_mode == MMC_POWER_OFF)
+		return 0;
+
+	return msecs_to_jiffies(OMAP_MMC_OFF_TIMEOUT);
+}
+
+/* Handler for [DISABLED -> OFF] transition */
+static int omap_mmc_disabled_to_off(struct mmc_omap_host *host)
+{
+	int new_state;
+
+	dev_dbg(mmc_dev(host->mmc), "DISABLED -> OFF\n");
+
+	if (!mmc_try_claim_host(host->mmc))
+		return 0;
+
+	clk_enable(host->fclk);
+
+	omap_mmc_restore_ctx(host);
+
+	if ((host->mmc->caps & MMC_CAP_NONREMOVABLE) ||
+	    mmc_slot(host).card_detect ||
+	    (mmc_slot(host).get_cover_state &&
+	     mmc_slot(host).get_cover_state(host->dev, host->slot_id))) {
+		mmc_power_save_host(host->mmc);
+		new_state = OFF;
+	} else
+		new_state = DISABLED;
+
+	OMAP_HSMMC_WRITE(host->base, ISE, 0);
+	OMAP_HSMMC_WRITE(host->base, IE, 0);
+	OMAP_HSMMC_WRITE(host->base, HCTL,
+		 OMAP_HSMMC_READ(host->base, HCTL) & ~SDBP);
+
+	clk_disable(host->fclk);
+	clk_disable(host->iclk);
+	clk_disable(host->dbclk);
+
+	host->dpm_state = new_state;
+
+	mmc_release_host(host->mmc);
+
+	return 0;
+}
+
+/* Handler for [DISABLED -> ENABLED] transition */
+static int omap_mmc_disabled_to_enabled(struct mmc_omap_host *host)
+{
+	int err;
+
+	err = clk_enable(host->fclk);
+	if (err < 0)
+		return err;
+
+	omap_mmc_restore_ctx(host);
+
+	host->dpm_state = ENABLED;
+
+	dev_dbg(mmc_dev(host->mmc), "DISABLED -> ENABLED\n");
+
+	return 0;
+}
+
+/* Handler for [OFF -> ENABLED] transition */
+static int omap_mmc_off_to_enabled(struct mmc_omap_host *host)
+{
+	clk_enable(host->fclk);
+	clk_enable(host->iclk);
+
+	if (clk_enable(host->dbclk))
+		dev_dbg(mmc_dev(host->mmc),
+			"Enabling debounce clk failed\n");
+
+	omap_mmc_restore_ctx(host);
+	omap_hsmmc_init(host);
+	mmc_power_restore_host(host->mmc);
+
+	host->dpm_state = ENABLED;
+
+	dev_dbg(mmc_dev(host->mmc), "OFF -> ENABLED\n");
+
+	return 0;
+}
+
+/*
+ * Bring MMC host to ENABLED from any other PM state.
+ */
+static int omap_mmc_enable(struct mmc_host *mmc)
+{
+	struct mmc_omap_host *host = mmc_priv(mmc);
+
+	switch (host->dpm_state) {
+	case DISABLED:
+		return omap_mmc_disabled_to_enabled(host);
+	case OFF:
+		return omap_mmc_off_to_enabled(host);
+	default:
+		dev_dbg(mmc_dev(host->mmc), "UNKNOWN state\n");
+		return -EINVAL;
+	}
+}
+
+/*
+ * Bring MMC host in PM state (one level deeper).
+ */
+static int omap_mmc_disable(struct mmc_host *mmc, int lazy)
+{
+	struct mmc_omap_host *host = mmc_priv(mmc);
+
+	switch (host->dpm_state) {
+	case ENABLED: {
+		int delay;
+
+		delay = omap_mmc_enabled_to_disabled(host);
+		if (lazy || delay < 0)
+			return delay;
+		return 0;
+	}
+	case DISABLED:
+		return omap_mmc_disabled_to_off(host);
+	default:
+		dev_dbg(mmc_dev(host->mmc), "UNKNOWN state\n");
+		return -EINVAL;
+	}
+}
+
+static int omap_mmc_enable_fclk(struct mmc_host *mmc)
+{
+	struct mmc_omap_host *host = mmc_priv(mmc);
+	int err;
+
+	err = clk_enable(host->fclk);
+	if (err)
+		return err;
+	dev_dbg(mmc_dev(host->mmc), "mmc_fclk: enabled\n");
+	omap_mmc_restore_ctx(host);
+	return 0;
+}
+
+static int omap_mmc_disable_fclk(struct mmc_host *mmc, int lazy)
+{
+	struct mmc_omap_host *host = mmc_priv(mmc);
+
+	omap_mmc_save_ctx(host);
+	clk_disable(host->fclk);
+	dev_dbg(mmc_dev(host->mmc), "mmc_fclk: disabled\n");
+	return 0;
+}
+
+static const struct mmc_host_ops mmc_omap_ops = {
+	.enable = omap_mmc_enable_fclk,
+	.disable = omap_mmc_disable_fclk,
+	.request = omap_mmc_request,
+	.set_ios = omap_mmc_set_ios,
+	.get_cd = omap_hsmmc_get_cd,
+	.get_ro = omap_hsmmc_get_ro,
+	/* NYET -- enable_sdio_irq */
+};
+
+static const struct mmc_host_ops mmc_omap_ps_ops = {
 	.enable = omap_mmc_enable,
 	.disable = omap_mmc_disable,
 	.request = omap_mmc_request,
@@ -1214,15 +1385,22 @@ static int mmc_regs_show(struct seq_file *s, void *data)
 
 	seq_printf(s, "mmc%d:\n"
 			" enabled:\t%d\n"
+			" dpm_state:\t%d\n"
 			" nesting_cnt:\t%d\n"
 			" ctx_loss:\t%d:%d\n"
 			"\nregs:\n",
-			mmc->index, mmc->enabled ? 1 : 0, mmc->nesting_cnt,
+			mmc->index, mmc->enabled ? 1 : 0,
+			host->dpm_state, mmc->nesting_cnt,
 			host->context_loss, context_loss);
 
+	if (host->suspended || host->dpm_state == OFF) {
+		seq_printf(s, "host suspended, can't read registers\n");
+		return 0;
+	}
+
 	if (clk_enable(host->fclk) != 0) {
 		seq_printf(s, "can't read the regs\n");
-		goto err;
+		return 0;
 	}
 
 	seq_printf(s, "SYSCONFIG:\t0x%08x\n",
@@ -1241,7 +1419,7 @@ static int mmc_regs_show(struct seq_file *s, void *data)
 			OMAP_HSMMC_READ(host->base, CAPA));
 
 	clk_disable(host->fclk);
-err:
+
 	return 0;
 }
 
@@ -1323,7 +1501,11 @@ static int __init omap_mmc_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, host);
 	INIT_WORK(&host->mmc_carddetect_work, mmc_omap_detect);
 
-	mmc->ops	= &mmc_omap_ops;
+	if (pdata->slots[host->slot_id].power_saving)
+		mmc->ops	= &mmc_omap_ps_ops;
+	else
+		mmc->ops	= &mmc_omap_ops;
+
 	mmc->f_min	= 400000;
 	mmc->f_max	= 52000000;
 
@@ -1346,7 +1528,10 @@ static int __init omap_mmc_probe(struct platform_device *pdev)
 	omap_mmc_save_ctx(host);
 
 	mmc->caps |= MMC_CAP_DISABLE;
-	mmc_set_disable_delay(mmc, 100);
+	mmc_set_disable_delay(mmc, OMAP_MMC_DISABLED_TIMEOUT);
+	/* we start off in DISABLED state */
+	host->dpm_state = DISABLED;
+
 	if (mmc_host_enable(host->mmc) != 0) {
 		clk_put(host->iclk);
 		clk_put(host->fclk);
-- 
cgit v0.10.2


From 9b7c18e070d59ba0acfdb936fd613dfa1d2a4e7d Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@nokia.com>
Date: Tue, 22 Sep 2009 16:44:50 -0700
Subject: ARM: OMAP: mmc-twl4030: add regulator sleep / wake function

Add the ability for the driver to put the card power regulators to sleep
and wake them up again.

Signed-off-by: Adrian Hunter <adrian.hunter@nokia.com>
Acked-by: Matt Fleming <matt@console-pimps.org>
Cc: Ian Molton <ian@mnementh.co.uk>
Cc: "Roberto A. Foglietta" <roberto.foglietta@gmail.com>
Cc: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Cc: Denis Karpov <ext-denis.2.karpov@nokia.com>
Cc: Pierre Ossman <pierre@ossman.eu>
Cc: Philip Langdale <philipl@overt.org>
Cc: "Madhusudhan" <madhu.cr@ti.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/arm/mach-omap2/mmc-twl4030.c b/arch/arm/mach-omap2/mmc-twl4030.c
index cb1cbd7..c9c59a2 100644
--- a/arch/arm/mach-omap2/mmc-twl4030.c
+++ b/arch/arm/mach-omap2/mmc-twl4030.c
@@ -340,6 +340,61 @@ static int twl_mmc23_set_power(struct device *dev, int slot, int power_on, int v
 	return ret;
 }
 
+static int twl_mmc1_set_sleep(struct device *dev, int slot, int sleep, int vdd,
+			      int cardsleep)
+{
+	struct twl_mmc_controller *c = &hsmmc[0];
+	int mode = sleep ? REGULATOR_MODE_STANDBY : REGULATOR_MODE_NORMAL;
+
+	return regulator_set_mode(c->vcc, mode);
+}
+
+static int twl_mmc23_set_sleep(struct device *dev, int slot, int sleep, int vdd,
+			       int cardsleep)
+{
+	struct twl_mmc_controller *c = NULL;
+	struct omap_mmc_platform_data *mmc = dev->platform_data;
+	int i, err, mode;
+
+	for (i = 1; i < ARRAY_SIZE(hsmmc); i++) {
+		if (mmc == hsmmc[i].mmc) {
+			c = &hsmmc[i];
+			break;
+		}
+	}
+
+	if (c == NULL)
+		return -ENODEV;
+
+	/*
+	 * If we don't see a Vcc regulator, assume it's a fixed
+	 * voltage always-on regulator.
+	 */
+	if (!c->vcc)
+		return 0;
+
+	mode = sleep ? REGULATOR_MODE_STANDBY : REGULATOR_MODE_NORMAL;
+
+	if (!c->vcc_aux)
+		return regulator_set_mode(c->vcc, mode);
+
+	if (cardsleep) {
+		/* VCC can be turned off if card is asleep */
+		struct regulator *vcc_aux = c->vcc_aux;
+
+		c->vcc_aux = NULL;
+		if (sleep)
+			err = twl_mmc23_set_power(dev, slot, 0, 0);
+		else
+			err = twl_mmc23_set_power(dev, slot, 1, vdd);
+		c->vcc_aux = vcc_aux;
+	} else
+		err = regulator_set_mode(c->vcc, mode);
+	if (err)
+		return err;
+	return regulator_set_mode(c->vcc_aux, mode);
+}
+
 static struct omap_mmc_platform_data *hsmmc_data[OMAP34XX_NR_MMC] __initdata;
 
 void __init twl4030_mmc_init(struct twl4030_hsmmc_info *controllers)
@@ -433,6 +488,7 @@ void __init twl4030_mmc_init(struct twl4030_hsmmc_info *controllers)
 		case 1:
 			/* on-chip level shifting via PBIAS0/PBIAS1 */
 			mmc->slots[0].set_power = twl_mmc1_set_power;
+			mmc->slots[0].set_sleep = twl_mmc1_set_sleep;
 			break;
 		case 2:
 			if (c->ext_clock)
@@ -443,6 +499,7 @@ void __init twl4030_mmc_init(struct twl4030_hsmmc_info *controllers)
 		case 3:
 			/* off-chip level shifting, or none */
 			mmc->slots[0].set_power = twl_mmc23_set_power;
+			mmc->slots[0].set_sleep = twl_mmc23_set_sleep;
 			break;
 		default:
 			pr_err("MMC%d configuration not supported!\n", c->mmc);
diff --git a/arch/arm/plat-omap/include/mach/mmc.h b/arch/arm/plat-omap/include/mach/mmc.h
index 82f1e29..9390297 100644
--- a/arch/arm/plat-omap/include/mach/mmc.h
+++ b/arch/arm/plat-omap/include/mach/mmc.h
@@ -95,6 +95,8 @@ struct omap_mmc_platform_data {
 		int (* set_bus_mode)(struct device *dev, int slot, int bus_mode);
 		int (* set_power)(struct device *dev, int slot, int power_on, int vdd);
 		int (* get_ro)(struct device *dev, int slot);
+		int (*set_sleep)(struct device *dev, int slot, int sleep,
+				 int vdd, int cardsleep);
 
 		/* return MMC cover switch state, can be NULL if not supported.
 		 *
-- 
cgit v0.10.2


From 623821f71bb40f9972630eb1f779817d462ef0ee Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@nokia.com>
Date: Tue, 22 Sep 2009 16:44:51 -0700
Subject: omap_hsmmc: put MMC regulator to sleep

When a card is not in use, the voltage regulator can be put to sleep.
This is an alternative to powering the card off, when powering off is not
safe because the card might be replaced without the driver being aware of
it.

That situation happens if:
	- the card is removable i.e. not eMMC
	- and there is no card detect
	- and there is a cover switch but the cover is open

Signed-off-by: Adrian Hunter <adrian.hunter@nokia.com>
Acked-by: Matt Fleming <matt@console-pimps.org>
Cc: Ian Molton <ian@mnementh.co.uk>
Cc: "Roberto A. Foglietta" <roberto.foglietta@gmail.com>
Cc: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Cc: Denis Karpov <ext-denis.2.karpov@nokia.com>
Cc: Pierre Ossman <pierre@ossman.eu>
Cc: Philip Langdale <philipl@overt.org>
Cc: "Madhusudhan" <madhu.cr@ti.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 016914c..c82ec7f 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -162,6 +162,7 @@ struct mmc_omap_host {
 	int			response_busy;
 	int			context_loss;
 	int			dpm_state;
+	int			vdd;
 
 	struct	omap_mmc_platform_data	*pdata;
 };
@@ -1038,10 +1039,12 @@ static void omap_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 		case MMC_POWER_OFF:
 			mmc_slot(host).set_power(host->dev, host->slot_id,
 						 0, 0);
+			host->vdd = 0;
 			break;
 		case MMC_POWER_UP:
 			mmc_slot(host).set_power(host->dev, host->slot_id,
 						 1, ios->vdd);
+			host->vdd = ios->vdd;
 			break;
 		case MMC_POWER_ON:
 			do_send_init_stream = 1;
@@ -1179,19 +1182,20 @@ static void omap_hsmmc_init(struct mmc_omap_host *host)
 
 /*
  * Dynamic power saving handling, FSM:
- *   ENABLED -> DISABLED -> OFF
+ *   ENABLED -> DISABLED -> OFF / REGSLEEP
  *     ^___________|          |
  *     |______________________|
  *
  * ENABLED:   mmc host is fully functional
  * DISABLED:  fclk is off
  * OFF:       fclk is off,voltage regulator is off
+ * REGSLEEP:  fclk is off,voltage regulator is asleep
  *
  * Transition handlers return the timeout for the next state transition
  * or negative error.
  */
 
-enum {ENABLED = 0, DISABLED, OFF};
+enum {ENABLED = 0, DISABLED, REGSLEEP, OFF};
 
 /* Handler for [ENABLED -> DISABLED] transition */
 static int omap_mmc_enabled_to_disabled(struct mmc_omap_host *host)
@@ -1228,8 +1232,12 @@ static int omap_mmc_disabled_to_off(struct mmc_omap_host *host)
 	     mmc_slot(host).get_cover_state(host->dev, host->slot_id))) {
 		mmc_power_save_host(host->mmc);
 		new_state = OFF;
-	} else
-		new_state = DISABLED;
+	} else {
+		if (mmc_slot(host).set_sleep)
+			mmc_slot(host).set_sleep(host->dev, host->slot_id,
+						 1, 0, 0);
+		new_state = REGSLEEP;
+	}
 
 	OMAP_HSMMC_WRITE(host->base, ISE, 0);
 	OMAP_HSMMC_WRITE(host->base, IE, 0);
@@ -1286,6 +1294,44 @@ static int omap_mmc_off_to_enabled(struct mmc_omap_host *host)
 	return 0;
 }
 
+/* Handler for [REGSLEEP -> ENABLED] transition */
+static int omap_mmc_regsleep_to_enabled(struct mmc_omap_host *host)
+{
+	unsigned long timeout;
+
+	dev_dbg(mmc_dev(host->mmc), "REGSLEEP -> ENABLED\n");
+
+	clk_enable(host->fclk);
+	clk_enable(host->iclk);
+
+	if (clk_enable(host->dbclk))
+		dev_dbg(mmc_dev(host->mmc),
+			"Enabling debounce clk failed\n");
+
+	omap_mmc_restore_ctx(host);
+
+	/*
+	 * We turned off interrupts and bus power.  Interrupts
+	 * are turned on by 'mmc_omap_start_command()' so we
+	 * just need to turn on the bus power here.
+	 */
+	OMAP_HSMMC_WRITE(host->base, HCTL,
+			 OMAP_HSMMC_READ(host->base, HCTL) | SDBP);
+
+	timeout = jiffies + msecs_to_jiffies(MMC_TIMEOUT_MS);
+	while ((OMAP_HSMMC_READ(host->base, HCTL) & SDBP) != SDBP &&
+	       time_before(jiffies, timeout))
+		;
+
+	if (mmc_slot(host).set_sleep)
+		mmc_slot(host).set_sleep(host->dev, host->slot_id,
+					 0, host->vdd, 0);
+
+	host->dpm_state = ENABLED;
+
+	return 0;
+}
+
 /*
  * Bring MMC host to ENABLED from any other PM state.
  */
@@ -1296,6 +1342,8 @@ static int omap_mmc_enable(struct mmc_host *mmc)
 	switch (host->dpm_state) {
 	case DISABLED:
 		return omap_mmc_disabled_to_enabled(host);
+	case REGSLEEP:
+		return omap_mmc_regsleep_to_enabled(host);
 	case OFF:
 		return omap_mmc_off_to_enabled(host);
 	default:
@@ -1393,7 +1441,8 @@ static int mmc_regs_show(struct seq_file *s, void *data)
 			host->dpm_state, mmc->nesting_cnt,
 			host->context_loss, context_loss);
 
-	if (host->suspended || host->dpm_state == OFF) {
+	if (host->suspended || host->dpm_state == OFF ||
+	    host->dpm_state == REGSLEEP) {
 		seq_printf(s, "host suspended, can't read registers\n");
 		return 0;
 	}
-- 
cgit v0.10.2


From 13189e78caa824f0285b1823519c020591641207 Mon Sep 17 00:00:00 2001
From: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Date: Tue, 22 Sep 2009 16:44:53 -0700
Subject: omap_hsmmc: add mmc card sleep and awake support

After 1 second of inactivity, put card and/or regulator to sleep.  After 8
seconds of inactivity, turn off the power.

Signed-off-by: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Signed-off-by: Adrian Hunter <adrian.hunter@nokia.com>
Acked-by: Matt Fleming <matt@console-pimps.org>
Cc: Ian Molton <ian@mnementh.co.uk>
Cc: "Roberto A. Foglietta" <roberto.foglietta@gmail.com>
Cc: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Cc: Denis Karpov <ext-denis.2.karpov@nokia.com>
Cc: Pierre Ossman <pierre@ossman.eu>
Cc: Philip Langdale <philipl@overt.org>
Cc: "Madhusudhan" <madhu.cr@ti.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index c82ec7f..b83f7a4 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -27,6 +27,7 @@
 #include <linux/timer.h>
 #include <linux/clk.h>
 #include <linux/mmc/host.h>
+#include <linux/mmc/core.h>
 #include <linux/io.h>
 #include <linux/semaphore.h>
 #include <mach/dma.h>
@@ -115,7 +116,8 @@
 
 /* Timeouts for entering power saving states on inactivity, msec */
 #define OMAP_MMC_DISABLED_TIMEOUT	100
-#define OMAP_MMC_OFF_TIMEOUT		1000
+#define OMAP_MMC_SLEEP_TIMEOUT		1000
+#define OMAP_MMC_OFF_TIMEOUT		8000
 
 /*
  * One controller can have multiple slots, like on some omap boards using
@@ -1182,20 +1184,21 @@ static void omap_hsmmc_init(struct mmc_omap_host *host)
 
 /*
  * Dynamic power saving handling, FSM:
- *   ENABLED -> DISABLED -> OFF / REGSLEEP
- *     ^___________|          |
- *     |______________________|
+ *   ENABLED -> DISABLED -> CARDSLEEP / REGSLEEP -> OFF
+ *     ^___________|          |                      |
+ *     |______________________|______________________|
  *
  * ENABLED:   mmc host is fully functional
  * DISABLED:  fclk is off
- * OFF:       fclk is off,voltage regulator is off
- * REGSLEEP:  fclk is off,voltage regulator is asleep
+ * CARDSLEEP: fclk is off, card is asleep, voltage regulator is asleep
+ * REGSLEEP:  fclk is off, voltage regulator is asleep
+ * OFF:       fclk is off, voltage regulator is off
  *
  * Transition handlers return the timeout for the next state transition
  * or negative error.
  */
 
-enum {ENABLED = 0, DISABLED, REGSLEEP, OFF};
+enum {ENABLED = 0, DISABLED, CARDSLEEP, REGSLEEP, OFF};
 
 /* Handler for [ENABLED -> DISABLED] transition */
 static int omap_mmc_enabled_to_disabled(struct mmc_omap_host *host)
@@ -1209,46 +1212,72 @@ static int omap_mmc_enabled_to_disabled(struct mmc_omap_host *host)
 	if (host->power_mode == MMC_POWER_OFF)
 		return 0;
 
-	return msecs_to_jiffies(OMAP_MMC_OFF_TIMEOUT);
+	return msecs_to_jiffies(OMAP_MMC_SLEEP_TIMEOUT);
 }
 
-/* Handler for [DISABLED -> OFF] transition */
-static int omap_mmc_disabled_to_off(struct mmc_omap_host *host)
+/* Handler for [DISABLED -> REGSLEEP / CARDSLEEP] transition */
+static int omap_mmc_disabled_to_sleep(struct mmc_omap_host *host)
 {
-	int new_state;
-
-	dev_dbg(mmc_dev(host->mmc), "DISABLED -> OFF\n");
+	int err, new_state;
 
 	if (!mmc_try_claim_host(host->mmc))
 		return 0;
 
 	clk_enable(host->fclk);
-
 	omap_mmc_restore_ctx(host);
+	if (mmc_card_can_sleep(host->mmc)) {
+		err = mmc_card_sleep(host->mmc);
+		if (err < 0) {
+			clk_disable(host->fclk);
+			mmc_release_host(host->mmc);
+			return err;
+		}
+		new_state = CARDSLEEP;
+	} else
+		new_state = REGSLEEP;
+	if (mmc_slot(host).set_sleep)
+		mmc_slot(host).set_sleep(host->dev, host->slot_id, 1, 0,
+					 new_state == CARDSLEEP);
+	/* FIXME: turn off bus power and perhaps interrupts too */
+	clk_disable(host->fclk);
+	host->dpm_state = new_state;
+
+	mmc_release_host(host->mmc);
+
+	dev_dbg(mmc_dev(host->mmc), "DISABLED -> %s\n",
+		host->dpm_state == CARDSLEEP ? "CARDSLEEP" : "REGSLEEP");
 
 	if ((host->mmc->caps & MMC_CAP_NONREMOVABLE) ||
 	    mmc_slot(host).card_detect ||
 	    (mmc_slot(host).get_cover_state &&
-	     mmc_slot(host).get_cover_state(host->dev, host->slot_id))) {
-		mmc_power_save_host(host->mmc);
-		new_state = OFF;
-	} else {
-		if (mmc_slot(host).set_sleep)
-			mmc_slot(host).set_sleep(host->dev, host->slot_id,
-						 1, 0, 0);
-		new_state = REGSLEEP;
+	     mmc_slot(host).get_cover_state(host->dev, host->slot_id)))
+		return msecs_to_jiffies(OMAP_MMC_OFF_TIMEOUT);
+
+	return 0;
+}
+
+/* Handler for [REGSLEEP / CARDSLEEP -> OFF] transition */
+static int omap_mmc_sleep_to_off(struct mmc_omap_host *host)
+{
+	if (!mmc_try_claim_host(host->mmc))
+		return 0;
+
+	if (!((host->mmc->caps & MMC_CAP_NONREMOVABLE) ||
+	      mmc_slot(host).card_detect ||
+	      (mmc_slot(host).get_cover_state &&
+	       mmc_slot(host).get_cover_state(host->dev, host->slot_id)))) {
+		mmc_release_host(host->mmc);
+		return 0;
 	}
 
-	OMAP_HSMMC_WRITE(host->base, ISE, 0);
-	OMAP_HSMMC_WRITE(host->base, IE, 0);
-	OMAP_HSMMC_WRITE(host->base, HCTL,
-		 OMAP_HSMMC_READ(host->base, HCTL) & ~SDBP);
+	mmc_slot(host).set_power(host->dev, host->slot_id, 0, 0);
+	host->vdd = 0;
+	host->power_mode = MMC_POWER_OFF;
 
-	clk_disable(host->fclk);
-	clk_disable(host->iclk);
-	clk_disable(host->dbclk);
+	dev_dbg(mmc_dev(host->mmc), "%s -> OFF\n",
+		host->dpm_state == CARDSLEEP ? "CARDSLEEP" : "REGSLEEP");
 
-	host->dpm_state = new_state;
+	host->dpm_state = OFF;
 
 	mmc_release_host(host->mmc);
 
@@ -1273,62 +1302,43 @@ static int omap_mmc_disabled_to_enabled(struct mmc_omap_host *host)
 	return 0;
 }
 
-/* Handler for [OFF -> ENABLED] transition */
-static int omap_mmc_off_to_enabled(struct mmc_omap_host *host)
+/* Handler for [SLEEP -> ENABLED] transition */
+static int omap_mmc_sleep_to_enabled(struct mmc_omap_host *host)
 {
-	clk_enable(host->fclk);
-	clk_enable(host->iclk);
-
-	if (clk_enable(host->dbclk))
-		dev_dbg(mmc_dev(host->mmc),
-			"Enabling debounce clk failed\n");
+	if (!mmc_try_claim_host(host->mmc))
+		return 0;
 
+	clk_enable(host->fclk);
 	omap_mmc_restore_ctx(host);
-	omap_hsmmc_init(host);
-	mmc_power_restore_host(host->mmc);
+	if (mmc_slot(host).set_sleep)
+		mmc_slot(host).set_sleep(host->dev, host->slot_id, 0,
+			 host->vdd, host->dpm_state == CARDSLEEP);
+	if (mmc_card_can_sleep(host->mmc))
+		mmc_card_awake(host->mmc);
+
+	dev_dbg(mmc_dev(host->mmc), "%s -> ENABLED\n",
+		host->dpm_state == CARDSLEEP ? "CARDSLEEP" : "REGSLEEP");
 
 	host->dpm_state = ENABLED;
 
-	dev_dbg(mmc_dev(host->mmc), "OFF -> ENABLED\n");
+	mmc_release_host(host->mmc);
 
 	return 0;
 }
 
-/* Handler for [REGSLEEP -> ENABLED] transition */
-static int omap_mmc_regsleep_to_enabled(struct mmc_omap_host *host)
+/* Handler for [OFF -> ENABLED] transition */
+static int omap_mmc_off_to_enabled(struct mmc_omap_host *host)
 {
-	unsigned long timeout;
-
-	dev_dbg(mmc_dev(host->mmc), "REGSLEEP -> ENABLED\n");
-
 	clk_enable(host->fclk);
-	clk_enable(host->iclk);
-
-	if (clk_enable(host->dbclk))
-		dev_dbg(mmc_dev(host->mmc),
-			"Enabling debounce clk failed\n");
 
 	omap_mmc_restore_ctx(host);
-
-	/*
-	 * We turned off interrupts and bus power.  Interrupts
-	 * are turned on by 'mmc_omap_start_command()' so we
-	 * just need to turn on the bus power here.
-	 */
-	OMAP_HSMMC_WRITE(host->base, HCTL,
-			 OMAP_HSMMC_READ(host->base, HCTL) | SDBP);
-
-	timeout = jiffies + msecs_to_jiffies(MMC_TIMEOUT_MS);
-	while ((OMAP_HSMMC_READ(host->base, HCTL) & SDBP) != SDBP &&
-	       time_before(jiffies, timeout))
-		;
-
-	if (mmc_slot(host).set_sleep)
-		mmc_slot(host).set_sleep(host->dev, host->slot_id,
-					 0, host->vdd, 0);
+	omap_hsmmc_init(host);
+	mmc_power_restore_host(host->mmc);
 
 	host->dpm_state = ENABLED;
 
+	dev_dbg(mmc_dev(host->mmc), "OFF -> ENABLED\n");
+
 	return 0;
 }
 
@@ -1342,8 +1352,9 @@ static int omap_mmc_enable(struct mmc_host *mmc)
 	switch (host->dpm_state) {
 	case DISABLED:
 		return omap_mmc_disabled_to_enabled(host);
+	case CARDSLEEP:
 	case REGSLEEP:
-		return omap_mmc_regsleep_to_enabled(host);
+		return omap_mmc_sleep_to_enabled(host);
 	case OFF:
 		return omap_mmc_off_to_enabled(host);
 	default:
@@ -1369,7 +1380,10 @@ static int omap_mmc_disable(struct mmc_host *mmc, int lazy)
 		return 0;
 	}
 	case DISABLED:
-		return omap_mmc_disabled_to_off(host);
+		return omap_mmc_disabled_to_sleep(host);
+	case CARDSLEEP:
+	case REGSLEEP:
+		return omap_mmc_sleep_to_off(host);
 	default:
 		dev_dbg(mmc_dev(host->mmc), "UNKNOWN state\n");
 		return -EINVAL;
@@ -1441,8 +1455,7 @@ static int mmc_regs_show(struct seq_file *s, void *data)
 			host->dpm_state, mmc->nesting_cnt,
 			host->context_loss, context_loss);
 
-	if (host->suspended || host->dpm_state == OFF ||
-	    host->dpm_state == REGSLEEP) {
+	if (host->suspended || host->dpm_state == OFF) {
 		seq_printf(s, "host suspended, can't read registers\n");
 		return 0;
 	}
@@ -1617,7 +1630,8 @@ static int __init omap_mmc_probe(struct platform_device *pdev)
 	mmc->max_req_size = mmc->max_blk_size * mmc->max_blk_count;
 	mmc->max_seg_size = mmc->max_req_size;
 
-	mmc->caps |= MMC_CAP_MMC_HIGHSPEED | MMC_CAP_SD_HIGHSPEED;
+	mmc->caps |= MMC_CAP_MMC_HIGHSPEED | MMC_CAP_SD_HIGHSPEED |
+		     MMC_CAP_WAIT_WHILE_BUSY;
 
 	if (pdata->slots[host->slot_id].wires >= 8)
 		mmc->caps |= MMC_CAP_8_BIT_DATA;
-- 
cgit v0.10.2


From 0a40e64786933fb04be37af0a19aa320bc3414a8 Mon Sep 17 00:00:00 2001
From: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Date: Tue, 22 Sep 2009 16:44:54 -0700
Subject: omap_hsmmc: fix NULL pointer dereference

Do not call 'mmc_omap_xfer_done()' if the request is already done.

Signed-off-by: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Signed-off-by: Adrian Hunter <adrian.hunter@nokia.com>
Acked-by: Matt Fleming <matt@console-pimps.org>
Cc: Ian Molton <ian@mnementh.co.uk>
Cc: "Roberto A. Foglietta" <roberto.foglietta@gmail.com>
Cc: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Cc: Denis Karpov <ext-denis.2.karpov@nokia.com>
Cc: Pierre Ossman <pierre@ossman.eu>
Cc: Philip Langdale <philipl@overt.org>
Cc: "Madhusudhan" <madhu.cr@ti.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index b83f7a4..9519964 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -677,7 +677,7 @@ static irqreturn_t mmc_omap_irq(int irq, void *dev_id)
 
 	if (end_cmd || ((status & CC) && host->cmd))
 		mmc_omap_cmd_done(host, host->cmd);
-	if (end_trans || (status & TC))
+	if ((end_trans || (status & TC)) && host->mrq)
 		mmc_omap_xfer_done(host, data);
 
 	return IRQ_HANDLED;
-- 
cgit v0.10.2


From 191d1f1de158cf4dee3a5595e845a498364c061f Mon Sep 17 00:00:00 2001
From: Denis Karpov <ext-denis.2.karpov@nokia.com>
Date: Tue, 22 Sep 2009 16:44:55 -0700
Subject: omap_hsmmc: cleanup macro usage

Use macro mmc_slot() in omap_hsmmc.

Signed-off-by: Denis Karpov <ext-denis.2.karpov@nokia.com>
Signed-off-by: Adrian Hunter <adrian.hunter@nokia.com>
Acked-by: Matt Fleming <matt@console-pimps.org>
Cc: Ian Molton <ian@mnementh.co.uk>
Cc: "Roberto A. Foglietta" <roberto.foglietta@gmail.com>
Cc: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Cc: Denis Karpov <ext-denis.2.karpov@nokia.com>
Cc: Pierre Ossman <pierre@ossman.eu>
Cc: Philip Langdale <philipl@overt.org>
Cc: "Madhusudhan" <madhu.cr@ti.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 9519964..305e5d2 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -368,9 +368,8 @@ int mmc_omap_cover_is_closed(struct mmc_omap_host *host)
 {
 	int r = 1;
 
-	if (host->pdata->slots[host->slot_id].get_cover_state)
-		r = host->pdata->slots[host->slot_id].get_cover_state(host->dev,
-			host->slot_id);
+	if (mmc_slot(host).get_cover_state)
+		r = mmc_slot(host).get_cover_state(host->dev, host->slot_id);
 	return r;
 }
 
@@ -393,9 +392,8 @@ mmc_omap_show_slot_name(struct device *dev, struct device_attribute *attr,
 {
 	struct mmc_host *mmc = container_of(dev, struct mmc_host, class_dev);
 	struct mmc_omap_host *host = mmc_priv(mmc);
-	struct omap_mmc_slot_data slot = host->pdata->slots[host->slot_id];
 
-	return sprintf(buf, "%s\n", slot.name);
+	return sprintf(buf, "%s\n", mmc_slot(host).name);
 }
 
 static DEVICE_ATTR(slot_name, S_IRUGO, mmc_omap_show_slot_name, NULL);
@@ -632,7 +630,8 @@ static irqreturn_t mmc_omap_irq(int irq, void *dev_id)
 			(status & CMD_CRC)) {
 			if (host->cmd) {
 				if (status & CMD_TIMEOUT) {
-					mmc_omap_reset_controller_fsm(host, SRC);
+					mmc_omap_reset_controller_fsm(host,
+								      SRC);
 					host->cmd->error = -ETIMEDOUT;
 				} else {
 					host->cmd->error = -EILSEQ;
@@ -775,7 +774,7 @@ static void mmc_omap_detect(struct work_struct *work)
 
 	sysfs_notify(&host->mmc->class_dev.kobj, NULL, "cover_switch");
 
-	if (mmc_slot(host).card_detect)
+	if (slot->card_detect)
 		carddetect = slot->card_detect(slot->card_detect_irq);
 	else
 		carddetect = -ENOSYS;
@@ -830,7 +829,7 @@ static void mmc_omap_config_dma_params(struct mmc_omap_host *host,
 			sg_dma_address(sgl), 0, 0);
 	} else {
 		omap_set_dma_src_params(dma_ch, 0, OMAP_DMA_AMODE_CONSTANT,
-					(host->mapbase + OMAP_HSMMC_DATA), 0, 0);
+			(host->mapbase + OMAP_HSMMC_DATA), 0, 0);
 		omap_set_dma_dest_params(dma_ch, 0, OMAP_DMA_AMODE_POST_INC,
 			sg_dma_address(sgl), 0, 0);
 	}
@@ -918,7 +917,7 @@ mmc_omap_start_dma_transfer(struct mmc_omap_host *host, struct mmc_request *req)
 	}
 
 	ret = omap_request_dma(mmc_omap_get_dma_sync_dev(host, data), "MMC/SD",
-			       mmc_omap_dma_cb,host, &dma_ch);
+			       mmc_omap_dma_cb, host, &dma_ch);
 	if (ret != 0) {
 		dev_err(mmc_dev(host->mmc),
 			"%s: omap_request_dma() failed with %d\n",
@@ -1138,21 +1137,19 @@ static void omap_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 static int omap_hsmmc_get_cd(struct mmc_host *mmc)
 {
 	struct mmc_omap_host *host = mmc_priv(mmc);
-	struct omap_mmc_platform_data *pdata = host->pdata;
 
-	if (!pdata->slots[0].card_detect)
+	if (!mmc_slot(host).card_detect)
 		return -ENOSYS;
-	return pdata->slots[0].card_detect(pdata->slots[0].card_detect_irq);
+	return mmc_slot(host).card_detect(mmc_slot(host).card_detect_irq);
 }
 
 static int omap_hsmmc_get_ro(struct mmc_host *mmc)
 {
 	struct mmc_omap_host *host = mmc_priv(mmc);
-	struct omap_mmc_platform_data *pdata = host->pdata;
 
-	if (!pdata->slots[0].get_ro)
+	if (!mmc_slot(host).get_ro)
 		return -ENOSYS;
-	return pdata->slots[0].get_ro(host->dev, 0);
+	return mmc_slot(host).get_ro(host->dev, 0);
 }
 
 static void omap_hsmmc_init(struct mmc_omap_host *host)
@@ -1563,7 +1560,7 @@ static int __init omap_mmc_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, host);
 	INIT_WORK(&host->mmc_carddetect_work, mmc_omap_detect);
 
-	if (pdata->slots[host->slot_id].power_saving)
+	if (mmc_slot(host).power_saving)
 		mmc->ops	= &mmc_omap_ps_ops;
 	else
 		mmc->ops	= &mmc_omap_ops;
@@ -1633,12 +1630,12 @@ static int __init omap_mmc_probe(struct platform_device *pdev)
 	mmc->caps |= MMC_CAP_MMC_HIGHSPEED | MMC_CAP_SD_HIGHSPEED |
 		     MMC_CAP_WAIT_WHILE_BUSY;
 
-	if (pdata->slots[host->slot_id].wires >= 8)
+	if (mmc_slot(host).wires >= 8)
 		mmc->caps |= MMC_CAP_8_BIT_DATA;
-	else if (pdata->slots[host->slot_id].wires >= 4)
+	else if (mmc_slot(host).wires >= 4)
 		mmc->caps |= MMC_CAP_4_BIT_DATA;
 
-	if (pdata->slots[host->slot_id].nonremovable)
+	if (mmc_slot(host).nonremovable)
 		mmc->caps |= MMC_CAP_NONREMOVABLE;
 
 	omap_hsmmc_init(host);
@@ -1700,13 +1697,12 @@ static int __init omap_mmc_probe(struct platform_device *pdev)
 
 	mmc_add_host(mmc);
 
-	if (host->pdata->slots[host->slot_id].name != NULL) {
+	if (mmc_slot(host).name != NULL) {
 		ret = device_create_file(&mmc->class_dev, &dev_attr_slot_name);
 		if (ret < 0)
 			goto err_slot_name;
 	}
-	if (mmc_slot(host).card_detect_irq &&
-	    host->pdata->slots[host->slot_id].get_cover_state) {
+	if (mmc_slot(host).card_detect_irq && mmc_slot(host).get_cover_state) {
 		ret = device_create_file(&mmc->class_dev,
 					&dev_attr_cover_switch);
 		if (ret < 0)
@@ -1812,7 +1808,7 @@ static int omap_mmc_suspend(struct platform_device *pdev, pm_message_t state)
 
 
 			OMAP_HSMMC_WRITE(host->base, HCTL,
-					 OMAP_HSMMC_READ(host->base, HCTL) & ~SDBP);
+				OMAP_HSMMC_READ(host->base, HCTL) & ~SDBP);
 			mmc_host_disable(host->mmc);
 			clk_disable(host->iclk);
 			clk_disable(host->dbclk);
-- 
cgit v0.10.2


From c653a6d4d18be5213d0e910cee75ebf089f8ba9d Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@nokia.com>
Date: Tue, 22 Sep 2009 16:44:56 -0700
Subject: omap_hsmmc: clear interrupt status after init sequence

Clear the interrupt status after sending the initialization sequence, as
specified in the TRM.

Signed-off-by: Adrian Hunter <adrian.hunter@nokia.com>
Acked-by: Matt Fleming <matt@console-pimps.org>
Cc: Ian Molton <ian@mnementh.co.uk>
Cc: "Roberto A. Foglietta" <roberto.foglietta@gmail.com>
Cc: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Cc: Denis Karpov <ext-denis.2.karpov@nokia.com>
Cc: Pierre Ossman <pierre@ossman.eu>
Cc: Philip Langdale <philipl@overt.org>
Cc: "Madhusudhan" <madhu.cr@ti.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 305e5d2..325cf60 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -360,6 +360,10 @@ static void send_init_stream(struct mmc_omap_host *host)
 
 	OMAP_HSMMC_WRITE(host->base, CON,
 		OMAP_HSMMC_READ(host->base, CON) & ~INIT_STREAM);
+
+	OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR);
+	OMAP_HSMMC_READ(host->base, STAT);
+
 	enable_irq(host->irq);
 }
 
-- 
cgit v0.10.2


From 23050103c21d4d5314b7c978187e6e4305a00495 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@nokia.com>
Date: Tue, 22 Sep 2009 16:44:57 -0700
Subject: omap_hsmmc: cater for weird CMD6 behaviour

Sometimes the controller unexpectedly produces a TC (transfer complete)
interrupt before the CC (command complete) interrupt for command 6
(SWITCH).  This is a problem because the CC interrupt can get mixed up
with the next request.  Add a hack for CMD6.

Signed-off-by: Adrian Hunter <adrian.hunter@nokia.com>
Acked-by: Matt Fleming <matt@console-pimps.org>
Cc: Ian Molton <ian@mnementh.co.uk>
Cc: "Roberto A. Foglietta" <roberto.foglietta@gmail.com>
Cc: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Cc: Denis Karpov <ext-denis.2.karpov@nokia.com>
Cc: Pierre Ossman <pierre@ossman.eu>
Cc: Philip Langdale <philipl@overt.org>
Cc: "Madhusudhan" <madhu.cr@ti.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 325cf60..f9ed5e2 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -481,6 +481,13 @@ mmc_omap_xfer_done(struct mmc_omap_host *host, struct mmc_data *data)
 	if (!data) {
 		struct mmc_request *mrq = host->mrq;
 
+		/* TC before CC from CMD6 - don't know why, but it happens */
+		if (host->cmd && host->cmd->opcode == 6 &&
+		    host->response_busy) {
+			host->response_busy = 0;
+			return;
+		}
+
 		host->mrq = NULL;
 		mmc_request_done(host->mmc, mrq);
 		return;
-- 
cgit v0.10.2


From 4dffd7a251172d78a157ff45ec1207012f44774a Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@nokia.com>
Date: Tue, 22 Sep 2009 16:44:58 -0700
Subject: omap_hsmmc: prevent races with irq handler

If an unexpected interrupt occurs while preparing the next request, an
oops can occur.

For example, a new request is setting up DMA for data transfer so
host->data is not NULL.  An unexpected transfer complete (TC) interrupt
comes along and the interrupt handler sets host->data to NULL.  Oops!

Prevent that by adding a spinlock.

Signed-off-by: Adrian Hunter <adrian.hunter@nokia.com>
Acked-by: Matt Fleming <matt@console-pimps.org>
Cc: Ian Molton <ian@mnementh.co.uk>
Cc: "Roberto A. Foglietta" <roberto.foglietta@gmail.com>
Cc: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Cc: Denis Karpov <ext-denis.2.karpov@nokia.com>
Cc: Pierre Ossman <pierre@ossman.eu>
Cc: Philip Langdale <philipl@overt.org>
Cc: "Madhusudhan" <madhu.cr@ti.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index f9ed5e2..0bfea9c 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -148,6 +148,8 @@ struct mmc_omap_host {
 	struct	work_struct	mmc_carddetect_work;
 	void	__iomem		*base;
 	resource_size_t		mapbase;
+	spinlock_t		irq_lock; /* Prevent races with irq handler */
+	unsigned long		flags;
 	unsigned int		id;
 	unsigned int		dma_len;
 	unsigned int		dma_sg_idx;
@@ -459,6 +461,14 @@ mmc_omap_start_command(struct mmc_omap_host *host, struct mmc_command *cmd,
 	if (host->use_dma)
 		cmdreg |= DMA_EN;
 
+	/*
+	 * In an interrupt context (i.e. STOP command), the spinlock is unlocked
+	 * by the interrupt handler, otherwise (i.e. for a new request) it is
+	 * unlocked here.
+	 */
+	if (!in_interrupt())
+		spin_unlock_irqrestore(&host->irq_lock, host->flags);
+
 	OMAP_HSMMC_WRITE(host->base, ARG, cmd->arg);
 	OMAP_HSMMC_WRITE(host->base, CMD, cmdreg);
 }
@@ -621,11 +631,14 @@ static irqreturn_t mmc_omap_irq(int irq, void *dev_id)
 	struct mmc_data *data;
 	int end_cmd = 0, end_trans = 0, status;
 
+	spin_lock(&host->irq_lock);
+
 	if (host->mrq == NULL) {
 		OMAP_HSMMC_WRITE(host->base, STAT,
 			OMAP_HSMMC_READ(host->base, STAT));
 		/* Flush posted write */
 		OMAP_HSMMC_READ(host->base, STAT);
+		spin_unlock(&host->irq_lock);
 		return IRQ_HANDLED;
 	}
 
@@ -690,6 +703,8 @@ static irqreturn_t mmc_omap_irq(int irq, void *dev_id)
 	if ((end_trans || (status & TC)) && host->mrq)
 		mmc_omap_xfer_done(host, data);
 
+	spin_unlock(&host->irq_lock);
+
 	return IRQ_HANDLED;
 }
 
@@ -1018,6 +1033,13 @@ static void omap_mmc_request(struct mmc_host *mmc, struct mmc_request *req)
 	struct mmc_omap_host *host = mmc_priv(mmc);
 	int err;
 
+	/*
+	 * Prevent races with the interrupt handler because of unexpected
+	 * interrupts, but not if we are already in interrupt context i.e.
+	 * retries.
+	 */
+	if (!in_interrupt())
+		spin_lock_irqsave(&host->irq_lock, host->flags);
 	WARN_ON(host->mrq != NULL);
 	host->mrq = req;
 	err = mmc_omap_prepare_data(host, req);
@@ -1026,6 +1048,8 @@ static void omap_mmc_request(struct mmc_host *mmc, struct mmc_request *req)
 		if (req->data)
 			req->data->error = err;
 		host->mrq = NULL;
+		if (!in_interrupt())
+			spin_unlock_irqrestore(&host->irq_lock, host->flags);
 		mmc_request_done(mmc, req);
 		return;
 	}
@@ -1580,6 +1604,7 @@ static int __init omap_mmc_probe(struct platform_device *pdev)
 	mmc->f_max	= 52000000;
 
 	sema_init(&host->sem, 1);
+	spin_lock_init(&host->irq_lock);
 
 	host->iclk = clk_get(&pdev->dev, "ick");
 	if (IS_ERR(host->iclk)) {
-- 
cgit v0.10.2


From 70a3341a711f27ae77714ae7dd360a4e7e2d5e7c Mon Sep 17 00:00:00 2001
From: Denis Karpov <ext-denis.2.karpov@nokia.com>
Date: Tue, 22 Sep 2009 16:44:59 -0700
Subject: omap_hsmmc: code refactoring

Functions', structures', variables' names are changed to start with
omap_hsmmc_ prefix.

Signed-off-by: Denis Karpov <ext-denis.2.karpov@nokia.com>
Signed-off-by: Adrian Hunter <adrian.hunter@nokia.com>
Acked-by: Matt Fleming <matt@console-pimps.org>
Cc: Ian Molton <ian@mnementh.co.uk>
Cc: "Roberto A. Foglietta" <roberto.foglietta@gmail.com>
Cc: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Cc: Denis Karpov <ext-denis.2.karpov@nokia.com>
Cc: Pierre Ossman <pierre@ossman.eu>
Cc: Philip Langdale <philipl@overt.org>
Cc: "Madhusudhan" <madhu.cr@ti.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 0bfea9c..20746e4 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -135,7 +135,7 @@
 #define OMAP_HSMMC_WRITE(base, reg, val) \
 	__raw_writel((val), (base) + OMAP_HSMMC_##reg)
 
-struct mmc_omap_host {
+struct omap_hsmmc_host {
 	struct	device		*dev;
 	struct	mmc_host	*mmc;
 	struct	mmc_request	*mrq;
@@ -174,7 +174,7 @@ struct mmc_omap_host {
 /*
  * Stop clock to the card
  */
-static void omap_mmc_stop_clock(struct mmc_omap_host *host)
+static void omap_hsmmc_stop_clock(struct omap_hsmmc_host *host)
 {
 	OMAP_HSMMC_WRITE(host->base, SYSCTL,
 		OMAP_HSMMC_READ(host->base, SYSCTL) & ~CEN);
@@ -188,7 +188,7 @@ static void omap_mmc_stop_clock(struct mmc_omap_host *host)
  * Restore the MMC host context, if it was lost as result of a
  * power state change.
  */
-static int omap_mmc_restore_ctx(struct mmc_omap_host *host)
+static int omap_hsmmc_context_restore(struct omap_hsmmc_host *host)
 {
 	struct mmc_ios *ios = &host->mmc->ios;
 	struct omap_mmc_platform_data *pdata = host->pdata;
@@ -316,7 +316,7 @@ out:
 /*
  * Save the MMC host context (store the number of power state changes so far).
  */
-static void omap_mmc_save_ctx(struct mmc_omap_host *host)
+static void omap_hsmmc_context_save(struct omap_hsmmc_host *host)
 {
 	struct omap_mmc_platform_data *pdata = host->pdata;
 	int context_loss;
@@ -331,12 +331,12 @@ static void omap_mmc_save_ctx(struct mmc_omap_host *host)
 
 #else
 
-static int omap_mmc_restore_ctx(struct mmc_omap_host *host)
+static int omap_hsmmc_context_restore(struct omap_hsmmc_host *host)
 {
 	return 0;
 }
 
-static void omap_mmc_save_ctx(struct mmc_omap_host *host)
+static void omap_hsmmc_context_save(struct omap_hsmmc_host *host)
 {
 }
 
@@ -346,7 +346,7 @@ static void omap_mmc_save_ctx(struct mmc_omap_host *host)
  * Send init stream sequence to card
  * before sending IDLE command
  */
-static void send_init_stream(struct mmc_omap_host *host)
+static void send_init_stream(struct omap_hsmmc_host *host)
 {
 	int reg = 0;
 	unsigned long timeout;
@@ -370,7 +370,7 @@ static void send_init_stream(struct mmc_omap_host *host)
 }
 
 static inline
-int mmc_omap_cover_is_closed(struct mmc_omap_host *host)
+int omap_hsmmc_cover_is_closed(struct omap_hsmmc_host *host)
 {
 	int r = 1;
 
@@ -380,35 +380,35 @@ int mmc_omap_cover_is_closed(struct mmc_omap_host *host)
 }
 
 static ssize_t
-mmc_omap_show_cover_switch(struct device *dev, struct device_attribute *attr,
+omap_hsmmc_show_cover_switch(struct device *dev, struct device_attribute *attr,
 			   char *buf)
 {
 	struct mmc_host *mmc = container_of(dev, struct mmc_host, class_dev);
-	struct mmc_omap_host *host = mmc_priv(mmc);
+	struct omap_hsmmc_host *host = mmc_priv(mmc);
 
-	return sprintf(buf, "%s\n", mmc_omap_cover_is_closed(host) ? "closed" :
-		       "open");
+	return sprintf(buf, "%s\n",
+			omap_hsmmc_cover_is_closed(host) ? "closed" : "open");
 }
 
-static DEVICE_ATTR(cover_switch, S_IRUGO, mmc_omap_show_cover_switch, NULL);
+static DEVICE_ATTR(cover_switch, S_IRUGO, omap_hsmmc_show_cover_switch, NULL);
 
 static ssize_t
-mmc_omap_show_slot_name(struct device *dev, struct device_attribute *attr,
+omap_hsmmc_show_slot_name(struct device *dev, struct device_attribute *attr,
 			char *buf)
 {
 	struct mmc_host *mmc = container_of(dev, struct mmc_host, class_dev);
-	struct mmc_omap_host *host = mmc_priv(mmc);
+	struct omap_hsmmc_host *host = mmc_priv(mmc);
 
 	return sprintf(buf, "%s\n", mmc_slot(host).name);
 }
 
-static DEVICE_ATTR(slot_name, S_IRUGO, mmc_omap_show_slot_name, NULL);
+static DEVICE_ATTR(slot_name, S_IRUGO, omap_hsmmc_show_slot_name, NULL);
 
 /*
  * Configure the response type and send the cmd.
  */
 static void
-mmc_omap_start_command(struct mmc_omap_host *host, struct mmc_command *cmd,
+omap_hsmmc_start_command(struct omap_hsmmc_host *host, struct mmc_command *cmd,
 	struct mmc_data *data)
 {
 	int cmdreg = 0, resptype = 0, cmdtype = 0;
@@ -474,7 +474,7 @@ mmc_omap_start_command(struct mmc_omap_host *host, struct mmc_command *cmd,
 }
 
 static int
-mmc_omap_get_dma_dir(struct mmc_omap_host *host, struct mmc_data *data)
+omap_hsmmc_get_dma_dir(struct omap_hsmmc_host *host, struct mmc_data *data)
 {
 	if (data->flags & MMC_DATA_WRITE)
 		return DMA_TO_DEVICE;
@@ -486,7 +486,7 @@ mmc_omap_get_dma_dir(struct mmc_omap_host *host, struct mmc_data *data)
  * Notify the transfer complete to MMC core
  */
 static void
-mmc_omap_xfer_done(struct mmc_omap_host *host, struct mmc_data *data)
+omap_hsmmc_xfer_done(struct omap_hsmmc_host *host, struct mmc_data *data)
 {
 	if (!data) {
 		struct mmc_request *mrq = host->mrq;
@@ -507,7 +507,7 @@ mmc_omap_xfer_done(struct mmc_omap_host *host, struct mmc_data *data)
 
 	if (host->use_dma && host->dma_ch != -1)
 		dma_unmap_sg(mmc_dev(host->mmc), data->sg, host->dma_len,
-			mmc_omap_get_dma_dir(host, data));
+			omap_hsmmc_get_dma_dir(host, data));
 
 	if (!data->error)
 		data->bytes_xfered += data->blocks * (data->blksz);
@@ -519,14 +519,14 @@ mmc_omap_xfer_done(struct mmc_omap_host *host, struct mmc_data *data)
 		mmc_request_done(host->mmc, data->mrq);
 		return;
 	}
-	mmc_omap_start_command(host, data->stop, NULL);
+	omap_hsmmc_start_command(host, data->stop, NULL);
 }
 
 /*
  * Notify the core about command completion
  */
 static void
-mmc_omap_cmd_done(struct mmc_omap_host *host, struct mmc_command *cmd)
+omap_hsmmc_cmd_done(struct omap_hsmmc_host *host, struct mmc_command *cmd)
 {
 	host->cmd = NULL;
 
@@ -551,13 +551,13 @@ mmc_omap_cmd_done(struct mmc_omap_host *host, struct mmc_command *cmd)
 /*
  * DMA clean up for command errors
  */
-static void mmc_dma_cleanup(struct mmc_omap_host *host, int errno)
+static void omap_hsmmc_dma_cleanup(struct omap_hsmmc_host *host, int errno)
 {
 	host->data->error = errno;
 
 	if (host->use_dma && host->dma_ch != -1) {
 		dma_unmap_sg(mmc_dev(host->mmc), host->data->sg, host->dma_len,
-			mmc_omap_get_dma_dir(host, host->data));
+			omap_hsmmc_get_dma_dir(host, host->data));
 		omap_free_dma(host->dma_ch);
 		host->dma_ch = -1;
 		up(&host->sem);
@@ -569,10 +569,10 @@ static void mmc_dma_cleanup(struct mmc_omap_host *host, int errno)
  * Readable error output
  */
 #ifdef CONFIG_MMC_DEBUG
-static void mmc_omap_report_irq(struct mmc_omap_host *host, u32 status)
+static void omap_hsmmc_report_irq(struct omap_hsmmc_host *host, u32 status)
 {
 	/* --- means reserved bit without definition at documentation */
-	static const char *mmc_omap_status_bits[] = {
+	static const char *omap_hsmmc_status_bits[] = {
 		"CC", "TC", "BGE", "---", "BWR", "BRR", "---", "---", "CIRQ",
 		"OBI", "---", "---", "---", "---", "---", "ERRI", "CTO", "CCRC",
 		"CEB", "CIE", "DTO", "DCRC", "DEB", "---", "ACE", "---",
@@ -585,9 +585,9 @@ static void mmc_omap_report_irq(struct mmc_omap_host *host, u32 status)
 	len = sprintf(buf, "MMC IRQ 0x%x :", status);
 	buf += len;
 
-	for (i = 0; i < ARRAY_SIZE(mmc_omap_status_bits); i++)
+	for (i = 0; i < ARRAY_SIZE(omap_hsmmc_status_bits); i++)
 		if (status & (1 << i)) {
-			len = sprintf(buf, " %s", mmc_omap_status_bits[i]);
+			len = sprintf(buf, " %s", omap_hsmmc_status_bits[i]);
 			buf += len;
 		}
 
@@ -602,8 +602,8 @@ static void mmc_omap_report_irq(struct mmc_omap_host *host, u32 status)
  *  SRC or SRD bit of SYSCTL register
  * Can be called from interrupt context
  */
-static inline void mmc_omap_reset_controller_fsm(struct mmc_omap_host *host,
-		unsigned long bit)
+static inline void omap_hsmmc_reset_controller_fsm(struct omap_hsmmc_host *host,
+						   unsigned long bit)
 {
 	unsigned long i = 0;
 	unsigned long limit = (loops_per_jiffy *
@@ -625,9 +625,9 @@ static inline void mmc_omap_reset_controller_fsm(struct mmc_omap_host *host,
 /*
  * MMC controller IRQ handler
  */
-static irqreturn_t mmc_omap_irq(int irq, void *dev_id)
+static irqreturn_t omap_hsmmc_irq(int irq, void *dev_id)
 {
-	struct mmc_omap_host *host = dev_id;
+	struct omap_hsmmc_host *host = dev_id;
 	struct mmc_data *data;
 	int end_cmd = 0, end_trans = 0, status;
 
@@ -648,14 +648,14 @@ static irqreturn_t mmc_omap_irq(int irq, void *dev_id)
 
 	if (status & ERR) {
 #ifdef CONFIG_MMC_DEBUG
-		mmc_omap_report_irq(host, status);
+		omap_hsmmc_report_irq(host, status);
 #endif
 		if ((status & CMD_TIMEOUT) ||
 			(status & CMD_CRC)) {
 			if (host->cmd) {
 				if (status & CMD_TIMEOUT) {
-					mmc_omap_reset_controller_fsm(host,
-								      SRC);
+					omap_hsmmc_reset_controller_fsm(host,
+									SRC);
 					host->cmd->error = -ETIMEDOUT;
 				} else {
 					host->cmd->error = -EILSEQ;
@@ -664,9 +664,10 @@ static irqreturn_t mmc_omap_irq(int irq, void *dev_id)
 			}
 			if (host->data || host->response_busy) {
 				if (host->data)
-					mmc_dma_cleanup(host, -ETIMEDOUT);
+					omap_hsmmc_dma_cleanup(host,
+								-ETIMEDOUT);
 				host->response_busy = 0;
-				mmc_omap_reset_controller_fsm(host, SRD);
+				omap_hsmmc_reset_controller_fsm(host, SRD);
 			}
 		}
 		if ((status & DATA_TIMEOUT) ||
@@ -676,11 +677,11 @@ static irqreturn_t mmc_omap_irq(int irq, void *dev_id)
 						-ETIMEDOUT : -EILSEQ;
 
 				if (host->data)
-					mmc_dma_cleanup(host, err);
+					omap_hsmmc_dma_cleanup(host, err);
 				else
 					host->mrq->cmd->error = err;
 				host->response_busy = 0;
-				mmc_omap_reset_controller_fsm(host, SRD);
+				omap_hsmmc_reset_controller_fsm(host, SRD);
 				end_trans = 1;
 			}
 		}
@@ -699,16 +700,16 @@ static irqreturn_t mmc_omap_irq(int irq, void *dev_id)
 	OMAP_HSMMC_READ(host->base, STAT);
 
 	if (end_cmd || ((status & CC) && host->cmd))
-		mmc_omap_cmd_done(host, host->cmd);
+		omap_hsmmc_cmd_done(host, host->cmd);
 	if ((end_trans || (status & TC)) && host->mrq)
-		mmc_omap_xfer_done(host, data);
+		omap_hsmmc_xfer_done(host, data);
 
 	spin_unlock(&host->irq_lock);
 
 	return IRQ_HANDLED;
 }
 
-static void set_sd_bus_power(struct mmc_omap_host *host)
+static void set_sd_bus_power(struct omap_hsmmc_host *host)
 {
 	unsigned long i;
 
@@ -728,7 +729,7 @@ static void set_sd_bus_power(struct mmc_omap_host *host)
  * The MMC2 transceiver controls are used instead of DAT4..DAT7.
  * Some chips, like eMMC ones, use internal transceivers.
  */
-static int omap_mmc_switch_opcond(struct mmc_omap_host *host, int vdd)
+static int omap_hsmmc_switch_opcond(struct omap_hsmmc_host *host, int vdd)
 {
 	u32 reg_val = 0;
 	int ret;
@@ -759,7 +760,7 @@ static int omap_mmc_switch_opcond(struct mmc_omap_host *host, int vdd)
 	/*
 	 * If a MMC dual voltage card is detected, the set_ios fn calls
 	 * this fn with VDD bit set for 1.8V. Upon card removal from the
-	 * slot, omap_mmc_set_ios sets the VDD back to 3V on MMC_POWER_OFF.
+	 * slot, omap_hsmmc_set_ios sets the VDD back to 3V on MMC_POWER_OFF.
 	 *
 	 * Cope with a bit of slop in the range ... per data sheets:
 	 *  - "1.8V" for vdds_mmc1/vdds_mmc1a can be up to 2.45V max,
@@ -788,10 +789,10 @@ err:
 /*
  * Work Item to notify the core about card insertion/removal
  */
-static void mmc_omap_detect(struct work_struct *work)
+static void omap_hsmmc_detect(struct work_struct *work)
 {
-	struct mmc_omap_host *host = container_of(work, struct mmc_omap_host,
-						mmc_carddetect_work);
+	struct omap_hsmmc_host *host =
+		container_of(work, struct omap_hsmmc_host, mmc_carddetect_work);
 	struct omap_mmc_slot_data *slot = &mmc_slot(host);
 	int carddetect;
 
@@ -809,8 +810,9 @@ static void mmc_omap_detect(struct work_struct *work)
 		mmc_detect_change(host->mmc, (HZ * 200) / 1000);
 	} else {
 		mmc_host_enable(host->mmc);
-		mmc_omap_reset_controller_fsm(host, SRD);
+		omap_hsmmc_reset_controller_fsm(host, SRD);
 		mmc_host_lazy_disable(host->mmc);
+
 		mmc_detect_change(host->mmc, (HZ * 50) / 1000);
 	}
 }
@@ -818,9 +820,9 @@ static void mmc_omap_detect(struct work_struct *work)
 /*
  * ISR for handling card insertion and removal
  */
-static irqreturn_t omap_mmc_cd_handler(int irq, void *dev_id)
+static irqreturn_t omap_hsmmc_cd_handler(int irq, void *dev_id)
 {
-	struct mmc_omap_host *host = (struct mmc_omap_host *)dev_id;
+	struct omap_hsmmc_host *host = (struct omap_hsmmc_host *)dev_id;
 
 	if (host->suspended)
 		return IRQ_HANDLED;
@@ -829,7 +831,7 @@ static irqreturn_t omap_mmc_cd_handler(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-static int mmc_omap_get_dma_sync_dev(struct mmc_omap_host *host,
+static int omap_hsmmc_get_dma_sync_dev(struct omap_hsmmc_host *host,
 				     struct mmc_data *data)
 {
 	int sync_dev;
@@ -841,7 +843,7 @@ static int mmc_omap_get_dma_sync_dev(struct mmc_omap_host *host,
 	return sync_dev;
 }
 
-static void mmc_omap_config_dma_params(struct mmc_omap_host *host,
+static void omap_hsmmc_config_dma_params(struct omap_hsmmc_host *host,
 				       struct mmc_data *data,
 				       struct scatterlist *sgl)
 {
@@ -865,7 +867,7 @@ static void mmc_omap_config_dma_params(struct mmc_omap_host *host,
 
 	omap_set_dma_transfer_params(dma_ch, OMAP_DMA_DATA_TYPE_S32,
 			blksz / 4, nblk, OMAP_DMA_SYNC_FRAME,
-			mmc_omap_get_dma_sync_dev(host, data),
+			omap_hsmmc_get_dma_sync_dev(host, data),
 			!(data->flags & MMC_DATA_WRITE));
 
 	omap_start_dma(dma_ch);
@@ -874,9 +876,9 @@ static void mmc_omap_config_dma_params(struct mmc_omap_host *host,
 /*
  * DMA call back function
  */
-static void mmc_omap_dma_cb(int lch, u16 ch_status, void *data)
+static void omap_hsmmc_dma_cb(int lch, u16 ch_status, void *data)
 {
-	struct mmc_omap_host *host = data;
+	struct omap_hsmmc_host *host = data;
 
 	if (ch_status & OMAP2_DMA_MISALIGNED_ERR_IRQ)
 		dev_dbg(mmc_dev(host->mmc), "MISALIGNED_ADRS_ERR\n");
@@ -887,7 +889,7 @@ static void mmc_omap_dma_cb(int lch, u16 ch_status, void *data)
 	host->dma_sg_idx++;
 	if (host->dma_sg_idx < host->dma_len) {
 		/* Fire up the next transfer. */
-		mmc_omap_config_dma_params(host, host->data,
+		omap_hsmmc_config_dma_params(host, host->data,
 					   host->data->sg + host->dma_sg_idx);
 		return;
 	}
@@ -904,8 +906,8 @@ static void mmc_omap_dma_cb(int lch, u16 ch_status, void *data)
 /*
  * Routine to configure and start DMA for the MMC card
  */
-static int
-mmc_omap_start_dma_transfer(struct mmc_omap_host *host, struct mmc_request *req)
+static int omap_hsmmc_start_dma_transfer(struct omap_hsmmc_host *host,
+					struct mmc_request *req)
 {
 	int dma_ch = 0, ret = 0, err = 1, i;
 	struct mmc_data *data = req->data;
@@ -942,8 +944,8 @@ mmc_omap_start_dma_transfer(struct mmc_omap_host *host, struct mmc_request *req)
 			return err;
 	}
 
-	ret = omap_request_dma(mmc_omap_get_dma_sync_dev(host, data), "MMC/SD",
-			       mmc_omap_dma_cb, host, &dma_ch);
+	ret = omap_request_dma(omap_hsmmc_get_dma_sync_dev(host, data),
+			       "MMC/SD", omap_hsmmc_dma_cb, host, &dma_ch);
 	if (ret != 0) {
 		dev_err(mmc_dev(host->mmc),
 			"%s: omap_request_dma() failed with %d\n",
@@ -952,16 +954,16 @@ mmc_omap_start_dma_transfer(struct mmc_omap_host *host, struct mmc_request *req)
 	}
 
 	host->dma_len = dma_map_sg(mmc_dev(host->mmc), data->sg,
-			data->sg_len, mmc_omap_get_dma_dir(host, data));
+			data->sg_len, omap_hsmmc_get_dma_dir(host, data));
 	host->dma_ch = dma_ch;
 	host->dma_sg_idx = 0;
 
-	mmc_omap_config_dma_params(host, data, data->sg);
+	omap_hsmmc_config_dma_params(host, data, data->sg);
 
 	return 0;
 }
 
-static void set_data_timeout(struct mmc_omap_host *host,
+static void set_data_timeout(struct omap_hsmmc_host *host,
 			     struct mmc_request *req)
 {
 	unsigned int timeout, cycle_ns;
@@ -1001,7 +1003,7 @@ static void set_data_timeout(struct mmc_omap_host *host,
  * Configure block length for MMC/SD cards and initiate the transfer.
  */
 static int
-mmc_omap_prepare_data(struct mmc_omap_host *host, struct mmc_request *req)
+omap_hsmmc_prepare_data(struct omap_hsmmc_host *host, struct mmc_request *req)
 {
 	int ret;
 	host->data = req->data;
@@ -1016,7 +1018,7 @@ mmc_omap_prepare_data(struct mmc_omap_host *host, struct mmc_request *req)
 	set_data_timeout(host, req);
 
 	if (host->use_dma) {
-		ret = mmc_omap_start_dma_transfer(host, req);
+		ret = omap_hsmmc_start_dma_transfer(host, req);
 		if (ret != 0) {
 			dev_dbg(mmc_dev(host->mmc), "MMC start dma failure\n");
 			return ret;
@@ -1028,9 +1030,9 @@ mmc_omap_prepare_data(struct mmc_omap_host *host, struct mmc_request *req)
 /*
  * Request function. for read/write operation
  */
-static void omap_mmc_request(struct mmc_host *mmc, struct mmc_request *req)
+static void omap_hsmmc_request(struct mmc_host *mmc, struct mmc_request *req)
 {
-	struct mmc_omap_host *host = mmc_priv(mmc);
+	struct omap_hsmmc_host *host = mmc_priv(mmc);
 	int err;
 
 	/*
@@ -1042,7 +1044,7 @@ static void omap_mmc_request(struct mmc_host *mmc, struct mmc_request *req)
 		spin_lock_irqsave(&host->irq_lock, host->flags);
 	WARN_ON(host->mrq != NULL);
 	host->mrq = req;
-	err = mmc_omap_prepare_data(host, req);
+	err = omap_hsmmc_prepare_data(host, req);
 	if (err) {
 		req->cmd->error = err;
 		if (req->data)
@@ -1054,14 +1056,13 @@ static void omap_mmc_request(struct mmc_host *mmc, struct mmc_request *req)
 		return;
 	}
 
-	mmc_omap_start_command(host, req->cmd, req->data);
+	omap_hsmmc_start_command(host, req->cmd, req->data);
 }
 
-
 /* Routine to configure clock values. Exposed API to core */
-static void omap_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
+static void omap_hsmmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 {
-	struct mmc_omap_host *host = mmc_priv(mmc);
+	struct omap_hsmmc_host *host = mmc_priv(mmc);
 	u16 dsor = 0;
 	unsigned long regval;
 	unsigned long timeout;
@@ -1120,8 +1121,8 @@ static void omap_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 				 * MMC_POWER_UP upon recalculating the voltage.
 				 * vdd 1.8v.
 				 */
-				if (omap_mmc_switch_opcond(host, ios->vdd) != 0)
-					dev_dbg(mmc_dev(host->mmc),
+			if (omap_hsmmc_switch_opcond(host, ios->vdd) != 0)
+				dev_dbg(mmc_dev(host->mmc),
 						"Switch operation failed\n");
 		}
 	}
@@ -1137,7 +1138,7 @@ static void omap_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 		if (dsor > 250)
 			dsor = 250;
 	}
-	omap_mmc_stop_clock(host);
+	omap_hsmmc_stop_clock(host);
 	regval = OMAP_HSMMC_READ(host->base, SYSCTL);
 	regval = regval & ~(CLKD_MASK);
 	regval = regval | (dsor << 6) | (DTO << 16);
@@ -1171,7 +1172,7 @@ static void omap_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 
 static int omap_hsmmc_get_cd(struct mmc_host *mmc)
 {
-	struct mmc_omap_host *host = mmc_priv(mmc);
+	struct omap_hsmmc_host *host = mmc_priv(mmc);
 
 	if (!mmc_slot(host).card_detect)
 		return -ENOSYS;
@@ -1180,14 +1181,14 @@ static int omap_hsmmc_get_cd(struct mmc_host *mmc)
 
 static int omap_hsmmc_get_ro(struct mmc_host *mmc)
 {
-	struct mmc_omap_host *host = mmc_priv(mmc);
+	struct omap_hsmmc_host *host = mmc_priv(mmc);
 
 	if (!mmc_slot(host).get_ro)
 		return -ENOSYS;
 	return mmc_slot(host).get_ro(host->dev, 0);
 }
 
-static void omap_hsmmc_init(struct mmc_omap_host *host)
+static void omap_hsmmc_conf_bus_power(struct omap_hsmmc_host *host)
 {
 	u32 hctl, capa, value;
 
@@ -1233,9 +1234,9 @@ static void omap_hsmmc_init(struct mmc_omap_host *host)
 enum {ENABLED = 0, DISABLED, CARDSLEEP, REGSLEEP, OFF};
 
 /* Handler for [ENABLED -> DISABLED] transition */
-static int omap_mmc_enabled_to_disabled(struct mmc_omap_host *host)
+static int omap_hsmmc_enabled_to_disabled(struct omap_hsmmc_host *host)
 {
-	omap_mmc_save_ctx(host);
+	omap_hsmmc_context_save(host);
 	clk_disable(host->fclk);
 	host->dpm_state = DISABLED;
 
@@ -1248,7 +1249,7 @@ static int omap_mmc_enabled_to_disabled(struct mmc_omap_host *host)
 }
 
 /* Handler for [DISABLED -> REGSLEEP / CARDSLEEP] transition */
-static int omap_mmc_disabled_to_sleep(struct mmc_omap_host *host)
+static int omap_hsmmc_disabled_to_sleep(struct omap_hsmmc_host *host)
 {
 	int err, new_state;
 
@@ -1256,7 +1257,7 @@ static int omap_mmc_disabled_to_sleep(struct mmc_omap_host *host)
 		return 0;
 
 	clk_enable(host->fclk);
-	omap_mmc_restore_ctx(host);
+	omap_hsmmc_context_restore(host);
 	if (mmc_card_can_sleep(host->mmc)) {
 		err = mmc_card_sleep(host->mmc);
 		if (err < 0) {
@@ -1265,8 +1266,9 @@ static int omap_mmc_disabled_to_sleep(struct mmc_omap_host *host)
 			return err;
 		}
 		new_state = CARDSLEEP;
-	} else
+	} else {
 		new_state = REGSLEEP;
+	}
 	if (mmc_slot(host).set_sleep)
 		mmc_slot(host).set_sleep(host->dev, host->slot_id, 1, 0,
 					 new_state == CARDSLEEP);
@@ -1289,7 +1291,7 @@ static int omap_mmc_disabled_to_sleep(struct mmc_omap_host *host)
 }
 
 /* Handler for [REGSLEEP / CARDSLEEP -> OFF] transition */
-static int omap_mmc_sleep_to_off(struct mmc_omap_host *host)
+static int omap_hsmmc_sleep_to_off(struct omap_hsmmc_host *host)
 {
 	if (!mmc_try_claim_host(host->mmc))
 		return 0;
@@ -1317,7 +1319,7 @@ static int omap_mmc_sleep_to_off(struct mmc_omap_host *host)
 }
 
 /* Handler for [DISABLED -> ENABLED] transition */
-static int omap_mmc_disabled_to_enabled(struct mmc_omap_host *host)
+static int omap_hsmmc_disabled_to_enabled(struct omap_hsmmc_host *host)
 {
 	int err;
 
@@ -1325,8 +1327,7 @@ static int omap_mmc_disabled_to_enabled(struct mmc_omap_host *host)
 	if (err < 0)
 		return err;
 
-	omap_mmc_restore_ctx(host);
-
+	omap_hsmmc_context_restore(host);
 	host->dpm_state = ENABLED;
 
 	dev_dbg(mmc_dev(host->mmc), "DISABLED -> ENABLED\n");
@@ -1335,13 +1336,13 @@ static int omap_mmc_disabled_to_enabled(struct mmc_omap_host *host)
 }
 
 /* Handler for [SLEEP -> ENABLED] transition */
-static int omap_mmc_sleep_to_enabled(struct mmc_omap_host *host)
+static int omap_hsmmc_sleep_to_enabled(struct omap_hsmmc_host *host)
 {
 	if (!mmc_try_claim_host(host->mmc))
 		return 0;
 
 	clk_enable(host->fclk);
-	omap_mmc_restore_ctx(host);
+	omap_hsmmc_context_restore(host);
 	if (mmc_slot(host).set_sleep)
 		mmc_slot(host).set_sleep(host->dev, host->slot_id, 0,
 			 host->vdd, host->dpm_state == CARDSLEEP);
@@ -1359,12 +1360,12 @@ static int omap_mmc_sleep_to_enabled(struct mmc_omap_host *host)
 }
 
 /* Handler for [OFF -> ENABLED] transition */
-static int omap_mmc_off_to_enabled(struct mmc_omap_host *host)
+static int omap_hsmmc_off_to_enabled(struct omap_hsmmc_host *host)
 {
 	clk_enable(host->fclk);
 
-	omap_mmc_restore_ctx(host);
-	omap_hsmmc_init(host);
+	omap_hsmmc_context_restore(host);
+	omap_hsmmc_conf_bus_power(host);
 	mmc_power_restore_host(host->mmc);
 
 	host->dpm_state = ENABLED;
@@ -1377,18 +1378,18 @@ static int omap_mmc_off_to_enabled(struct mmc_omap_host *host)
 /*
  * Bring MMC host to ENABLED from any other PM state.
  */
-static int omap_mmc_enable(struct mmc_host *mmc)
+static int omap_hsmmc_enable(struct mmc_host *mmc)
 {
-	struct mmc_omap_host *host = mmc_priv(mmc);
+	struct omap_hsmmc_host *host = mmc_priv(mmc);
 
 	switch (host->dpm_state) {
 	case DISABLED:
-		return omap_mmc_disabled_to_enabled(host);
+		return omap_hsmmc_disabled_to_enabled(host);
 	case CARDSLEEP:
 	case REGSLEEP:
-		return omap_mmc_sleep_to_enabled(host);
+		return omap_hsmmc_sleep_to_enabled(host);
 	case OFF:
-		return omap_mmc_off_to_enabled(host);
+		return omap_hsmmc_off_to_enabled(host);
 	default:
 		dev_dbg(mmc_dev(host->mmc), "UNKNOWN state\n");
 		return -EINVAL;
@@ -1398,68 +1399,68 @@ static int omap_mmc_enable(struct mmc_host *mmc)
 /*
  * Bring MMC host in PM state (one level deeper).
  */
-static int omap_mmc_disable(struct mmc_host *mmc, int lazy)
+static int omap_hsmmc_disable(struct mmc_host *mmc, int lazy)
 {
-	struct mmc_omap_host *host = mmc_priv(mmc);
+	struct omap_hsmmc_host *host = mmc_priv(mmc);
 
 	switch (host->dpm_state) {
 	case ENABLED: {
 		int delay;
 
-		delay = omap_mmc_enabled_to_disabled(host);
+		delay = omap_hsmmc_enabled_to_disabled(host);
 		if (lazy || delay < 0)
 			return delay;
 		return 0;
 	}
 	case DISABLED:
-		return omap_mmc_disabled_to_sleep(host);
+		return omap_hsmmc_disabled_to_sleep(host);
 	case CARDSLEEP:
 	case REGSLEEP:
-		return omap_mmc_sleep_to_off(host);
+		return omap_hsmmc_sleep_to_off(host);
 	default:
 		dev_dbg(mmc_dev(host->mmc), "UNKNOWN state\n");
 		return -EINVAL;
 	}
 }
 
-static int omap_mmc_enable_fclk(struct mmc_host *mmc)
+static int omap_hsmmc_enable_fclk(struct mmc_host *mmc)
 {
-	struct mmc_omap_host *host = mmc_priv(mmc);
+	struct omap_hsmmc_host *host = mmc_priv(mmc);
 	int err;
 
 	err = clk_enable(host->fclk);
 	if (err)
 		return err;
 	dev_dbg(mmc_dev(host->mmc), "mmc_fclk: enabled\n");
-	omap_mmc_restore_ctx(host);
+	omap_hsmmc_context_restore(host);
 	return 0;
 }
 
-static int omap_mmc_disable_fclk(struct mmc_host *mmc, int lazy)
+static int omap_hsmmc_disable_fclk(struct mmc_host *mmc, int lazy)
 {
-	struct mmc_omap_host *host = mmc_priv(mmc);
+	struct omap_hsmmc_host *host = mmc_priv(mmc);
 
-	omap_mmc_save_ctx(host);
+	omap_hsmmc_context_save(host);
 	clk_disable(host->fclk);
 	dev_dbg(mmc_dev(host->mmc), "mmc_fclk: disabled\n");
 	return 0;
 }
 
-static const struct mmc_host_ops mmc_omap_ops = {
-	.enable = omap_mmc_enable_fclk,
-	.disable = omap_mmc_disable_fclk,
-	.request = omap_mmc_request,
-	.set_ios = omap_mmc_set_ios,
+static const struct mmc_host_ops omap_hsmmc_ops = {
+	.enable = omap_hsmmc_enable_fclk,
+	.disable = omap_hsmmc_disable_fclk,
+	.request = omap_hsmmc_request,
+	.set_ios = omap_hsmmc_set_ios,
 	.get_cd = omap_hsmmc_get_cd,
 	.get_ro = omap_hsmmc_get_ro,
 	/* NYET -- enable_sdio_irq */
 };
 
-static const struct mmc_host_ops mmc_omap_ps_ops = {
-	.enable = omap_mmc_enable,
-	.disable = omap_mmc_disable,
-	.request = omap_mmc_request,
-	.set_ios = omap_mmc_set_ios,
+static const struct mmc_host_ops omap_hsmmc_ps_ops = {
+	.enable = omap_hsmmc_enable,
+	.disable = omap_hsmmc_disable,
+	.request = omap_hsmmc_request,
+	.set_ios = omap_hsmmc_set_ios,
 	.get_cd = omap_hsmmc_get_cd,
 	.get_ro = omap_hsmmc_get_ro,
 	/* NYET -- enable_sdio_irq */
@@ -1467,15 +1468,14 @@ static const struct mmc_host_ops mmc_omap_ps_ops = {
 
 #ifdef CONFIG_DEBUG_FS
 
-static int mmc_regs_show(struct seq_file *s, void *data)
+static int omap_hsmmc_regs_show(struct seq_file *s, void *data)
 {
 	struct mmc_host *mmc = s->private;
-	struct mmc_omap_host *host = mmc_priv(mmc);
-	struct omap_mmc_platform_data *pdata = host->pdata;
+	struct omap_hsmmc_host *host = mmc_priv(mmc);
 	int context_loss = 0;
 
-	if (pdata->get_context_loss_count)
-		context_loss = pdata->get_context_loss_count(host->dev);
+	if (host->pdata->get_context_loss_count)
+		context_loss = host->pdata->get_context_loss_count(host->dev);
 
 	seq_printf(s, "mmc%d:\n"
 			" enabled:\t%d\n"
@@ -1517,19 +1517,19 @@ static int mmc_regs_show(struct seq_file *s, void *data)
 	return 0;
 }
 
-static int mmc_regs_open(struct inode *inode, struct file *file)
+static int omap_hsmmc_regs_open(struct inode *inode, struct file *file)
 {
-	return single_open(file, mmc_regs_show, inode->i_private);
+	return single_open(file, omap_hsmmc_regs_show, inode->i_private);
 }
 
 static const struct file_operations mmc_regs_fops = {
-	.open           = mmc_regs_open,
+	.open           = omap_hsmmc_regs_open,
 	.read           = seq_read,
 	.llseek         = seq_lseek,
 	.release        = single_release,
 };
 
-static void omap_mmc_debugfs(struct mmc_host *mmc)
+static void omap_hsmmc_debugfs(struct mmc_host *mmc)
 {
 	if (mmc->debugfs_root)
 		debugfs_create_file("regs", S_IRUSR, mmc->debugfs_root,
@@ -1538,17 +1538,17 @@ static void omap_mmc_debugfs(struct mmc_host *mmc)
 
 #else
 
-static void omap_mmc_debugfs(struct mmc_host *mmc)
+static void omap_hsmmc_debugfs(struct mmc_host *mmc)
 {
 }
 
 #endif
 
-static int __init omap_mmc_probe(struct platform_device *pdev)
+static int __init omap_hsmmc_probe(struct platform_device *pdev)
 {
 	struct omap_mmc_platform_data *pdata = pdev->dev.platform_data;
 	struct mmc_host *mmc;
-	struct mmc_omap_host *host = NULL;
+	struct omap_hsmmc_host *host = NULL;
 	struct resource *res;
 	int ret = 0, irq;
 
@@ -1572,7 +1572,7 @@ static int __init omap_mmc_probe(struct platform_device *pdev)
 	if (res == NULL)
 		return -EBUSY;
 
-	mmc = mmc_alloc_host(sizeof(struct mmc_omap_host), &pdev->dev);
+	mmc = mmc_alloc_host(sizeof(struct omap_hsmmc_host), &pdev->dev);
 	if (!mmc) {
 		ret = -ENOMEM;
 		goto err;
@@ -1593,12 +1593,12 @@ static int __init omap_mmc_probe(struct platform_device *pdev)
 	host->power_mode = -1;
 
 	platform_set_drvdata(pdev, host);
-	INIT_WORK(&host->mmc_carddetect_work, mmc_omap_detect);
+	INIT_WORK(&host->mmc_carddetect_work, omap_hsmmc_detect);
 
 	if (mmc_slot(host).power_saving)
-		mmc->ops	= &mmc_omap_ps_ops;
+		mmc->ops	= &omap_hsmmc_ps_ops;
 	else
-		mmc->ops	= &mmc_omap_ops;
+		mmc->ops	= &omap_hsmmc_ops;
 
 	mmc->f_min	= 400000;
 	mmc->f_max	= 52000000;
@@ -1620,7 +1620,7 @@ static int __init omap_mmc_probe(struct platform_device *pdev)
 		goto err1;
 	}
 
-	omap_mmc_save_ctx(host);
+	omap_hsmmc_context_save(host);
 
 	mmc->caps |= MMC_CAP_DISABLE;
 	mmc_set_disable_delay(mmc, OMAP_MMC_DISABLED_TIMEOUT);
@@ -1674,7 +1674,7 @@ static int __init omap_mmc_probe(struct platform_device *pdev)
 	if (mmc_slot(host).nonremovable)
 		mmc->caps |= MMC_CAP_NONREMOVABLE;
 
-	omap_hsmmc_init(host);
+	omap_hsmmc_conf_bus_power(host);
 
 	/* Select DMA lines */
 	switch (host->id) {
@@ -1696,7 +1696,7 @@ static int __init omap_mmc_probe(struct platform_device *pdev)
 	}
 
 	/* Request IRQ for MMC operations */
-	ret = request_irq(host->irq, mmc_omap_irq, IRQF_DISABLED,
+	ret = request_irq(host->irq, omap_hsmmc_irq, IRQF_DISABLED,
 			mmc_hostname(mmc), host);
 	if (ret) {
 		dev_dbg(mmc_dev(host->mmc), "Unable to grab HSMMC IRQ\n");
@@ -1706,7 +1706,8 @@ static int __init omap_mmc_probe(struct platform_device *pdev)
 	/* initialize power supplies, gpios, etc */
 	if (pdata->init != NULL) {
 		if (pdata->init(&pdev->dev) != 0) {
-			dev_dbg(mmc_dev(host->mmc), "late init error\n");
+			dev_dbg(mmc_dev(host->mmc),
+				"Unable to configure MMC IRQs\n");
 			goto err_irq_cd_init;
 		}
 	}
@@ -1715,7 +1716,7 @@ static int __init omap_mmc_probe(struct platform_device *pdev)
 	/* Request IRQ for card detect */
 	if ((mmc_slot(host).card_detect_irq)) {
 		ret = request_irq(mmc_slot(host).card_detect_irq,
-				  omap_mmc_cd_handler,
+				  omap_hsmmc_cd_handler,
 				  IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING
 					  | IRQF_DISABLED,
 				  mmc_hostname(mmc), host);
@@ -1745,7 +1746,7 @@ static int __init omap_mmc_probe(struct platform_device *pdev)
 			goto err_cover_switch;
 	}
 
-	omap_mmc_debugfs(mmc);
+	omap_hsmmc_debugfs(mmc);
 
 	return 0;
 
@@ -1777,9 +1778,9 @@ err:
 	return ret;
 }
 
-static int omap_mmc_remove(struct platform_device *pdev)
+static int omap_hsmmc_remove(struct platform_device *pdev)
 {
-	struct mmc_omap_host *host = platform_get_drvdata(pdev);
+	struct omap_hsmmc_host *host = platform_get_drvdata(pdev);
 	struct resource *res;
 
 	if (host) {
@@ -1814,10 +1815,10 @@ static int omap_mmc_remove(struct platform_device *pdev)
 }
 
 #ifdef CONFIG_PM
-static int omap_mmc_suspend(struct platform_device *pdev, pm_message_t state)
+static int omap_hsmmc_suspend(struct platform_device *pdev, pm_message_t state)
 {
 	int ret = 0;
-	struct mmc_omap_host *host = platform_get_drvdata(pdev);
+	struct omap_hsmmc_host *host = platform_get_drvdata(pdev);
 
 	if (host && host->suspended)
 		return 0;
@@ -1865,10 +1866,10 @@ static int omap_mmc_suspend(struct platform_device *pdev, pm_message_t state)
 }
 
 /* Routine to resume the MMC device */
-static int omap_mmc_resume(struct platform_device *pdev)
+static int omap_hsmmc_resume(struct platform_device *pdev)
 {
 	int ret = 0;
-	struct mmc_omap_host *host = platform_get_drvdata(pdev);
+	struct omap_hsmmc_host *host = platform_get_drvdata(pdev);
 
 	if (host && !host->suspended)
 		return 0;
@@ -1887,7 +1888,7 @@ static int omap_mmc_resume(struct platform_device *pdev)
 			goto clk_en_err;
 		}
 
-		omap_hsmmc_init(host);
+		omap_hsmmc_conf_bus_power(host);
 
 		if (host->pdata->resume) {
 			ret = host->pdata->resume(&pdev->dev, host->slot_id);
@@ -1900,6 +1901,7 @@ static int omap_mmc_resume(struct platform_device *pdev)
 		ret = mmc_resume_host(host->mmc);
 		if (ret == 0)
 			host->suspended = 0;
+
 		mmc_host_lazy_disable(host->mmc);
 	}
 
@@ -1912,34 +1914,34 @@ clk_en_err:
 }
 
 #else
-#define omap_mmc_suspend	NULL
-#define omap_mmc_resume		NULL
+#define omap_hsmmc_suspend	NULL
+#define omap_hsmmc_resume		NULL
 #endif
 
-static struct platform_driver omap_mmc_driver = {
-	.remove		= omap_mmc_remove,
-	.suspend	= omap_mmc_suspend,
-	.resume		= omap_mmc_resume,
+static struct platform_driver omap_hsmmc_driver = {
+	.remove		= omap_hsmmc_remove,
+	.suspend	= omap_hsmmc_suspend,
+	.resume		= omap_hsmmc_resume,
 	.driver		= {
 		.name = DRIVER_NAME,
 		.owner = THIS_MODULE,
 	},
 };
 
-static int __init omap_mmc_init(void)
+static int __init omap_hsmmc_init(void)
 {
 	/* Register the MMC driver */
-	return platform_driver_probe(&omap_mmc_driver, omap_mmc_probe);
+	return platform_driver_register(&omap_hsmmc_driver);
 }
 
-static void __exit omap_mmc_cleanup(void)
+static void __exit omap_hsmmc_cleanup(void)
 {
 	/* Unregister MMC driver */
-	platform_driver_unregister(&omap_mmc_driver);
+	platform_driver_unregister(&omap_hsmmc_driver);
 }
 
-module_init(omap_mmc_init);
-module_exit(omap_mmc_cleanup);
+module_init(omap_hsmmc_init);
+module_exit(omap_hsmmc_cleanup);
 
 MODULE_DESCRIPTION("OMAP High Speed Multimedia Card driver");
 MODULE_LICENSE("GPL");
-- 
cgit v0.10.2


From b62f622812c5aaacad217fd8f4445562b917df6d Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@nokia.com>
Date: Tue, 22 Sep 2009 16:45:01 -0700
Subject: omap_hsmmc: protect the card when the cover is open

Depending on the manufacturer, there is a small possibility that removing
a card while it is being written to, can render the card permanently
unusable.  To prevent that, the card is made inaccessible when the cover
is open.

Signed-off-by: Adrian Hunter <adrian.hunter@nokia.com>
Acked-by: Matt Fleming <matt@console-pimps.org>
Cc: Ian Molton <ian@mnementh.co.uk>
Cc: "Roberto A. Foglietta" <roberto.foglietta@gmail.com>
Cc: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Cc: Denis Karpov <ext-denis.2.karpov@nokia.com>
Cc: Pierre Ossman <pierre@ossman.eu>
Cc: Philip Langdale <philipl@overt.org>
Cc: "Madhusudhan" <madhu.cr@ti.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 20746e4..a20fafe 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -167,6 +167,8 @@ struct omap_hsmmc_host {
 	int			context_loss;
 	int			dpm_state;
 	int			vdd;
+	int			protect_card;
+	int			reqs_blocked;
 
 	struct	omap_mmc_platform_data	*pdata;
 };
@@ -351,6 +353,9 @@ static void send_init_stream(struct omap_hsmmc_host *host)
 	int reg = 0;
 	unsigned long timeout;
 
+	if (host->protect_card)
+		return;
+
 	disable_irq(host->irq);
 	OMAP_HSMMC_WRITE(host->base, CON,
 		OMAP_HSMMC_READ(host->base, CON) | INIT_STREAM);
@@ -786,6 +791,30 @@ err:
 	return ret;
 }
 
+/* Protect the card while the cover is open */
+static void omap_hsmmc_protect_card(struct omap_hsmmc_host *host)
+{
+	if (!mmc_slot(host).get_cover_state)
+		return;
+
+	host->reqs_blocked = 0;
+	if (mmc_slot(host).get_cover_state(host->dev, host->slot_id)) {
+		if (host->protect_card) {
+			printk(KERN_INFO "%s: cover is closed, "
+					 "card is now accessible\n",
+					 mmc_hostname(host->mmc));
+			host->protect_card = 0;
+		}
+	} else {
+		if (!host->protect_card) {
+			printk(KERN_INFO "%s: cover is open, "
+					 "card is now inaccessible\n",
+					 mmc_hostname(host->mmc));
+			host->protect_card = 1;
+		}
+	}
+}
+
 /*
  * Work Item to notify the core about card insertion/removal
  */
@@ -803,8 +832,10 @@ static void omap_hsmmc_detect(struct work_struct *work)
 
 	if (slot->card_detect)
 		carddetect = slot->card_detect(slot->card_detect_irq);
-	else
+	else {
+		omap_hsmmc_protect_card(host);
 		carddetect = -ENOSYS;
+	}
 
 	if (carddetect) {
 		mmc_detect_change(host->mmc, (HZ * 200) / 1000);
@@ -1040,8 +1071,32 @@ static void omap_hsmmc_request(struct mmc_host *mmc, struct mmc_request *req)
 	 * interrupts, but not if we are already in interrupt context i.e.
 	 * retries.
 	 */
-	if (!in_interrupt())
+	if (!in_interrupt()) {
 		spin_lock_irqsave(&host->irq_lock, host->flags);
+		/*
+		 * Protect the card from I/O if there is a possibility
+		 * it can be removed.
+		 */
+		if (host->protect_card) {
+			if (host->reqs_blocked < 3) {
+				/*
+				 * Ensure the controller is left in a consistent
+				 * state by resetting the command and data state
+				 * machines.
+				 */
+				omap_hsmmc_reset_controller_fsm(host, SRD);
+				omap_hsmmc_reset_controller_fsm(host, SRC);
+				host->reqs_blocked += 1;
+			}
+			req->cmd->error = -EBADF;
+			if (req->data)
+				req->data->error = -EBADF;
+			spin_unlock_irqrestore(&host->irq_lock, host->flags);
+			mmc_request_done(mmc, req);
+			return;
+		} else if (host->reqs_blocked)
+			host->reqs_blocked = 0;
+	}
 	WARN_ON(host->mrq != NULL);
 	host->mrq = req;
 	err = omap_hsmmc_prepare_data(host, req);
@@ -1732,6 +1787,8 @@ static int __init omap_hsmmc_probe(struct platform_device *pdev)
 
 	mmc_host_lazy_disable(host->mmc);
 
+	omap_hsmmc_protect_card(host);
+
 	mmc_add_host(mmc);
 
 	if (mmc_slot(host).name != NULL) {
@@ -1897,6 +1954,8 @@ static int omap_hsmmc_resume(struct platform_device *pdev)
 					"Unmask interrupt failed\n");
 		}
 
+		omap_hsmmc_protect_card(host);
+
 		/* Notify the core to resume the host */
 		ret = mmc_resume_host(host->mmc);
 		if (ret == 0)
-- 
cgit v0.10.2


From 2bec08937e3cdf6828d29421c7f870dca84f3b02 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@nokia.com>
Date: Tue, 22 Sep 2009 16:45:02 -0700
Subject: omap_hsmmc: ensure all clock enables and disables are paired

[madhu.cr@ti.com: fix for the db clock failure message]
Signed-off-by: Adrian Hunter <adrian.hunter@nokia.com>
Acked-by: Matt Fleming <matt@console-pimps.org>
Cc: Ian Molton <ian@mnementh.co.uk>
Cc: "Roberto A. Foglietta" <roberto.foglietta@gmail.com>
Cc: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Cc: Denis Karpov <ext-denis.2.karpov@nokia.com>
Cc: Pierre Ossman <pierre@ossman.eu>
Cc: Philip Langdale <philipl@overt.org>
Cc: "Madhusudhan" <madhu.cr@ti.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Madhusudhan Chikkature <madhu.cr@ti.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index a20fafe..14c58ca 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -162,7 +162,7 @@ struct omap_hsmmc_host {
 	int			use_dma, dma_ch;
 	int			dma_line_tx, dma_line_rx;
 	int			slot_id;
-	int			dbclk_enabled;
+	int			got_dbclk;
 	int			response_busy;
 	int			context_loss;
 	int			dpm_state;
@@ -742,22 +742,24 @@ static int omap_hsmmc_switch_opcond(struct omap_hsmmc_host *host, int vdd)
 	/* Disable the clocks */
 	clk_disable(host->fclk);
 	clk_disable(host->iclk);
-	clk_disable(host->dbclk);
+	if (host->got_dbclk)
+		clk_disable(host->dbclk);
 
 	/* Turn the power off */
 	ret = mmc_slot(host).set_power(host->dev, host->slot_id, 0, 0);
-	if (ret != 0)
-		goto err;
 
 	/* Turn the power ON with given VDD 1.8 or 3.0v */
-	ret = mmc_slot(host).set_power(host->dev, host->slot_id, 1, vdd);
+	if (!ret)
+		ret = mmc_slot(host).set_power(host->dev, host->slot_id, 1,
+					       vdd);
+	clk_enable(host->iclk);
+	clk_enable(host->fclk);
+	if (host->got_dbclk)
+		clk_enable(host->dbclk);
+
 	if (ret != 0)
 		goto err;
 
-	clk_enable(host->fclk);
-	clk_enable(host->iclk);
-	clk_enable(host->dbclk);
-
 	OMAP_HSMMC_WRITE(host->base, HCTL,
 		OMAP_HSMMC_READ(host->base, HCTL) & SDVSCLR);
 	reg_val = OMAP_HSMMC_READ(host->base, HCTL);
@@ -1695,18 +1697,22 @@ static int __init omap_hsmmc_probe(struct platform_device *pdev)
 		goto err1;
 	}
 
-	host->dbclk = clk_get(&pdev->dev, "mmchsdb_fck");
-	/*
-	 * MMC can still work without debounce clock.
-	 */
-	if (IS_ERR(host->dbclk))
-		dev_warn(mmc_dev(host->mmc), "Failed to get debounce clock\n");
-	else
-		if (clk_enable(host->dbclk) != 0)
-			dev_dbg(mmc_dev(host->mmc), "Enabling debounce"
-							" clk failed\n");
+	if (cpu_is_omap2430()) {
+		host->dbclk = clk_get(&pdev->dev, "mmchsdb_fck");
+		/*
+		 * MMC can still work without debounce clock.
+		 */
+		if (IS_ERR(host->dbclk))
+			dev_warn(mmc_dev(host->mmc),
+				"Failed to get debounce clock\n");
 		else
-			host->dbclk_enabled = 1;
+			host->got_dbclk = 1;
+
+		if (host->got_dbclk)
+			if (clk_enable(host->dbclk) != 0)
+				dev_dbg(mmc_dev(host->mmc), "Enabling debounce"
+							" clk failed\n");
+	}
 
 	/* Since we do only SG emulation, we can have as many segs
 	 * as we want. */
@@ -1820,7 +1826,7 @@ err_irq:
 	clk_disable(host->iclk);
 	clk_put(host->fclk);
 	clk_put(host->iclk);
-	if (host->dbclk_enabled) {
+	if (host->got_dbclk) {
 		clk_disable(host->dbclk);
 		clk_put(host->dbclk);
 	}
@@ -1854,7 +1860,7 @@ static int omap_hsmmc_remove(struct platform_device *pdev)
 		clk_disable(host->iclk);
 		clk_put(host->fclk);
 		clk_put(host->iclk);
-		if (host->dbclk_enabled) {
+		if (host->got_dbclk) {
 			clk_disable(host->dbclk);
 			clk_put(host->dbclk);
 		}
@@ -1905,7 +1911,8 @@ static int omap_hsmmc_suspend(struct platform_device *pdev, pm_message_t state)
 				OMAP_HSMMC_READ(host->base, HCTL) & ~SDBP);
 			mmc_host_disable(host->mmc);
 			clk_disable(host->iclk);
-			clk_disable(host->dbclk);
+			if (host->got_dbclk)
+				clk_disable(host->dbclk);
 		} else {
 			host->suspended = 0;
 			if (host->pdata->resume) {
@@ -1936,15 +1943,14 @@ static int omap_hsmmc_resume(struct platform_device *pdev)
 		if (ret)
 			goto clk_en_err;
 
-		if (clk_enable(host->dbclk) != 0)
-			dev_dbg(mmc_dev(host->mmc),
-					"Enabling debounce clk failed\n");
-
 		if (mmc_host_enable(host->mmc) != 0) {
 			clk_disable(host->iclk);
 			goto clk_en_err;
 		}
 
+		if (host->got_dbclk)
+			clk_enable(host->dbclk);
+
 		omap_hsmmc_conf_bus_power(host);
 
 		if (host->pdata->resume) {
-- 
cgit v0.10.2


From e2bf08d643a244ccb9d9b70aff655340a0d98626 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@nokia.com>
Date: Tue, 22 Sep 2009 16:45:03 -0700
Subject: omap_hsmmc: set a large data timeout for commands with busy signal

Commands like SWITCH (CMD6) send a response and then signal busy while the
operation is completed.  These commands are expected to always succeed
(otherwise the response would have indicated an error).

Set an arbitrarily large data timeout value (100ms) for these commands to
ensure that premature timeouts do not occur.

Signed-off-by: Adrian Hunter <adrian.hunter@nokia.com>
Acked-by: Matt Fleming <matt@console-pimps.org>
Cc: Ian Molton <ian@mnementh.co.uk>
Cc: "Roberto A. Foglietta" <roberto.foglietta@gmail.com>
Cc: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Cc: Denis Karpov <ext-denis.2.karpov@nokia.com>
Cc: Pierre Ossman <pierre@ossman.eu>
Cc: Philip Langdale <philipl@overt.org>
Cc: "Madhusudhan" <madhu.cr@ti.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 14c58ca..a4dd43f 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -997,7 +997,8 @@ static int omap_hsmmc_start_dma_transfer(struct omap_hsmmc_host *host,
 }
 
 static void set_data_timeout(struct omap_hsmmc_host *host,
-			     struct mmc_request *req)
+			     unsigned int timeout_ns,
+			     unsigned int timeout_clks)
 {
 	unsigned int timeout, cycle_ns;
 	uint32_t reg, clkd, dto = 0;
@@ -1008,8 +1009,8 @@ static void set_data_timeout(struct omap_hsmmc_host *host,
 		clkd = 1;
 
 	cycle_ns = 1000000000 / (clk_get_rate(host->fclk) / clkd);
-	timeout = req->data->timeout_ns / cycle_ns;
-	timeout += req->data->timeout_clks;
+	timeout = timeout_ns / cycle_ns;
+	timeout += timeout_clks;
 	if (timeout) {
 		while ((timeout & 0x80000000) == 0) {
 			dto += 1;
@@ -1043,12 +1044,18 @@ omap_hsmmc_prepare_data(struct omap_hsmmc_host *host, struct mmc_request *req)
 
 	if (req->data == NULL) {
 		OMAP_HSMMC_WRITE(host->base, BLK, 0);
+		/*
+		 * Set an arbitrary 100ms data timeout for commands with
+		 * busy signal.
+		 */
+		if (req->cmd->flags & MMC_RSP_BUSY)
+			set_data_timeout(host, 100000000U, 0);
 		return 0;
 	}
 
 	OMAP_HSMMC_WRITE(host->base, BLK, (req->data->blksz)
 					| (req->data->blocks << 16));
-	set_data_timeout(host, req);
+	set_data_timeout(host, req->data->timeout_ns, req->data->timeout_clks);
 
 	if (host->use_dma) {
 		ret = omap_hsmmc_start_dma_transfer(host, req);
-- 
cgit v0.10.2


From 5e763d2968148efafc352748b0ed598b1a695dad Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@nokia.com>
Date: Tue, 22 Sep 2009 16:45:05 -0700
Subject: ARM: OMAP: RX51: set MMC capabilities and power-saving flag

Specify MMC capabilities and set the power-saving flag for RX51.

Signed-off-by: Adrian Hunter <adrian.hunter@nokia.com>
Acked-by: Matt Fleming <matt@console-pimps.org>
Cc: Ian Molton <ian@mnementh.co.uk>
Cc: "Roberto A. Foglietta" <roberto.foglietta@gmail.com>
Cc: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Cc: Denis Karpov <ext-denis.2.karpov@nokia.com>
Cc: Pierre Ossman <pierre@ossman.eu>
Cc: Philip Langdale <philipl@overt.org>
Cc: "Madhusudhan" <madhu.cr@ti.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/arm/mach-omap2/board-rx51-peripherals.c b/arch/arm/mach-omap2/board-rx51-peripherals.c
index e70baa7..e6e8290 100644
--- a/arch/arm/mach-omap2/board-rx51-peripherals.c
+++ b/arch/arm/mach-omap2/board-rx51-peripherals.c
@@ -19,6 +19,7 @@
 #include <linux/delay.h>
 #include <linux/regulator/machine.h>
 #include <linux/gpio.h>
+#include <linux/mmc/host.h>
 
 #include <mach/mcspi.h>
 #include <mach/mux.h>
@@ -102,6 +103,7 @@ static struct twl4030_hsmmc_info mmc[] = {
 		.cover_only	= true,
 		.gpio_cd	= 160,
 		.gpio_wp	= -EINVAL,
+		.power_saving	= true,
 	},
 	{
 		.name		= "internal",
@@ -109,6 +111,8 @@ static struct twl4030_hsmmc_info mmc[] = {
 		.wires		= 8,
 		.gpio_cd	= -EINVAL,
 		.gpio_wp	= -EINVAL,
+		.nonremovable	= true,
+		.power_saving	= true,
 	},
 	{}	/* Terminator */
 };
-- 
cgit v0.10.2


From 653f41b52dfc63fecf4a2333f13be28b159a918c Mon Sep 17 00:00:00 2001
From: Madhusudhan Chikkature <madhu.cr@ti.com>
Date: Tue, 22 Sep 2009 16:45:06 -0700
Subject: MAINTAINERS: update for TI OMAP hsmmc driver

Update maintainers entry for TI OMAP HS MMC support.

Signed-off-by: Madhusudhan Chikkature <madhu.cr@ti.com>
Acked-by: Kevin Hilman <khilman@deeprootsystems.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/MAINTAINERS b/MAINTAINERS
index 017cf69..d8973ca 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3768,7 +3768,13 @@ OMAP MMC SUPPORT
 M:	Jarkko Lavinen <jarkko.lavinen@nokia.com>
 L:	linux-omap@vger.kernel.org
 S:	Maintained
-F:	drivers/mmc/host/*omap*
+F:	drivers/mmc/host/omap.c
+
+OMAP HS MMC SUPPORT
+M:	Madhusudhan Chikkature <madhu.cr@ti.com>
+L:	linux-omap@vger.kernel.org
+S:	Maintained
+F:	drivers/mmc/host/omap_hsmmc.c
 
 OMAP RANDOM NUMBER GENERATOR SUPPORT
 M:	Deepak Saxena <dsaxena@plexity.net>
-- 
cgit v0.10.2


From 006ebd5de13854d6250eecc76866bbfad1ff7daf Mon Sep 17 00:00:00 2001
From: Ohad Ben-Cohen <ohad@wizery.com>
Date: Tue, 22 Sep 2009 16:45:07 -0700
Subject: sdio: add CD disable support

Add support to disconnect the pull-up resistor on CD/DAT[3] (pin 1)
of the card. This may be desired on certain setups of boards,
controllers and embedded sdio devices which do not need the card's
pull-up. As a result, card detection is disabled and power is saved.

[akpm@linux-foundation.org: simplify sdio_disable_cd() a bit]
Signed-off-by: Ohad Ben-Cohen <ohad@wizery.com>
Acked-by: Matt Fleming <matt@console-pimps.org>
Cc: Ian Molton <ian@mnementh.co.uk>
Cc: "Roberto A. Foglietta" <roberto.foglietta@gmail.com>
Cc: Philip Langdale <philipl@overt.org>
Cc: Pierre Ossman <pierre@ossman.eu>
Cc: David Vrabel <david.vrabel@csr.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index 6f221dc..2d24a12 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -165,6 +165,29 @@ static int sdio_enable_wide(struct mmc_card *card)
 }
 
 /*
+ * If desired, disconnect the pull-up resistor on CD/DAT[3] (pin 1)
+ * of the card. This may be required on certain setups of boards,
+ * controllers and embedded sdio device which do not need the card's
+ * pull-up. As a result, card detection is disabled and power is saved.
+ */
+static int sdio_disable_cd(struct mmc_card *card)
+{
+	int ret;
+	u8 ctrl;
+
+	if (!card->cccr.disable_cd)
+		return 0;
+
+	ret = mmc_io_rw_direct(card, 0, 0, SDIO_CCCR_IF, 0, &ctrl);
+	if (ret)
+		return ret;
+
+	ctrl |= SDIO_BUS_CD_DISABLE;
+
+	return mmc_io_rw_direct(card, 1, 0, SDIO_CCCR_IF, ctrl, NULL);
+}
+
+/*
  * Test if the card supports high-speed mode and, if so, switch to it.
  */
 static int sdio_enable_hs(struct mmc_card *card)
@@ -385,6 +408,13 @@ int mmc_attach_sdio(struct mmc_host *host, u32 ocr)
 		goto remove;
 
 	/*
+	 * If needed, disconnect card detection pull-up resistor.
+	 */
+	err = sdio_disable_cd(card);
+	if (err)
+		goto remove;
+
+	/*
 	 * Initialize (but don't add) all present functions.
 	 */
 	for (i = 0;i < funcs;i++) {
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 58f5917..00db39c 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -64,7 +64,8 @@ struct sdio_cccr {
 				low_speed:1,
 				wide_bus:1,
 				high_power:1,
-				high_speed:1;
+				high_speed:1,
+				disable_cd:1;
 };
 
 struct sdio_cis {
-- 
cgit v0.10.2


From e9510176ff728135383f0cdfc9c90cfe57f9e162 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@ru.mvista.com>
Date: Tue, 22 Sep 2009 16:45:08 -0700
Subject: sdhci: be more strict with get_min_clock() usage

get_min_clock() makes sense only with NONSTANDARD_CLOCK quirk and when
set_clock() callback is specified.

The patch should cause no functional changes, it just makes the code
self-documented and avoids any possible misuse of get_min_clock().

Suggested-by: Pierre Ossman <pierre@ossman.eu>
Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Cc: Ian Molton <ian@mnementh.co.uk>
Cc: Matt Fleming <matt@console-pimps.org>
Cc: Philip Langdale <philipl@overt.org>
Cc: Pierre Ossman <pierre@ossman.eu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index fc96f8c..7f7f45b 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -1772,7 +1772,8 @@ int sdhci_add_host(struct sdhci_host *host)
 	 * Set host parameters.
 	 */
 	mmc->ops = &sdhci_ops;
-	if (host->ops->get_min_clock)
+	if (host->quirks & SDHCI_QUIRK_NONSTANDARD_CLOCK &&
+			host->ops->set_clock && host->ops->get_min_clock)
 		mmc->f_min = host->ops->get_min_clock(host);
 	else
 		mmc->f_min = host->max_clk / 256;
-- 
cgit v0.10.2


From 4245c0256da0784b1f96d01ff263a71a4ca3894e Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Tue, 22 Sep 2009 16:45:09 -0700
Subject: sdio: fix read buffer overflow

Avoid buffer underrun when parsing an invalid CISTPL_VERS_1.

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Cc: David Vrabel <david.vrabel@csr.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/mmc/core/sdio_cis.c b/drivers/mmc/core/sdio_cis.c
index 963f293..6636354 100644
--- a/drivers/mmc/core/sdio_cis.c
+++ b/drivers/mmc/core/sdio_cis.c
@@ -40,7 +40,7 @@ static int cistpl_vers_1(struct mmc_card *card, struct sdio_func *func,
 			nr_strings++;
 	}
 
-	if (buf[i-1] != '\0') {
+	if (nr_strings < 4) {
 		printk(KERN_WARNING "SDIO: ignoring broken CISTPL_VERS_1\n");
 		return 0;
 	}
-- 
cgit v0.10.2


From 1e5df7525d0705dfbfded0c10a7b87b0a754a35d Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@ru.mvista.com>
Date: Tue, 22 Sep 2009 16:45:10 -0700
Subject: sdhci-of: fix SD clock calculation

Linear divisor's values in a register start at 0 (zero means "divide by
1").  Before this patch the code didn't account that fact, so SD cards
were running underclocked.

Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Cc: Pierre Ossman <pierre@ossman.eu>
Cc: Kumar Gala <galak@kernel.crashing.org>
Cc: David Vrabel <david.vrabel@csr.com>
Cc: Ben Dooks <ben@fluff.org>
Cc: Sascha Hauer <s.hauer@pengutronix.de>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/mmc/host/sdhci-of.c b/drivers/mmc/host/sdhci-of.c
index 1e8aa590..2b07569 100644
--- a/drivers/mmc/host/sdhci-of.c
+++ b/drivers/mmc/host/sdhci-of.c
@@ -136,6 +136,7 @@ static void esdhc_set_clock(struct sdhci_host *host, unsigned int clock)
 	}
 
 	pre_div >>= 1;
+	div--;
 
 	setbits32(host->ioaddr + ESDHC_SYSTEM_CONTROL, ESDHC_CLOCK_IPGEN |
 		  ESDHC_CLOCK_HCKEN | ESDHC_CLOCK_PEREN |
-- 
cgit v0.10.2


From c08592698534f390afe726c38301aa8f1620c361 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@ru.mvista.com>
Date: Tue, 22 Sep 2009 16:45:11 -0700
Subject: sdhci-of: avoid writing reserved bits into host control register

SDHCI core tries to write HISPD bit into the host control register, but
the eSDHC controllers don't have that bit, and that causes all sorts of
misbehaviour when using 4-bit mode capable SD cards.

Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Cc: Pierre Ossman <pierre@ossman.eu>
Cc: Kumar Gala <galak@kernel.crashing.org>
Cc: David Vrabel <david.vrabel@csr.com>
Cc: Ben Dooks <ben@fluff.org>
Cc: Sascha Hauer <s.hauer@pengutronix.de>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/mmc/host/sdhci-of.c b/drivers/mmc/host/sdhci-of.c
index 2b07569..3439fc1 100644
--- a/drivers/mmc/host/sdhci-of.c
+++ b/drivers/mmc/host/sdhci-of.c
@@ -48,6 +48,8 @@ struct sdhci_of_host {
 #define ESDHC_CLOCK_HCKEN	0x00000002
 #define ESDHC_CLOCK_IPGEN	0x00000001
 
+#define ESDHC_HOST_CONTROL_RES	0x05
+
 static u32 esdhc_readl(struct sdhci_host *host, int reg)
 {
 	return in_be32(host->ioaddr + reg);
@@ -109,6 +111,10 @@ static void esdhc_writeb(struct sdhci_host *host, u8 val, int reg)
 	int base = reg & ~0x3;
 	int shift = (reg & 0x3) * 8;
 
+	/* Prevent SDHCI core from writing reserved bits (e.g. HISPD). */
+	if (reg == SDHCI_HOST_CONTROL)
+		val &= ~ESDHC_HOST_CONTROL_RES;
+
 	clrsetbits_be32(host->ioaddr + base , 0xff << shift, val << shift);
 }
 
-- 
cgit v0.10.2


From 81b39802468fe4bf5c6b038837319b608acfdd3e Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@ru.mvista.com>
Date: Tue, 22 Sep 2009 16:45:13 -0700
Subject: sdhci-of: fix high-speed cards recognition

eSDHC fails to recognize some SDHS cards, throwing timeout errors:

  mmc0: error -110 whilst initialising SD card

That's because we calculate timeout value in a wrong way: on eSDHC hosts
the timeout clock is derivied from the SD clock, which is set dynamically.

As David Vrabel suggested, deriving timeout clock from SD clock is a
common scheme, so let's implement DATA_TIMEOUT_USES_SDCLK quirk and use it
for eSDHC hosts.

Also, from now on we don't need esdhc_get_timeout_clock() callback, so
remove it.

Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Cc: Pierre Ossman <pierre@ossman.eu>
Cc: Kumar Gala <galak@kernel.crashing.org>
Cc: David Vrabel <david.vrabel@csr.com>
Cc: Ben Dooks <ben@fluff.org>
Cc: Sascha Hauer <s.hauer@pengutronix.de>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/mmc/host/sdhci-of.c b/drivers/mmc/host/sdhci-of.c
index 3439fc1..56041dc 100644
--- a/drivers/mmc/host/sdhci-of.c
+++ b/drivers/mmc/host/sdhci-of.c
@@ -172,19 +172,13 @@ static unsigned int esdhc_get_min_clock(struct sdhci_host *host)
 	return of_host->clock / 256 / 16;
 }
 
-static unsigned int esdhc_get_timeout_clock(struct sdhci_host *host)
-{
-	struct sdhci_of_host *of_host = sdhci_priv(host);
-
-	return of_host->clock / 1000;
-}
-
 static struct sdhci_of_data sdhci_esdhc = {
 	.quirks = SDHCI_QUIRK_FORCE_BLK_SZ_2048 |
 		  SDHCI_QUIRK_BROKEN_CARD_DETECTION |
 		  SDHCI_QUIRK_INVERTED_WRITE_PROTECT |
 		  SDHCI_QUIRK_NO_BUSY_IRQ |
 		  SDHCI_QUIRK_NONSTANDARD_CLOCK |
+		  SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK |
 		  SDHCI_QUIRK_PIO_NEEDS_DELAY |
 		  SDHCI_QUIRK_RESTORE_IRQS_AFTER_RESET |
 		  SDHCI_QUIRK_NO_CARD_NO_RESET,
@@ -199,7 +193,6 @@ static struct sdhci_of_data sdhci_esdhc = {
 		.enable_dma = esdhc_enable_dma,
 		.get_max_clock = esdhc_get_max_clock,
 		.get_min_clock = esdhc_get_min_clock,
-		.get_timeout_clock = esdhc_get_timeout_clock,
 	},
 };
 
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 7f7f45b..38a7874 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -591,6 +591,9 @@ static u8 sdhci_calc_timeout(struct sdhci_host *host, struct mmc_data *data)
 	target_timeout = data->timeout_ns / 1000 +
 		data->timeout_clks / host->clock;
 
+	if (host->quirks & SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK)
+		host->timeout_clk = host->clock / 1000;
+
 	/*
 	 * Figure out needed cycles.
 	 * We do this in steps in order to fit inside a 32 bit int.
@@ -1757,13 +1760,15 @@ int sdhci_add_host(struct sdhci_host *host)
 	host->timeout_clk =
 		(caps & SDHCI_TIMEOUT_CLK_MASK) >> SDHCI_TIMEOUT_CLK_SHIFT;
 	if (host->timeout_clk == 0) {
-		if (!host->ops->get_timeout_clock) {
+		if (host->ops->get_timeout_clock) {
+			host->timeout_clk = host->ops->get_timeout_clock(host);
+		} else if (!(host->quirks &
+				SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK)) {
 			printk(KERN_ERR
 			       "%s: Hardware doesn't specify timeout clock "
 			       "frequency.\n", mmc_hostname(mmc));
 			return -ENODEV;
 		}
-		host->timeout_clk = host->ops->get_timeout_clock(host);
 	}
 	if (caps & SDHCI_TIMEOUT_CLK_UNIT)
 		host->timeout_clk *= 1000;
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index c77e9ff..afda7f1 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -232,6 +232,8 @@ struct sdhci_host {
 #define SDHCI_QUIRK_FORCE_1_BIT_DATA			(1<<22)
 /* Controller needs 10ms delay between applying power and clock */
 #define SDHCI_QUIRK_DELAY_AFTER_POWER			(1<<23)
+/* Controller uses SDCLK instead of TMCLK for data timeouts */
+#define SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK		(1<<24)
 
 	int			irq;		/* Device IRQ */
 	void __iomem *		ioaddr;		/* Mapped address */
-- 
cgit v0.10.2


From 50dfe70fe9e216cf356830194630f9a39e498d76 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@ru.mvista.com>
Date: Tue, 22 Sep 2009 16:45:14 -0700
Subject: powerpc: introduce and document sdhci,wp-inverted property for eSDHC

eSDHC block in MPC837x SOCs reports inverted write-protect state, soon
sdhci-of driver will look for sdhci,wp-inverted properties to decide
whether apply a specific quirk.

So, document the property and add it to device tree source files.

Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Cc: Pierre Ossman <pierre@ossman.eu>
Cc: Kumar Gala <galak@kernel.crashing.org>
Cc: David Vrabel <david.vrabel@csr.com>
Cc: Ben Dooks <ben@fluff.org>
Cc: Sascha Hauer <s.hauer@pengutronix.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/Documentation/powerpc/dts-bindings/fsl/esdhc.txt b/Documentation/powerpc/dts-bindings/fsl/esdhc.txt
index 3ed3797..8a00407 100644
--- a/Documentation/powerpc/dts-bindings/fsl/esdhc.txt
+++ b/Documentation/powerpc/dts-bindings/fsl/esdhc.txt
@@ -10,6 +10,8 @@ Required properties:
   - interrupts : should contain eSDHC interrupt.
   - interrupt-parent : interrupt source phandle.
   - clock-frequency : specifies eSDHC base clock frequency.
+  - sdhci,wp-inverted : (optional) specifies that eSDHC controller
+    reports inverted write-protect state;
   - sdhci,1-bit-only : (optional) specifies that a controller can
     only handle 1-bit data transfers.
 
diff --git a/arch/powerpc/boot/dts/mpc8377_mds.dts b/arch/powerpc/boot/dts/mpc8377_mds.dts
index f32c281..855782c 100644
--- a/arch/powerpc/boot/dts/mpc8377_mds.dts
+++ b/arch/powerpc/boot/dts/mpc8377_mds.dts
@@ -159,6 +159,7 @@
 				reg = <0x2e000 0x1000>;
 				interrupts = <42 0x8>;
 				interrupt-parent = <&ipic>;
+				sdhci,wp-inverted;
 				/* Filled in by U-Boot */
 				clock-frequency = <0>;
 			};
diff --git a/arch/powerpc/boot/dts/mpc8377_rdb.dts b/arch/powerpc/boot/dts/mpc8377_rdb.dts
index 28e022a..9e2264b 100644
--- a/arch/powerpc/boot/dts/mpc8377_rdb.dts
+++ b/arch/powerpc/boot/dts/mpc8377_rdb.dts
@@ -173,6 +173,7 @@
 				reg = <0x2e000 0x1000>;
 				interrupts = <42 0x8>;
 				interrupt-parent = <&ipic>;
+				sdhci,wp-inverted;
 				/* Filled in by U-Boot */
 				clock-frequency = <111111111>;
 			};
diff --git a/arch/powerpc/boot/dts/mpc8377_wlan.dts b/arch/powerpc/boot/dts/mpc8377_wlan.dts
index 3febc4e..9a60369 100644
--- a/arch/powerpc/boot/dts/mpc8377_wlan.dts
+++ b/arch/powerpc/boot/dts/mpc8377_wlan.dts
@@ -150,6 +150,7 @@
 				reg = <0x2e000 0x1000>;
 				interrupts = <42 0x8>;
 				interrupt-parent = <&ipic>;
+				sdhci,wp-inverted;
 				clock-frequency = <133333333>;
 			};
 		};
diff --git a/arch/powerpc/boot/dts/mpc8378_mds.dts b/arch/powerpc/boot/dts/mpc8378_mds.dts
index f720ab9..f70cf60 100644
--- a/arch/powerpc/boot/dts/mpc8378_mds.dts
+++ b/arch/powerpc/boot/dts/mpc8378_mds.dts
@@ -159,6 +159,7 @@
 				reg = <0x2e000 0x1000>;
 				interrupts = <42 0x8>;
 				interrupt-parent = <&ipic>;
+				sdhci,wp-inverted;
 				/* Filled in by U-Boot */
 				clock-frequency = <0>;
 			};
diff --git a/arch/powerpc/boot/dts/mpc8378_rdb.dts b/arch/powerpc/boot/dts/mpc8378_rdb.dts
index a11ead8..4e6a1a4 100644
--- a/arch/powerpc/boot/dts/mpc8378_rdb.dts
+++ b/arch/powerpc/boot/dts/mpc8378_rdb.dts
@@ -173,6 +173,7 @@
 				reg = <0x2e000 0x1000>;
 				interrupts = <42 0x8>;
 				interrupt-parent = <&ipic>;
+				sdhci,wp-inverted;
 				/* Filled in by U-Boot */
 				clock-frequency = <111111111>;
 			};
diff --git a/arch/powerpc/boot/dts/mpc8379_mds.dts b/arch/powerpc/boot/dts/mpc8379_mds.dts
index 4fa221f..645ec51 100644
--- a/arch/powerpc/boot/dts/mpc8379_mds.dts
+++ b/arch/powerpc/boot/dts/mpc8379_mds.dts
@@ -157,6 +157,7 @@
 				reg = <0x2e000 0x1000>;
 				interrupts = <42 0x8>;
 				interrupt-parent = <&ipic>;
+				sdhci,wp-inverted;
 				/* Filled in by U-Boot */
 				clock-frequency = <0>;
 			};
diff --git a/arch/powerpc/boot/dts/mpc8379_rdb.dts b/arch/powerpc/boot/dts/mpc8379_rdb.dts
index e35dfba..72336d5 100644
--- a/arch/powerpc/boot/dts/mpc8379_rdb.dts
+++ b/arch/powerpc/boot/dts/mpc8379_rdb.dts
@@ -171,6 +171,7 @@
 				reg = <0x2e000 0x1000>;
 				interrupts = <42 0x8>;
 				interrupt-parent = <&ipic>;
+				sdhci,wp-inverted;
 				/* Filled in by U-Boot */
 				clock-frequency = <111111111>;
 			};
-- 
cgit v0.10.2


From 8226a219254bbcd20492df185f191a11a7a81dcd Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@ru.mvista.com>
Date: Tue, 22 Sep 2009 16:45:15 -0700
Subject: sdhci-of: don't hard-code inverted write-protect quirk

MPC85xx SOCs have normal write-protect state reporting, so we shouldn't
hard-code the quirk.

Instead, look for "sdhci,wp-inverted" property, plus check for
mpc837x_{rdb,mds} machines since older device trees don't specify the new
property.

Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Cc: Pierre Ossman <pierre@ossman.eu>
Cc: Kumar Gala <galak@kernel.crashing.org>
Cc: David Vrabel <david.vrabel@csr.com>
Cc: Ben Dooks <ben@fluff.org>
Cc: Sascha Hauer <s.hauer@pengutronix.de>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/mmc/host/sdhci-of.c b/drivers/mmc/host/sdhci-of.c
index 56041dc..c8dab07 100644
--- a/drivers/mmc/host/sdhci-of.c
+++ b/drivers/mmc/host/sdhci-of.c
@@ -21,6 +21,7 @@
 #include <linux/of.h>
 #include <linux/of_platform.h>
 #include <linux/mmc/host.h>
+#include <asm/machdep.h>
 #include "sdhci.h"
 
 struct sdhci_of_data {
@@ -175,7 +176,6 @@ static unsigned int esdhc_get_min_clock(struct sdhci_host *host)
 static struct sdhci_of_data sdhci_esdhc = {
 	.quirks = SDHCI_QUIRK_FORCE_BLK_SZ_2048 |
 		  SDHCI_QUIRK_BROKEN_CARD_DETECTION |
-		  SDHCI_QUIRK_INVERTED_WRITE_PROTECT |
 		  SDHCI_QUIRK_NO_BUSY_IRQ |
 		  SDHCI_QUIRK_NONSTANDARD_CLOCK |
 		  SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK |
@@ -219,6 +219,15 @@ static int sdhci_of_resume(struct of_device *ofdev)
 
 #endif
 
+static bool __devinit sdhci_of_wp_inverted(struct device_node *np)
+{
+	if (of_get_property(np, "sdhci,wp-inverted", NULL))
+		return true;
+
+	/* Old device trees don't have the wp-inverted property. */
+	return machine_is(mpc837x_rdb) || machine_is(mpc837x_mds);
+}
+
 static int __devinit sdhci_of_probe(struct of_device *ofdev,
 				 const struct of_device_id *match)
 {
@@ -261,6 +270,9 @@ static int __devinit sdhci_of_probe(struct of_device *ofdev,
 	if (of_get_property(np, "sdhci,1-bit-only", NULL))
 		host->quirks |= SDHCI_QUIRK_FORCE_1_BIT_DATA;
 
+	if (sdhci_of_wp_inverted(np))
+		host->quirks |= SDHCI_QUIRK_INVERTED_WRITE_PROTECT;
+
 	clk = of_get_property(np, "clock-frequency", &size);
 	if (clk && size == sizeof(*clk) && *clk)
 		of_host->clock = *clk;
-- 
cgit v0.10.2


From ad1e597d4199ffcdee04b9fb402e45c5be6a5052 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@ru.mvista.com>
Date: Tue, 22 Sep 2009 16:45:16 -0700
Subject: sdhci-of: cleanup eSDHC's set_clock() a little bit

- Get rid of incomprehensible "if { for { if } }" construction for the
  exponential divisor calculation. The first if statement isn't correct
  at all, since it should check for "host->max_clk / pre_div / 16 >
  clock". The error doesn't cause any bugs because the check in the for
  loop does the right thing, and so the outer check becomes useless;

- For the linear divisor do the same: a single while statement is more
  readable than for + if construction;

- Add dev_dbg() that prints desired and actual clock frequency.

Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Cc: Pierre Ossman <pierre@ossman.eu>
Cc: Kumar Gala <galak@kernel.crashing.org>
Cc: David Vrabel <david.vrabel@csr.com>
Cc: Ben Dooks <ben@fluff.org>
Cc: Sascha Hauer <s.hauer@pengutronix.de>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/mmc/host/sdhci-of.c b/drivers/mmc/host/sdhci-of.c
index c8dab07..01ab916 100644
--- a/drivers/mmc/host/sdhci-of.c
+++ b/drivers/mmc/host/sdhci-of.c
@@ -121,8 +121,8 @@ static void esdhc_writeb(struct sdhci_host *host, u8 val, int reg)
 
 static void esdhc_set_clock(struct sdhci_host *host, unsigned int clock)
 {
-	int div;
 	int pre_div = 2;
+	int div = 1;
 
 	clrbits32(host->ioaddr + ESDHC_SYSTEM_CONTROL, ESDHC_CLOCK_IPGEN |
 		  ESDHC_CLOCK_HCKEN | ESDHC_CLOCK_PEREN | ESDHC_CLOCK_MASK);
@@ -130,17 +130,14 @@ static void esdhc_set_clock(struct sdhci_host *host, unsigned int clock)
 	if (clock == 0)
 		goto out;
 
-	if (host->max_clk / 16 > clock) {
-		for (; pre_div < 256; pre_div *= 2) {
-			if (host->max_clk / pre_div < clock * 16)
-				break;
-		}
-	}
+	while (host->max_clk / pre_div / 16 > clock && pre_div < 256)
+		pre_div *= 2;
 
-	for (div = 1; div <= 16; div++) {
-		if (host->max_clk / (div * pre_div) <= clock)
-			break;
-	}
+	while (host->max_clk / pre_div / div > clock && div < 16)
+		div++;
+
+	dev_dbg(mmc_dev(host->mmc), "desired SD clock: %d, actual: %d\n",
+		clock, host->max_clk / pre_div / div);
 
 	pre_div >>= 1;
 	div--;
-- 
cgit v0.10.2


From 7c979ec7135d96bbff34790bf4b85a8508ede7fc Mon Sep 17 00:00:00 2001
From: Ohad Ben-Cohen <ohad@wizery.com>
Date: Tue, 22 Sep 2009 16:45:18 -0700
Subject: sdio: add MMC_QUIRK_LENIENT_FN0

Normally writes to SDIO function 0 outside the vendor specific CCCR
registers are prohibited.

To support embedded devices that require writes to SDIO function 0 outside
this range (e.g.  TI WL127x embedded sdio wifi device),
MMC_QUIRK_LENIENT_FN0 is introduced.

A card quirks field is added to `struct mmc_card' to support non-standard
devices (e.g.  embedded sdio devices).

[akpm@linux-foundation.org: code in C, not cpp!]
Signed-off-by: Ohad Ben-Cohen <ohad@wizery.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/mmc/core/sdio_io.c b/drivers/mmc/core/sdio_io.c
index f61fc2d..f9aa8a7 100644
--- a/drivers/mmc/core/sdio_io.c
+++ b/drivers/mmc/core/sdio_io.c
@@ -624,7 +624,7 @@ void sdio_f0_writeb(struct sdio_func *func, unsigned char b, unsigned int addr,
 
 	BUG_ON(!func);
 
-	if (addr < 0xF0 || addr > 0xFF) {
+	if ((addr < 0xF0 || addr > 0xFF) && (!mmc_card_lenient_fn0(func->card))) {
 		if (err_ret)
 			*err_ret = -EINVAL;
 		return;
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 00db39c..2ee22e8 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -97,6 +97,8 @@ struct mmc_card {
 #define MMC_STATE_READONLY	(1<<1)		/* card is read-only */
 #define MMC_STATE_HIGHSPEED	(1<<2)		/* card is in high speed mode */
 #define MMC_STATE_BLOCKADDR	(1<<3)		/* card uses block-addressing */
+	unsigned int		quirks; 	/* card quirks */
+#define MMC_QUIRK_LENIENT_FN0	(1<<0)		/* allow SDIO FN0 writes outside of the VS CCCR range */
 
 	u32			raw_cid[4];	/* raw card CID */
 	u32			raw_csd[4];	/* raw card CSD */
@@ -132,6 +134,11 @@ struct mmc_card {
 #define mmc_card_set_highspeed(c) ((c)->state |= MMC_STATE_HIGHSPEED)
 #define mmc_card_set_blockaddr(c) ((c)->state |= MMC_STATE_BLOCKADDR)
 
+static inline int mmc_card_lenient_fn0(const struct mmc_card *c)
+{
+	return c->quirks & MMC_QUIRK_LENIENT_FN0;
+}
+
 #define mmc_card_name(c)	((c)->cid.prod_name)
 #define mmc_card_id(c)		(dev_name(&(c)->dev))
 
-- 
cgit v0.10.2


From 04d699c3643fbf75dd72c03a4eacec87149c4aca Mon Sep 17 00:00:00 2001
From: Rob Emanuele <rob@emanuele.us>
Date: Tue, 22 Sep 2009 16:45:19 -0700
Subject: atmel-mci: unified Atmel MCI drivers (AVR32 & AT91)

Unification of the atmel-mci driver to support the AT91 processors MCI
interface.  The atmel-mci driver currently supports the AVR32 and this
patch adds AT91 support.

Add read/write proof selection switch dependent on chip availability of
this feature.

To use this new driver on a at91 the platform driver for your board needs
to be updated.

[nicolas.ferre@atmel.com indent, Kconfig comment and one printk modification]
Signed-off-by: Rob Emanuele <rob@emanuele.us>
Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Cc: Haavard Skinnemoen <hskinnemoen@atmel.com>
Cc: Andrew Victor <linux@maxim.org.za>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig
index 34fdfa9..6f20eb9 100644
--- a/drivers/mmc/host/Kconfig
+++ b/drivers/mmc/host/Kconfig
@@ -160,6 +160,12 @@ config MMC_AU1X
 
 	  If unsure, say N.
 
+choice
+	prompt "Atmel SD/MMC Driver"
+	default MMC_ATMELMCI if AVR32
+	help
+	  Choose which driver to use for the Atmel MCI Silicon
+
 config MMC_AT91
 	tristate "AT91 SD/MMC Card Interface support"
 	depends on ARCH_AT91
@@ -170,17 +176,19 @@ config MMC_AT91
 
 config MMC_ATMELMCI
 	tristate "Atmel Multimedia Card Interface support"
-	depends on AVR32
+	depends on AVR32 || ARCH_AT91
 	help
 	  This selects the Atmel Multimedia Card Interface driver. If
-	  you have an AT32 (AVR32) platform with a Multimedia Card
-	  slot, say Y or M here.
+	  you have an AT32 (AVR32) or AT91 platform with a Multimedia
+	  Card slot, say Y or M here.
 
 	  If unsure, say N.
 
+endchoice
+
 config MMC_ATMELMCI_DMA
 	bool "Atmel MCI DMA support (EXPERIMENTAL)"
-	depends on MMC_ATMELMCI && DMA_ENGINE && EXPERIMENTAL
+	depends on MMC_ATMELMCI && AVR32 && DMA_ENGINE && EXPERIMENTAL
 	help
 	  Say Y here to have the Atmel MCI driver use a DMA engine to
 	  do data transfers and thus increase the throughput and
diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c
index 7b603e4..065fa81 100644
--- a/drivers/mmc/host/atmel-mci.c
+++ b/drivers/mmc/host/atmel-mci.c
@@ -30,6 +30,7 @@
 #include <asm/io.h>
 #include <asm/unaligned.h>
 
+#include <mach/cpu.h>
 #include <mach/board.h>
 
 #include "atmel-mci-regs.h"
@@ -210,6 +211,18 @@ struct atmel_mci_slot {
 	set_bit(event, &host->pending_events)
 
 /*
+ * Enable or disable features/registers based on
+ * whether the processor supports them
+ */
+static bool mci_has_rwproof(void)
+{
+	if (cpu_is_at91sam9261() || cpu_is_at91rm9200())
+		return false;
+	else
+		return true;
+}
+
+/*
  * The debugfs stuff below is mostly optimized away when
  * CONFIG_DEBUG_FS is not set.
  */
@@ -276,8 +289,13 @@ static void atmci_show_status_reg(struct seq_file *s,
 		[3]	= "BLKE",
 		[4]	= "DTIP",
 		[5]	= "NOTBUSY",
+		[6]	= "ENDRX",
+		[7]	= "ENDTX",
 		[8]	= "SDIOIRQA",
 		[9]	= "SDIOIRQB",
+		[12]	= "SDIOWAIT",
+		[14]	= "RXBUFF",
+		[15]	= "TXBUFE",
 		[16]	= "RINDE",
 		[17]	= "RDIRE",
 		[18]	= "RCRCE",
@@ -285,6 +303,11 @@ static void atmci_show_status_reg(struct seq_file *s,
 		[20]	= "RTOE",
 		[21]	= "DCRCE",
 		[22]	= "DTOE",
+		[23]	= "CSTOE",
+		[24]	= "BLKOVRE",
+		[25]	= "DMADONE",
+		[26]	= "FIFOEMPTY",
+		[27]	= "XFRDONE",
 		[30]	= "OVRE",
 		[31]	= "UNRE",
 	};
@@ -849,13 +872,15 @@ static void atmci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 			clkdiv = 255;
 		}
 
+		host->mode_reg = MCI_MR_CLKDIV(clkdiv);
+
 		/*
 		 * WRPROOF and RDPROOF prevent overruns/underruns by
 		 * stopping the clock when the FIFO is full/empty.
 		 * This state is not expected to last for long.
 		 */
-		host->mode_reg = MCI_MR_CLKDIV(clkdiv) | MCI_MR_WRPROOF
-					| MCI_MR_RDPROOF;
+		if (mci_has_rwproof())
+			host->mode_reg |= (MCI_MR_WRPROOF | MCI_MR_RDPROOF);
 
 		if (list_empty(&host->queue))
 			mci_writel(host, MR, host->mode_reg);
@@ -1648,8 +1673,10 @@ static int __init atmci_probe(struct platform_device *pdev)
 			nr_slots++;
 	}
 
-	if (!nr_slots)
+	if (!nr_slots) {
+		dev_err(&pdev->dev, "init failed: no slot defined\n");
 		goto err_init_slot;
+	}
 
 	dev_info(&pdev->dev,
 			"Atmel MCI controller at 0x%08lx irq %d, %u slots\n",
-- 
cgit v0.10.2


From 864f38ebdcb1f7dc4138b7ccb801f16f1696eb8e Mon Sep 17 00:00:00 2001
From: Rob Emanuele <rob@emanuele.us>
Date: Tue, 22 Sep 2009 16:45:22 -0700
Subject: AT91: atmel-mci: Platform configuration to the the atmel-mci driver

Created a modified version of the at91sam9g20 evaluation kit platform
(board-sam9g20ek-2slot-mmc.c) and device support to make use of the
updated atmel-mci driver.

As the use of two slots modify GPIO pin allocation, we create another
board file.

This requires getting the most updated arch/arm/tools/mach-types from
http://www.arm.linux.org.uk/developer/machines/download.php to have the machine
type for the at91sam9g20ek-2slot-mmc board.

[nicolas.ferre@atmel.com: printk, slot_count modification in at91sam9260_devices.c file]
[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Rob Emanuele <rob@emanuele.us>
Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Cc: Haavard Skinnemoen <hskinnemoen@atmel.com>
Cc: Andrew Victor <linux@maxim.org.za>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/arm/mach-at91/Kconfig b/arch/arm/mach-at91/Kconfig
index a24d824..e35d54d 100644
--- a/arch/arm/mach-at91/Kconfig
+++ b/arch/arm/mach-at91/Kconfig
@@ -289,6 +289,13 @@ config MACH_NEOCORE926
 	help
 	  Select this if you are using the Adeneo Neocore 926 board.
 
+config MACH_AT91SAM9G20EK_2MMC
+	bool "Atmel AT91SAM9G20-EK Evaluation Kit modified for 2 MMC Slots"
+	depends on ARCH_AT91SAM9G20
+	help
+	  Select this if you are using an Atmel AT91SAM9G20-EK Evaluation Kit
+	  Rev A or B modified for 2 MMC Slots.
+
 endif
 
 # ----------------------------------------------------------
diff --git a/arch/arm/mach-at91/Makefile b/arch/arm/mach-at91/Makefile
index a6ed015..ada440a 100644
--- a/arch/arm/mach-at91/Makefile
+++ b/arch/arm/mach-at91/Makefile
@@ -59,6 +59,7 @@ obj-$(CONFIG_MACH_AT91SAM9RLEK)	+= board-sam9rlek.o
 
 # AT91SAM9G20 board-specific support
 obj-$(CONFIG_MACH_AT91SAM9G20EK) += board-sam9g20ek.o
+obj-$(CONFIG_MACH_AT91SAM9G20EK_2MMC) += board-sam9g20ek-2slot-mmc.o
 obj-$(CONFIG_MACH_CPU9G20)	+= board-cpu9krea.o
 
 # AT91SAM9G45 board-specific support
diff --git a/arch/arm/mach-at91/at91sam9260_devices.c b/arch/arm/mach-at91/at91sam9260_devices.c
index ee4ea0e7..07eb7b0 100644
--- a/arch/arm/mach-at91/at91sam9260_devices.c
+++ b/arch/arm/mach-at91/at91sam9260_devices.c
@@ -278,6 +278,102 @@ void __init at91_add_device_mmc(short mmc_id, struct at91_mmc_data *data)
 void __init at91_add_device_mmc(short mmc_id, struct at91_mmc_data *data) {}
 #endif
 
+/* --------------------------------------------------------------------
+ *  MMC / SD Slot for Atmel MCI Driver
+ * -------------------------------------------------------------------- */
+
+#if defined(CONFIG_MMC_ATMELMCI) || defined(CONFIG_MMC_ATMELMCI_MODULE)
+static u64 mmc_dmamask = DMA_BIT_MASK(32);
+static struct mci_platform_data mmc_data;
+
+static struct resource mmc_resources[] = {
+	[0] = {
+		.start	= AT91SAM9260_BASE_MCI,
+		.end	= AT91SAM9260_BASE_MCI + SZ_16K - 1,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= AT91SAM9260_ID_MCI,
+		.end	= AT91SAM9260_ID_MCI,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device at91sam9260_mmc_device = {
+	.name		= "atmel_mci",
+	.id		= -1,
+	.dev		= {
+				.dma_mask		= &mmc_dmamask,
+				.coherent_dma_mask	= DMA_BIT_MASK(32),
+				.platform_data		= &mmc_data,
+	},
+	.resource	= mmc_resources,
+	.num_resources	= ARRAY_SIZE(mmc_resources),
+};
+
+void __init at91_add_device_mci(short mmc_id, struct mci_platform_data *data)
+{
+	unsigned int i;
+	unsigned int slot_count = 0;
+
+	if (!data)
+		return;
+
+	for (i = 0; i < ATMEL_MCI_MAX_NR_SLOTS; i++) {
+		if (data->slot[i].bus_width) {
+			/* input/irq */
+			if (data->slot[i].detect_pin) {
+				at91_set_gpio_input(data->slot[i].detect_pin, 1);
+				at91_set_deglitch(data->slot[i].detect_pin, 1);
+			}
+			if (data->slot[i].wp_pin)
+				at91_set_gpio_input(data->slot[i].wp_pin, 1);
+
+			switch (i) {
+			case 0:
+				/* CMD */
+				at91_set_A_periph(AT91_PIN_PA7, 1);
+				/* DAT0, maybe DAT1..DAT3 */
+				at91_set_A_periph(AT91_PIN_PA6, 1);
+				if (data->slot[i].bus_width == 4) {
+					at91_set_A_periph(AT91_PIN_PA9, 1);
+					at91_set_A_periph(AT91_PIN_PA10, 1);
+					at91_set_A_periph(AT91_PIN_PA11, 1);
+				}
+				slot_count++;
+				break;
+			case 1:
+				/* CMD */
+				at91_set_B_periph(AT91_PIN_PA1, 1);
+				/* DAT0, maybe DAT1..DAT3 */
+				at91_set_B_periph(AT91_PIN_PA0, 1);
+				if (data->slot[i].bus_width == 4) {
+					at91_set_B_periph(AT91_PIN_PA5, 1);
+					at91_set_B_periph(AT91_PIN_PA4, 1);
+					at91_set_B_periph(AT91_PIN_PA3, 1);
+				}
+				slot_count++;
+				break;
+			default:
+				printk(KERN_ERR
+					"AT91: SD/MMC slot %d not available\n", i);
+				break;
+			}
+		}
+	}
+
+	if (slot_count) {
+		/* CLK */
+		at91_set_A_periph(AT91_PIN_PA8, 0);
+
+		mmc_data = *data;
+		platform_device_register(&at91sam9260_mmc_device);
+	}
+}
+#else
+void __init at91_add_device_mci(short mmc_id, struct mci_platform_data *data) {}
+#endif
+
 
 /* --------------------------------------------------------------------
  *  NAND / SmartMedia
diff --git a/arch/arm/mach-at91/board-sam9g20ek-2slot-mmc.c b/arch/arm/mach-at91/board-sam9g20ek-2slot-mmc.c
new file mode 100644
index 0000000..a28e53f
--- /dev/null
+++ b/arch/arm/mach-at91/board-sam9g20ek-2slot-mmc.c
@@ -0,0 +1,277 @@
+/*
+ *  Copyright (C) 2005 SAN People
+ *  Copyright (C) 2008 Atmel
+ *  Copyright (C) 2009 Rob Emanuele
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/at73c213.h>
+#include <linux/clk.h>
+
+#include <mach/hardware.h>
+#include <asm/setup.h>
+#include <asm/mach-types.h>
+#include <asm/irq.h>
+
+#include <asm/mach/arch.h>
+#include <asm/mach/map.h>
+#include <asm/mach/irq.h>
+
+#include <mach/board.h>
+#include <mach/gpio.h>
+#include <mach/at91sam9_smc.h>
+
+#include "sam9_smc.h"
+#include "generic.h"
+
+
+static void __init ek_map_io(void)
+{
+	/* Initialize processor: 18.432 MHz crystal */
+	at91sam9260_initialize(18432000);
+
+	/* DGBU on ttyS0. (Rx & Tx only) */
+	at91_register_uart(0, 0, 0);
+
+	/* USART0 on ttyS1. (Rx, Tx, CTS, RTS, DTR, DSR, DCD, RI) */
+	at91_register_uart(AT91SAM9260_ID_US0, 1, ATMEL_UART_CTS | ATMEL_UART_RTS
+			   | ATMEL_UART_DTR | ATMEL_UART_DSR | ATMEL_UART_DCD
+			   | ATMEL_UART_RI);
+
+	/* USART1 on ttyS2. (Rx, Tx, RTS, CTS) */
+	at91_register_uart(AT91SAM9260_ID_US1, 2, ATMEL_UART_CTS | ATMEL_UART_RTS);
+
+	/* set serial console to ttyS0 (ie, DBGU) */
+	at91_set_serial_console(0);
+}
+
+static void __init ek_init_irq(void)
+{
+	at91sam9260_init_interrupts(NULL);
+}
+
+
+/*
+ * USB Host port
+ */
+static struct at91_usbh_data __initdata ek_usbh_data = {
+	.ports		= 2,
+};
+
+/*
+ * USB Device port
+ */
+static struct at91_udc_data __initdata ek_udc_data = {
+	.vbus_pin	= AT91_PIN_PC5,
+	.pullup_pin	= 0,		/* pull-up driven by UDC */
+};
+
+
+/*
+ * SPI devices.
+ */
+static struct spi_board_info ek_spi_devices[] = {
+#if !defined(CONFIG_MMC_ATMELMCI)
+	{	/* DataFlash chip */
+		.modalias	= "mtd_dataflash",
+		.chip_select	= 1,
+		.max_speed_hz	= 15 * 1000 * 1000,
+		.bus_num	= 0,
+	},
+#if defined(CONFIG_MTD_AT91_DATAFLASH_CARD)
+	{	/* DataFlash card */
+		.modalias	= "mtd_dataflash",
+		.chip_select	= 0,
+		.max_speed_hz	= 15 * 1000 * 1000,
+		.bus_num	= 0,
+	},
+#endif
+#endif
+};
+
+
+/*
+ * MACB Ethernet device
+ */
+static struct at91_eth_data __initdata ek_macb_data = {
+	.phy_irq_pin	= AT91_PIN_PC12,
+	.is_rmii	= 1,
+};
+
+
+/*
+ * NAND flash
+ */
+static struct mtd_partition __initdata ek_nand_partition[] = {
+	{
+		.name   = "Bootstrap",
+		.offset = 0,
+		.size   = 4 * SZ_1M,
+	},
+	{
+		.name	= "Partition 1",
+		.offset	= MTDPART_OFS_NXTBLK,
+		.size	= 60 * SZ_1M,
+	},
+	{
+		.name	= "Partition 2",
+		.offset	= MTDPART_OFS_NXTBLK,
+		.size	= MTDPART_SIZ_FULL,
+	},
+};
+
+static struct mtd_partition * __init nand_partitions(int size, int *num_partitions)
+{
+	*num_partitions = ARRAY_SIZE(ek_nand_partition);
+	return ek_nand_partition;
+}
+
+/* det_pin is not connected */
+static struct atmel_nand_data __initdata ek_nand_data = {
+	.ale		= 21,
+	.cle		= 22,
+	.rdy_pin	= AT91_PIN_PC13,
+	.enable_pin	= AT91_PIN_PC14,
+	.partition_info	= nand_partitions,
+#if defined(CONFIG_MTD_NAND_ATMEL_BUSWIDTH_16)
+	.bus_width_16	= 1,
+#else
+	.bus_width_16	= 0,
+#endif
+};
+
+static struct sam9_smc_config __initdata ek_nand_smc_config = {
+	.ncs_read_setup		= 0,
+	.nrd_setup		= 2,
+	.ncs_write_setup	= 0,
+	.nwe_setup		= 2,
+
+	.ncs_read_pulse		= 4,
+	.nrd_pulse		= 4,
+	.ncs_write_pulse	= 4,
+	.nwe_pulse		= 4,
+
+	.read_cycle		= 7,
+	.write_cycle		= 7,
+
+	.mode			= AT91_SMC_READMODE | AT91_SMC_WRITEMODE | AT91_SMC_EXNWMODE_DISABLE,
+	.tdf_cycles		= 3,
+};
+
+static void __init ek_add_device_nand(void)
+{
+	/* setup bus-width (8 or 16) */
+	if (ek_nand_data.bus_width_16)
+		ek_nand_smc_config.mode |= AT91_SMC_DBW_16;
+	else
+		ek_nand_smc_config.mode |= AT91_SMC_DBW_8;
+
+	/* configure chip-select 3 (NAND) */
+	sam9_smc_configure(3, &ek_nand_smc_config);
+
+	at91_add_device_nand(&ek_nand_data);
+}
+
+
+/*
+ * MCI (SD/MMC)
+ * det_pin and wp_pin are not connected
+ */
+#if defined(CONFIG_MMC_ATMELMCI) || defined(CONFIG_MMC_ATMELMCI_MODULE)
+static struct mci_platform_data __initdata ek_mmc_data = {
+	.slot[0] = {
+		.bus_width	= 4,
+		.detect_pin	= -ENODEV,
+		.wp_pin		= -ENODEV,
+	},
+	.slot[1] = {
+		.bus_width	= 4,
+		.detect_pin	= -ENODEV,
+		.wp_pin		= -ENODEV,
+	},
+
+};
+#else
+static struct amci_platform_data __initdata ek_mmc_data = {
+};
+#endif
+
+/*
+ * LEDs
+ */
+static struct gpio_led ek_leds[] = {
+	{	/* "bottom" led, green, userled1 to be defined */
+		.name			= "ds5",
+		.gpio			= AT91_PIN_PB12,
+		.active_low		= 1,
+		.default_trigger	= "none",
+	},
+	{	/* "power" led, yellow */
+		.name			= "ds1",
+		.gpio			= AT91_PIN_PB13,
+		.default_trigger	= "heartbeat",
+	}
+};
+
+static struct i2c_board_info __initdata ek_i2c_devices[] = {
+	{
+		I2C_BOARD_INFO("24c512", 0x50),
+	},
+};
+
+
+static void __init ek_board_init(void)
+{
+	/* Serial */
+	at91_add_device_serial();
+	/* USB Host */
+	at91_add_device_usbh(&ek_usbh_data);
+	/* USB Device */
+	at91_add_device_udc(&ek_udc_data);
+	/* SPI */
+	at91_add_device_spi(ek_spi_devices, ARRAY_SIZE(ek_spi_devices));
+	/* NAND */
+	ek_add_device_nand();
+	/* Ethernet */
+	at91_add_device_eth(&ek_macb_data);
+	/* MMC */
+	at91_add_device_mci(0, &ek_mmc_data);
+	/* I2C */
+	at91_add_device_i2c(ek_i2c_devices, ARRAY_SIZE(ek_i2c_devices));
+	/* LEDs */
+	at91_gpio_leds(ek_leds, ARRAY_SIZE(ek_leds));
+	/* PCK0 provides MCLK to the WM8731 */
+	at91_set_B_periph(AT91_PIN_PC1, 0);
+	/* SSC (for WM8731) */
+	at91_add_device_ssc(AT91SAM9260_ID_SSC, ATMEL_SSC_TX);
+}
+
+MACHINE_START(AT91SAM9G20EK_2MMC, "Atmel AT91SAM9G20-EK 2 MMC Slot Mod")
+	/* Maintainer: Rob Emanuele */
+	.phys_io	= AT91_BASE_SYS,
+	.io_pg_offst	= (AT91_VA_BASE_SYS >> 18) & 0xfffc,
+	.boot_params	= AT91_SDRAM_BASE + 0x100,
+	.timer		= &at91sam926x_timer,
+	.map_io		= ek_map_io,
+	.init_irq	= ek_init_irq,
+	.init_machine	= ek_board_init,
+MACHINE_END
diff --git a/arch/arm/mach-at91/include/mach/board.h b/arch/arm/mach-at91/include/mach/board.h
index 13f27a4..583f38a 100644
--- a/arch/arm/mach-at91/include/mach/board.h
+++ b/arch/arm/mach-at91/include/mach/board.h
@@ -37,6 +37,7 @@
 #include <linux/leds.h>
 #include <linux/spi/spi.h>
 #include <linux/usb/atmel_usba_udc.h>
+#include <linux/atmel-mci.h>
 #include <sound/atmel-ac97c.h>
 
  /* USB Device */
@@ -64,6 +65,7 @@ struct at91_cf_data {
 extern void __init at91_add_device_cf(struct at91_cf_data *data);
 
  /* MMC / SD */
+  /* at91_mci platform config */
 struct at91_mmc_data {
 	u8		det_pin;	/* card detect IRQ */
 	unsigned	slot_b:1;	/* uses Slot B */
@@ -73,6 +75,9 @@ struct at91_mmc_data {
 };
 extern void __init at91_add_device_mmc(short mmc_id, struct at91_mmc_data *data);
 
+  /* atmel-mci platform config */
+extern void __init at91_add_device_mci(short mmc_id, struct mci_platform_data *data);
+
  /* Ethernet (EMAC & MACB) */
 struct at91_eth_data {
 	u32		phy_mask;
-- 
cgit v0.10.2


From c99436fb7505ca2427780d7ae49ebb427bb6f374 Mon Sep 17 00:00:00 2001
From: Chris Ball <cjb@laptop.org>
Date: Tue, 22 Sep 2009 16:45:22 -0700
Subject: sdhci: add no-card-no-reset quirk for Ricoh R5C822/Sony Z11

Card insertion detection is broken without this quirk on a Sony Vaio
Z11, as discussed on linux-mmc here: http://marc.info/?t=125017355000008

Signed-off-by: Chris Ball <cjb@laptop.org>
Tested-by: Norbert Preining <preining@logic.at>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c
index 2f15cc1..63b9d2a 100644
--- a/drivers/mmc/host/sdhci-pci.c
+++ b/drivers/mmc/host/sdhci-pci.c
@@ -83,7 +83,8 @@ static int ricoh_probe(struct sdhci_pci_chip *chip)
 	if (chip->pdev->subsystem_vendor == PCI_VENDOR_ID_IBM)
 		chip->quirks |= SDHCI_QUIRK_CLOCK_BEFORE_RESET;
 
-	if (chip->pdev->subsystem_vendor == PCI_VENDOR_ID_SAMSUNG)
+	if (chip->pdev->subsystem_vendor == PCI_VENDOR_ID_SAMSUNG ||
+	    chip->pdev->subsystem_vendor == PCI_VENDOR_ID_SONY)
 		chip->quirks |= SDHCI_QUIRK_NO_CARD_NO_RESET;
 
 	return 0;
-- 
cgit v0.10.2


From 82cf818d54f0a415a031eabd0949a81946445198 Mon Sep 17 00:00:00 2001
From: kishore kadiyala <kishore.kadiyala@ti.com>
Date: Tue, 22 Sep 2009 16:45:25 -0700
Subject: omap4: mmc driver support on OMAP4

Add basic support for all 5 MMC controllers on OMAP4.

This patch doesn't include mmc-regulator support

Signed-off-by: Kishore Kadiyala <kishore.kadiyala@ti.com>
Cc: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Acked-by: Madhusudhan Chikkature <madhu.cr@ti.com>
Cc: Russell King <linux@arm.linux.org.uk>
Acked-by: Tony Lindgren <tony@atomide.com>
Cc: Hiroshi DOYU <Hiroshi.DOYU@nokia.com>
Cc: Sakari Ailus <sakari.ailus@maxwell.research.nokia.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/arm/mach-omap2/devices.c b/arch/arm/mach-omap2/devices.c
index a2e9156..92009a4 100644
--- a/arch/arm/mach-omap2/devices.c
+++ b/arch/arm/mach-omap2/devices.c
@@ -397,7 +397,7 @@ static inline void omap_init_sha1_md5(void) { }
 
 /*-------------------------------------------------------------------------*/
 
-#ifdef CONFIG_ARCH_OMAP3
+#if defined(CONFIG_ARCH_OMAP3) || defined(CONFIG_ARCH_OMAP4)
 
 #define MMCHS_SYSCONFIG			0x0010
 #define MMCHS_SYSCONFIG_SWRESET		(1 << 1)
@@ -424,8 +424,8 @@ static struct platform_device dummy_pdev = {
  **/
 static void __init omap_hsmmc_reset(void)
 {
-	u32 i, nr_controllers = cpu_is_omap34xx() ? OMAP34XX_NR_MMC :
-		OMAP24XX_NR_MMC;
+	u32 i, nr_controllers = cpu_is_omap44xx() ? OMAP44XX_NR_MMC :
+		(cpu_is_omap34xx() ? OMAP34XX_NR_MMC : OMAP24XX_NR_MMC);
 
 	for (i = 0; i < nr_controllers; i++) {
 		u32 v, base = 0;
@@ -442,8 +442,21 @@ static void __init omap_hsmmc_reset(void)
 		case 2:
 			base = OMAP3_MMC3_BASE;
 			break;
+		case 3:
+			if (!cpu_is_omap44xx())
+				return;
+			base = OMAP4_MMC4_BASE;
+			break;
+		case 4:
+			if (!cpu_is_omap44xx())
+				return;
+			base = OMAP4_MMC5_BASE;
+			break;
 		}
 
+		if (cpu_is_omap44xx())
+			base += OMAP4_MMC_REG_OFFSET;
+
 		dummy_pdev.id = i;
 		dev_set_name(&dummy_pdev.dev, "mmci-omap-hs.%d", i);
 		iclk = clk_get(dev, "ick");
@@ -581,11 +594,23 @@ void __init omap2_init_mmc(struct omap_mmc_platform_data **mmc_data,
 			irq = INT_24XX_MMC2_IRQ;
 			break;
 		case 2:
-			if (!cpu_is_omap34xx())
+			if (!cpu_is_omap44xx() && !cpu_is_omap34xx())
 				return;
 			base = OMAP3_MMC3_BASE;
 			irq = INT_34XX_MMC3_IRQ;
 			break;
+		case 3:
+			if (!cpu_is_omap44xx())
+				return;
+			base = OMAP4_MMC4_BASE + OMAP4_MMC_REG_OFFSET;
+			irq = INT_44XX_MMC4_IRQ;
+			break;
+		case 4:
+			if (!cpu_is_omap44xx())
+				return;
+			base = OMAP4_MMC5_BASE + OMAP4_MMC_REG_OFFSET;
+			irq = INT_44XX_MMC5_IRQ;
+			break;
 		default:
 			continue;
 		}
@@ -593,8 +618,15 @@ void __init omap2_init_mmc(struct omap_mmc_platform_data **mmc_data,
 		if (cpu_is_omap2420()) {
 			size = OMAP2420_MMC_SIZE;
 			name = "mmci-omap";
+		} else if (cpu_is_omap44xx()) {
+			if (i < 3) {
+				base += OMAP4_MMC_REG_OFFSET;
+				irq += IRQ_GIC_START;
+			}
+			size = OMAP4_HSMMC_SIZE;
+			name = "mmci-omap-hs";
 		} else {
-			size = HSMMC_SIZE;
+			size = OMAP3_HSMMC_SIZE;
 			name = "mmci-omap-hs";
 		}
 		omap_mmc_add(name, i, base, size, irq, mmc_data[i]);
diff --git a/arch/arm/plat-omap/include/mach/irqs.h b/arch/arm/plat-omap/include/mach/irqs.h
index fb7cb77..28a1650 100644
--- a/arch/arm/plat-omap/include/mach/irqs.h
+++ b/arch/arm/plat-omap/include/mach/irqs.h
@@ -503,6 +503,7 @@
 #define INT_44XX_FPKA_READY_IRQ	(50 + IRQ_GIC_START)
 #define INT_44XX_SHA1MD51_IRQ	(51 + IRQ_GIC_START)
 #define INT_44XX_RNG_IRQ	(52 + IRQ_GIC_START)
+#define INT_44XX_MMC5_IRQ	(59 + IRQ_GIC_START)
 #define INT_44XX_I2C3_IRQ	(61 + IRQ_GIC_START)
 #define INT_44XX_FPKA_ERROR_IRQ	(64 + IRQ_GIC_START)
 #define INT_44XX_PBIAS_IRQ	(75 + IRQ_GIC_START)
@@ -511,6 +512,7 @@
 #define INT_44XX_TLL_IRQ	(78 + IRQ_GIC_START)
 #define INT_44XX_PARTHASH_IRQ	(79 + IRQ_GIC_START)
 #define INT_44XX_MMC3_IRQ	(94 + IRQ_GIC_START)
+#define INT_44XX_MMC4_IRQ	(96 + IRQ_GIC_START)
 
 
 /* Max. 128 level 2 IRQs (OMAP1610), 192 GPIOs (OMAP730/850) and
diff --git a/arch/arm/plat-omap/include/mach/mmc.h b/arch/arm/plat-omap/include/mach/mmc.h
index 9390297..7229b95 100644
--- a/arch/arm/plat-omap/include/mach/mmc.h
+++ b/arch/arm/plat-omap/include/mach/mmc.h
@@ -25,11 +25,18 @@
 
 #define OMAP24XX_NR_MMC		2
 #define OMAP34XX_NR_MMC		3
+#define OMAP44XX_NR_MMC		5
 #define OMAP2420_MMC_SIZE	OMAP1_MMC_SIZE
-#define HSMMC_SIZE		0x200
+#define OMAP3_HSMMC_SIZE	0x200
+#define OMAP4_HSMMC_SIZE	0x1000
 #define OMAP2_MMC1_BASE		0x4809c000
 #define OMAP2_MMC2_BASE		0x480b4000
 #define OMAP3_MMC3_BASE		0x480ad000
+#define OMAP4_MMC4_BASE		0x480d1000
+#define OMAP4_MMC5_BASE		0x480d5000
+#define OMAP4_MMC_REG_OFFSET	0x100
+#define HSMMC5			(1 << 4)
+#define HSMMC4			(1 << 3)
 #define HSMMC3			(1 << 2)
 #define HSMMC2			(1 << 1)
 #define HSMMC1			(1 << 0)
diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig
index 6f20eb9..7cb057f 100644
--- a/drivers/mmc/host/Kconfig
+++ b/drivers/mmc/host/Kconfig
@@ -132,11 +132,11 @@ config MMC_OMAP
 
 config MMC_OMAP_HS
 	tristate "TI OMAP High Speed Multimedia Card Interface support"
-	depends on ARCH_OMAP2430 || ARCH_OMAP3
+	depends on ARCH_OMAP2430 || ARCH_OMAP3 || ARCH_OMAP4
 	help
 	  This selects the TI OMAP High Speed Multimedia card Interface.
-	  If you have an OMAP2430 or OMAP3 board with a Multimedia Card slot,
-	  say Y or M here.
+	  If you have an OMAP2430 or OMAP3 board or OMAP4 board with a
+	  Multimedia Card slot, say Y or M here.
 
 	  If unsure, say N.
 
diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index a4dd43f..4487cc0 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -109,6 +109,8 @@
 #define OMAP_MMC1_DEVID		0
 #define OMAP_MMC2_DEVID		1
 #define OMAP_MMC3_DEVID		2
+#define OMAP_MMC4_DEVID		3
+#define OMAP_MMC5_DEVID		4
 
 #define MMC_TIMEOUT_MS		20
 #define OMAP_MMC_MASTER_CLOCK	96000000
@@ -1758,6 +1760,14 @@ static int __init omap_hsmmc_probe(struct platform_device *pdev)
 		host->dma_line_tx = OMAP34XX_DMA_MMC3_TX;
 		host->dma_line_rx = OMAP34XX_DMA_MMC3_RX;
 		break;
+	case OMAP_MMC4_DEVID:
+		host->dma_line_tx = OMAP44XX_DMA_MMC4_TX;
+		host->dma_line_rx = OMAP44XX_DMA_MMC4_RX;
+		break;
+	case OMAP_MMC5_DEVID:
+		host->dma_line_tx = OMAP44XX_DMA_MMC5_TX;
+		host->dma_line_rx = OMAP44XX_DMA_MMC5_RX;
+		break;
 	default:
 		dev_err(mmc_dev(host->mmc), "Invalid MMC id\n");
 		goto err_irq;
-- 
cgit v0.10.2


From d08ebeddfb3293fa4bdfca9c610daf1e8ec8b233 Mon Sep 17 00:00:00 2001
From: Wolfgang Muees <wolfgang.mues@auerswald.de>
Date: Tue, 22 Sep 2009 16:45:26 -0700
Subject: mmc_spi: fail gracefully if host or card do not support the switch
 command

Some time ago, I have send a patch to the mmc_spi subsystem changing the
error codes.  This was after a discussion with Pierre about using EINVAL
only for non-recoverable errors.  This patch was accepted as

http://git.kernel.org/linus/fdd858db7113ca64132de390188d7ca00701013d

Unfortunately, several weeks later, I realized that this patch has opened
a little can of worms because there are SD cards on the market which

a) claim that they support the switch command
AND
b) refuse to execute this command if operating in SPI mode.

So, such a card would get unusuable in an embedded linux system in SPI
mode, because the init sequence terminates with an error.

This patch adds the missing error codes to the caller of the switch
command and restores the old behaviour to fail gracefully if these
commands can not execute.

Signed-off-by: Wolfgang Muees <wolfgang.mues@auerswald.de>
Cc: <linux-mmc@vger.kernel.org>
Cc: <stable@kernel.org>		[2.6.31.x]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index a6b5fe9..ddddad4 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -179,11 +179,11 @@ static int mmc_read_ext_csd(struct mmc_card *card)
 
 	err = mmc_send_ext_csd(card, ext_csd);
 	if (err) {
-		/*
-		 * We all hosts that cannot perform the command
-		 * to fail more gracefully
-		 */
-		if (err != -EINVAL)
+		/* If the host or the card can't do the switch,
+		 * fail more gracefully. */
+		if ((err != -EINVAL)
+		 && (err != -ENOSYS)
+		 && (err != -EFAULT))
 			goto out;
 
 		/*
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index 222a609..13b3a6b 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -210,11 +210,11 @@ static int mmc_read_switch(struct mmc_card *card)
 
 	err = mmc_sd_switch(card, 0, 0, 1, status);
 	if (err) {
-		/*
-		 * We all hosts that cannot perform the command
-		 * to fail more gracefully
-		 */
-		if (err != -EINVAL)
+		/* If the host or the card can't do the switch,
+		 * fail more gracefully. */
+		if ((err != -EINVAL)
+		 && (err != -ENOSYS)
+		 && (err != -EFAULT))
 			goto out;
 
 		printk(KERN_WARNING "%s: problem reading switch "
-- 
cgit v0.10.2


From 17d33e14f7ffc05f8c81e4a3bdb9a8003a05dcce Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@fluxnic.net>
Date: Tue, 22 Sep 2009 16:45:28 -0700
Subject: mmc: core SDIO suspend/resume support

Currently, all SDIO cards are virtually removed upon a suspend, and
completely reprobed upon a resume.  This adds the suspend and resume
methods to the SDIO bus driver so to be able to dispatch those events to
the actual SDIO function drivers for real suspend/resume instead.

All active functions on a card must have a driver with both a suspend and
a resume method though.  Failing that, we fall back to the current
behavior of simply "removing" the card when suspending.

When resuming, we make sure the same card is still inserted by comparing
the vendor and product IDs.  If there is a mismatch, or if there is simply
no card anymore in the slot, then the previous card is "removed" and the
new card is detected.  This is further enhanced with the next patch.

[akpm@linux-foundation.org: fix warnings]
Signed-off-by: Nicolas Pitre <nico@marvell.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index 2d24a12..f4c8637 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -218,6 +218,134 @@ static int sdio_enable_hs(struct mmc_card *card)
 }
 
 /*
+ * Handle the detection and initialisation of a card.
+ *
+ * In the case of a resume, "oldcard" will contain the card
+ * we're trying to reinitialise.
+ */
+static int mmc_sdio_init_card(struct mmc_host *host, u32 ocr,
+			      struct mmc_card *oldcard)
+{
+	struct mmc_card *card;
+	int err;
+
+	BUG_ON(!host);
+	WARN_ON(!host->claimed);
+
+	/*
+	 * Inform the card of the voltage
+	 */
+	err = mmc_send_io_op_cond(host, host->ocr, &ocr);
+	if (err)
+		goto err;
+
+	/*
+	 * For SPI, enable CRC as appropriate.
+	 */
+	if (mmc_host_is_spi(host)) {
+		err = mmc_spi_set_crc(host, use_spi_crc);
+		if (err)
+			goto err;
+	}
+
+	/*
+	 * Allocate card structure.
+	 */
+	card = mmc_alloc_card(host, NULL);
+	if (IS_ERR(card)) {
+		err = PTR_ERR(card);
+		goto err;
+	}
+
+	card->type = MMC_TYPE_SDIO;
+
+	/*
+	 * For native busses:  set card RCA and quit open drain mode.
+	 */
+	if (!mmc_host_is_spi(host)) {
+		err = mmc_send_relative_addr(host, &card->rca);
+		if (err)
+			goto remove;
+
+		mmc_set_bus_mode(host, MMC_BUSMODE_PUSHPULL);
+	}
+
+	/*
+	 * Select card, as all following commands rely on that.
+	 */
+	if (!mmc_host_is_spi(host)) {
+		err = mmc_select_card(card);
+		if (err)
+			goto remove;
+	}
+
+	/*
+	 * Read the common registers.
+	 */
+	err = sdio_read_cccr(card);
+	if (err)
+		goto remove;
+
+	/*
+	 * Read the common CIS tuples.
+	 */
+	err = sdio_read_common_cis(card);
+	if (err)
+		goto remove;
+
+	if (oldcard) {
+		int same = (card->cis.vendor == oldcard->cis.vendor &&
+			    card->cis.device == oldcard->cis.device);
+		mmc_remove_card(card);
+		if (!same) {
+			err = -ENOENT;
+			goto err;
+		}
+		card = oldcard;
+	}
+
+	/*
+	 * Switch to high-speed (if supported).
+	 */
+	err = sdio_enable_hs(card);
+	if (err)
+		goto remove;
+
+	/*
+	 * Change to the card's maximum speed.
+	 */
+	if (mmc_card_highspeed(card)) {
+		/*
+		 * The SDIO specification doesn't mention how
+		 * the CIS transfer speed register relates to
+		 * high-speed, but it seems that 50 MHz is
+		 * mandatory.
+		 */
+		mmc_set_clock(host, 50000000);
+	} else {
+		mmc_set_clock(host, card->cis.max_dtr);
+	}
+
+	/*
+	 * Switch to wider bus (if supported).
+	 */
+	err = sdio_enable_wide(card);
+	if (err)
+		goto remove;
+
+	if (!oldcard)
+		host->card = card;
+	return 0;
+
+remove:
+	if (!oldcard)
+		mmc_remove_card(card);
+
+err:
+	return err;
+}
+
+/*
  * Host is being removed. Free up the current card.
  */
 static void mmc_sdio_remove(struct mmc_host *host)
@@ -266,10 +394,74 @@ static void mmc_sdio_detect(struct mmc_host *host)
 	}
 }
 
+/*
+ * SDIO suspend.  We need to suspend all functions separately.
+ * Therefore all registered functions must have drivers with suspend
+ * and resume methods.  Failing that we simply remove the whole card.
+ */
+static void mmc_sdio_suspend(struct mmc_host *host)
+{
+	int i;
+
+	/* make sure all registered functions can suspend/resume */
+	for (i = 0; i < host->card->sdio_funcs; i++) {
+		struct sdio_func *func = host->card->sdio_func[i];
+		if (func && sdio_func_present(func) && func->dev.driver) {
+			const struct dev_pm_ops *pmops = func->dev.driver->pm;
+			if (!pmops || !pmops->suspend || !pmops->resume) {
+				/* just remove the entire card in that case */
+				mmc_sdio_remove(host);
+				mmc_claim_host(host);
+				mmc_detach_bus(host);
+				mmc_release_host(host);
+				return;
+			}
+		}
+	}
+
+	/* now suspend them */
+	for (i = 0; i < host->card->sdio_funcs; i++) {
+		struct sdio_func *func = host->card->sdio_func[i];
+		if (func && sdio_func_present(func) && func->dev.driver) {
+			const struct dev_pm_ops *pmops = func->dev.driver->pm;
+			pmops->suspend(&func->dev);
+		}
+	}
+}
+
+static void mmc_sdio_resume(struct mmc_host *host)
+{
+	int i, err;
+
+	BUG_ON(!host);
+	BUG_ON(!host->card);
+
+	mmc_claim_host(host);
+	err = mmc_sdio_init_card(host, host->ocr, host->card);
+	mmc_release_host(host);
+	if (err) {
+		mmc_sdio_remove(host);
+		mmc_claim_host(host);
+		mmc_detach_bus(host);
+		mmc_release_host(host);
+		return;
+	}
+
+	/* resume all functions */
+	for (i = 0; i < host->card->sdio_funcs; i++) {
+		struct sdio_func *func = host->card->sdio_func[i];
+		if (func && sdio_func_present(func) && func->dev.driver) {
+			const struct dev_pm_ops *pmops = func->dev.driver->pm;
+			pmops->resume(&func->dev);
+		}
+	}
+}
 
 static const struct mmc_bus_ops mmc_sdio_ops = {
 	.remove = mmc_sdio_remove,
 	.detect = mmc_sdio_detect,
+	.suspend = mmc_sdio_suspend,
+	.resume = mmc_sdio_resume,
 };
 
 
@@ -309,103 +501,18 @@ int mmc_attach_sdio(struct mmc_host *host, u32 ocr)
 	}
 
 	/*
-	 * Inform the card of the voltage
+	 * Detect and init the card.
 	 */
-	err = mmc_send_io_op_cond(host, host->ocr, &ocr);
+	err = mmc_sdio_init_card(host, host->ocr, NULL);
 	if (err)
 		goto err;
-
-	/*
-	 * For SPI, enable CRC as appropriate.
-	 */
-	if (mmc_host_is_spi(host)) {
-		err = mmc_spi_set_crc(host, use_spi_crc);
-		if (err)
-			goto err;
-	}
+	card = host->card;
 
 	/*
 	 * The number of functions on the card is encoded inside
 	 * the ocr.
 	 */
-	funcs = (ocr & 0x70000000) >> 28;
-
-	/*
-	 * Allocate card structure.
-	 */
-	card = mmc_alloc_card(host, NULL);
-	if (IS_ERR(card)) {
-		err = PTR_ERR(card);
-		goto err;
-	}
-
-	card->type = MMC_TYPE_SDIO;
-	card->sdio_funcs = funcs;
-
-	host->card = card;
-
-	/*
-	 * For native busses:  set card RCA and quit open drain mode.
-	 */
-	if (!mmc_host_is_spi(host)) {
-		err = mmc_send_relative_addr(host, &card->rca);
-		if (err)
-			goto remove;
-
-		mmc_set_bus_mode(host, MMC_BUSMODE_PUSHPULL);
-	}
-
-	/*
-	 * Select card, as all following commands rely on that.
-	 */
-	if (!mmc_host_is_spi(host)) {
-		err = mmc_select_card(card);
-		if (err)
-			goto remove;
-	}
-
-	/*
-	 * Read the common registers.
-	 */
-	err = sdio_read_cccr(card);
-	if (err)
-		goto remove;
-
-	/*
-	 * Read the common CIS tuples.
-	 */
-	err = sdio_read_common_cis(card);
-	if (err)
-		goto remove;
-
-	/*
-	 * Switch to high-speed (if supported).
-	 */
-	err = sdio_enable_hs(card);
-	if (err)
-		goto remove;
-
-	/*
-	 * Change to the card's maximum speed.
-	 */
-	if (mmc_card_highspeed(card)) {
-		/*
-		 * The SDIO specification doesn't mention how
-		 * the CIS transfer speed register relates to
-		 * high-speed, but it seems that 50 MHz is
-		 * mandatory.
-		 */
-		mmc_set_clock(host, 50000000);
-	} else {
-		mmc_set_clock(host, card->cis.max_dtr);
-	}
-
-	/*
-	 * Switch to wider bus (if supported).
-	 */
-	err = sdio_enable_wide(card);
-	if (err)
-		goto remove;
+	card->sdio_funcs = funcs = (ocr & 0x70000000) >> 28;
 
 	/*
 	 * If needed, disconnect card detection pull-up resistor.
-- 
cgit v0.10.2


From 95cdfb72b9bc568803f395c266152c71b034b461 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@fluxnic.net>
Date: Tue, 22 Sep 2009 16:45:29 -0700
Subject: mmc: propagate error codes back from bus drivers' suspend/resume
 methods

Especially for SDIO drivers which may have special conditions/errors to
report, it is a good thing to relay the returned error code back to upper
layers.

This also allows for the rationalization of the resume path where code to
"remove" a no-longer-existing or replaced card was duplicated into the
MMC, SD and SDIO bus drivers.

In the SDIO case, if a function suspend method returns an error, then all
previously suspended functions are resumed and the error returned.  An
exception is made for -ENOSYS which the core interprets as "we don't
support suspend so just kick the card out for suspend and return success".

When resuming SDIO cards, the core code only validates the manufacturer
and product IDs to make sure the same kind of card is still present before
invoking functions resume methods.  It's the function driver's
responsibility to perform further tests to confirm that the actual same
card is present (same MAC address, etc.) and return an error otherwise.

Signed-off-by: Nicolas Pitre <nico@marvell.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 0842f68..7dab2e5 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -1236,6 +1236,8 @@ EXPORT_SYMBOL(mmc_card_can_sleep);
  */
 int mmc_suspend_host(struct mmc_host *host, pm_message_t state)
 {
+	int err = 0;
+
 	if (host->caps & MMC_CAP_DISABLE)
 		cancel_delayed_work(&host->disable);
 	cancel_delayed_work(&host->detect);
@@ -1244,21 +1246,26 @@ int mmc_suspend_host(struct mmc_host *host, pm_message_t state)
 	mmc_bus_get(host);
 	if (host->bus_ops && !host->bus_dead) {
 		if (host->bus_ops->suspend)
-			host->bus_ops->suspend(host);
-		if (!host->bus_ops->resume) {
+			err = host->bus_ops->suspend(host);
+		if (err == -ENOSYS || !host->bus_ops->resume) {
+			/*
+			 * We simply "remove" the card in this case.
+			 * It will be redetected on resume.
+			 */
 			if (host->bus_ops->remove)
 				host->bus_ops->remove(host);
-
 			mmc_claim_host(host);
 			mmc_detach_bus(host);
 			mmc_release_host(host);
+			err = 0;
 		}
 	}
 	mmc_bus_put(host);
 
-	mmc_power_off(host);
+	if (!err)
+		mmc_power_off(host);
 
-	return 0;
+	return err;
 }
 
 EXPORT_SYMBOL(mmc_suspend_host);
@@ -1269,12 +1276,26 @@ EXPORT_SYMBOL(mmc_suspend_host);
  */
 int mmc_resume_host(struct mmc_host *host)
 {
+	int err = 0;
+
 	mmc_bus_get(host);
 	if (host->bus_ops && !host->bus_dead) {
 		mmc_power_up(host);
 		mmc_select_voltage(host, host->ocr);
 		BUG_ON(!host->bus_ops->resume);
-		host->bus_ops->resume(host);
+		err = host->bus_ops->resume(host);
+		if (err) {
+			printk(KERN_WARNING "%s: error %d during resume "
+					    "(card was removed?)\n",
+					    mmc_hostname(host), err);
+			if (host->bus_ops->remove)
+				host->bus_ops->remove(host);
+			mmc_claim_host(host);
+			mmc_detach_bus(host);
+			mmc_release_host(host);
+			/* no need to bother upper layers */
+			err = 0;
+		}
 	}
 	mmc_bus_put(host);
 
@@ -1284,7 +1305,7 @@ int mmc_resume_host(struct mmc_host *host)
 	 */
 	mmc_detect_change(host, 1);
 
-	return 0;
+	return err;
 }
 
 EXPORT_SYMBOL(mmc_resume_host);
diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h
index c386348..67ae6ab 100644
--- a/drivers/mmc/core/core.h
+++ b/drivers/mmc/core/core.h
@@ -20,8 +20,8 @@ struct mmc_bus_ops {
 	int (*sleep)(struct mmc_host *);
 	void (*remove)(struct mmc_host *);
 	void (*detect)(struct mmc_host *);
-	void (*suspend)(struct mmc_host *);
-	void (*resume)(struct mmc_host *);
+	int (*suspend)(struct mmc_host *);
+	int (*resume)(struct mmc_host *);
 	void (*power_save)(struct mmc_host *);
 	void (*power_restore)(struct mmc_host *);
 };
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index ddddad4..bfefce3 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -530,7 +530,7 @@ static void mmc_detect(struct mmc_host *host)
 /*
  * Suspend callback from host.
  */
-static void mmc_suspend(struct mmc_host *host)
+static int mmc_suspend(struct mmc_host *host)
 {
 	BUG_ON(!host);
 	BUG_ON(!host->card);
@@ -540,6 +540,8 @@ static void mmc_suspend(struct mmc_host *host)
 		mmc_deselect_cards(host);
 	host->card->state &= ~MMC_STATE_HIGHSPEED;
 	mmc_release_host(host);
+
+	return 0;
 }
 
 /*
@@ -548,7 +550,7 @@ static void mmc_suspend(struct mmc_host *host)
  * This function tries to determine if the same card is still present
  * and, if so, restore all state to it.
  */
-static void mmc_resume(struct mmc_host *host)
+static int mmc_resume(struct mmc_host *host)
 {
 	int err;
 
@@ -559,14 +561,7 @@ static void mmc_resume(struct mmc_host *host)
 	err = mmc_init_card(host, host->ocr, host->card);
 	mmc_release_host(host);
 
-	if (err) {
-		mmc_remove(host);
-
-		mmc_claim_host(host);
-		mmc_detach_bus(host);
-		mmc_release_host(host);
-	}
-
+	return err;
 }
 
 static void mmc_power_restore(struct mmc_host *host)
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index 13b3a6b..10b2a4d 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -564,7 +564,7 @@ static void mmc_sd_detect(struct mmc_host *host)
 /*
  * Suspend callback from host.
  */
-static void mmc_sd_suspend(struct mmc_host *host)
+static int mmc_sd_suspend(struct mmc_host *host)
 {
 	BUG_ON(!host);
 	BUG_ON(!host->card);
@@ -574,6 +574,8 @@ static void mmc_sd_suspend(struct mmc_host *host)
 		mmc_deselect_cards(host);
 	host->card->state &= ~MMC_STATE_HIGHSPEED;
 	mmc_release_host(host);
+
+	return 0;
 }
 
 /*
@@ -582,7 +584,7 @@ static void mmc_sd_suspend(struct mmc_host *host)
  * This function tries to determine if the same card is still present
  * and, if so, restore all state to it.
  */
-static void mmc_sd_resume(struct mmc_host *host)
+static int mmc_sd_resume(struct mmc_host *host)
 {
 	int err;
 
@@ -593,14 +595,7 @@ static void mmc_sd_resume(struct mmc_host *host)
 	err = mmc_sd_init_card(host, host->ocr, host->card);
 	mmc_release_host(host);
 
-	if (err) {
-		mmc_sd_remove(host);
-
-		mmc_claim_host(host);
-		mmc_detach_bus(host);
-		mmc_release_host(host);
-	}
-
+	return err;
 }
 
 static void mmc_sd_power_restore(struct mmc_host *host)
diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index f4c8637..cdb845b 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -302,6 +302,7 @@ static int mmc_sdio_init_card(struct mmc_host *host, u32 ocr,
 			goto err;
 		}
 		card = oldcard;
+		return 0;
 	}
 
 	/*
@@ -399,62 +400,65 @@ static void mmc_sdio_detect(struct mmc_host *host)
  * Therefore all registered functions must have drivers with suspend
  * and resume methods.  Failing that we simply remove the whole card.
  */
-static void mmc_sdio_suspend(struct mmc_host *host)
+static int mmc_sdio_suspend(struct mmc_host *host)
 {
-	int i;
+	int i, err = 0;
 
-	/* make sure all registered functions can suspend/resume */
 	for (i = 0; i < host->card->sdio_funcs; i++) {
 		struct sdio_func *func = host->card->sdio_func[i];
 		if (func && sdio_func_present(func) && func->dev.driver) {
 			const struct dev_pm_ops *pmops = func->dev.driver->pm;
 			if (!pmops || !pmops->suspend || !pmops->resume) {
-				/* just remove the entire card in that case */
-				mmc_sdio_remove(host);
-				mmc_claim_host(host);
-				mmc_detach_bus(host);
-				mmc_release_host(host);
-				return;
-			}
+				/* force removal of entire card in that case */
+				err = -ENOSYS;
+			} else
+				err = pmops->suspend(&func->dev);
+			if (err)
+				break;
 		}
 	}
-
-	/* now suspend them */
-	for (i = 0; i < host->card->sdio_funcs; i++) {
+	while (err && --i >= 0) {
 		struct sdio_func *func = host->card->sdio_func[i];
 		if (func && sdio_func_present(func) && func->dev.driver) {
 			const struct dev_pm_ops *pmops = func->dev.driver->pm;
-			pmops->suspend(&func->dev);
+			pmops->resume(&func->dev);
 		}
 	}
+
+	return err;
 }
 
-static void mmc_sdio_resume(struct mmc_host *host)
+static int mmc_sdio_resume(struct mmc_host *host)
 {
 	int i, err;
 
 	BUG_ON(!host);
 	BUG_ON(!host->card);
 
+	/* Basic card reinitialization. */
 	mmc_claim_host(host);
 	err = mmc_sdio_init_card(host, host->ocr, host->card);
 	mmc_release_host(host);
-	if (err) {
-		mmc_sdio_remove(host);
-		mmc_claim_host(host);
-		mmc_detach_bus(host);
-		mmc_release_host(host);
-		return;
-	}
 
-	/* resume all functions */
-	for (i = 0; i < host->card->sdio_funcs; i++) {
+	/*
+	 * If the card looked to be the same as before suspending, then
+	 * we proceed to resume all card functions.  If one of them returns
+	 * an error then we simply return that error to the core and the
+	 * card will be redetected as new.  It is the responsibility of
+	 * the function driver to perform further tests with the extra
+	 * knowledge it has of the card to confirm the card is indeed the
+	 * same as before suspending (same MAC address for network cards,
+	 * etc.) and return an error otherwise.
+	 */
+	for (i = 0; !err && i < host->card->sdio_funcs; i++) {
 		struct sdio_func *func = host->card->sdio_func[i];
 		if (func && sdio_func_present(func) && func->dev.driver) {
 			const struct dev_pm_ops *pmops = func->dev.driver->pm;
-			pmops->resume(&func->dev);
+			err = pmops->resume(&func->dev);
 		}
 	}
+
+	return err;
 }
 
 static const struct mmc_bus_ops mmc_sdio_ops = {
-- 
cgit v0.10.2


From 996ad5686c5f868e67557cc1bfcb2cfdde1a18b4 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@fluxnic.net>
Date: Tue, 22 Sep 2009 16:45:30 -0700
Subject: mmc: make SDIO device/driver struct accessors public

Especially with the PM framework, those are quite handy to have in driver
code too.

Signed-off-by: Nicolas Pitre <nico@marvell.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/mmc/core/sdio_bus.c b/drivers/mmc/core/sdio_bus.c
index 46284b5..d37464e 100644
--- a/drivers/mmc/core/sdio_bus.c
+++ b/drivers/mmc/core/sdio_bus.c
@@ -20,9 +20,6 @@
 #include "sdio_cis.h"
 #include "sdio_bus.h"
 
-#define dev_to_sdio_func(d)	container_of(d, struct sdio_func, dev)
-#define to_sdio_driver(d)      container_of(d, struct sdio_driver, drv)
-
 /* show configuration fields */
 #define sdio_config_attr(field, format_string)				\
 static ssize_t								\
diff --git a/include/linux/mmc/sdio_func.h b/include/linux/mmc/sdio_func.h
index 451bdfc..ac3ab68 100644
--- a/include/linux/mmc/sdio_func.h
+++ b/include/linux/mmc/sdio_func.h
@@ -67,6 +67,7 @@ struct sdio_func {
 
 #define sdio_get_drvdata(f)	dev_get_drvdata(&(f)->dev)
 #define sdio_set_drvdata(f,d)	dev_set_drvdata(&(f)->dev, d)
+#define dev_to_sdio_func(d)	container_of(d, struct sdio_func, dev)
 
 /*
  * SDIO function device driver
@@ -81,6 +82,8 @@ struct sdio_driver {
 	struct device_driver drv;
 };
 
+#define to_sdio_driver(d)	container_of(d, struct sdio_driver, drv)
+
 /**
  * SDIO_DEVICE - macro used to describe a specific SDIO device
  * @vend: the 16 bit manufacturer code
-- 
cgit v0.10.2


From a13abc7b0814da7733c531453a207729b542ecf8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Richard=20R=C3=B6jfors?= <richard.rojfors@mocean-labs.com>
Date: Tue, 22 Sep 2009 16:45:30 -0700
Subject: sdhci: support for ADMA only hosts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add support for ADMA on SDHCI hosts, not supporting SDMA.

According to the SDHCI specifications a host can support ADMA but not SDMA

Signed-off-by: Richard Röjfors <richard.rojfors@mocean-labs.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c
index 63b9d2a..e035664 100644
--- a/drivers/mmc/host/sdhci-pci.c
+++ b/drivers/mmc/host/sdhci-pci.c
@@ -396,7 +396,7 @@ static int sdhci_pci_enable_dma(struct sdhci_host *host)
 
 	if (((pdev->class & 0xFFFF00) == (PCI_CLASS_SYSTEM_SDHCI << 8)) &&
 		((pdev->class & 0x0000FF) != PCI_SDHCI_IFDMA) &&
-		(host->flags & SDHCI_USE_DMA)) {
+		(host->flags & SDHCI_USE_SDMA)) {
 		dev_warn(&pdev->dev, "Will use DMA mode even though HW "
 			"doesn't fully claim to support it.\n");
 	}
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 38a7874..9d07676 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -655,7 +655,7 @@ static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_data *data)
 	count = sdhci_calc_timeout(host, data);
 	sdhci_writeb(host, count, SDHCI_TIMEOUT_CONTROL);
 
-	if (host->flags & SDHCI_USE_DMA)
+	if (host->flags & (SDHCI_USE_SDMA | SDHCI_USE_ADMA))
 		host->flags |= SDHCI_REQ_USE_DMA;
 
 	/*
@@ -1600,7 +1600,7 @@ int sdhci_resume_host(struct sdhci_host *host)
 {
 	int ret;
 
-	if (host->flags & SDHCI_USE_DMA) {
+	if (host->flags & (SDHCI_USE_SDMA | SDHCI_USE_ADMA)) {
 		if (host->ops->enable_dma)
 			host->ops->enable_dma(host);
 	}
@@ -1681,23 +1681,20 @@ int sdhci_add_host(struct sdhci_host *host)
 	caps = sdhci_readl(host, SDHCI_CAPABILITIES);
 
 	if (host->quirks & SDHCI_QUIRK_FORCE_DMA)
-		host->flags |= SDHCI_USE_DMA;
-	else if (!(caps & SDHCI_CAN_DO_DMA))
-		DBG("Controller doesn't have DMA capability\n");
+		host->flags |= SDHCI_USE_SDMA;
+	else if (!(caps & SDHCI_CAN_DO_SDMA))
+		DBG("Controller doesn't have SDMA capability\n");
 	else
-		host->flags |= SDHCI_USE_DMA;
+		host->flags |= SDHCI_USE_SDMA;
 
 	if ((host->quirks & SDHCI_QUIRK_BROKEN_DMA) &&
-		(host->flags & SDHCI_USE_DMA)) {
+		(host->flags & SDHCI_USE_SDMA)) {
 		DBG("Disabling DMA as it is marked broken\n");
-		host->flags &= ~SDHCI_USE_DMA;
+		host->flags &= ~SDHCI_USE_SDMA;
 	}
 
-	if (host->flags & SDHCI_USE_DMA) {
-		if ((host->version >= SDHCI_SPEC_200) &&
-				(caps & SDHCI_CAN_DO_ADMA2))
-			host->flags |= SDHCI_USE_ADMA;
-	}
+	if ((host->version >= SDHCI_SPEC_200) && (caps & SDHCI_CAN_DO_ADMA2))
+		host->flags |= SDHCI_USE_ADMA;
 
 	if ((host->quirks & SDHCI_QUIRK_BROKEN_ADMA) &&
 		(host->flags & SDHCI_USE_ADMA)) {
@@ -1705,13 +1702,14 @@ int sdhci_add_host(struct sdhci_host *host)
 		host->flags &= ~SDHCI_USE_ADMA;
 	}
 
-	if (host->flags & SDHCI_USE_DMA) {
+	if (host->flags & (SDHCI_USE_SDMA | SDHCI_USE_ADMA)) {
 		if (host->ops->enable_dma) {
 			if (host->ops->enable_dma(host)) {
 				printk(KERN_WARNING "%s: No suitable DMA "
 					"available. Falling back to PIO.\n",
 					mmc_hostname(mmc));
-				host->flags &= ~(SDHCI_USE_DMA | SDHCI_USE_ADMA);
+				host->flags &=
+					~(SDHCI_USE_SDMA | SDHCI_USE_ADMA);
 			}
 		}
 	}
@@ -1739,7 +1737,7 @@ int sdhci_add_host(struct sdhci_host *host)
 	 * mask, but PIO does not need the hw shim so we set a new
 	 * mask here in that case.
 	 */
-	if (!(host->flags & SDHCI_USE_DMA)) {
+	if (!(host->flags & (SDHCI_USE_SDMA | SDHCI_USE_ADMA))) {
 		host->dma_mask = DMA_BIT_MASK(64);
 		mmc_dev(host->mmc)->dma_mask = &host->dma_mask;
 	}
@@ -1816,7 +1814,7 @@ int sdhci_add_host(struct sdhci_host *host)
 	 */
 	if (host->flags & SDHCI_USE_ADMA)
 		mmc->max_hw_segs = 128;
-	else if (host->flags & SDHCI_USE_DMA)
+	else if (host->flags & SDHCI_USE_SDMA)
 		mmc->max_hw_segs = 1;
 	else /* PIO */
 		mmc->max_hw_segs = 128;
@@ -1899,10 +1897,10 @@ int sdhci_add_host(struct sdhci_host *host)
 
 	mmc_add_host(mmc);
 
-	printk(KERN_INFO "%s: SDHCI controller on %s [%s] using %s%s\n",
+	printk(KERN_INFO "%s: SDHCI controller on %s [%s] using %s\n",
 		mmc_hostname(mmc), host->hw_name, dev_name(mmc_dev(mmc)),
-		(host->flags & SDHCI_USE_ADMA)?"A":"",
-		(host->flags & SDHCI_USE_DMA)?"DMA":"PIO");
+		(host->flags & SDHCI_USE_ADMA) ? "ADMA" :
+		(host->flags & SDHCI_USE_SDMA) ? "DMA" : "PIO");
 
 	sdhci_enable_card_detection(host);
 
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index afda7f1..ce5f1d7 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -143,7 +143,7 @@
 #define  SDHCI_CAN_DO_ADMA2	0x00080000
 #define  SDHCI_CAN_DO_ADMA1	0x00100000
 #define  SDHCI_CAN_DO_HISPD	0x00200000
-#define  SDHCI_CAN_DO_DMA	0x00400000
+#define  SDHCI_CAN_DO_SDMA	0x00400000
 #define  SDHCI_CAN_VDD_330	0x01000000
 #define  SDHCI_CAN_VDD_300	0x02000000
 #define  SDHCI_CAN_VDD_180	0x04000000
@@ -252,7 +252,7 @@ struct sdhci_host {
 	spinlock_t		lock;		/* Mutex */
 
 	int			flags;		/* Host attributes */
-#define SDHCI_USE_DMA		(1<<0)		/* Host is DMA capable */
+#define SDHCI_USE_SDMA		(1<<0)		/* Host is SDMA capable */
 #define SDHCI_USE_ADMA		(1<<1)		/* Host is ADMA capable */
 #define SDHCI_REQ_USE_DMA	(1<<2)		/* Use DMA for this req. */
 #define SDHCI_DEVICE_DEAD	(1<<3)		/* Device unresponsive */
-- 
cgit v0.10.2


From 27f6cb160b71b342b7a47d28a4b6c422ea74ccd1 Mon Sep 17 00:00:00 2001
From: Chris Ball <cjb@laptop.org>
Date: Tue, 22 Sep 2009 16:45:31 -0700
Subject: sdhci: increase timeout for internal clock stabilization.

On an OLPC XO-1.5 development board with Via VX855 chipset, the sdhci
controller can take up to 12ms to stabilize its clock, but the current
timeout at which we give up on the controller is 10ms.

The patch increases the timeout delay rather than using a device-specific
quirk -- since we exit the loop when the clock comes up, increasing the
timeout value will only make us mdelay() longer in the errant case of a
device with a clock that is not stabilizing, which it seems worth waiting
a little longer for in general.

Signed-off-by: Chris Ball <cjb@laptop.org>
Cc: Harald Welte <HaraldWelte@viatech.com>
Acked-by: Pierre Ossman <pierre@ossman.eu>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 9d07676..c279fbc 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -994,8 +994,8 @@ static void sdhci_set_clock(struct sdhci_host *host, unsigned int clock)
 	clk |= SDHCI_CLOCK_INT_EN;
 	sdhci_writew(host, clk, SDHCI_CLOCK_CONTROL);
 
-	/* Wait max 10 ms */
-	timeout = 10;
+	/* Wait max 20 ms */
+	timeout = 20;
 	while (!((clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL))
 		& SDHCI_CLOCK_INT_STABLE)) {
 		if (timeout == 0) {
-- 
cgit v0.10.2


From cff4edb591c153a779a27a3fd8e7bc1217f2f6b8 Mon Sep 17 00:00:00 2001
From: Kees Cook <kees.cook@canonical.com>
Date: Tue, 22 Sep 2009 16:45:32 -0700
Subject: proc: fix reported unit for RLIMIT_CPU

/proc/$pid/limits should show RLIMIT_CPU as seconds, which is the unit
used in kernel/posix-cpu-timers.c:

        unsigned long psecs = cputime_to_secs(ptime);
        ...
        if (psecs >= sig->rlim[RLIMIT_CPU].rlim_max) {
                ...
                __group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);

Signed-off-by: Kees Cook <kees.cook@canonical.com>
Acked-by: WANG Cong <xiyou.wangcong@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 55c4c80..69bb703 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -458,7 +458,7 @@ struct limit_names {
 };
 
 static const struct limit_names lnames[RLIM_NLIMITS] = {
-	[RLIMIT_CPU] = {"Max cpu time", "ms"},
+	[RLIMIT_CPU] = {"Max cpu time", "seconds"},
 	[RLIMIT_FSIZE] = {"Max file size", "bytes"},
 	[RLIMIT_DATA] = {"Max data size", "bytes"},
 	[RLIMIT_STACK] = {"Max stack size", "bytes"},
-- 
cgit v0.10.2


From 9b4d1cbef8f41aff2b3e4ca31f566c071fe601de Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 22 Sep 2009 16:45:34 -0700
Subject: proc_flush_task: flush /proc/tid/task/pid when a sub-thread exits

The exiting sub-thread flushes /proc/pid only, but this doesn't buy too
much: ps and friends mostly use /proc/tid/task/pid.

Remove "if (thread_group_leader())" checks from proc_flush_task() path,
this means we always remove /proc/tid/task/pid dentry on exit, and this
actually matches the comment above proc_flush_task().

The test-case:

	static void* tfunc(void *arg)
	{
		char name[256];

		sprintf(name, "/proc/%d/task/%ld/status", getpid(), gettid());
		close(open(name, O_RDONLY));

		return NULL;
	}

	int main(void)
	{
		pthread_t t;

		for (;;) {
			if (!pthread_create(&t, NULL, &tfunc, NULL))
				pthread_join(t, NULL);
		}
	}

slabtop shows that pid/proc_inode_cache/etc grow quickly and
"indefinitely" until the task is killed or shrink_slab() is called, not
good.  And the main thread needs a lot of time to exit.

The same can happen if something like "ps -efL" runs continuously, while
some application spawns short-living threads.

Reported-by: "James M. Leddy" <jleddy@redhat.com>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Dominic Duval <dduval@redhat.com>
Cc: Frank Hirtz <fhirtz@redhat.com>
Cc: "Fuller, Johnray" <Johnray.Fuller@gs.com>
Cc: Larry Woodman <lwoodman@redhat.com>
Cc: Paul Batkowski <pbatkowski@redhat.com>
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 69bb703..5bc5870 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2604,9 +2604,6 @@ static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid)
 		dput(dentry);
 	}
 
-	if (tgid == 0)
-		goto out;
-
 	name.name = buf;
 	name.len = snprintf(buf, sizeof(buf), "%d", tgid);
 	leader = d_hash_and_lookup(mnt->mnt_root, &name);
@@ -2663,17 +2660,16 @@ out:
 void proc_flush_task(struct task_struct *task)
 {
 	int i;
-	struct pid *pid, *tgid = NULL;
+	struct pid *pid, *tgid;
 	struct upid *upid;
 
 	pid = task_pid(task);
-	if (thread_group_leader(task))
-		tgid = task_tgid(task);
+	tgid = task_tgid(task);
 
 	for (i = 0; i <= pid->level; i++) {
 		upid = &pid->numbers[i];
 		proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr,
-			tgid ? tgid->numbers[i].nr : 0);
+					tgid->numbers[i].nr);
 	}
 
 	upid = &pid->numbers[pid->level];
-- 
cgit v0.10.2


From acef82b873b6899d80e639317228f2104dae79a2 Mon Sep 17 00:00:00 2001
From: Amerigo Wang <xiyou.wangcong@gmail.com>
Date: Tue, 22 Sep 2009 16:45:35 -0700
Subject: kcore: fix /proc/kcore's stat.st_size

In 9063c61fd5cbd ("x86, 64-bit: Clean up user address masking") Linus
fixed the wrong size of /proc/kcore problem.

But its size still looks insane, since it never equals the size of
physical memory.

Signed-off-by: WANG Cong <amwang@redhat.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Tao Ma <tao.ma@oracle.com>
Cc: <mtk.manpages@gmail.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index f06f45b..0cf8a24 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -374,9 +374,6 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
 static int __init proc_kcore_init(void)
 {
 	proc_root_kcore = proc_create("kcore", S_IRUSR, NULL, &proc_kcore_operations);
-	if (proc_root_kcore)
-		proc_root_kcore->size =
-				(size_t)high_memory - PAGE_OFFSET + PAGE_SIZE;
 	return 0;
 }
 module_init(proc_kcore_init);
-- 
cgit v0.10.2


From fb92a4b068be96799da3748c11cbd69760e44d7b Mon Sep 17 00:00:00 2001
From: Vincent Li <macli@brc.ubc.ca>
Date: Tue, 22 Sep 2009 16:45:36 -0700
Subject: fs/proc/task_mmu.c v1: fix clear_refs_write() input sanity check

Andrew Morton pointed out similar string hacking and obfuscated check for
zero-length input at the end of the function, David Rientjes suggested to
use strict_strtol to replace simple_strtol, this patch cover above
suggestions, add removing of leading and trailing whitespace from user
input.  It does not change function behavious.

Signed-off-by: Vincent Li <macli@brc.ubc.ca>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Matt Mackall <mpm@selenic.com>
Cc: Amerigo Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 59e98fe..366b101 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -473,21 +473,20 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
 				size_t count, loff_t *ppos)
 {
 	struct task_struct *task;
-	char buffer[PROC_NUMBUF], *end;
+	char buffer[PROC_NUMBUF];
 	struct mm_struct *mm;
 	struct vm_area_struct *vma;
-	int type;
+	long type;
 
 	memset(buffer, 0, sizeof(buffer));
 	if (count > sizeof(buffer) - 1)
 		count = sizeof(buffer) - 1;
 	if (copy_from_user(buffer, buf, count))
 		return -EFAULT;
-	type = simple_strtol(buffer, &end, 0);
+	if (strict_strtol(strstrip(buffer), 10, &type))
+		return -EINVAL;
 	if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED)
 		return -EINVAL;
-	if (*end == '\n')
-		end++;
 	task = get_proc_task(file->f_path.dentry->d_inode);
 	if (!task)
 		return -ESRCH;
@@ -523,9 +522,8 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
 		mmput(mm);
 	}
 	put_task_struct(task);
-	if (end - buffer == 0)
-		return -EIO;
-	return end - buffer;
+
+	return count;
 }
 
 const struct file_operations proc_clear_refs_operations = {
-- 
cgit v0.10.2


From cba8aafe1e07dfc8bae5ba78be8e02883bd34d31 Mon Sep 17 00:00:00 2001
From: Vincent Li <macli@brc.ubc.ca>
Date: Tue, 22 Sep 2009 16:45:38 -0700
Subject: fs/proc/base.c: fix proc_fault_inject_write() input sanity check

Remove obfuscated zero-length input check and return -EINVAL instead of
-EIO error to make the error message clear to user.  Add whitespace
stripping.  No functionality changes.

The old code:

echo  1  > /proc/pid/make-it-fail (ok)
echo 1foo > /proc/pid/make-it-fail (-bash: echo: write error: Input/output error)

The new code:

echo  1  > /proc/pid/make-it-fail (ok)
echo 1foo > /proc/pid/make-it-fail (-bash: echo: write error: Invalid argument)

This patch is conservative in changes to not breaking existing
scripts/applications.

Signed-off-by: Vincent Li <macli@brc.ubc.ca>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 5bc5870..837469a 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1187,17 +1187,16 @@ static ssize_t proc_fault_inject_write(struct file * file,
 		count = sizeof(buffer) - 1;
 	if (copy_from_user(buffer, buf, count))
 		return -EFAULT;
-	make_it_fail = simple_strtol(buffer, &end, 0);
-	if (*end == '\n')
-		end++;
+	make_it_fail = simple_strtol(strstrip(buffer), &end, 0);
+	if (*end)
+		return -EINVAL;
 	task = get_proc_task(file->f_dentry->d_inode);
 	if (!task)
 		return -ESRCH;
 	task->make_it_fail = make_it_fail;
 	put_task_struct(task);
-	if (end - buffer == 0)
-		return -EIO;
-	return end - buffer;
+
+	return count;
 }
 
 static const struct file_operations proc_fault_inject_operations = {
-- 
cgit v0.10.2


From d899bf7b55f503ba7d3d07ed27c3a37e270fa7db Mon Sep 17 00:00:00 2001
From: Stefani Seibold <stefani@seibold.net>
Date: Tue, 22 Sep 2009 16:45:40 -0700
Subject: procfs: provide stack information for threads

A patch to give a better overview of the userland application stack usage,
especially for embedded linux.

Currently you are only able to dump the main process/thread stack usage
which is showed in /proc/pid/status by the "VmStk" Value.  But you get no
information about the consumed stack memory of the the threads.

There is an enhancement in the /proc/<pid>/{task/*,}/*maps and which marks
the vm mapping where the thread stack pointer reside with "[thread stack
xxxxxxxx]".  xxxxxxxx is the maximum size of stack.  This is a value
information, because libpthread doesn't set the start of the stack to the
top of the mapped area, depending of the pthread usage.

A sample output of /proc/<pid>/task/<tid>/maps looks like:

08048000-08049000 r-xp 00000000 03:00 8312       /opt/z
08049000-0804a000 rw-p 00001000 03:00 8312       /opt/z
0804a000-0806b000 rw-p 00000000 00:00 0          [heap]
a7d12000-a7d13000 ---p 00000000 00:00 0
a7d13000-a7f13000 rw-p 00000000 00:00 0          [thread stack: 001ff4b4]
a7f13000-a7f14000 ---p 00000000 00:00 0
a7f14000-a7f36000 rw-p 00000000 00:00 0
a7f36000-a8069000 r-xp 00000000 03:00 4222       /lib/libc.so.6
a8069000-a806b000 r--p 00133000 03:00 4222       /lib/libc.so.6
a806b000-a806c000 rw-p 00135000 03:00 4222       /lib/libc.so.6
a806c000-a806f000 rw-p 00000000 00:00 0
a806f000-a8083000 r-xp 00000000 03:00 14462      /lib/libpthread.so.0
a8083000-a8084000 r--p 00013000 03:00 14462      /lib/libpthread.so.0
a8084000-a8085000 rw-p 00014000 03:00 14462      /lib/libpthread.so.0
a8085000-a8088000 rw-p 00000000 00:00 0
a8088000-a80a4000 r-xp 00000000 03:00 8317       /lib/ld-linux.so.2
a80a4000-a80a5000 r--p 0001b000 03:00 8317       /lib/ld-linux.so.2
a80a5000-a80a6000 rw-p 0001c000 03:00 8317       /lib/ld-linux.so.2
afaf5000-afb0a000 rw-p 00000000 00:00 0          [stack]
ffffe000-fffff000 r-xp 00000000 00:00 0          [vdso]

Also there is a new entry "stack usage" in /proc/<pid>/{task/*,}/status
which will you give the current stack usage in kb.

A sample output of /proc/self/status looks like:

Name:	cat
State:	R (running)
Tgid:	507
Pid:	507
.
.
.
CapBnd:	fffffffffffffeff
voluntary_ctxt_switches:	0
nonvoluntary_ctxt_switches:	0
Stack usage:	12 kB

I also fixed stack base address in /proc/<pid>/{task/*,}/stat to the base
address of the associated thread stack and not the one of the main
process.  This makes more sense.

[akpm@linux-foundation.org: fs/proc/array.c now needs walk_page_range()]
Signed-off-by: Stefani Seibold <stefani@seibold.net>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 75988ba..b5aee78 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -176,6 +176,7 @@ read the file /proc/PID/status:
   CapBnd: ffffffffffffffff
   voluntary_ctxt_switches:        0
   nonvoluntary_ctxt_switches:     1
+  Stack usage:    12 kB
 
 This shows you nearly the same information you would get if you viewed it with
 the ps  command.  In  fact,  ps  uses  the  proc  file  system  to  obtain its
@@ -229,6 +230,7 @@ Table 1-2: Contents of the statm files (as of 2.6.30-rc7)
  Mems_allowed_list           Same as previous, but in "list format"
  voluntary_ctxt_switches     number of voluntary context switches
  nonvoluntary_ctxt_switches  number of non voluntary context switches
+ Stack usage:                stack usage high water mark (round up to page size)
 ..............................................................................
 
 Table 1-3: Contents of the statm files (as of 2.6.8-rc3)
@@ -307,7 +309,7 @@ address           perms offset  dev   inode      pathname
 08049000-0804a000 rw-p 00001000 03:00 8312       /opt/test
 0804a000-0806b000 rw-p 00000000 00:00 0          [heap]
 a7cb1000-a7cb2000 ---p 00000000 00:00 0
-a7cb2000-a7eb2000 rw-p 00000000 00:00 0
+a7cb2000-a7eb2000 rw-p 00000000 00:00 0          [threadstack:001ff4b4]
 a7eb2000-a7eb3000 ---p 00000000 00:00 0
 a7eb3000-a7ed5000 rw-p 00000000 00:00 0
 a7ed5000-a8008000 r-xp 00000000 03:00 4222       /lib/libc.so.6
@@ -343,6 +345,7 @@ is not associated with a file:
  [stack]                  = the stack of the main process
  [vdso]                   = the "virtual dynamic shared object",
                             the kernel system call handler
+ [threadstack:xxxxxxxx]   = the stack of the thread, xxxxxxxx is the stack size
 
  or if empty, the mapping is anonymous.
 
diff --git a/fs/exec.c b/fs/exec.c
index 69bb9d8..5c833c1 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1357,6 +1357,8 @@ int do_execve(char * filename,
 	if (retval < 0)
 		goto out;
 
+	current->stack_start = current->mm->start_stack;
+
 	/* execve succeeded */
 	current->fs->in_exec = 0;
 	current->in_execve = 0;
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 725a650..0c6bc60 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -82,6 +82,7 @@
 #include <linux/pid_namespace.h>
 #include <linux/ptrace.h>
 #include <linux/tracehook.h>
+#include <linux/swapops.h>
 
 #include <asm/pgtable.h>
 #include <asm/processor.h>
@@ -321,6 +322,87 @@ static inline void task_context_switch_counts(struct seq_file *m,
 			p->nivcsw);
 }
 
+struct stack_stats {
+	struct vm_area_struct *vma;
+	unsigned long	startpage;
+	unsigned long	usage;
+};
+
+static int stack_usage_pte_range(pmd_t *pmd, unsigned long addr,
+				unsigned long end, struct mm_walk *walk)
+{
+	struct stack_stats *ss = walk->private;
+	struct vm_area_struct *vma = ss->vma;
+	pte_t *pte, ptent;
+	spinlock_t *ptl;
+	int ret = 0;
+
+	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
+	for (; addr != end; pte++, addr += PAGE_SIZE) {
+		ptent = *pte;
+
+#ifdef CONFIG_STACK_GROWSUP
+		if (pte_present(ptent) || is_swap_pte(ptent))
+			ss->usage = addr - ss->startpage + PAGE_SIZE;
+#else
+		if (pte_present(ptent) || is_swap_pte(ptent)) {
+			ss->usage = ss->startpage - addr + PAGE_SIZE;
+			pte++;
+			ret = 1;
+			break;
+		}
+#endif
+	}
+	pte_unmap_unlock(pte - 1, ptl);
+	cond_resched();
+	return ret;
+}
+
+static inline unsigned long get_stack_usage_in_bytes(struct vm_area_struct *vma,
+				struct task_struct *task)
+{
+	struct stack_stats ss;
+	struct mm_walk stack_walk = {
+		.pmd_entry = stack_usage_pte_range,
+		.mm = vma->vm_mm,
+		.private = &ss,
+	};
+
+	if (!vma->vm_mm || is_vm_hugetlb_page(vma))
+		return 0;
+
+	ss.vma = vma;
+	ss.startpage = task->stack_start & PAGE_MASK;
+	ss.usage = 0;
+
+#ifdef CONFIG_STACK_GROWSUP
+	walk_page_range(KSTK_ESP(task) & PAGE_MASK, vma->vm_end,
+		&stack_walk);
+#else
+	walk_page_range(vma->vm_start, (KSTK_ESP(task) & PAGE_MASK) + PAGE_SIZE,
+		&stack_walk);
+#endif
+	return ss.usage;
+}
+
+static inline void task_show_stack_usage(struct seq_file *m,
+						struct task_struct *task)
+{
+	struct vm_area_struct	*vma;
+	struct mm_struct	*mm = get_task_mm(task);
+
+	if (mm) {
+		down_read(&mm->mmap_sem);
+		vma = find_vma(mm, task->stack_start);
+		if (vma)
+			seq_printf(m, "Stack usage:\t%lu kB\n",
+				get_stack_usage_in_bytes(vma, task) >> 10);
+
+		up_read(&mm->mmap_sem);
+		mmput(mm);
+	}
+}
+
 int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
 			struct pid *pid, struct task_struct *task)
 {
@@ -340,6 +422,7 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
 	task_show_regs(m, task);
 #endif
 	task_context_switch_counts(m, task);
+	task_show_stack_usage(m, task);
 	return 0;
 }
 
@@ -481,7 +564,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 		rsslim,
 		mm ? mm->start_code : 0,
 		mm ? mm->end_code : 0,
-		(permitted && mm) ? mm->start_stack : 0,
+		(permitted) ? task->stack_start : 0,
 		esp,
 		eip,
 		/* The signal information here is obsolete.
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 366b101..2a1bef9 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -243,6 +243,25 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
 				} else if (vma->vm_start <= mm->start_stack &&
 					   vma->vm_end >= mm->start_stack) {
 					name = "[stack]";
+				} else {
+					unsigned long stack_start;
+					struct proc_maps_private *pmp;
+
+					pmp = m->private;
+					stack_start = pmp->task->stack_start;
+
+					if (vma->vm_start <= stack_start &&
+					    vma->vm_end >= stack_start) {
+						pad_len_spaces(m, len);
+						seq_printf(m,
+						 "[threadstack:%08lx]",
+#ifdef CONFIG_STACK_GROWSUP
+						 vma->vm_end - stack_start
+#else
+						 stack_start - vma->vm_start
+#endif
+						);
+					}
 				}
 			} else {
 				name = "[vdso]";
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6448bbc..3cbc6c0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1529,6 +1529,7 @@ struct task_struct {
 	/* bitmask of trace recursion */
 	unsigned long trace_recursion;
 #endif /* CONFIG_TRACING */
+	unsigned long stack_start;
 };
 
 /* Future-safe accessor for struct task_struct's cpus_allowed. */
diff --git a/kernel/fork.c b/kernel/fork.c
index 7cf4581..8f45b0e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1095,6 +1095,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 
 	p->bts = NULL;
 
+	p->stack_start = stack_start;
+
 	/* Perform scheduler related setup. Assign this task to a CPU. */
 	sched_fork(p, clone_flags);
 
diff --git a/mm/Makefile b/mm/Makefile
index 728a9fd..88193d7 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -11,10 +11,10 @@ obj-y			:= bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
 			   maccess.o page_alloc.o page-writeback.o \
 			   readahead.o swap.o truncate.o vmscan.o shmem.o \
 			   prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
-			   page_isolation.o mm_init.o mmu_context.o $(mmu-y)
+			   page_isolation.o mm_init.o mmu_context.o \
+			   pagewalk.o $(mmu-y)
 obj-y += init-mm.o
 
-obj-$(CONFIG_PROC_PAGE_MONITOR) += pagewalk.o
 obj-$(CONFIG_BOUNCE)	+= bounce.o
 obj-$(CONFIG_SWAP)	+= page_io.o swap_state.o swapfile.o thrash.o
 obj-$(CONFIG_HAS_DMA)	+= dmapool.o
-- 
cgit v0.10.2


From 2ef43ec772551e975a6ea7cf22b59c84955aadf9 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Tue, 22 Sep 2009 16:45:41 -0700
Subject: kcore: use usual list for kclist

This patchset is for /proc/kcore.  With this,

 - many per-arch hooks are removed.

 - /proc/kcore will know really valid physical memory area.

 - /proc/kcore will be aware of memory hotplug.

 - /proc/kcore will be architecture independent i.e.
   if an arch supports CONFIG_MMU, it can use /proc/kcore.
   (if the arch uses usual memory layout.)

This patch:

/proc/kcore uses its own list handling codes. It's better to use
generic list codes.

No changes in logic. just clean up.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: WANG Cong <xiyou.wangcong@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 0cf8a24..f9327e5 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -20,6 +20,7 @@
 #include <linux/init.h>
 #include <asm/uaccess.h>
 #include <asm/io.h>
+#include <linux/list.h>
 
 #define CORE_STR "CORE"
 
@@ -57,7 +58,7 @@ struct memelfnote
 	void *data;
 };
 
-static struct kcore_list *kclist;
+static LIST_HEAD(kclist_head);
 static DEFINE_RWLOCK(kclist_lock);
 
 void
@@ -67,8 +68,7 @@ kclist_add(struct kcore_list *new, void *addr, size_t size)
 	new->size = size;
 
 	write_lock(&kclist_lock);
-	new->next = kclist;
-	kclist = new;
+	list_add_tail(&new->list, &kclist_head);
 	write_unlock(&kclist_lock);
 }
 
@@ -80,7 +80,7 @@ static size_t get_kcore_size(int *nphdr, size_t *elf_buflen)
 	*nphdr = 1; /* PT_NOTE */
 	size = 0;
 
-	for (m=kclist; m; m=m->next) {
+	list_for_each_entry(m, &kclist_head, list) {
 		try = kc_vaddr_to_offset((size_t)m->addr + m->size);
 		if (try > size)
 			size = try;
@@ -192,7 +192,7 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff)
 	nhdr->p_align	= 0;
 
 	/* setup ELF PT_LOAD program header for every area */
-	for (m=kclist; m; m=m->next) {
+	list_for_each_entry(m, &kclist_head, list) {
 		phdr = (struct elf_phdr *) bufp;
 		bufp += sizeof(struct elf_phdr);
 		offset += sizeof(struct elf_phdr);
@@ -317,7 +317,7 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
 		struct kcore_list *m;
 
 		read_lock(&kclist_lock);
-		for (m=kclist; m; m=m->next) {
+		list_for_each_entry(m, &kclist_head, list) {
 			if (start >= m->addr && start < (m->addr+m->size))
 				break;
 		}
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index e6e77d3..0aff2a6 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -79,7 +79,7 @@ struct proc_dir_entry {
 };
 
 struct kcore_list {
-	struct kcore_list *next;
+	struct list_head list;
 	unsigned long addr;
 	size_t size;
 };
-- 
cgit v0.10.2


From c30bb2a25fcfde6157e6154a32c14686fb0bedbe Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Tue, 22 Sep 2009 16:45:43 -0700
Subject: kcore: add kclist types

Presently, kclist_add() only eats start address and size as its arguments.
Considering to make kclist dynamically reconfigulable, it's necessary to
know which kclists are for System RAM and which are not.

This patch add kclist types as
  KCORE_RAM
  KCORE_VMALLOC
  KCORE_TEXT
  KCORE_OTHER

This "type" is used in a patch following this for detecting KCORE_RAM.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: WANG Cong <xiyou.wangcong@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 1d28624..f6a3c21 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -639,9 +639,10 @@ mem_init (void)
 
 	high_memory = __va(max_low_pfn * PAGE_SIZE);
 
-	kclist_add(&kcore_mem, __va(0), max_low_pfn * PAGE_SIZE);
-	kclist_add(&kcore_vmem, (void *)VMALLOC_START, VMALLOC_END-VMALLOC_START);
-	kclist_add(&kcore_kernel, _stext, _end - _stext);
+	kclist_add(&kcore_mem, __va(0), max_low_pfn * PAGE_SIZE, KCORE_RAM);
+	kclist_add(&kcore_vmem, (void *)VMALLOC_START,
+			VMALLOC_END-VMALLOC_START, KCORE_VMALLOC);
+	kclist_add(&kcore_kernel, _stext, _end - _stext, KCORE_TEXT);
 
 	for_each_online_pgdat(pgdat)
 		if (pgdat->bdata->node_bootmem_map)
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 1f4ee47..f866198 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -409,11 +409,12 @@ void __init mem_init(void)
 	if ((unsigned long) &_text > (unsigned long) CKSEG0)
 		/* The -4 is a hack so that user tools don't have to handle
 		   the overflow.  */
-		kclist_add(&kcore_kseg0, (void *) CKSEG0, 0x80000000 - 4);
+		kclist_add(&kcore_kseg0, (void *) CKSEG0,
+				0x80000000 - 4, KCORE_TEXT);
 #endif
-	kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT);
+	kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT, KCORE_RAM);
 	kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
-		   VMALLOC_END-VMALLOC_START);
+		   VMALLOC_END-VMALLOC_START, KCORE_VMALLOC);
 
 	printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, "
 	       "%ldk reserved, %ldk data, %ldk init, %ldk highmem)\n",
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index 3ef5084..e91add9 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -268,11 +268,11 @@ static int __init setup_kcore(void)
 						size);
 		}
 
-		kclist_add(kcore_mem, __va(base), size);
+		kclist_add(kcore_mem, __va(base), size, KCORE_RAM);
 	}
 
 	kclist_add(&kcore_vmem, (void *)VMALLOC_START,
-		VMALLOC_END-VMALLOC_START);
+		VMALLOC_END-VMALLOC_START, KCORE_VMALLOC);
 
 	return 0;
 }
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 3158232..9ee5631 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -128,10 +128,11 @@ static int __init setup_kcore(void)
 		if (!kcore_mem)
 			panic("%s: kmalloc failed\n", __func__);
 
-		kclist_add(kcore_mem, __va(base), size);
+		kclist_add(kcore_mem, __va(base), size, KCORE_RAM);
 	}
 
-	kclist_add(&kcore_vmem, (void *)VMALLOC_START, VMALLOC_END-VMALLOC_START);
+	kclist_add(&kcore_vmem, (void *)VMALLOC_START,
+		VMALLOC_END-VMALLOC_START, KCORE_VMALLOC);
 
 	return 0;
 }
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index fabb7c6..ef56c9f 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -226,9 +226,9 @@ void __init mem_init(void)
 	datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
 	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
 
-	kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT);
+	kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT, KCORE_RAM);
 	kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
-		   VMALLOC_END - VMALLOC_START);
+		   VMALLOC_END - VMALLOC_START, KCORE_VMALLOC);
 
 	printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, "
 	       "%dk data, %dk init)\n",
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index b49b4f6..2cbc401 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -886,9 +886,9 @@ void __init mem_init(void)
 	datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
 	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
 
-	kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT);
+	kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT, KCORE_RAM);
 	kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
-		   VMALLOC_END-VMALLOC_START);
+		   VMALLOC_END-VMALLOC_START, KCORE_VMALLOC);
 
 	printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, "
 			"%dk reserved, %dk data, %dk init, %ldk highmem)\n",
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 810bd31..c05810b6 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -677,13 +677,14 @@ void __init mem_init(void)
 	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
 
 	/* Register memory areas for /proc/kcore */
-	kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT);
+	kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT, KCORE_RAM);
 	kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
-		   VMALLOC_END-VMALLOC_START);
-	kclist_add(&kcore_kernel, &_stext, _end - _stext);
-	kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_LEN);
+		   VMALLOC_END-VMALLOC_START, KCORE_VMALLOC);
+	kclist_add(&kcore_kernel, &_stext, _end - _stext, KCORE_TEXT);
+	kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_LEN,
+			KCORE_OTHER);
 	kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START,
-				 VSYSCALL_END - VSYSCALL_START);
+			 VSYSCALL_END - VSYSCALL_START, KCORE_OTHER);
 
 	printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, "
 			 "%ldk absent, %ldk reserved, %ldk data, %ldk init)\n",
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index f9327e5..659c163 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -62,10 +62,11 @@ static LIST_HEAD(kclist_head);
 static DEFINE_RWLOCK(kclist_lock);
 
 void
-kclist_add(struct kcore_list *new, void *addr, size_t size)
+kclist_add(struct kcore_list *new, void *addr, size_t size, int type)
 {
 	new->addr = (unsigned long)addr;
 	new->size = size;
+	new->type = type;
 
 	write_lock(&kclist_lock);
 	list_add_tail(&new->list, &kclist_head);
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 0aff2a6..bd7b840 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -78,10 +78,18 @@ struct proc_dir_entry {
 	struct list_head pde_openers;	/* who did ->open, but not ->release */
 };
 
+enum kcore_type {
+	KCORE_TEXT,
+	KCORE_VMALLOC,
+	KCORE_RAM,
+	KCORE_OTHER,
+};
+
 struct kcore_list {
 	struct list_head list;
 	unsigned long addr;
 	size_t size;
+	int type;
 };
 
 struct vmcore {
@@ -233,11 +241,12 @@ static inline void dup_mm_exe_file(struct mm_struct *oldmm,
 #endif /* CONFIG_PROC_FS */
 
 #if !defined(CONFIG_PROC_KCORE)
-static inline void kclist_add(struct kcore_list *new, void *addr, size_t size)
+static inline void
+kclist_add(struct kcore_list *new, void *addr, size_t size, int type)
 {
 }
 #else
-extern void kclist_add(struct kcore_list *, void *, size_t);
+extern void kclist_add(struct kcore_list *, void *, size_t, int type);
 #endif
 
 union proc_op {
-- 
cgit v0.10.2


From a0614da88b67ffa3dbcc0d40b817e682c7c4a0ee Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Tue, 22 Sep 2009 16:45:44 -0700
Subject: kcore: register vmalloc area in generic way

For /proc/kcore, vmalloc areas are registered per arch.  But, all of them
registers same range of [VMALLOC_START...VMALLOC_END) This patch unifies
them.  By this.  archs which have no kclist_add() hooks can see vmalloc
area correctly.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: WANG Cong <xiyou.wangcong@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index f6a3c21..286b98a 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -617,7 +617,7 @@ mem_init (void)
 	long reserved_pages, codesize, datasize, initsize;
 	pg_data_t *pgdat;
 	int i;
-	static struct kcore_list kcore_mem, kcore_vmem, kcore_kernel;
+	static struct kcore_list kcore_mem, kcore_kernel;
 
 	BUG_ON(PTRS_PER_PGD * sizeof(pgd_t) != PAGE_SIZE);
 	BUG_ON(PTRS_PER_PMD * sizeof(pmd_t) != PAGE_SIZE);
@@ -640,8 +640,6 @@ mem_init (void)
 	high_memory = __va(max_low_pfn * PAGE_SIZE);
 
 	kclist_add(&kcore_mem, __va(0), max_low_pfn * PAGE_SIZE, KCORE_RAM);
-	kclist_add(&kcore_vmem, (void *)VMALLOC_START,
-			VMALLOC_END-VMALLOC_START, KCORE_VMALLOC);
 	kclist_add(&kcore_kernel, _stext, _end - _stext, KCORE_TEXT);
 
 	for_each_online_pgdat(pgdat)
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index f866198..4a83da4 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -352,7 +352,7 @@ void __init paging_init(void)
 	free_area_init_nodes(max_zone_pfns);
 }
 
-static struct kcore_list kcore_mem, kcore_vmalloc;
+static struct kcore_list kcore_mem;
 #ifdef CONFIG_64BIT
 static struct kcore_list kcore_kseg0;
 #endif
@@ -413,8 +413,6 @@ void __init mem_init(void)
 				0x80000000 - 4, KCORE_TEXT);
 #endif
 	kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT, KCORE_RAM);
-	kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
-		   VMALLOC_END-VMALLOC_START, KCORE_VMALLOC);
 
 	printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, "
 	       "%ldk reserved, %ldk data, %ldk init, %ldk highmem)\n",
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index e91add9..38a450d 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -243,7 +243,6 @@ void free_initrd_mem(unsigned long start, unsigned long end)
 #endif
 
 #ifdef CONFIG_PROC_KCORE
-static struct kcore_list kcore_vmem;
 
 static int __init setup_kcore(void)
 {
@@ -271,9 +270,6 @@ static int __init setup_kcore(void)
 		kclist_add(kcore_mem, __va(base), size, KCORE_RAM);
 	}
 
-	kclist_add(&kcore_vmem, (void *)VMALLOC_START,
-		VMALLOC_END-VMALLOC_START, KCORE_VMALLOC);
-
 	return 0;
 }
 module_init(setup_kcore);
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 9ee5631..87ef492 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -110,7 +110,6 @@ void free_initrd_mem(unsigned long start, unsigned long end)
 #endif
 
 #ifdef CONFIG_PROC_KCORE
-static struct kcore_list kcore_vmem;
 
 static int __init setup_kcore(void)
 {
@@ -131,9 +130,6 @@ static int __init setup_kcore(void)
 		kclist_add(kcore_mem, __va(base), size, KCORE_RAM);
 	}
 
-	kclist_add(&kcore_vmem, (void *)VMALLOC_START,
-		VMALLOC_END-VMALLOC_START, KCORE_VMALLOC);
-
 	return 0;
 }
 module_init(setup_kcore);
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index ef56c9f..bf8bd02 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -186,7 +186,7 @@ void __init paging_init(void)
 	set_fixmap_nocache(FIX_UNCACHED, __pa(&__uncached_start));
 }
 
-static struct kcore_list kcore_mem, kcore_vmalloc;
+static struct kcore_list kcore_mem;
 
 void __init mem_init(void)
 {
@@ -227,8 +227,6 @@ void __init mem_init(void)
 	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
 
 	kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT, KCORE_RAM);
-	kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
-		   VMALLOC_END - VMALLOC_START, KCORE_VMALLOC);
 
 	printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, "
 	       "%dk data, %dk init)\n",
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 2cbc401..7108678 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -857,7 +857,7 @@ static void __init test_wp_bit(void)
 	}
 }
 
-static struct kcore_list kcore_mem, kcore_vmalloc;
+static struct kcore_list kcore_mem;
 
 void __init mem_init(void)
 {
@@ -887,8 +887,6 @@ void __init mem_init(void)
 	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
 
 	kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT, KCORE_RAM);
-	kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
-		   VMALLOC_END-VMALLOC_START, KCORE_VMALLOC);
 
 	printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, "
 			"%dk reserved, %dk data, %dk init, %ldk highmem)\n",
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index c05810b6..da2cae7 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -647,7 +647,7 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
 
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
-static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel,
+static struct kcore_list kcore_mem, kcore_kernel,
 			 kcore_modules, kcore_vsyscall;
 
 void __init mem_init(void)
@@ -678,8 +678,6 @@ void __init mem_init(void)
 
 	/* Register memory areas for /proc/kcore */
 	kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT, KCORE_RAM);
-	kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
-		   VMALLOC_END-VMALLOC_START, KCORE_VMALLOC);
 	kclist_add(&kcore_kernel, &_stext, _end - _stext, KCORE_TEXT);
 	kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_LEN,
 			KCORE_OTHER);
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 659c163..e10d360 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -372,9 +372,14 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
 	return acc;
 }
 
+static struct kcore_list kcore_vmalloc;
+
 static int __init proc_kcore_init(void)
 {
 	proc_root_kcore = proc_create("kcore", S_IRUSR, NULL, &proc_kcore_operations);
+
+	kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
+		VMALLOC_END - VMALLOC_START, KCORE_VMALLOC);
 	return 0;
 }
 module_init(proc_kcore_init);
-- 
cgit v0.10.2


From 9492587cf35d370db33ef4b38375dfb35a105b61 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Tue, 22 Sep 2009 16:45:45 -0700
Subject: kcore: register text area in generic way

Some 64bit arch has special segment for mapping kernel text.  It should be
entried to /proc/kcore in addtion to direct-linear-map, vmalloc area.
This patch unifies KCORE_TEXT entry scattered under x86 and ia64.

I'm not familiar with other archs (mips has its own even after this patch)
but range of [_stext ..._end) is a valid area of text and it's not in
direct-map area, defining CONFIG_ARCH_PROC_KCORE_TEXT is only a necessary
thing to do.

Note: I left mips as it is now.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: WANG Cong <xiyou.wangcong@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 011a1cd..6851e52 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -500,6 +500,10 @@ config HAVE_ARCH_NODEDATA_EXTENSION
 	def_bool y
 	depends on NUMA
 
+config ARCH_PROC_KCORE_TEXT
+	def_bool y
+	depends on PROC_KCORE
+
 config IA32_SUPPORT
 	bool "Support for Linux/x86 binaries"
 	help
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 286b98a..3f95ea1 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -617,7 +617,7 @@ mem_init (void)
 	long reserved_pages, codesize, datasize, initsize;
 	pg_data_t *pgdat;
 	int i;
-	static struct kcore_list kcore_mem, kcore_kernel;
+	static struct kcore_list kcore_mem;
 
 	BUG_ON(PTRS_PER_PGD * sizeof(pgd_t) != PAGE_SIZE);
 	BUG_ON(PTRS_PER_PMD * sizeof(pmd_t) != PAGE_SIZE);
@@ -640,7 +640,6 @@ mem_init (void)
 	high_memory = __va(max_low_pfn * PAGE_SIZE);
 
 	kclist_add(&kcore_mem, __va(0), max_low_pfn * PAGE_SIZE, KCORE_RAM);
-	kclist_add(&kcore_kernel, _stext, _end - _stext, KCORE_TEXT);
 
 	for_each_online_pgdat(pgdat)
 		if (pgdat->bdata->node_bootmem_map)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index e4ff5d1..7c7a54b 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1204,6 +1204,10 @@ config ARCH_DISCONTIGMEM_DEFAULT
 	def_bool y
 	depends on NUMA && X86_32
 
+config ARCH_PROC_KCORE_TEXT
+	def_bool y
+	depends on X86_64 && PROC_KCORE
+
 config ARCH_SPARSEMEM_DEFAULT
 	def_bool y
 	depends on X86_64
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index da2cae7..a0c2efb 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -647,8 +647,7 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
 
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
-static struct kcore_list kcore_mem, kcore_kernel,
-			 kcore_modules, kcore_vsyscall;
+static struct kcore_list kcore_mem, kcore_modules, kcore_vsyscall;
 
 void __init mem_init(void)
 {
@@ -678,7 +677,6 @@ void __init mem_init(void)
 
 	/* Register memory areas for /proc/kcore */
 	kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT, KCORE_RAM);
-	kclist_add(&kcore_kernel, &_stext, _end - _stext, KCORE_TEXT);
 	kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_LEN,
 			KCORE_OTHER);
 	kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START,
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index e10d360..fdde1cc 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -21,6 +21,7 @@
 #include <asm/uaccess.h>
 #include <asm/io.h>
 #include <linux/list.h>
+#include <asm/sections.h>
 
 #define CORE_STR "CORE"
 
@@ -374,10 +375,26 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
 
 static struct kcore_list kcore_vmalloc;
 
+#ifdef CONFIG_ARCH_PROC_KCORE_TEXT
+static struct kcore_list kcore_text;
+/*
+ * If defined, special segment is used for mapping kernel text instead of
+ * direct-map area. We need to create special TEXT section.
+ */
+static void __init proc_kcore_text_init(void)
+{
+	kclist_add(&kcore_text, _stext, _end - _stext, KCORE_TEXT);
+}
+#else
+static void __init proc_kcore_text_init(void)
+{
+}
+#endif
+
 static int __init proc_kcore_init(void)
 {
 	proc_root_kcore = proc_create("kcore", S_IRUSR, NULL, &proc_kcore_operations);
-
+	proc_kcore_text_init();
 	kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
 		VMALLOC_END - VMALLOC_START, KCORE_VMALLOC);
 	return 0;
-- 
cgit v0.10.2


From 908eedc6168bd92e89f90d89fa389065a36358fa Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Tue, 22 Sep 2009 16:45:46 -0700
Subject: walk system ram range
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Originally, walk_memory_resource() was introduced to traverse all memory
of "System RAM" for detecting memory hotplug/unplug range.  For doing so,
flags of IORESOUCE_MEM|IORESOURCE_BUSY was used and this was enough for
memory hotplug.

But for using other purpose, /proc/kcore, this may includes some firmware
area marked as IORESOURCE_BUSY | IORESOUCE_MEM.  This patch makes the
check strict to find out busy "System RAM".

Note: PPC64 keeps their own walk_memory_resouce(), which walk through
ppc64's lmb informaton.  Because old kclist_add() is called per lmb, this
patch makes no difference in behavior, finally.

And this patch removes CONFIG_MEMORY_HOTPLUG check from this function.
Because pfn_valid() just show "there is memmap or not* and cannot be used
for "there is physical memory or not", this function is useful in generic
to scan physical memory range.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: WANG Cong <xiyou.wangcong@gmail.com>
Cc: Américo Wang <xiyou.wangcong@gmail.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Roland Dreier <rolandd@cisco.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 0e5c59b..5973631 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -143,8 +143,8 @@ int arch_add_memory(int nid, u64 start, u64 size)
  * memory regions, find holes and callback for contiguous regions.
  */
 int
-walk_memory_resource(unsigned long start_pfn, unsigned long nr_pages, void *arg,
-			int (*func)(unsigned long, unsigned long, void *))
+walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
+		void *arg, int (*func)(unsigned long, unsigned long, void *))
 {
 	struct lmb_property res;
 	unsigned long pfn, len;
@@ -166,7 +166,7 @@ walk_memory_resource(unsigned long start_pfn, unsigned long nr_pages, void *arg,
 	}
 	return ret;
 }
-EXPORT_SYMBOL_GPL(walk_memory_resource);
+EXPORT_SYMBOL_GPL(walk_system_ram_range);
 
 /*
  * Initialize the bootmem system and give it all the memory we
diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c
index 7663a2a..7550a53 100644
--- a/drivers/infiniband/hw/ehca/ehca_mrmw.c
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c
@@ -2463,7 +2463,7 @@ int ehca_create_busmap(void)
 	int ret;
 
 	ehca_mr_len = 0;
-	ret = walk_memory_resource(0, 1ULL << MAX_PHYSMEM_BITS, NULL,
+	ret = walk_system_ram_range(0, 1ULL << MAX_PHYSMEM_BITS, NULL,
 				   ehca_create_busmap_callback);
 	return ret;
 }
diff --git a/drivers/net/ehea/ehea_qmr.c b/drivers/net/ehea/ehea_qmr.c
index 3747457f5..bc7c5b7 100644
--- a/drivers/net/ehea/ehea_qmr.c
+++ b/drivers/net/ehea/ehea_qmr.c
@@ -751,7 +751,7 @@ int ehea_create_busmap(void)
 
 	mutex_lock(&ehea_busmap_mutex);
 	ehea_mr_len = 0;
-	ret = walk_memory_resource(0, 1ULL << MAX_PHYSMEM_BITS, NULL,
+	ret = walk_system_ram_range(0, 1ULL << MAX_PHYSMEM_BITS, NULL,
 				   ehea_create_busmap_callback);
 	mutex_unlock(&ehea_busmap_mutex);
 	return ret;
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 786e7b8..83aa812 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -184,5 +184,9 @@ extern void __devm_release_region(struct device *dev, struct resource *parent,
 extern int iomem_map_sanity_check(resource_size_t addr, unsigned long size);
 extern int iomem_is_exclusive(u64 addr);
 
+extern int
+walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
+		void *arg, int (*func)(unsigned long, unsigned long, void *));
+
 #endif /* __ASSEMBLY__ */
 #endif	/* _LINUX_IOPORT_H */
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index d95f72e..fed9692 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -191,14 +191,6 @@ static inline void register_page_bootmem_info_node(struct pglist_data *pgdat)
 
 #endif /* ! CONFIG_MEMORY_HOTPLUG */
 
-/*
- * Walk through all memory which is registered as resource.
- * arg is (start_pfn, nr_pages, private_arg_pointer)
- */
-extern int walk_memory_resource(unsigned long start_pfn,
-			unsigned long nr_pages, void *arg,
-			int (*func)(unsigned long, unsigned long, void *));
-
 #ifdef CONFIG_MEMORY_HOTREMOVE
 
 extern int is_mem_section_removable(unsigned long pfn, unsigned long nr_pages);
diff --git a/kernel/resource.c b/kernel/resource.c
index 78b0872..fb11a58 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -223,13 +223,13 @@ int release_resource(struct resource *old)
 
 EXPORT_SYMBOL(release_resource);
 
-#if defined(CONFIG_MEMORY_HOTPLUG) && !defined(CONFIG_ARCH_HAS_WALK_MEMORY)
+#if !defined(CONFIG_ARCH_HAS_WALK_MEMORY)
 /*
  * Finds the lowest memory reosurce exists within [res->start.res->end)
- * the caller must specify res->start, res->end, res->flags.
+ * the caller must specify res->start, res->end, res->flags and "name".
  * If found, returns 0, res is overwritten, if not found, returns -1.
  */
-static int find_next_system_ram(struct resource *res)
+static int find_next_system_ram(struct resource *res, char *name)
 {
 	resource_size_t start, end;
 	struct resource *p;
@@ -245,6 +245,8 @@ static int find_next_system_ram(struct resource *res)
 		/* system ram is just marked as IORESOURCE_MEM */
 		if (p->flags != res->flags)
 			continue;
+		if (name && strcmp(p->name, name))
+			continue;
 		if (p->start > end) {
 			p = NULL;
 			break;
@@ -262,19 +264,26 @@ static int find_next_system_ram(struct resource *res)
 		res->end = p->end;
 	return 0;
 }
-int
-walk_memory_resource(unsigned long start_pfn, unsigned long nr_pages, void *arg,
-			int (*func)(unsigned long, unsigned long, void *))
+
+/*
+ * This function calls callback against all memory range of "System RAM"
+ * which are marked as IORESOURCE_MEM and IORESOUCE_BUSY.
+ * Now, this function is only for "System RAM".
+ */
+int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
+		void *arg, int (*func)(unsigned long, unsigned long, void *))
 {
 	struct resource res;
 	unsigned long pfn, len;
 	u64 orig_end;
 	int ret = -1;
+
 	res.start = (u64) start_pfn << PAGE_SHIFT;
 	res.end = ((u64)(start_pfn + nr_pages) << PAGE_SHIFT) - 1;
 	res.flags = IORESOURCE_MEM | IORESOURCE_BUSY;
 	orig_end = res.end;
-	while ((res.start < res.end) && (find_next_system_ram(&res) >= 0)) {
+	while ((res.start < res.end) &&
+		(find_next_system_ram(&res, "System RAM") >= 0)) {
 		pfn = (unsigned long)(res.start >> PAGE_SHIFT);
 		len = (unsigned long)((res.end + 1 - res.start) >> PAGE_SHIFT);
 		ret = (*func)(pfn, len, arg);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index efe3e0e..821dee5 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -413,7 +413,7 @@ int online_pages(unsigned long pfn, unsigned long nr_pages)
 	if (!populated_zone(zone))
 		need_zonelists_rebuild = 1;
 
-	ret = walk_memory_resource(pfn, nr_pages, &onlined_pages,
+	ret = walk_system_ram_range(pfn, nr_pages, &onlined_pages,
 		online_pages_range);
 	if (ret) {
 		printk(KERN_DEBUG "online_pages %lx at %lx failed\n",
@@ -705,7 +705,7 @@ offline_isolated_pages_cb(unsigned long start, unsigned long nr_pages,
 static void
 offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
 {
-	walk_memory_resource(start_pfn, end_pfn - start_pfn, NULL,
+	walk_system_ram_range(start_pfn, end_pfn - start_pfn, NULL,
 				offline_isolated_pages_cb);
 }
 
@@ -731,7 +731,7 @@ check_pages_isolated(unsigned long start_pfn, unsigned long end_pfn)
 	long offlined = 0;
 	int ret;
 
-	ret = walk_memory_resource(start_pfn, end_pfn - start_pfn, &offlined,
+	ret = walk_system_ram_range(start_pfn, end_pfn - start_pfn, &offlined,
 			check_pages_isolated_cb);
 	if (ret < 0)
 		offlined = (long)ret;
-- 
cgit v0.10.2


From 3089aa1b0c07fb7c48f9829c619f50198307789d Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Tue, 22 Sep 2009 16:45:48 -0700
Subject: kcore: use registerd physmem information

For /proc/kcore, each arch registers its memory range by kclist_add().
In usual,

	- range of physical memory
	- range of vmalloc area
	- text, etc...

are registered but "range of physical memory" has some troubles.  It
doesn't updated at memory hotplug and it tend to include unnecessary
memory holes.  Now, /proc/iomem (kernel/resource.c) includes required
physical memory range information and it's properly updated at memory
hotplug.  Then, it's good to avoid using its own code(duplicating
information) and to rebuild kclist for physical memory based on
/proc/iomem.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: WANG Cong <xiyou.wangcong@gmail.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 3f95ea1..1857766 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -617,7 +617,6 @@ mem_init (void)
 	long reserved_pages, codesize, datasize, initsize;
 	pg_data_t *pgdat;
 	int i;
-	static struct kcore_list kcore_mem;
 
 	BUG_ON(PTRS_PER_PGD * sizeof(pgd_t) != PAGE_SIZE);
 	BUG_ON(PTRS_PER_PMD * sizeof(pmd_t) != PAGE_SIZE);
@@ -639,8 +638,6 @@ mem_init (void)
 
 	high_memory = __va(max_low_pfn * PAGE_SIZE);
 
-	kclist_add(&kcore_mem, __va(0), max_low_pfn * PAGE_SIZE, KCORE_RAM);
-
 	for_each_online_pgdat(pgdat)
 		if (pgdat->bdata->node_bootmem_map)
 			totalram_pages += free_all_bootmem_node(pgdat);
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 4a83da4..15aa190 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -352,7 +352,6 @@ void __init paging_init(void)
 	free_area_init_nodes(max_zone_pfns);
 }
 
-static struct kcore_list kcore_mem;
 #ifdef CONFIG_64BIT
 static struct kcore_list kcore_kseg0;
 #endif
@@ -412,7 +411,6 @@ void __init mem_init(void)
 		kclist_add(&kcore_kseg0, (void *) CKSEG0,
 				0x80000000 - 4, KCORE_TEXT);
 #endif
-	kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT, KCORE_RAM);
 
 	printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, "
 	       "%ldk reserved, %ldk data, %ldk init, %ldk highmem)\n",
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index 38a450d..9ddcfb4 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -242,35 +242,3 @@ void free_initrd_mem(unsigned long start, unsigned long end)
 }
 #endif
 
-#ifdef CONFIG_PROC_KCORE
-
-static int __init setup_kcore(void)
-{
-	int i;
-
-	for (i = 0; i < lmb.memory.cnt; i++) {
-		unsigned long base;
-		unsigned long size;
-		struct kcore_list *kcore_mem;
-
-		base = lmb.memory.region[i].base;
-		size = lmb.memory.region[i].size;
-
-		kcore_mem = kmalloc(sizeof(struct kcore_list), GFP_ATOMIC);
-		if (!kcore_mem)
-			panic("%s: kmalloc failed\n", __func__);
-
-		/* must stay under 32 bits */
-		if ( 0xfffffffful - (unsigned long)__va(base) < size) {
-			size = 0xfffffffful - (unsigned long)(__va(base));
-			printk(KERN_DEBUG "setup_kcore: restrict size=%lx\n",
-						size);
-		}
-
-		kclist_add(kcore_mem, __va(base), size, KCORE_RAM);
-	}
-
-	return 0;
-}
-module_init(setup_kcore);
-#endif
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 87ef492..335c578 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -109,32 +109,6 @@ void free_initrd_mem(unsigned long start, unsigned long end)
 }
 #endif
 
-#ifdef CONFIG_PROC_KCORE
-
-static int __init setup_kcore(void)
-{
-	int i;
-
-	for (i=0; i < lmb.memory.cnt; i++) {
-		unsigned long base, size;
-		struct kcore_list *kcore_mem;
-
-		base = lmb.memory.region[i].base;
-		size = lmb.memory.region[i].size;
-
-		/* GFP_ATOMIC to avoid might_sleep warnings during boot */
-		kcore_mem = kmalloc(sizeof(struct kcore_list), GFP_ATOMIC);
-		if (!kcore_mem)
-			panic("%s: kmalloc failed\n", __func__);
-
-		kclist_add(kcore_mem, __va(base), size, KCORE_RAM);
-	}
-
-	return 0;
-}
-module_init(setup_kcore);
-#endif
-
 static void pgd_ctor(void *addr)
 {
 	memset(addr, 0, PGD_TABLE_SIZE);
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index bf8bd02..8173e38 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -186,8 +186,6 @@ void __init paging_init(void)
 	set_fixmap_nocache(FIX_UNCACHED, __pa(&__uncached_start));
 }
 
-static struct kcore_list kcore_mem;
-
 void __init mem_init(void)
 {
 	int codesize, datasize, initsize;
@@ -226,8 +224,6 @@ void __init mem_init(void)
 	datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
 	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
 
-	kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT, KCORE_RAM);
-
 	printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, "
 	       "%dk data, %dk init)\n",
 		nr_free_pages() << (PAGE_SHIFT-10),
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 7108678..30938c1 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -857,8 +857,6 @@ static void __init test_wp_bit(void)
 	}
 }
 
-static struct kcore_list kcore_mem;
-
 void __init mem_init(void)
 {
 	int codesize, reservedpages, datasize, initsize;
@@ -886,8 +884,6 @@ void __init mem_init(void)
 	datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
 	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
 
-	kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT, KCORE_RAM);
-
 	printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, "
 			"%dk reserved, %dk data, %dk init, %ldk highmem)\n",
 		nr_free_pages() << (PAGE_SHIFT-10),
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index a0c2efb..d5d23cc 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -647,7 +647,7 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
 
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
-static struct kcore_list kcore_mem, kcore_modules, kcore_vsyscall;
+static struct kcore_list kcore_modules, kcore_vsyscall;
 
 void __init mem_init(void)
 {
@@ -676,7 +676,6 @@ void __init mem_init(void)
 	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
 
 	/* Register memory areas for /proc/kcore */
-	kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT, KCORE_RAM);
 	kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_LEN,
 			KCORE_OTHER);
 	kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START,
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index fdde1cc..802de33 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -17,10 +17,14 @@
 #include <linux/elfcore.h>
 #include <linux/vmalloc.h>
 #include <linux/highmem.h>
+#include <linux/bootmem.h>
 #include <linux/init.h>
 #include <asm/uaccess.h>
 #include <asm/io.h>
 #include <linux/list.h>
+#include <linux/ioport.h>
+#include <linux/mm.h>
+#include <linux/memory.h>
 #include <asm/sections.h>
 
 #define CORE_STR "CORE"
@@ -31,17 +35,6 @@
 
 static struct proc_dir_entry *proc_root_kcore;
 
-static int open_kcore(struct inode * inode, struct file * filp)
-{
-	return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
-}
-
-static ssize_t read_kcore(struct file *, char __user *, size_t, loff_t *);
-
-static const struct file_operations proc_kcore_operations = {
-	.read		= read_kcore,
-	.open		= open_kcore,
-};
 
 #ifndef kc_vaddr_to_offset
 #define	kc_vaddr_to_offset(v) ((v) - PAGE_OFFSET)
@@ -61,6 +54,7 @@ struct memelfnote
 
 static LIST_HEAD(kclist_head);
 static DEFINE_RWLOCK(kclist_lock);
+static int kcore_need_update = 1;
 
 void
 kclist_add(struct kcore_list *new, void *addr, size_t size, int type)
@@ -99,6 +93,126 @@ static size_t get_kcore_size(int *nphdr, size_t *elf_buflen)
 	return size + *elf_buflen;
 }
 
+static void free_kclist_ents(struct list_head *head)
+{
+	struct kcore_list *tmp, *pos;
+
+	list_for_each_entry_safe(pos, tmp, head, list) {
+		list_del(&pos->list);
+		kfree(pos);
+	}
+}
+/*
+ * Replace all KCORE_RAM information with passed list.
+ */
+static void __kcore_update_ram(struct list_head *list)
+{
+	struct kcore_list *tmp, *pos;
+	LIST_HEAD(garbage);
+
+	write_lock(&kclist_lock);
+	if (kcore_need_update) {
+		list_for_each_entry_safe(pos, tmp, &kclist_head, list) {
+			if (pos->type == KCORE_RAM)
+				list_move(&pos->list, &garbage);
+		}
+		list_splice_tail(list, &kclist_head);
+	} else
+		list_splice(list, &garbage);
+	kcore_need_update = 0;
+	write_unlock(&kclist_lock);
+
+	free_kclist_ents(&garbage);
+}
+
+
+#ifdef CONFIG_HIGHMEM
+/*
+ * If no highmem, we can assume [0...max_low_pfn) continuous range of memory
+ * because memory hole is not as big as !HIGHMEM case.
+ * (HIGHMEM is special because part of memory is _invisible_ from the kernel.)
+ */
+static int kcore_update_ram(void)
+{
+	LIST_HEAD(head);
+	struct kcore_list *ent;
+	int ret = 0;
+
+	ent = kmalloc(sizeof(*ent), GFP_KERNEL);
+	if (!ent)
+		return -ENOMEM;
+	ent->addr = (unsigned long)__va(0);
+	ent->size = max_low_pfn << PAGE_SHIFT;
+	ent->type = KCORE_RAM;
+	list_add(&ent->list, &head);
+	__kcore_update_ram(&head);
+	return ret;
+}
+
+#else /* !CONFIG_HIGHMEM */
+
+static int
+kclist_add_private(unsigned long pfn, unsigned long nr_pages, void *arg)
+{
+	struct list_head *head = (struct list_head *)arg;
+	struct kcore_list *ent;
+
+	ent = kmalloc(sizeof(*ent), GFP_KERNEL);
+	if (!ent)
+		return -ENOMEM;
+	ent->addr = (unsigned long)__va((pfn << PAGE_SHIFT));
+	ent->size = nr_pages << PAGE_SHIFT;
+
+	/* Sanity check: Can happen in 32bit arch...maybe */
+	if (ent->addr < (unsigned long) __va(0))
+		goto free_out;
+
+	/* cut not-mapped area. ....from ppc-32 code. */
+	if (ULONG_MAX - ent->addr < ent->size)
+		ent->size = ULONG_MAX - ent->addr;
+
+	/* cut when vmalloc() area is higher than direct-map area */
+	if (VMALLOC_START > (unsigned long)__va(0)) {
+		if (ent->addr > VMALLOC_START)
+			goto free_out;
+		if (VMALLOC_START - ent->addr < ent->size)
+			ent->size = VMALLOC_START - ent->addr;
+	}
+
+	ent->type = KCORE_RAM;
+	list_add_tail(&ent->list, head);
+	return 0;
+free_out:
+	kfree(ent);
+	return 1;
+}
+
+static int kcore_update_ram(void)
+{
+	int nid, ret;
+	unsigned long end_pfn;
+	LIST_HEAD(head);
+
+	/* Not inialized....update now */
+	/* find out "max pfn" */
+	end_pfn = 0;
+	for_each_node_state(nid, N_HIGH_MEMORY) {
+		unsigned long node_end;
+		node_end  = NODE_DATA(nid)->node_start_pfn +
+			NODE_DATA(nid)->node_spanned_pages;
+		if (end_pfn < node_end)
+			end_pfn = node_end;
+	}
+	/* scan 0 to max_pfn */
+	ret = walk_system_ram_range(0, end_pfn, &head, kclist_add_private);
+	if (ret) {
+		free_kclist_ents(&head);
+		return -ENOMEM;
+	}
+	__kcore_update_ram(&head);
+	return ret;
+}
+#endif /* CONFIG_HIGHMEM */
 
 /*****************************************************************************/
 /*
@@ -373,6 +487,39 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
 	return acc;
 }
 
+
+static int open_kcore(struct inode *inode, struct file *filp)
+{
+	if (!capable(CAP_SYS_RAWIO))
+		return -EPERM;
+	if (kcore_need_update)
+		kcore_update_ram();
+	return 0;
+}
+
+
+static const struct file_operations proc_kcore_operations = {
+	.read		= read_kcore,
+	.open		= open_kcore,
+};
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+/* just remember that we have to update kcore */
+static int __meminit kcore_callback(struct notifier_block *self,
+				    unsigned long action, void *arg)
+{
+	switch (action) {
+	case MEM_ONLINE:
+	case MEM_OFFLINE:
+		write_lock(&kclist_lock);
+		kcore_need_update = 1;
+		write_unlock(&kclist_lock);
+	}
+	return NOTIFY_OK;
+}
+#endif
+
+
 static struct kcore_list kcore_vmalloc;
 
 #ifdef CONFIG_ARCH_PROC_KCORE_TEXT
@@ -393,10 +540,18 @@ static void __init proc_kcore_text_init(void)
 
 static int __init proc_kcore_init(void)
 {
-	proc_root_kcore = proc_create("kcore", S_IRUSR, NULL, &proc_kcore_operations);
+	proc_root_kcore = proc_create("kcore", S_IRUSR, NULL,
+				      &proc_kcore_operations);
+	/* Store text area if it's special */
 	proc_kcore_text_init();
+	/* Store vmalloc area */
 	kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
 		VMALLOC_END - VMALLOC_START, KCORE_VMALLOC);
+	/* Store direct-map area from physical memory map */
+	kcore_update_ram();
+	hotplug_memory_notifier(kcore_callback, 0);
+	/* Other special area, area-for-module etc is arch specific. */
+
 	return 0;
 }
 module_init(proc_kcore_init);
-- 
cgit v0.10.2


From 26562c59fa9111ae3ea7b78045889662aac9e5ac Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Tue, 22 Sep 2009 16:45:49 -0700
Subject: kcore: register vmemmap range

Benjamin Herrenschmidt <benh@kernel.crashing.org> pointed out that vmemmap
range is not included in KCORE_RAM, KCORE_VMALLOC ....

This adds KCORE_VMEMMAP if SPARSEMEM_VMEMMAP is used.  By this, vmemmap
can be readable via /proc/kcore

Because it's not vmalloc area, vread/vwrite cannot be used.  But the range
is static against the memory layout, this patch handles vmemmap area by
the same scheme with physical memory.

This patch assumes SPARSEMEM_VMEMMAP range is not in VMALLOC range.  It's
correct now.

[akpm@linux-foundation.org: fix typo]
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Jiri Slaby <jirislaby@gmail.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: WANG Cong <xiyou.wangcong@gmail.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 802de33..78970e6 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -103,7 +103,7 @@ static void free_kclist_ents(struct list_head *head)
 	}
 }
 /*
- * Replace all KCORE_RAM information with passed list.
+ * Replace all KCORE_RAM/KCORE_VMEMMAP information with passed list.
  */
 static void __kcore_update_ram(struct list_head *list)
 {
@@ -113,7 +113,8 @@ static void __kcore_update_ram(struct list_head *list)
 	write_lock(&kclist_lock);
 	if (kcore_need_update) {
 		list_for_each_entry_safe(pos, tmp, &kclist_head, list) {
-			if (pos->type == KCORE_RAM)
+			if (pos->type == KCORE_RAM
+				|| pos->type == KCORE_VMEMMAP)
 				list_move(&pos->list, &garbage);
 		}
 		list_splice_tail(list, &kclist_head);
@@ -151,6 +152,47 @@ static int kcore_update_ram(void)
 
 #else /* !CONFIG_HIGHMEM */
 
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+/* calculate vmemmap's address from given system ram pfn and register it */
+int get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head)
+{
+	unsigned long pfn = __pa(ent->addr) >> PAGE_SHIFT;
+	unsigned long nr_pages = ent->size >> PAGE_SHIFT;
+	unsigned long start, end;
+	struct kcore_list *vmm, *tmp;
+
+
+	start = ((unsigned long)pfn_to_page(pfn)) & PAGE_MASK;
+	end = ((unsigned long)pfn_to_page(pfn + nr_pages)) - 1;
+	end = ALIGN(end, PAGE_SIZE);
+	/* overlap check (because we have to align page */
+	list_for_each_entry(tmp, head, list) {
+		if (tmp->type != KCORE_VMEMMAP)
+			continue;
+		if (start < tmp->addr + tmp->size)
+			if (end > tmp->addr)
+				end = tmp->addr;
+	}
+	if (start < end) {
+		vmm = kmalloc(sizeof(*vmm), GFP_KERNEL);
+		if (!vmm)
+			return 0;
+		vmm->addr = start;
+		vmm->size = end - start;
+		vmm->type = KCORE_VMEMMAP;
+		list_add_tail(&vmm->list, head);
+	}
+	return 1;
+
+}
+#else
+int get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head)
+{
+	return 1;
+}
+
+#endif
+
 static int
 kclist_add_private(unsigned long pfn, unsigned long nr_pages, void *arg)
 {
@@ -181,6 +223,12 @@ kclist_add_private(unsigned long pfn, unsigned long nr_pages, void *arg)
 
 	ent->type = KCORE_RAM;
 	list_add_tail(&ent->list, head);
+
+	if (!get_sparsemem_vmemmap_info(ent, head)) {
+		list_del(&ent->list);
+		goto free_out;
+	}
+
 	return 0;
 free_out:
 	kfree(ent);
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index bd7b840..379eaed 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -82,6 +82,7 @@ enum kcore_type {
 	KCORE_TEXT,
 	KCORE_VMALLOC,
 	KCORE_RAM,
+	KCORE_VMEMMAP,
 	KCORE_OTHER,
 };
 
-- 
cgit v0.10.2


From 81ac3ad9061dd9cd490ee92f0c5316a14d77ce18 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Tue, 22 Sep 2009 16:45:49 -0700
Subject: kcore: register module area in generic way

Some archs define MODULED_VADDR/MODULES_END which is not in VMALLOC area.
This is handled only in x86-64.  This patch make it more generic.  And we
can use vread/vwrite to access the area.  Fix it.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Jiri Slaby <jirislaby@gmail.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: WANG Cong <xiyou.wangcong@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index d5d23cc..5a4398a 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -647,7 +647,7 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
 
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
-static struct kcore_list kcore_modules, kcore_vsyscall;
+static struct kcore_list kcore_vsyscall;
 
 void __init mem_init(void)
 {
@@ -676,8 +676,6 @@ void __init mem_init(void)
 	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
 
 	/* Register memory areas for /proc/kcore */
-	kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_LEN,
-			KCORE_OTHER);
 	kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START,
 			 VSYSCALL_END - VSYSCALL_START, KCORE_OTHER);
 
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 78970e6..c6a5ec7 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -490,7 +490,7 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
 		if (m == NULL) {
 			if (clear_user(buffer, tsz))
 				return -EFAULT;
-		} else if (is_vmalloc_addr((void *)start)) {
+		} else if (is_vmalloc_or_module_addr((void *)start)) {
 			char * elf_buf;
 
 			elf_buf = kzalloc(tsz, GFP_KERNEL);
@@ -586,6 +586,22 @@ static void __init proc_kcore_text_init(void)
 }
 #endif
 
+#if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
+/*
+ * MODULES_VADDR has no intersection with VMALLOC_ADDR.
+ */
+struct kcore_list kcore_modules;
+static void __init add_modules_range(void)
+{
+	kclist_add(&kcore_modules, (void *)MODULES_VADDR,
+			MODULES_END - MODULES_VADDR, KCORE_VMALLOC);
+}
+#else
+static void __init add_modules_range(void)
+{
+}
+#endif
+
 static int __init proc_kcore_init(void)
 {
 	proc_root_kcore = proc_create("kcore", S_IRUSR, NULL,
@@ -595,6 +611,7 @@ static int __init proc_kcore_init(void)
 	/* Store vmalloc area */
 	kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
 		VMALLOC_END - VMALLOC_START, KCORE_VMALLOC);
+	add_modules_range();
 	/* Store direct-map area from physical memory map */
 	kcore_update_ram();
 	hotplug_memory_notifier(kcore_callback, 0);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5946e2f..b6eae5e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -285,6 +285,14 @@ static inline int is_vmalloc_addr(const void *x)
 	return 0;
 #endif
 }
+#ifdef CONFIG_MMU
+extern int is_vmalloc_or_module_addr(const void *x);
+#else
+static int is_vmalloc_or_module_addr(const void *x)
+{
+	return 0;
+}
+#endif
 
 static inline struct page *compound_head(struct page *page)
 {
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 5535da1..69511e6 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -184,7 +184,7 @@ static int vmap_page_range(unsigned long start, unsigned long end,
 	return ret;
 }
 
-static inline int is_vmalloc_or_module_addr(const void *x)
+int is_vmalloc_or_module_addr(const void *x)
 {
 	/*
 	 * ARM, x86-64 and sparc64 put modules in a special place,
-- 
cgit v0.10.2


From 90396f96b7da0e2305ffe0266d22b6f8221f28ba Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Tue, 22 Sep 2009 16:45:50 -0700
Subject: kcore: more fixes for init

proc_kcore_init() doesn't check NULL case.  fix it and remove unnecessary
comments.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: WANG Cong <xiyou.wangcong@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index c6a5ec7..7073378 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -606,6 +606,10 @@ static int __init proc_kcore_init(void)
 {
 	proc_root_kcore = proc_create("kcore", S_IRUSR, NULL,
 				      &proc_kcore_operations);
+	if (!proc_root_kcore) {
+		printk(KERN_ERR "couldn't create /proc/kcore\n");
+		return 0; /* Always returns 0. */
+	}
 	/* Store text area if it's special */
 	proc_kcore_text_init();
 	/* Store vmalloc area */
@@ -615,7 +619,6 @@ static int __init proc_kcore_init(void)
 	/* Store direct-map area from physical memory map */
 	kcore_update_ram();
 	hotplug_memory_notifier(kcore_callback, 0);
-	/* Other special area, area-for-module etc is arch specific. */
 
 	return 0;
 }
-- 
cgit v0.10.2


From 678ad5d8aaf8925cb8465f84e1e47d9b1284666a Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Tue, 22 Sep 2009 16:45:51 -0700
Subject: /proc/kcore: fix stat.st_size

Presently the size of /proc/kcore which can be read by 'ls -l' is 0.  But
it's not the correct value.

On x86-64, ls -l shows
 ... root root 140737486266368 2009-09-17 10:29 /proc/kcore
Then, 7FFFFFFE02000. This comes from vmalloc area's size.
(*) This shows "core" size, not  memory size.

This patch shows the size by updating "size" field in struct
proc_dir_entry.  Later, lookup routine will create inode and fill
inode->i_size based on this value.  Then, this has a problem.

 - Once inode is cached, inode->i_size will never be updated.

Then, this patch is not memory-hotplug-aware.

To update inode->i_size, we have to know dentry or inode.
But there is no way to lookup them by inside kernel. Hmmm....
Next patch will try it.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: WANG Cong <xiyou.wangcong@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 7073378..3d0485c 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -107,6 +107,8 @@ static void free_kclist_ents(struct list_head *head)
  */
 static void __kcore_update_ram(struct list_head *list)
 {
+	int nphdr;
+	size_t size;
 	struct kcore_list *tmp, *pos;
 	LIST_HEAD(garbage);
 
@@ -121,6 +123,7 @@ static void __kcore_update_ram(struct list_head *list)
 	} else
 		list_splice(list, &garbage);
 	kcore_need_update = 0;
+	proc_root_kcore->size = get_kcore_size(&nphdr, &size);
 	write_unlock(&kclist_lock);
 
 	free_kclist_ents(&garbage);
@@ -429,7 +432,8 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
 	unsigned long start;
 
 	read_lock(&kclist_lock);
-	proc_root_kcore->size = size = get_kcore_size(&nphdr, &elf_buflen);
+	size = get_kcore_size(&nphdr, &elf_buflen);
+
 	if (buflen == 0 || *fpos >= size) {
 		read_unlock(&kclist_lock);
 		return 0;
-- 
cgit v0.10.2


From 0d4c36a9b6ab6b15851f60956d901a3c53574ea8 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Tue, 22 Sep 2009 16:45:52 -0700
Subject: /proc/kcore: update stat.st_size after memory hotplug

After memory hotplug (or other events in future), kcore size can be
modified.

To update inode->i_size, we have to know inode/dentry but we can't get it
from inside /proc directly.  But considerinyg memory hotplug, kcore image
is updated only when it's opened.  Then, updating inode->i_size at open()
is enough.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: WANG Cong <xiyou.wangcong@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 3d0485c..5601337 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -546,6 +546,11 @@ static int open_kcore(struct inode *inode, struct file *filp)
 		return -EPERM;
 	if (kcore_need_update)
 		kcore_update_ram();
+	if (i_size_read(inode) != proc_root_kcore->size) {
+		mutex_lock(&inode->i_mutex);
+		i_size_write(inode, proc_root_kcore->size);
+		mutex_unlock(&inode->i_mutex);
+	}
 	return 0;
 }
 
-- 
cgit v0.10.2


From a7e3108cca54c105f496919040f00df56767ec00 Mon Sep 17 00:00:00 2001
From: maximilian attems <max@stro.at>
Date: Tue, 22 Sep 2009 16:45:53 -0700
Subject: ramfs: move RAMFS_MAGIC to include/linux/magic.h

initramfs userspace likes to use this magic number.

Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Signed-off-by: maximilian attems <max@stro.at>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index a7f0110..a6090aa 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -34,12 +34,10 @@
 #include <linux/ramfs.h>
 #include <linux/sched.h>
 #include <linux/parser.h>
+#include <linux/magic.h>
 #include <asm/uaccess.h>
 #include "internal.h"
 
-/* some random number */
-#define RAMFS_MAGIC	0x858458f6
-
 #define RAMFS_DEFAULT_MODE	0755
 
 static const struct super_operations ramfs_ops;
diff --git a/include/linux/magic.h b/include/linux/magic.h
index bce3778..76285e0 100644
--- a/include/linux/magic.h
+++ b/include/linux/magic.h
@@ -12,6 +12,7 @@
 #define SYSFS_MAGIC		0x62656572
 #define SECURITYFS_MAGIC	0x73636673
 #define SELINUX_MAGIC		0xf97cff8c
+#define RAMFS_MAGIC		0x858458f6	/* some random number */
 #define TMPFS_MAGIC		0x01021994
 #define HUGETLBFS_MAGIC 	0x958458f6	/* some random number */
 #define SQUASHFS_MAGIC		0x73717368
-- 
cgit v0.10.2


From c5df59136a55fe08c21d39321679cbb008479edf Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Tue, 22 Sep 2009 16:45:54 -0700
Subject: ncpfs: read buffer overflow

This function uses signed integers for the unix_date and local variables -
if a negative number is supplied and the leap-year condition is not met,
month will be 0, leading to a later read of day_n[-1]

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Cc: Petr Vandrovec <VANDROVE@vc.cvut.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index 9c59072..b8b5b30 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -1241,7 +1241,7 @@ ncp_date_unix2dos(int unix_date, __le16 *time, __le16 *date)
 		month = 2;
 	} else {
 		nl_day = (year & 3) || day <= 59 ? day : day - 1;
-		for (month = 0; month < 12; month++)
+		for (month = 1; month < 12; month++)
 			if (day_n[month] > nl_day)
 				break;
 	}
-- 
cgit v0.10.2


From 1b83df308f69a5a3cc59be03bd7fb23e4bcebd8e Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Tue, 22 Sep 2009 16:45:54 -0700
Subject: ncpfs: remove dead URL from documentation

Noticed-by: Joe Perches <joe@perches.com>
Cc: Petr Vandrovec <vandrove@vc.cvut.cz>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/Documentation/filesystems/ncpfs.txt b/Documentation/filesystems/ncpfs.txt
index f12c30c..5af164f 100644
--- a/Documentation/filesystems/ncpfs.txt
+++ b/Documentation/filesystems/ncpfs.txt
@@ -7,6 +7,6 @@ ftp.gwdg.de/pub/linux/misc/ncpfs, but sunsite and its many mirrors
 will have it as well.
 
 Related products are linware and mars_nwe, which will give Linux partial
-NetWare server functionality.  Linware's home site is
-klokan.sh.cvut.cz/pub/linux/linware; mars_nwe can be found on
-ftp.gwdg.de/pub/linux/misc/ncpfs.
+NetWare server functionality.
+
+mars_nwe can be found on ftp.gwdg.de/pub/linux/misc/ncpfs.
-- 
cgit v0.10.2


From 8b2feb10c907b610bf8a739792c6b967c65445b0 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Tue, 22 Sep 2009 16:45:55 -0700
Subject: ncpfs: fix wrong check in __ncp_ioctl()

We want to check for s_inode's existence, not inode's one (inode is always
valid in this function).

This takes care of the following entry from Dan's list:

fs/ncpfs/ioctl.c +445 __ncp_ioctl(180) warning: variable derefenced before check 'inode'

Reported-by: Dan Carpenter <error27@gmail.com>
Cc: Julia Lawall <julia@diku.dk>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Cc: Petr Vandrovec <vandrove@vc.cvut.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index fa038df..53a7ed7 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -442,7 +442,7 @@ static int __ncp_ioctl(struct inode *inode, struct file *filp,
 			if (dentry) {
 				struct inode* s_inode = dentry->d_inode;
 				
-				if (inode) {
+				if (s_inode) {
 					NCP_FINFO(s_inode)->volNumber = vnum;
 					NCP_FINFO(s_inode)->dirEntNum = de;
 					NCP_FINFO(s_inode)->DosDirNum = dosde;
-- 
cgit v0.10.2


From 07fcaa2486ca4f5c67bebedfa56e705c4dd23fc2 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Tue, 22 Sep 2009 16:45:56 -0700
Subject: spi: remove i.MX SPI driver

This driver is in a non working state at the moment and will be replaced
by a bitbang driver which can also handle the newer i.MX variants

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Cc: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Acked-by: David Brownell <david-b@pacbell.net>
Acked-by: Andrea Paterniani <a.paterniani@swapp-eng.it>
Cc: Russell King <rmk@arm.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index 2c733c2..86056d14 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -116,13 +116,6 @@ config SPI_GPIO
 	  GPIO operations, you should be able to leverage that for better
 	  speed with a custom version of this driver; see the source code.
 
-config SPI_IMX
-	tristate "Freescale iMX SPI controller"
-	depends on ARCH_MX1 && EXPERIMENTAL
-	help
-	  This enables using the Freescale iMX SPI controller in master
-	  mode.
-
 config SPI_LM70_LLP
 	tristate "Parallel port adapter for LM70 eval board (DEVELOPMENT)"
 	depends on PARPORT && EXPERIMENTAL
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index 3de408d..0f20a70b 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -17,7 +17,6 @@ obj-$(CONFIG_SPI_BITBANG)		+= spi_bitbang.o
 obj-$(CONFIG_SPI_AU1550)		+= au1550_spi.o
 obj-$(CONFIG_SPI_BUTTERFLY)		+= spi_butterfly.o
 obj-$(CONFIG_SPI_GPIO)			+= spi_gpio.o
-obj-$(CONFIG_SPI_IMX)			+= spi_imx.o
 obj-$(CONFIG_SPI_LM70_LLP)		+= spi_lm70llp.o
 obj-$(CONFIG_SPI_PXA2XX)		+= pxa2xx_spi.o
 obj-$(CONFIG_SPI_OMAP_UWIRE)		+= omap_uwire.o
diff --git a/drivers/spi/spi_imx.c b/drivers/spi/spi_imx.c
deleted file mode 100644
index c195e45..0000000
--- a/drivers/spi/spi_imx.c
+++ /dev/null
@@ -1,1770 +0,0 @@
-/*
- * drivers/spi/spi_imx.c
- *
- * Copyright (C) 2006 SWAPP
- *	Andrea Paterniani <a.paterniani@swapp-eng.it>
- *
- * Initial version inspired by:
- *	linux-2.6.17-rc3-mm1/drivers/spi/pxa2xx_spi.c
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/device.h>
-#include <linux/ioport.h>
-#include <linux/errno.h>
-#include <linux/interrupt.h>
-#include <linux/platform_device.h>
-#include <linux/dma-mapping.h>
-#include <linux/spi/spi.h>
-#include <linux/workqueue.h>
-#include <linux/delay.h>
-#include <linux/clk.h>
-
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/delay.h>
-
-#include <mach/hardware.h>
-#include <mach/imx-dma.h>
-#include <mach/spi_imx.h>
-
-/*-------------------------------------------------------------------------*/
-/* SPI Registers offsets from peripheral base address */
-#define SPI_RXDATA		(0x00)
-#define SPI_TXDATA		(0x04)
-#define SPI_CONTROL		(0x08)
-#define SPI_INT_STATUS		(0x0C)
-#define SPI_TEST		(0x10)
-#define SPI_PERIOD		(0x14)
-#define SPI_DMA			(0x18)
-#define SPI_RESET		(0x1C)
-
-/* SPI Control Register Bit Fields & Masks */
-#define SPI_CONTROL_BITCOUNT_MASK	(0xF)		/* Bit Count Mask */
-#define SPI_CONTROL_BITCOUNT(n)		(((n) - 1) & SPI_CONTROL_BITCOUNT_MASK)
-#define SPI_CONTROL_POL			(0x1 << 4)      /* Clock Polarity Mask */
-#define SPI_CONTROL_POL_ACT_HIGH	(0x0 << 4)      /* Active high pol. (0=idle) */
-#define SPI_CONTROL_POL_ACT_LOW		(0x1 << 4)      /* Active low pol. (1=idle) */
-#define SPI_CONTROL_PHA			(0x1 << 5)      /* Clock Phase Mask */
-#define SPI_CONTROL_PHA_0		(0x0 << 5)      /* Clock Phase 0 */
-#define SPI_CONTROL_PHA_1		(0x1 << 5)      /* Clock Phase 1 */
-#define SPI_CONTROL_SSCTL		(0x1 << 6)      /* /SS Waveform Select Mask */
-#define SPI_CONTROL_SSCTL_0		(0x0 << 6)      /* Master: /SS stays low between SPI burst
-							   Slave: RXFIFO advanced by BIT_COUNT */
-#define SPI_CONTROL_SSCTL_1		(0x1 << 6)      /* Master: /SS insert pulse between SPI burst
-							   Slave: RXFIFO advanced by /SS rising edge */
-#define SPI_CONTROL_SSPOL		(0x1 << 7)      /* /SS Polarity Select Mask */
-#define SPI_CONTROL_SSPOL_ACT_LOW	(0x0 << 7)      /* /SS Active low */
-#define SPI_CONTROL_SSPOL_ACT_HIGH	(0x1 << 7)      /* /SS Active high */
-#define SPI_CONTROL_XCH			(0x1 << 8)      /* Exchange */
-#define SPI_CONTROL_SPIEN		(0x1 << 9)      /* SPI Module Enable */
-#define SPI_CONTROL_MODE		(0x1 << 10)     /* SPI Mode Select Mask */
-#define SPI_CONTROL_MODE_SLAVE		(0x0 << 10)     /* SPI Mode Slave */
-#define SPI_CONTROL_MODE_MASTER		(0x1 << 10)     /* SPI Mode Master */
-#define SPI_CONTROL_DRCTL		(0x3 << 11)     /* /SPI_RDY Control Mask */
-#define SPI_CONTROL_DRCTL_0		(0x0 << 11)     /* Ignore /SPI_RDY */
-#define SPI_CONTROL_DRCTL_1		(0x1 << 11)     /* /SPI_RDY falling edge triggers input */
-#define SPI_CONTROL_DRCTL_2		(0x2 << 11)     /* /SPI_RDY active low level triggers input */
-#define SPI_CONTROL_DATARATE		(0x7 << 13)     /* Data Rate Mask */
-#define SPI_PERCLK2_DIV_MIN		(0)		/* PERCLK2:4 */
-#define SPI_PERCLK2_DIV_MAX		(7)		/* PERCLK2:512 */
-#define SPI_CONTROL_DATARATE_MIN	(SPI_PERCLK2_DIV_MAX << 13)
-#define SPI_CONTROL_DATARATE_MAX	(SPI_PERCLK2_DIV_MIN << 13)
-#define SPI_CONTROL_DATARATE_BAD	(SPI_CONTROL_DATARATE_MIN + 1)
-
-/* SPI Interrupt/Status Register Bit Fields & Masks */
-#define SPI_STATUS_TE	(0x1 << 0)	/* TXFIFO Empty Status */
-#define SPI_STATUS_TH	(0x1 << 1)      /* TXFIFO Half Status */
-#define SPI_STATUS_TF	(0x1 << 2)      /* TXFIFO Full Status */
-#define SPI_STATUS_RR	(0x1 << 3)      /* RXFIFO Data Ready Status */
-#define SPI_STATUS_RH	(0x1 << 4)      /* RXFIFO Half Status */
-#define SPI_STATUS_RF	(0x1 << 5)      /* RXFIFO Full Status */
-#define SPI_STATUS_RO	(0x1 << 6)      /* RXFIFO Overflow */
-#define SPI_STATUS_BO	(0x1 << 7)      /* Bit Count Overflow */
-#define SPI_STATUS	(0xFF)		/* SPI Status Mask */
-#define SPI_INTEN_TE	(0x1 << 8)      /* TXFIFO Empty Interrupt Enable */
-#define SPI_INTEN_TH	(0x1 << 9)      /* TXFIFO Half Interrupt Enable */
-#define SPI_INTEN_TF	(0x1 << 10)     /* TXFIFO Full Interrupt Enable */
-#define SPI_INTEN_RE	(0x1 << 11)     /* RXFIFO Data Ready Interrupt Enable */
-#define SPI_INTEN_RH	(0x1 << 12)     /* RXFIFO Half Interrupt Enable */
-#define SPI_INTEN_RF	(0x1 << 13)     /* RXFIFO Full Interrupt Enable */
-#define SPI_INTEN_RO	(0x1 << 14)     /* RXFIFO Overflow Interrupt Enable */
-#define SPI_INTEN_BO	(0x1 << 15)     /* Bit Count Overflow Interrupt Enable */
-#define SPI_INTEN	(0xFF << 8)	/* SPI Interrupt Enable Mask */
-
-/* SPI Test Register Bit Fields & Masks */
-#define SPI_TEST_TXCNT		(0xF << 0)	/* TXFIFO Counter */
-#define SPI_TEST_RXCNT_LSB	(4)		/* RXFIFO Counter LSB */
-#define SPI_TEST_RXCNT		(0xF << 4)	/* RXFIFO Counter */
-#define SPI_TEST_SSTATUS	(0xF << 8)	/* State Machine Status */
-#define SPI_TEST_LBC		(0x1 << 14)	/* Loop Back Control */
-
-/* SPI Period Register Bit Fields & Masks */
-#define SPI_PERIOD_WAIT		(0x7FFF << 0)	/* Wait Between Transactions */
-#define SPI_PERIOD_MAX_WAIT	(0x7FFF)	/* Max Wait Between
-							Transactions */
-#define SPI_PERIOD_CSRC		(0x1 << 15)	/* Period Clock Source Mask */
-#define SPI_PERIOD_CSRC_BCLK	(0x0 << 15)	/* Period Clock Source is
-							Bit Clock */
-#define SPI_PERIOD_CSRC_32768	(0x1 << 15)	/* Period Clock Source is
-							32.768 KHz Clock */
-
-/* SPI DMA Register Bit Fields & Masks */
-#define SPI_DMA_RHDMA	(0x1 << 4)	/* RXFIFO Half Status */
-#define SPI_DMA_RFDMA	(0x1 << 5)      /* RXFIFO Full Status */
-#define SPI_DMA_TEDMA	(0x1 << 6)      /* TXFIFO Empty Status */
-#define SPI_DMA_THDMA	(0x1 << 7)      /* TXFIFO Half Status */
-#define SPI_DMA_RHDEN	(0x1 << 12)	/* RXFIFO Half DMA Request Enable */
-#define SPI_DMA_RFDEN	(0x1 << 13)     /* RXFIFO Full DMA Request Enable */
-#define SPI_DMA_TEDEN	(0x1 << 14)     /* TXFIFO Empty DMA Request Enable */
-#define SPI_DMA_THDEN	(0x1 << 15)     /* TXFIFO Half DMA Request Enable */
-
-/* SPI Soft Reset Register Bit Fields & Masks */
-#define SPI_RESET_START	(0x1)		/* Start */
-
-/* Default SPI configuration values */
-#define SPI_DEFAULT_CONTROL		\
-(					\
-	SPI_CONTROL_BITCOUNT(16) | 	\
-	SPI_CONTROL_POL_ACT_HIGH |	\
-	SPI_CONTROL_PHA_0 |		\
-	SPI_CONTROL_SPIEN |		\
-	SPI_CONTROL_SSCTL_1 |		\
-	SPI_CONTROL_MODE_MASTER |	\
-	SPI_CONTROL_DRCTL_0 |		\
-	SPI_CONTROL_DATARATE_MIN	\
-)
-#define SPI_DEFAULT_ENABLE_LOOPBACK	(0)
-#define SPI_DEFAULT_ENABLE_DMA		(0)
-#define SPI_DEFAULT_PERIOD_WAIT		(8)
-/*-------------------------------------------------------------------------*/
-
-
-/*-------------------------------------------------------------------------*/
-/* TX/RX SPI FIFO size */
-#define SPI_FIFO_DEPTH			(8)
-#define SPI_FIFO_BYTE_WIDTH		(2)
-#define SPI_FIFO_OVERFLOW_MARGIN	(2)
-
-/* DMA burst length for half full/empty request trigger */
-#define SPI_DMA_BLR			(SPI_FIFO_DEPTH * SPI_FIFO_BYTE_WIDTH / 2)
-
-/* Dummy char output to achieve reads.
-   Choosing something different from all zeroes may help pattern recogition
-   for oscilloscope analysis, but may break some drivers. */
-#define SPI_DUMMY_u8			0
-#define SPI_DUMMY_u16			((SPI_DUMMY_u8 << 8) | SPI_DUMMY_u8)
-#define SPI_DUMMY_u32			((SPI_DUMMY_u16 << 16) | SPI_DUMMY_u16)
-
-/**
- * Macro to change a u32 field:
- * @r : register to edit
- * @m : bit mask
- * @v : new value for the field correctly bit-alligned
-*/
-#define u32_EDIT(r, m, v)		r = (r & ~(m)) | (v)
-
-/* Message state */
-#define START_STATE			((void*)0)
-#define RUNNING_STATE			((void*)1)
-#define DONE_STATE			((void*)2)
-#define ERROR_STATE			((void*)-1)
-
-/* Queue state */
-#define QUEUE_RUNNING			(0)
-#define QUEUE_STOPPED			(1)
-
-#define IS_DMA_ALIGNED(x) 		(((u32)(x) & 0x03) == 0)
-#define DMA_ALIGNMENT			4
-/*-------------------------------------------------------------------------*/
-
-
-/*-------------------------------------------------------------------------*/
-/* Driver data structs */
-
-/* Context */
-struct driver_data {
-	/* Driver model hookup */
-	struct platform_device *pdev;
-
-	/* SPI framework hookup */
-	struct spi_master *master;
-
-	/* IMX hookup */
-	struct spi_imx_master *master_info;
-
-	/* Memory resources and SPI regs virtual address */
-	struct resource *ioarea;
-	void __iomem *regs;
-
-	/* SPI RX_DATA physical address */
-	dma_addr_t rd_data_phys;
-
-	/* Driver message queue */
-	struct workqueue_struct	*workqueue;
-	struct work_struct work;
-	spinlock_t lock;
-	struct list_head queue;
-	int busy;
-	int run;
-
-	/* Message Transfer pump */
-	struct tasklet_struct pump_transfers;
-
-	/* Current message, transfer and state */
-	struct spi_message *cur_msg;
-	struct spi_transfer *cur_transfer;
-	struct chip_data *cur_chip;
-
-	/* Rd / Wr buffers pointers */
-	size_t len;
-	void *tx;
-	void *tx_end;
-	void *rx;
-	void *rx_end;
-
-	u8 rd_only;
-	u8 n_bytes;
-	int cs_change;
-
-	/* Function pointers */
-	irqreturn_t (*transfer_handler)(struct driver_data *drv_data);
-	void (*cs_control)(u32 command);
-
-	/* DMA setup */
-	int rx_channel;
-	int tx_channel;
-	dma_addr_t rx_dma;
-	dma_addr_t tx_dma;
-	int rx_dma_needs_unmap;
-	int tx_dma_needs_unmap;
-	size_t tx_map_len;
-	u32 dummy_dma_buf ____cacheline_aligned;
-
-	struct clk *clk;
-};
-
-/* Runtime state */
-struct chip_data {
-	u32 control;
-	u32 period;
-	u32 test;
-
-	u8 enable_dma:1;
-	u8 bits_per_word;
-	u8 n_bytes;
-	u32 max_speed_hz;
-
-	void (*cs_control)(u32 command);
-};
-/*-------------------------------------------------------------------------*/
-
-
-static void pump_messages(struct work_struct *work);
-
-static void flush(struct driver_data *drv_data)
-{
-	void __iomem *regs = drv_data->regs;
-	u32 control;
-
-	dev_dbg(&drv_data->pdev->dev, "flush\n");
-
-	/* Wait for end of transaction */
-	do {
-		control = readl(regs + SPI_CONTROL);
-	} while (control & SPI_CONTROL_XCH);
-
-	/* Release chip select if requested, transfer delays are
-	   handled in pump_transfers */
-	if (drv_data->cs_change)
-		drv_data->cs_control(SPI_CS_DEASSERT);
-
-	/* Disable SPI to flush FIFOs */
-	writel(control & ~SPI_CONTROL_SPIEN, regs + SPI_CONTROL);
-	writel(control, regs + SPI_CONTROL);
-}
-
-static void restore_state(struct driver_data *drv_data)
-{
-	void __iomem *regs = drv_data->regs;
-	struct chip_data *chip = drv_data->cur_chip;
-
-	/* Load chip registers */
-	dev_dbg(&drv_data->pdev->dev,
-		"restore_state\n"
-		"    test    = 0x%08X\n"
-		"    control = 0x%08X\n",
-		chip->test,
-		chip->control);
-	writel(chip->test, regs + SPI_TEST);
-	writel(chip->period, regs + SPI_PERIOD);
-	writel(0, regs + SPI_INT_STATUS);
-	writel(chip->control, regs + SPI_CONTROL);
-}
-
-static void null_cs_control(u32 command)
-{
-}
-
-static inline u32 data_to_write(struct driver_data *drv_data)
-{
-	return ((u32)(drv_data->tx_end - drv_data->tx)) / drv_data->n_bytes;
-}
-
-static inline u32 data_to_read(struct driver_data *drv_data)
-{
-	return ((u32)(drv_data->rx_end - drv_data->rx)) / drv_data->n_bytes;
-}
-
-static int write(struct driver_data *drv_data)
-{
-	void __iomem *regs = drv_data->regs;
-	void *tx = drv_data->tx;
-	void *tx_end = drv_data->tx_end;
-	u8 n_bytes = drv_data->n_bytes;
-	u32 remaining_writes;
-	u32 fifo_avail_space;
-	u32 n;
-	u16 d;
-
-	/* Compute how many fifo writes to do */
-	remaining_writes = (u32)(tx_end - tx) / n_bytes;
-	fifo_avail_space = SPI_FIFO_DEPTH -
-				(readl(regs + SPI_TEST) & SPI_TEST_TXCNT);
-	if (drv_data->rx && (fifo_avail_space > SPI_FIFO_OVERFLOW_MARGIN))
-		/* Fix misunderstood receive overflow */
-		fifo_avail_space -= SPI_FIFO_OVERFLOW_MARGIN;
-	n = min(remaining_writes, fifo_avail_space);
-
-	dev_dbg(&drv_data->pdev->dev,
-		"write type %s\n"
-		"    remaining writes = %d\n"
-		"    fifo avail space = %d\n"
-		"    fifo writes      = %d\n",
-		(n_bytes == 1) ? "u8" : "u16",
-		remaining_writes,
-		fifo_avail_space,
-		n);
-
-	if (n > 0) {
-		/* Fill SPI TXFIFO */
-		if (drv_data->rd_only) {
-			tx += n * n_bytes;
-			while (n--)
-				writel(SPI_DUMMY_u16, regs + SPI_TXDATA);
-		} else {
-			if (n_bytes == 1) {
-				while (n--) {
-					d = *(u8*)tx;
-					writel(d, regs + SPI_TXDATA);
-					tx += 1;
-				}
-			} else {
-				while (n--) {
-					d = *(u16*)tx;
-					writel(d, regs + SPI_TXDATA);
-					tx += 2;
-				}
-			}
-		}
-
-		/* Trigger transfer */
-		writel(readl(regs + SPI_CONTROL) | SPI_CONTROL_XCH,
-			regs + SPI_CONTROL);
-
-		/* Update tx pointer */
-		drv_data->tx = tx;
-	}
-
-	return (tx >= tx_end);
-}
-
-static int read(struct driver_data *drv_data)
-{
-	void __iomem *regs = drv_data->regs;
-	void *rx = drv_data->rx;
-	void *rx_end = drv_data->rx_end;
-	u8 n_bytes = drv_data->n_bytes;
-	u32 remaining_reads;
-	u32 fifo_rxcnt;
-	u32 n;
-	u16 d;
-
-	/* Compute how many fifo reads to do */
-	remaining_reads = (u32)(rx_end - rx) / n_bytes;
-	fifo_rxcnt = (readl(regs + SPI_TEST) & SPI_TEST_RXCNT) >>
-			SPI_TEST_RXCNT_LSB;
-	n = min(remaining_reads, fifo_rxcnt);
-
-	dev_dbg(&drv_data->pdev->dev,
-		"read type %s\n"
-		"    remaining reads = %d\n"
-		"    fifo rx count   = %d\n"
-		"    fifo reads      = %d\n",
-		(n_bytes == 1) ? "u8" : "u16",
-		remaining_reads,
-		fifo_rxcnt,
-		n);
-
-	if (n > 0) {
-		/* Read SPI RXFIFO */
-		if (n_bytes == 1) {
-			while (n--) {
-				d = readl(regs + SPI_RXDATA);
-				*((u8*)rx) = d;
-				rx += 1;
-			}
-		} else {
-			while (n--) {
-				d = readl(regs + SPI_RXDATA);
-				*((u16*)rx) = d;
-				rx += 2;
-			}
-		}
-
-		/* Update rx pointer */
-		drv_data->rx = rx;
-	}
-
-	return (rx >= rx_end);
-}
-
-static void *next_transfer(struct driver_data *drv_data)
-{
-	struct spi_message *msg = drv_data->cur_msg;
-	struct spi_transfer *trans = drv_data->cur_transfer;
-
-	/* Move to next transfer */
-	if (trans->transfer_list.next != &msg->transfers) {
-		drv_data->cur_transfer =
-			list_entry(trans->transfer_list.next,
-					struct spi_transfer,
-					transfer_list);
-		return RUNNING_STATE;
-	}
-
-	return DONE_STATE;
-}
-
-static int map_dma_buffers(struct driver_data *drv_data)
-{
-	struct spi_message *msg;
-	struct device *dev;
-	void *buf;
-
-	drv_data->rx_dma_needs_unmap = 0;
-	drv_data->tx_dma_needs_unmap = 0;
-
-	if (!drv_data->master_info->enable_dma ||
-		!drv_data->cur_chip->enable_dma)
-			return -1;
-
-	msg = drv_data->cur_msg;
-	dev = &msg->spi->dev;
-	if (msg->is_dma_mapped) {
-		if (drv_data->tx_dma)
-			/* The caller provided at least dma and cpu virtual
-			   address for write; pump_transfers() will consider the
-			   transfer as write only if cpu rx virtual address is
-			   NULL */
-			return 0;
-
-		if (drv_data->rx_dma) {
-			/* The caller provided dma and cpu virtual address to
-			   performe read only transfer -->
-			   use drv_data->dummy_dma_buf for dummy writes to
-			   achive reads */
-			buf = &drv_data->dummy_dma_buf;
-			drv_data->tx_map_len = sizeof(drv_data->dummy_dma_buf);
-			drv_data->tx_dma = dma_map_single(dev,
-							buf,
-							drv_data->tx_map_len,
-							DMA_TO_DEVICE);
-			if (dma_mapping_error(dev, drv_data->tx_dma))
-				return -1;
-
-			drv_data->tx_dma_needs_unmap = 1;
-
-			/* Flags transfer as rd_only for pump_transfers() DMA
-			   regs programming (should be redundant) */
-			drv_data->tx = NULL;
-
-			return 0;
-		}
-	}
-
-	if (!IS_DMA_ALIGNED(drv_data->rx) || !IS_DMA_ALIGNED(drv_data->tx))
-		return -1;
-
-	if (drv_data->tx == NULL) {
-		/* Read only message --> use drv_data->dummy_dma_buf for dummy
-		   writes to achive reads */
-		buf = &drv_data->dummy_dma_buf;
-		drv_data->tx_map_len = sizeof(drv_data->dummy_dma_buf);
-	} else {
-		buf = drv_data->tx;
-		drv_data->tx_map_len = drv_data->len;
-	}
-	drv_data->tx_dma = dma_map_single(dev,
-					buf,
-					drv_data->tx_map_len,
-					DMA_TO_DEVICE);
-	if (dma_mapping_error(dev, drv_data->tx_dma))
-		return -1;
-	drv_data->tx_dma_needs_unmap = 1;
-
-	/* NULL rx means write-only transfer and no map needed
-	 * since rx DMA will not be used */
-	if (drv_data->rx) {
-		buf = drv_data->rx;
-		drv_data->rx_dma = dma_map_single(dev,
-						buf,
-						drv_data->len,
-						DMA_FROM_DEVICE);
-		if (dma_mapping_error(dev, drv_data->rx_dma)) {
-			if (drv_data->tx_dma) {
-				dma_unmap_single(dev,
-						drv_data->tx_dma,
-						drv_data->tx_map_len,
-						DMA_TO_DEVICE);
-				drv_data->tx_dma_needs_unmap = 0;
-			}
-			return -1;
-		}
-		drv_data->rx_dma_needs_unmap = 1;
-	}
-
-	return 0;
-}
-
-static void unmap_dma_buffers(struct driver_data *drv_data)
-{
-	struct spi_message *msg = drv_data->cur_msg;
-	struct device *dev = &msg->spi->dev;
-
-	if (drv_data->rx_dma_needs_unmap) {
-		dma_unmap_single(dev,
-				drv_data->rx_dma,
-				drv_data->len,
-				DMA_FROM_DEVICE);
-		drv_data->rx_dma_needs_unmap = 0;
-	}
-	if (drv_data->tx_dma_needs_unmap) {
-		dma_unmap_single(dev,
-				drv_data->tx_dma,
-				drv_data->tx_map_len,
-				DMA_TO_DEVICE);
-		drv_data->tx_dma_needs_unmap = 0;
-	}
-}
-
-/* Caller already set message->status (dma is already blocked) */
-static void giveback(struct spi_message *message, struct driver_data *drv_data)
-{
-	void __iomem *regs = drv_data->regs;
-
-	/* Bring SPI to sleep; restore_state() and pump_transfer()
-	   will do new setup */
-	writel(0, regs + SPI_INT_STATUS);
-	writel(0, regs + SPI_DMA);
-
-	/* Unconditioned deselct */
-	drv_data->cs_control(SPI_CS_DEASSERT);
-
-	message->state = NULL;
-	if (message->complete)
-		message->complete(message->context);
-
-	drv_data->cur_msg = NULL;
-	drv_data->cur_transfer = NULL;
-	drv_data->cur_chip = NULL;
-	queue_work(drv_data->workqueue, &drv_data->work);
-}
-
-static void dma_err_handler(int channel, void *data, int errcode)
-{
-	struct driver_data *drv_data = data;
-	struct spi_message *msg = drv_data->cur_msg;
-
-	dev_dbg(&drv_data->pdev->dev, "dma_err_handler\n");
-
-	/* Disable both rx and tx dma channels */
-	imx_dma_disable(drv_data->rx_channel);
-	imx_dma_disable(drv_data->tx_channel);
-	unmap_dma_buffers(drv_data);
-
-	flush(drv_data);
-
-	msg->state = ERROR_STATE;
-	tasklet_schedule(&drv_data->pump_transfers);
-}
-
-static void dma_tx_handler(int channel, void *data)
-{
-	struct driver_data *drv_data = data;
-
-	dev_dbg(&drv_data->pdev->dev, "dma_tx_handler\n");
-
-	imx_dma_disable(channel);
-
-	/* Now waits for TX FIFO empty */
-	writel(SPI_INTEN_TE, drv_data->regs + SPI_INT_STATUS);
-}
-
-static irqreturn_t dma_transfer(struct driver_data *drv_data)
-{
-	u32 status;
-	struct spi_message *msg = drv_data->cur_msg;
-	void __iomem *regs = drv_data->regs;
-
-	status = readl(regs + SPI_INT_STATUS);
-
-	if ((status & (SPI_INTEN_RO | SPI_STATUS_RO))
-			== (SPI_INTEN_RO | SPI_STATUS_RO)) {
-		writel(status & ~SPI_INTEN, regs + SPI_INT_STATUS);
-
-		imx_dma_disable(drv_data->tx_channel);
-		imx_dma_disable(drv_data->rx_channel);
-		unmap_dma_buffers(drv_data);
-
-		flush(drv_data);
-
-		dev_warn(&drv_data->pdev->dev,
-				"dma_transfer - fifo overun\n");
-
-		msg->state = ERROR_STATE;
-		tasklet_schedule(&drv_data->pump_transfers);
-
-		return IRQ_HANDLED;
-	}
-
-	if (status & SPI_STATUS_TE) {
-		writel(status & ~SPI_INTEN_TE, regs + SPI_INT_STATUS);
-
-		if (drv_data->rx) {
-			/* Wait end of transfer before read trailing data */
-			while (readl(regs + SPI_CONTROL) & SPI_CONTROL_XCH)
-				cpu_relax();
-
-			imx_dma_disable(drv_data->rx_channel);
-			unmap_dma_buffers(drv_data);
-
-			/* Release chip select if requested, transfer delays are
-			   handled in pump_transfers() */
-			if (drv_data->cs_change)
-				drv_data->cs_control(SPI_CS_DEASSERT);
-
-			/* Calculate number of trailing data and read them */
-			dev_dbg(&drv_data->pdev->dev,
-				"dma_transfer - test = 0x%08X\n",
-				readl(regs + SPI_TEST));
-			drv_data->rx = drv_data->rx_end -
-					((readl(regs + SPI_TEST) &
-					SPI_TEST_RXCNT) >>
-					SPI_TEST_RXCNT_LSB)*drv_data->n_bytes;
-			read(drv_data);
-		} else {
-			/* Write only transfer */
-			unmap_dma_buffers(drv_data);
-
-			flush(drv_data);
-		}
-
-		/* End of transfer, update total byte transfered */
-		msg->actual_length += drv_data->len;
-
-		/* Move to next transfer */
-		msg->state = next_transfer(drv_data);
-
-		/* Schedule transfer tasklet */
-		tasklet_schedule(&drv_data->pump_transfers);
-
-		return IRQ_HANDLED;
-	}
-
-	/* Opps problem detected */
-	return IRQ_NONE;
-}
-
-static irqreturn_t interrupt_wronly_transfer(struct driver_data *drv_data)
-{
-	struct spi_message *msg = drv_data->cur_msg;
-	void __iomem *regs = drv_data->regs;
-	u32 status;
-	irqreturn_t handled = IRQ_NONE;
-
-	status = readl(regs + SPI_INT_STATUS);
-
-	if (status & SPI_INTEN_TE) {
-		/* TXFIFO Empty Interrupt on the last transfered word */
-		writel(status & ~SPI_INTEN, regs + SPI_INT_STATUS);
-		dev_dbg(&drv_data->pdev->dev,
-			"interrupt_wronly_transfer - end of tx\n");
-
-		flush(drv_data);
-
-		/* Update total byte transfered */
-		msg->actual_length += drv_data->len;
-
-		/* Move to next transfer */
-		msg->state = next_transfer(drv_data);
-
-		/* Schedule transfer tasklet */
-		tasklet_schedule(&drv_data->pump_transfers);
-
-		return IRQ_HANDLED;
-	} else {
-		while (status & SPI_STATUS_TH) {
-			dev_dbg(&drv_data->pdev->dev,
-				"interrupt_wronly_transfer - status = 0x%08X\n",
-				status);
-
-			/* Pump data */
-			if (write(drv_data)) {
-				/* End of TXFIFO writes,
-				   now wait until TXFIFO is empty */
-				writel(SPI_INTEN_TE, regs + SPI_INT_STATUS);
-				return IRQ_HANDLED;
-			}
-
-			status = readl(regs + SPI_INT_STATUS);
-
-			/* We did something */
-			handled = IRQ_HANDLED;
-		}
-	}
-
-	return handled;
-}
-
-static irqreturn_t interrupt_transfer(struct driver_data *drv_data)
-{
-	struct spi_message *msg = drv_data->cur_msg;
-	void __iomem *regs = drv_data->regs;
-	u32 status, control;
-	irqreturn_t handled = IRQ_NONE;
-	unsigned long limit;
-
-	status = readl(regs + SPI_INT_STATUS);
-
-	if (status & SPI_INTEN_TE) {
-		/* TXFIFO Empty Interrupt on the last transfered word */
-		writel(status & ~SPI_INTEN, regs + SPI_INT_STATUS);
-		dev_dbg(&drv_data->pdev->dev,
-			"interrupt_transfer - end of tx\n");
-
-		if (msg->state == ERROR_STATE) {
-			/* RXFIFO overrun was detected and message aborted */
-			flush(drv_data);
-		} else {
-			/* Wait for end of transaction */
-			do {
-				control = readl(regs + SPI_CONTROL);
-			} while (control & SPI_CONTROL_XCH);
-
-			/* Release chip select if requested, transfer delays are
-			   handled in pump_transfers */
-			if (drv_data->cs_change)
-				drv_data->cs_control(SPI_CS_DEASSERT);
-
-			/* Read trailing bytes */
-			limit = loops_per_jiffy << 1;
-			while ((read(drv_data) == 0) && --limit)
-				cpu_relax();
-
-			if (limit == 0)
-				dev_err(&drv_data->pdev->dev,
-					"interrupt_transfer - "
-					"trailing byte read failed\n");
-			else
-				dev_dbg(&drv_data->pdev->dev,
-					"interrupt_transfer - end of rx\n");
-
-			/* Update total byte transfered */
-			msg->actual_length += drv_data->len;
-
-			/* Move to next transfer */
-			msg->state = next_transfer(drv_data);
-		}
-
-		/* Schedule transfer tasklet */
-		tasklet_schedule(&drv_data->pump_transfers);
-
-		return IRQ_HANDLED;
-	} else {
-		while (status & (SPI_STATUS_TH | SPI_STATUS_RO)) {
-			dev_dbg(&drv_data->pdev->dev,
-				"interrupt_transfer - status = 0x%08X\n",
-				status);
-
-			if (status & SPI_STATUS_RO) {
-				/* RXFIFO overrun, abort message end wait
-				   until TXFIFO is empty */
-				writel(SPI_INTEN_TE, regs + SPI_INT_STATUS);
-
-				dev_warn(&drv_data->pdev->dev,
-					"interrupt_transfer - fifo overun\n"
-					"    data not yet written = %d\n"
-					"    data not yet read    = %d\n",
-					data_to_write(drv_data),
-					data_to_read(drv_data));
-
-				msg->state = ERROR_STATE;
-
-				return IRQ_HANDLED;
-			}
-
-			/* Pump data */
-			read(drv_data);
-			if (write(drv_data)) {
-				/* End of TXFIFO writes,
-				   now wait until TXFIFO is empty */
-				writel(SPI_INTEN_TE, regs + SPI_INT_STATUS);
-				return IRQ_HANDLED;
-			}
-
-			status = readl(regs + SPI_INT_STATUS);
-
-			/* We did something */
-			handled = IRQ_HANDLED;
-		}
-	}
-
-	return handled;
-}
-
-static irqreturn_t spi_int(int irq, void *dev_id)
-{
-	struct driver_data *drv_data = (struct driver_data *)dev_id;
-
-	if (!drv_data->cur_msg) {
-		dev_err(&drv_data->pdev->dev,
-			"spi_int - bad message state\n");
-		/* Never fail */
-		return IRQ_HANDLED;
-	}
-
-	return drv_data->transfer_handler(drv_data);
-}
-
-static inline u32 spi_speed_hz(struct driver_data *drv_data, u32 data_rate)
-{
-	return clk_get_rate(drv_data->clk) / (4 << ((data_rate) >> 13));
-}
-
-static u32 spi_data_rate(struct driver_data *drv_data, u32 speed_hz)
-{
-	u32 div;
-	u32 quantized_hz = clk_get_rate(drv_data->clk) >> 2;
-
-	for (div = SPI_PERCLK2_DIV_MIN;
-		div <= SPI_PERCLK2_DIV_MAX;
-		div++, quantized_hz >>= 1) {
-			if (quantized_hz <= speed_hz)
-				/* Max available speed LEQ required speed */
-				return div << 13;
-	}
-	return SPI_CONTROL_DATARATE_BAD;
-}
-
-static void pump_transfers(unsigned long data)
-{
-	struct driver_data *drv_data = (struct driver_data *)data;
-	struct spi_message *message;
-	struct spi_transfer *transfer, *previous;
-	struct chip_data *chip;
-	void __iomem *regs;
-	u32 tmp, control;
-
-	dev_dbg(&drv_data->pdev->dev, "pump_transfer\n");
-
-	message = drv_data->cur_msg;
-
-	/* Handle for abort */
-	if (message->state == ERROR_STATE) {
-		message->status = -EIO;
-		giveback(message, drv_data);
-		return;
-	}
-
-	/* Handle end of message */
-	if (message->state == DONE_STATE) {
-		message->status = 0;
-		giveback(message, drv_data);
-		return;
-	}
-
-	chip = drv_data->cur_chip;
-
-	/* Delay if requested at end of transfer*/
-	transfer = drv_data->cur_transfer;
-	if (message->state == RUNNING_STATE) {
-		previous = list_entry(transfer->transfer_list.prev,
-					struct spi_transfer,
-					transfer_list);
-		if (previous->delay_usecs)
-			udelay(previous->delay_usecs);
-	} else {
-		/* START_STATE */
-		message->state = RUNNING_STATE;
-		drv_data->cs_control = chip->cs_control;
-	}
-
-	transfer = drv_data->cur_transfer;
-	drv_data->tx = (void *)transfer->tx_buf;
-	drv_data->tx_end = drv_data->tx + transfer->len;
-	drv_data->rx = transfer->rx_buf;
-	drv_data->rx_end = drv_data->rx + transfer->len;
-	drv_data->rx_dma = transfer->rx_dma;
-	drv_data->tx_dma = transfer->tx_dma;
-	drv_data->len = transfer->len;
-	drv_data->cs_change = transfer->cs_change;
-	drv_data->rd_only = (drv_data->tx == NULL);
-
-	regs = drv_data->regs;
-	control = readl(regs + SPI_CONTROL);
-
-	/* Bits per word setup */
-	tmp = transfer->bits_per_word;
-	if (tmp == 0) {
-		/* Use device setup */
-		tmp = chip->bits_per_word;
-		drv_data->n_bytes = chip->n_bytes;
-	} else
-		/* Use per-transfer setup */
-		drv_data->n_bytes = (tmp <= 8) ? 1 : 2;
-	u32_EDIT(control, SPI_CONTROL_BITCOUNT_MASK, tmp - 1);
-
-	/* Speed setup (surely valid because already checked) */
-	tmp = transfer->speed_hz;
-	if (tmp == 0)
-		tmp = chip->max_speed_hz;
-	tmp = spi_data_rate(drv_data, tmp);
-	u32_EDIT(control, SPI_CONTROL_DATARATE, tmp);
-
-	writel(control, regs + SPI_CONTROL);
-
-	/* Assert device chip-select */
-	drv_data->cs_control(SPI_CS_ASSERT);
-
-	/* DMA cannot read/write SPI FIFOs other than 16 bits at a time; hence
-	   if bits_per_word is less or equal 8 PIO transfers are performed.
-	   Moreover DMA is convinient for transfer length bigger than FIFOs
-	   byte size. */
-	if ((drv_data->n_bytes == 2) &&
-		(drv_data->len > SPI_FIFO_DEPTH*SPI_FIFO_BYTE_WIDTH) &&
-		(map_dma_buffers(drv_data) == 0)) {
-		dev_dbg(&drv_data->pdev->dev,
-			"pump dma transfer\n"
-			"    tx      = %p\n"
-			"    tx_dma  = %08X\n"
-			"    rx      = %p\n"
-			"    rx_dma  = %08X\n"
-			"    len     = %d\n",
-			drv_data->tx,
-			(unsigned int)drv_data->tx_dma,
-			drv_data->rx,
-			(unsigned int)drv_data->rx_dma,
-			drv_data->len);
-
-		/* Ensure we have the correct interrupt handler */
-		drv_data->transfer_handler = dma_transfer;
-
-		/* Trigger transfer */
-		writel(readl(regs + SPI_CONTROL) | SPI_CONTROL_XCH,
-			regs + SPI_CONTROL);
-
-		/* Setup tx DMA */
-		if (drv_data->tx)
-			/* Linear source address */
-			CCR(drv_data->tx_channel) =
-				CCR_DMOD_FIFO |
-				CCR_SMOD_LINEAR |
-				CCR_SSIZ_32 | CCR_DSIZ_16 |
-				CCR_REN;
-		else
-			/* Read only transfer -> fixed source address for
-			   dummy write to achive read */
-			CCR(drv_data->tx_channel) =
-				CCR_DMOD_FIFO |
-				CCR_SMOD_FIFO |
-				CCR_SSIZ_32 | CCR_DSIZ_16 |
-				CCR_REN;
-
-		imx_dma_setup_single(
-			drv_data->tx_channel,
-			drv_data->tx_dma,
-			drv_data->len,
-			drv_data->rd_data_phys + 4,
-			DMA_MODE_WRITE);
-
-		if (drv_data->rx) {
-			/* Setup rx DMA for linear destination address */
-			CCR(drv_data->rx_channel) =
-				CCR_DMOD_LINEAR |
-				CCR_SMOD_FIFO |
-				CCR_DSIZ_32 | CCR_SSIZ_16 |
-				CCR_REN;
-			imx_dma_setup_single(
-				drv_data->rx_channel,
-				drv_data->rx_dma,
-				drv_data->len,
-				drv_data->rd_data_phys,
-				DMA_MODE_READ);
-			imx_dma_enable(drv_data->rx_channel);
-
-			/* Enable SPI interrupt */
-			writel(SPI_INTEN_RO, regs + SPI_INT_STATUS);
-
-			/* Set SPI to request DMA service on both
-			   Rx and Tx half fifo watermark */
-			writel(SPI_DMA_RHDEN | SPI_DMA_THDEN, regs + SPI_DMA);
-		} else
-			/* Write only access -> set SPI to request DMA
-			   service on Tx half fifo watermark */
-			writel(SPI_DMA_THDEN, regs + SPI_DMA);
-
-		imx_dma_enable(drv_data->tx_channel);
-	} else {
-		dev_dbg(&drv_data->pdev->dev,
-			"pump pio transfer\n"
-			"    tx      = %p\n"
-			"    rx      = %p\n"
-			"    len     = %d\n",
-			drv_data->tx,
-			drv_data->rx,
-			drv_data->len);
-
-		/* Ensure we have the correct interrupt handler	*/
-		if (drv_data->rx)
-			drv_data->transfer_handler = interrupt_transfer;
-		else
-			drv_data->transfer_handler = interrupt_wronly_transfer;
-
-		/* Enable SPI interrupt */
-		if (drv_data->rx)
-			writel(SPI_INTEN_TH | SPI_INTEN_RO,
-				regs + SPI_INT_STATUS);
-		else
-			writel(SPI_INTEN_TH, regs + SPI_INT_STATUS);
-	}
-}
-
-static void pump_messages(struct work_struct *work)
-{
-	struct driver_data *drv_data =
-				container_of(work, struct driver_data, work);
-	unsigned long flags;
-
-	/* Lock queue and check for queue work */
-	spin_lock_irqsave(&drv_data->lock, flags);
-	if (list_empty(&drv_data->queue) || drv_data->run == QUEUE_STOPPED) {
-		drv_data->busy = 0;
-		spin_unlock_irqrestore(&drv_data->lock, flags);
-		return;
-	}
-
-	/* Make sure we are not already running a message */
-	if (drv_data->cur_msg) {
-		spin_unlock_irqrestore(&drv_data->lock, flags);
-		return;
-	}
-
-	/* Extract head of queue */
-	drv_data->cur_msg = list_entry(drv_data->queue.next,
-					struct spi_message, queue);
-	list_del_init(&drv_data->cur_msg->queue);
-	drv_data->busy = 1;
-	spin_unlock_irqrestore(&drv_data->lock, flags);
-
-	/* Initial message state */
-	drv_data->cur_msg->state = START_STATE;
-	drv_data->cur_transfer = list_entry(drv_data->cur_msg->transfers.next,
-						struct spi_transfer,
-						transfer_list);
-
-	/* Setup the SPI using the per chip configuration */
-	drv_data->cur_chip = spi_get_ctldata(drv_data->cur_msg->spi);
-	restore_state(drv_data);
-
-	/* Mark as busy and launch transfers */
-	tasklet_schedule(&drv_data->pump_transfers);
-}
-
-static int transfer(struct spi_device *spi, struct spi_message *msg)
-{
-	struct driver_data *drv_data = spi_master_get_devdata(spi->master);
-	u32 min_speed_hz, max_speed_hz, tmp;
-	struct spi_transfer *trans;
-	unsigned long flags;
-
-	msg->actual_length = 0;
-
-	/* Per transfer setup check */
-	min_speed_hz = spi_speed_hz(drv_data, SPI_CONTROL_DATARATE_MIN);
-	max_speed_hz = spi->max_speed_hz;
-	list_for_each_entry(trans, &msg->transfers, transfer_list) {
-		tmp = trans->bits_per_word;
-		if (tmp > 16) {
-			dev_err(&drv_data->pdev->dev,
-				"message rejected : "
-				"invalid transfer bits_per_word (%d bits)\n",
-				tmp);
-			goto msg_rejected;
-		}
-		tmp = trans->speed_hz;
-		if (tmp) {
-			if (tmp < min_speed_hz) {
-				dev_err(&drv_data->pdev->dev,
-					"message rejected : "
-					"device min speed (%d Hz) exceeds "
-					"required transfer speed (%d Hz)\n",
-					min_speed_hz,
-					tmp);
-				goto msg_rejected;
-			} else if (tmp > max_speed_hz) {
-				dev_err(&drv_data->pdev->dev,
-					"message rejected : "
-					"transfer speed (%d Hz) exceeds "
-					"device max speed (%d Hz)\n",
-					tmp,
-					max_speed_hz);
-				goto msg_rejected;
-			}
-		}
-	}
-
-	/* Message accepted */
-	msg->status = -EINPROGRESS;
-	msg->state = START_STATE;
-
-	spin_lock_irqsave(&drv_data->lock, flags);
-	if (drv_data->run == QUEUE_STOPPED) {
-		spin_unlock_irqrestore(&drv_data->lock, flags);
-		return -ESHUTDOWN;
-	}
-
-	list_add_tail(&msg->queue, &drv_data->queue);
-	if (drv_data->run == QUEUE_RUNNING && !drv_data->busy)
-		queue_work(drv_data->workqueue, &drv_data->work);
-
-	spin_unlock_irqrestore(&drv_data->lock, flags);
-	return 0;
-
-msg_rejected:
-	/* Message rejected and not queued */
-	msg->status = -EINVAL;
-	msg->state = ERROR_STATE;
-	if (msg->complete)
-		msg->complete(msg->context);
-	return -EINVAL;
-}
-
-/* On first setup bad values must free chip_data memory since will cause
-   spi_new_device to fail. Bad value setup from protocol driver are simply not
-   applied and notified to the calling driver. */
-static int setup(struct spi_device *spi)
-{
-	struct driver_data *drv_data = spi_master_get_devdata(spi->master);
-	struct spi_imx_chip *chip_info;
-	struct chip_data *chip;
-	int first_setup = 0;
-	u32 tmp;
-	int status = 0;
-
-	/* Get controller data */
-	chip_info = spi->controller_data;
-
-	/* Get controller_state */
-	chip = spi_get_ctldata(spi);
-	if (chip == NULL) {
-		first_setup = 1;
-
-		chip = kzalloc(sizeof(struct chip_data), GFP_KERNEL);
-		if (!chip) {
-			dev_err(&spi->dev,
-				"setup - cannot allocate controller state\n");
-			return -ENOMEM;
-		}
-		chip->control = SPI_DEFAULT_CONTROL;
-
-		if (chip_info == NULL) {
-			/* spi_board_info.controller_data not is supplied */
-			chip_info = kzalloc(sizeof(struct spi_imx_chip),
-						GFP_KERNEL);
-			if (!chip_info) {
-				dev_err(&spi->dev,
-					"setup - "
-					"cannot allocate controller data\n");
-				status = -ENOMEM;
-				goto err_first_setup;
-			}
-			/* Set controller data default value */
-			chip_info->enable_loopback =
-						SPI_DEFAULT_ENABLE_LOOPBACK;
-			chip_info->enable_dma = SPI_DEFAULT_ENABLE_DMA;
-			chip_info->ins_ss_pulse = 1;
-			chip_info->bclk_wait = SPI_DEFAULT_PERIOD_WAIT;
-			chip_info->cs_control = null_cs_control;
-		}
-	}
-
-	/* Now set controller state based on controller data */
-
-	if (first_setup) {
-		/* SPI loopback */
-		if (chip_info->enable_loopback)
-			chip->test = SPI_TEST_LBC;
-		else
-			chip->test = 0;
-
-		/* SPI dma driven */
-		chip->enable_dma = chip_info->enable_dma;
-
-		/* SPI /SS pulse between spi burst */
-		if (chip_info->ins_ss_pulse)
-			u32_EDIT(chip->control,
-				SPI_CONTROL_SSCTL, SPI_CONTROL_SSCTL_1);
-		else
-			u32_EDIT(chip->control,
-				SPI_CONTROL_SSCTL, SPI_CONTROL_SSCTL_0);
-
-		/* SPI bclk waits between each bits_per_word spi burst */
-		if (chip_info->bclk_wait > SPI_PERIOD_MAX_WAIT) {
-			dev_err(&spi->dev,
-				"setup - "
-				"bclk_wait exceeds max allowed (%d)\n",
-				SPI_PERIOD_MAX_WAIT);
-			goto err_first_setup;
-		}
-		chip->period = SPI_PERIOD_CSRC_BCLK |
-				(chip_info->bclk_wait & SPI_PERIOD_WAIT);
-	}
-
-	/* SPI mode */
-	tmp = spi->mode;
-	if (tmp & SPI_CS_HIGH) {
-		u32_EDIT(chip->control,
-				SPI_CONTROL_SSPOL, SPI_CONTROL_SSPOL_ACT_HIGH);
-	}
-	switch (tmp & SPI_MODE_3) {
-	case SPI_MODE_0:
-		tmp = 0;
-		break;
-	case SPI_MODE_1:
-		tmp = SPI_CONTROL_PHA_1;
-		break;
-	case SPI_MODE_2:
-		tmp = SPI_CONTROL_POL_ACT_LOW;
-		break;
-	default:
-		/* SPI_MODE_3 */
-		tmp = SPI_CONTROL_PHA_1 | SPI_CONTROL_POL_ACT_LOW;
-		break;
-	}
-	u32_EDIT(chip->control, SPI_CONTROL_POL | SPI_CONTROL_PHA, tmp);
-
-	/* SPI word width */
-	tmp = spi->bits_per_word;
-	if (tmp > 16) {
-		status = -EINVAL;
-		dev_err(&spi->dev,
-			"setup - "
-			"invalid bits_per_word (%d)\n",
-			tmp);
-		if (first_setup)
-			goto err_first_setup;
-		else {
-			/* Undo setup using chip as backup copy */
-			tmp = chip->bits_per_word;
-			spi->bits_per_word = tmp;
-		}
-	}
-	chip->bits_per_word = tmp;
-	u32_EDIT(chip->control, SPI_CONTROL_BITCOUNT_MASK, tmp - 1);
-	chip->n_bytes = (tmp <= 8) ? 1 : 2;
-
-	/* SPI datarate */
-	tmp = spi_data_rate(drv_data, spi->max_speed_hz);
-	if (tmp == SPI_CONTROL_DATARATE_BAD) {
-		status = -EINVAL;
-		dev_err(&spi->dev,
-			"setup - "
-			"HW min speed (%d Hz) exceeds required "
-			"max speed (%d Hz)\n",
-			spi_speed_hz(drv_data, SPI_CONTROL_DATARATE_MIN),
-			spi->max_speed_hz);
-		if (first_setup)
-			goto err_first_setup;
-		else
-			/* Undo setup using chip as backup copy */
-			spi->max_speed_hz = chip->max_speed_hz;
-	} else {
-		u32_EDIT(chip->control, SPI_CONTROL_DATARATE, tmp);
-		/* Actual rounded max_speed_hz */
-		tmp = spi_speed_hz(drv_data, tmp);
-		spi->max_speed_hz = tmp;
-		chip->max_speed_hz = tmp;
-	}
-
-	/* SPI chip-select management */
-	if (chip_info->cs_control)
-		chip->cs_control = chip_info->cs_control;
-	else
-		chip->cs_control = null_cs_control;
-
-	/* Save controller_state */
-	spi_set_ctldata(spi, chip);
-
-	/* Summary */
-	dev_dbg(&spi->dev,
-		"setup succeded\n"
-		"    loopback enable   = %s\n"
-		"    dma enable        = %s\n"
-		"    insert /ss pulse  = %s\n"
-		"    period wait       = %d\n"
-		"    mode              = %d\n"
-		"    bits per word     = %d\n"
-		"    min speed         = %d Hz\n"
-		"    rounded max speed = %d Hz\n",
-		chip->test & SPI_TEST_LBC ? "Yes" : "No",
-		chip->enable_dma ? "Yes" : "No",
-		chip->control & SPI_CONTROL_SSCTL ? "Yes" : "No",
-		chip->period & SPI_PERIOD_WAIT,
-		spi->mode,
-		spi->bits_per_word,
-		spi_speed_hz(drv_data, SPI_CONTROL_DATARATE_MIN),
-		spi->max_speed_hz);
-	return status;
-
-err_first_setup:
-	kfree(chip);
-	return status;
-}
-
-static void cleanup(struct spi_device *spi)
-{
-	kfree(spi_get_ctldata(spi));
-}
-
-static int __init init_queue(struct driver_data *drv_data)
-{
-	INIT_LIST_HEAD(&drv_data->queue);
-	spin_lock_init(&drv_data->lock);
-
-	drv_data->run = QUEUE_STOPPED;
-	drv_data->busy = 0;
-
-	tasklet_init(&drv_data->pump_transfers,
-			pump_transfers,	(unsigned long)drv_data);
-
-	INIT_WORK(&drv_data->work, pump_messages);
-	drv_data->workqueue = create_singlethread_workqueue(
-				dev_name(drv_data->master->dev.parent));
-	if (drv_data->workqueue == NULL)
-		return -EBUSY;
-
-	return 0;
-}
-
-static int start_queue(struct driver_data *drv_data)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&drv_data->lock, flags);
-
-	if (drv_data->run == QUEUE_RUNNING || drv_data->busy) {
-		spin_unlock_irqrestore(&drv_data->lock, flags);
-		return -EBUSY;
-	}
-
-	drv_data->run = QUEUE_RUNNING;
-	drv_data->cur_msg = NULL;
-	drv_data->cur_transfer = NULL;
-	drv_data->cur_chip = NULL;
-	spin_unlock_irqrestore(&drv_data->lock, flags);
-
-	queue_work(drv_data->workqueue, &drv_data->work);
-
-	return 0;
-}
-
-static int stop_queue(struct driver_data *drv_data)
-{
-	unsigned long flags;
-	unsigned limit = 500;
-	int status = 0;
-
-	spin_lock_irqsave(&drv_data->lock, flags);
-
-	/* This is a bit lame, but is optimized for the common execution path.
-	 * A wait_queue on the drv_data->busy could be used, but then the common
-	 * execution path (pump_messages) would be required to call wake_up or
-	 * friends on every SPI message. Do this instead */
-	drv_data->run = QUEUE_STOPPED;
-	while (!list_empty(&drv_data->queue) && drv_data->busy && limit--) {
-		spin_unlock_irqrestore(&drv_data->lock, flags);
-		msleep(10);
-		spin_lock_irqsave(&drv_data->lock, flags);
-	}
-
-	if (!list_empty(&drv_data->queue) || drv_data->busy)
-		status = -EBUSY;
-
-	spin_unlock_irqrestore(&drv_data->lock, flags);
-
-	return status;
-}
-
-static int destroy_queue(struct driver_data *drv_data)
-{
-	int status;
-
-	status = stop_queue(drv_data);
-	if (status != 0)
-		return status;
-
-	if (drv_data->workqueue)
-		destroy_workqueue(drv_data->workqueue);
-
-	return 0;
-}
-
-static int __init spi_imx_probe(struct platform_device *pdev)
-{
-	struct device *dev = &pdev->dev;
-	struct spi_imx_master *platform_info;
-	struct spi_master *master;
-	struct driver_data *drv_data;
-	struct resource *res;
-	int irq, status = 0;
-
-	platform_info = dev->platform_data;
-	if (platform_info == NULL) {
-		dev_err(&pdev->dev, "probe - no platform data supplied\n");
-		status = -ENODEV;
-		goto err_no_pdata;
-	}
-
-	/* Allocate master with space for drv_data */
-	master = spi_alloc_master(dev, sizeof(struct driver_data));
-	if (!master) {
-		dev_err(&pdev->dev, "probe - cannot alloc spi_master\n");
-		status = -ENOMEM;
-		goto err_no_mem;
-	}
-	drv_data = spi_master_get_devdata(master);
-	drv_data->master = master;
-	drv_data->master_info = platform_info;
-	drv_data->pdev = pdev;
-
-	/* the spi->mode bits understood by this driver: */
-	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH;
-
-	master->bus_num = pdev->id;
-	master->num_chipselect = platform_info->num_chipselect;
-	master->dma_alignment = DMA_ALIGNMENT;
-	master->cleanup = cleanup;
-	master->setup = setup;
-	master->transfer = transfer;
-
-	drv_data->dummy_dma_buf = SPI_DUMMY_u32;
-
-	drv_data->clk = clk_get(&pdev->dev, "perclk2");
-	if (IS_ERR(drv_data->clk)) {
-		dev_err(&pdev->dev, "probe - cannot get clock\n");
-		status = PTR_ERR(drv_data->clk);
-		goto err_no_clk;
-	}
-	clk_enable(drv_data->clk);
-
-	/* Find and map resources */
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res) {
-		dev_err(&pdev->dev, "probe - MEM resources not defined\n");
-		status = -ENODEV;
-		goto err_no_iores;
-	}
-	drv_data->ioarea = request_mem_region(res->start,
-						res->end - res->start + 1,
-						pdev->name);
-	if (drv_data->ioarea == NULL) {
-		dev_err(&pdev->dev, "probe - cannot reserve region\n");
-		status = -ENXIO;
-		goto err_no_iores;
-	}
-	drv_data->regs = ioremap(res->start, res->end - res->start + 1);
-	if (drv_data->regs == NULL) {
-		dev_err(&pdev->dev, "probe - cannot map IO\n");
-		status = -ENXIO;
-		goto err_no_iomap;
-	}
-	drv_data->rd_data_phys = (dma_addr_t)res->start;
-
-	/* Attach to IRQ */
-	irq = platform_get_irq(pdev, 0);
-	if (irq < 0) {
-		dev_err(&pdev->dev, "probe - IRQ resource not defined\n");
-		status = -ENODEV;
-		goto err_no_irqres;
-	}
-	status = request_irq(irq, spi_int, IRQF_DISABLED,
-			     dev_name(dev), drv_data);
-	if (status < 0) {
-		dev_err(&pdev->dev, "probe - cannot get IRQ (%d)\n", status);
-		goto err_no_irqres;
-	}
-
-	/* Setup DMA if requested */
-	drv_data->tx_channel = -1;
-	drv_data->rx_channel = -1;
-	if (platform_info->enable_dma) {
-		/* Get rx DMA channel */
-		drv_data->rx_channel = imx_dma_request_by_prio("spi_imx_rx",
-							       DMA_PRIO_HIGH);
-		if (drv_data->rx_channel < 0) {
-			dev_err(dev,
-				"probe - problem (%d) requesting rx channel\n",
-				drv_data->rx_channel);
-			goto err_no_rxdma;
-		} else
-			imx_dma_setup_handlers(drv_data->rx_channel, NULL,
-						dma_err_handler, drv_data);
-
-		/* Get tx DMA channel */
-		drv_data->tx_channel = imx_dma_request_by_prio("spi_imx_tx",
-							       DMA_PRIO_MEDIUM);
-		if (drv_data->tx_channel < 0) {
-			dev_err(dev,
-				"probe - problem (%d) requesting tx channel\n",
-				drv_data->tx_channel);
-			imx_dma_free(drv_data->rx_channel);
-			goto err_no_txdma;
-		} else
-			imx_dma_setup_handlers(drv_data->tx_channel,
-						dma_tx_handler, dma_err_handler,
-						drv_data);
-
-		/* Set request source and burst length for allocated channels */
-		switch (drv_data->pdev->id) {
-		case 1:
-			/* Using SPI1 */
-			RSSR(drv_data->rx_channel) = DMA_REQ_SPI1_R;
-			RSSR(drv_data->tx_channel) = DMA_REQ_SPI1_T;
-			break;
-		case 2:
-			/* Using SPI2 */
-			RSSR(drv_data->rx_channel) = DMA_REQ_SPI2_R;
-			RSSR(drv_data->tx_channel) = DMA_REQ_SPI2_T;
-			break;
-		default:
-			dev_err(dev, "probe - bad SPI Id\n");
-			imx_dma_free(drv_data->rx_channel);
-			imx_dma_free(drv_data->tx_channel);
-			status = -ENODEV;
-			goto err_no_devid;
-		}
-		BLR(drv_data->rx_channel) = SPI_DMA_BLR;
-		BLR(drv_data->tx_channel) = SPI_DMA_BLR;
-	}
-
-	/* Load default SPI configuration */
-	writel(SPI_RESET_START, drv_data->regs + SPI_RESET);
-	writel(0, drv_data->regs + SPI_RESET);
-	writel(SPI_DEFAULT_CONTROL, drv_data->regs + SPI_CONTROL);
-
-	/* Initial and start queue */
-	status = init_queue(drv_data);
-	if (status != 0) {
-		dev_err(&pdev->dev, "probe - problem initializing queue\n");
-		goto err_init_queue;
-	}
-	status = start_queue(drv_data);
-	if (status != 0) {
-		dev_err(&pdev->dev, "probe - problem starting queue\n");
-		goto err_start_queue;
-	}
-
-	/* Register with the SPI framework */
-	platform_set_drvdata(pdev, drv_data);
-	status = spi_register_master(master);
-	if (status != 0) {
-		dev_err(&pdev->dev, "probe - problem registering spi master\n");
-		goto err_spi_register;
-	}
-
-	dev_dbg(dev, "probe succeded\n");
-	return 0;
-
-err_init_queue:
-err_start_queue:
-err_spi_register:
-	destroy_queue(drv_data);
-
-err_no_rxdma:
-err_no_txdma:
-err_no_devid:
-	free_irq(irq, drv_data);
-
-err_no_irqres:
-	iounmap(drv_data->regs);
-
-err_no_iomap:
-	release_resource(drv_data->ioarea);
-	kfree(drv_data->ioarea);
-
-err_no_iores:
-	clk_disable(drv_data->clk);
-	clk_put(drv_data->clk);
-
-err_no_clk:
-	spi_master_put(master);
-
-err_no_pdata:
-err_no_mem:
-	return status;
-}
-
-static int __exit spi_imx_remove(struct platform_device *pdev)
-{
-	struct driver_data *drv_data = platform_get_drvdata(pdev);
-	int irq;
-	int status = 0;
-
-	if (!drv_data)
-		return 0;
-
-	tasklet_kill(&drv_data->pump_transfers);
-
-	/* Remove the queue */
-	status = destroy_queue(drv_data);
-	if (status != 0) {
-		dev_err(&pdev->dev, "queue remove failed (%d)\n", status);
-		return status;
-	}
-
-	/* Reset SPI */
-	writel(SPI_RESET_START, drv_data->regs + SPI_RESET);
-	writel(0, drv_data->regs + SPI_RESET);
-
-	/* Release DMA */
-	if (drv_data->master_info->enable_dma) {
-		RSSR(drv_data->rx_channel) = 0;
-		RSSR(drv_data->tx_channel) = 0;
-		imx_dma_free(drv_data->tx_channel);
-		imx_dma_free(drv_data->rx_channel);
-	}
-
-	/* Release IRQ */
-	irq = platform_get_irq(pdev, 0);
-	if (irq >= 0)
-		free_irq(irq, drv_data);
-
-	clk_disable(drv_data->clk);
-	clk_put(drv_data->clk);
-
-	/* Release map resources */
-	iounmap(drv_data->regs);
-	release_resource(drv_data->ioarea);
-	kfree(drv_data->ioarea);
-
-	/* Disconnect from the SPI framework */
-	spi_unregister_master(drv_data->master);
-	spi_master_put(drv_data->master);
-
-	/* Prevent double remove */
-	platform_set_drvdata(pdev, NULL);
-
-	dev_dbg(&pdev->dev, "remove succeded\n");
-
-	return 0;
-}
-
-static void spi_imx_shutdown(struct platform_device *pdev)
-{
-	struct driver_data *drv_data = platform_get_drvdata(pdev);
-
-	/* Reset SPI */
-	writel(SPI_RESET_START, drv_data->regs + SPI_RESET);
-	writel(0, drv_data->regs + SPI_RESET);
-
-	dev_dbg(&pdev->dev, "shutdown succeded\n");
-}
-
-#ifdef CONFIG_PM
-
-static int spi_imx_suspend(struct platform_device *pdev, pm_message_t state)
-{
-	struct driver_data *drv_data = platform_get_drvdata(pdev);
-	int status = 0;
-
-	status = stop_queue(drv_data);
-	if (status != 0) {
-		dev_warn(&pdev->dev, "suspend cannot stop queue\n");
-		return status;
-	}
-
-	dev_dbg(&pdev->dev, "suspended\n");
-
-	return 0;
-}
-
-static int spi_imx_resume(struct platform_device *pdev)
-{
-	struct driver_data *drv_data = platform_get_drvdata(pdev);
-	int status = 0;
-
-	/* Start the queue running */
-	status = start_queue(drv_data);
-	if (status != 0)
-		dev_err(&pdev->dev, "problem starting queue (%d)\n", status);
-	else
-		dev_dbg(&pdev->dev, "resumed\n");
-
-	return status;
-}
-#else
-#define spi_imx_suspend NULL
-#define spi_imx_resume NULL
-#endif /* CONFIG_PM */
-
-/* work with hotplug and coldplug */
-MODULE_ALIAS("platform:spi_imx");
-
-static struct platform_driver driver = {
-	.driver = {
-		.name = "spi_imx",
-		.owner = THIS_MODULE,
-	},
-	.remove = __exit_p(spi_imx_remove),
-	.shutdown = spi_imx_shutdown,
-	.suspend = spi_imx_suspend,
-	.resume = spi_imx_resume,
-};
-
-static int __init spi_imx_init(void)
-{
-	return platform_driver_probe(&driver, spi_imx_probe);
-}
-module_init(spi_imx_init);
-
-static void __exit spi_imx_exit(void)
-{
-	platform_driver_unregister(&driver);
-}
-module_exit(spi_imx_exit);
-
-MODULE_AUTHOR("Andrea Paterniani, <a.paterniani@swapp-eng.it>");
-MODULE_DESCRIPTION("iMX SPI Controller Driver");
-MODULE_LICENSE("GPL");
-- 
cgit v0.10.2


From 7a8fa725b21ae19a3d9a2de196a440da8c9085a6 Mon Sep 17 00:00:00 2001
From: Jouni Hogander <jouni.hogander@nokia.com>
Date: Tue, 22 Sep 2009 16:45:58 -0700
Subject: spi: omap2_mcspi use BIT(n)

Convert bit shifted values into BIT format

Signed-off-by: Jouni Hogander <jouni.hogander@nokia.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/spi/omap2_mcspi.c b/drivers/spi/omap2_mcspi.c
index 9b80ad3..ae785cc 100644
--- a/drivers/spi/omap2_mcspi.c
+++ b/drivers/spi/omap2_mcspi.c
@@ -59,40 +59,40 @@
 
 /* per-register bitmasks: */
 
-#define OMAP2_MCSPI_SYSCONFIG_SMARTIDLE	(2 << 3)
-#define OMAP2_MCSPI_SYSCONFIG_ENAWAKEUP	(1 << 2)
-#define OMAP2_MCSPI_SYSCONFIG_AUTOIDLE	(1 << 0)
-#define OMAP2_MCSPI_SYSCONFIG_SOFTRESET	(1 << 1)
+#define OMAP2_MCSPI_SYSCONFIG_SMARTIDLE	BIT(4)
+#define OMAP2_MCSPI_SYSCONFIG_ENAWAKEUP	BIT(2)
+#define OMAP2_MCSPI_SYSCONFIG_AUTOIDLE	BIT(0)
+#define OMAP2_MCSPI_SYSCONFIG_SOFTRESET	BIT(1)
 
-#define OMAP2_MCSPI_SYSSTATUS_RESETDONE	(1 << 0)
+#define OMAP2_MCSPI_SYSSTATUS_RESETDONE	BIT(0)
 
-#define OMAP2_MCSPI_MODULCTRL_SINGLE	(1 << 0)
-#define OMAP2_MCSPI_MODULCTRL_MS	(1 << 2)
-#define OMAP2_MCSPI_MODULCTRL_STEST	(1 << 3)
+#define OMAP2_MCSPI_MODULCTRL_SINGLE	BIT(0)
+#define OMAP2_MCSPI_MODULCTRL_MS	BIT(2)
+#define OMAP2_MCSPI_MODULCTRL_STEST	BIT(3)
 
-#define OMAP2_MCSPI_CHCONF_PHA		(1 << 0)
-#define OMAP2_MCSPI_CHCONF_POL		(1 << 1)
+#define OMAP2_MCSPI_CHCONF_PHA		BIT(0)
+#define OMAP2_MCSPI_CHCONF_POL		BIT(1)
 #define OMAP2_MCSPI_CHCONF_CLKD_MASK	(0x0f << 2)
-#define OMAP2_MCSPI_CHCONF_EPOL		(1 << 6)
+#define OMAP2_MCSPI_CHCONF_EPOL		BIT(6)
 #define OMAP2_MCSPI_CHCONF_WL_MASK	(0x1f << 7)
-#define OMAP2_MCSPI_CHCONF_TRM_RX_ONLY	(0x01 << 12)
-#define OMAP2_MCSPI_CHCONF_TRM_TX_ONLY	(0x02 << 12)
+#define OMAP2_MCSPI_CHCONF_TRM_RX_ONLY	BIT(12)
+#define OMAP2_MCSPI_CHCONF_TRM_TX_ONLY	BIT(13)
 #define OMAP2_MCSPI_CHCONF_TRM_MASK	(0x03 << 12)
-#define OMAP2_MCSPI_CHCONF_DMAW		(1 << 14)
-#define OMAP2_MCSPI_CHCONF_DMAR		(1 << 15)
-#define OMAP2_MCSPI_CHCONF_DPE0		(1 << 16)
-#define OMAP2_MCSPI_CHCONF_DPE1		(1 << 17)
-#define OMAP2_MCSPI_CHCONF_IS		(1 << 18)
-#define OMAP2_MCSPI_CHCONF_TURBO	(1 << 19)
-#define OMAP2_MCSPI_CHCONF_FORCE	(1 << 20)
+#define OMAP2_MCSPI_CHCONF_DMAW		BIT(14)
+#define OMAP2_MCSPI_CHCONF_DMAR		BIT(15)
+#define OMAP2_MCSPI_CHCONF_DPE0		BIT(16)
+#define OMAP2_MCSPI_CHCONF_DPE1		BIT(17)
+#define OMAP2_MCSPI_CHCONF_IS		BIT(18)
+#define OMAP2_MCSPI_CHCONF_TURBO	BIT(19)
+#define OMAP2_MCSPI_CHCONF_FORCE	BIT(20)
 
-#define OMAP2_MCSPI_CHSTAT_RXS		(1 << 0)
-#define OMAP2_MCSPI_CHSTAT_TXS		(1 << 1)
-#define OMAP2_MCSPI_CHSTAT_EOT		(1 << 2)
+#define OMAP2_MCSPI_CHSTAT_RXS		BIT(0)
+#define OMAP2_MCSPI_CHSTAT_TXS		BIT(1)
+#define OMAP2_MCSPI_CHSTAT_EOT		BIT(2)
 
-#define OMAP2_MCSPI_CHCTRL_EN		(1 << 0)
+#define OMAP2_MCSPI_CHCTRL_EN		BIT(0)
 
-#define OMAP2_MCSPI_WAKEUPENABLE_WKEN	(1 << 0)
+#define OMAP2_MCSPI_WAKEUPENABLE_WKEN	BIT(0)
 
 /* We have 2 DMA channels per CS, one for RX and one for TX */
 struct omap2_mcspi_dma {
-- 
cgit v0.10.2


From 44dab88e7cc99d1d2caa9a8936e09d9a98a6761a Mon Sep 17 00:00:00 2001
From: "Steven A. Falco" <sfalco@harris.com>
Date: Tue, 22 Sep 2009 16:45:58 -0700
Subject: spi: add spi_ppc4xx driver

This adds a SPI driver for the SPI controller found in the IBM/AMCC
4xx PowerPC's.

Signed-off-by: Stefan Roese <sr@denx.de>
Signed-off-by: Wolfgang Ocker <weo@reccoware.de>
Acked-by: Josh Boyer <jwboyer@linux.vnet.ibm.com>
Signed-off-by: Steven A. Falco <sfalco@harris.com>
Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index 86056d14..3ac072f 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -171,6 +171,13 @@ config SPI_PL022
 	  controller. If you have an embedded system with an AMBA(R)
 	  bus and a PL022 controller, say Y or M here.
 
+config SPI_PPC4xx
+	tristate "PPC4xx SPI Controller"
+	depends on PPC32 && 4xx && SPI_MASTER
+	select SPI_BITBANG
+	help
+	  This selects a driver for the PPC4xx SPI Controller.
+
 config SPI_PXA2XX
 	tristate "PXA2xx SSP SPI master"
 	depends on ARCH_PXA && EXPERIMENTAL
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index 0f20a70b..bdd1f3a 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_SPI_ORION)			+= orion_spi.o
 obj-$(CONFIG_SPI_PL022)			+= amba-pl022.o
 obj-$(CONFIG_SPI_MPC52xx_PSC)		+= mpc52xx_psc_spi.o
 obj-$(CONFIG_SPI_MPC8xxx)		+= spi_mpc8xxx.o
+obj-$(CONFIG_SPI_PPC4xx)		+= spi_ppc4xx.o
 obj-$(CONFIG_SPI_S3C24XX_GPIO)		+= spi_s3c24xx_gpio.o
 obj-$(CONFIG_SPI_S3C24XX)		+= spi_s3c24xx.o
 obj-$(CONFIG_SPI_TXX9)			+= spi_txx9.o
diff --git a/drivers/spi/spi_ppc4xx.c b/drivers/spi/spi_ppc4xx.c
new file mode 100644
index 0000000..140a18d
--- /dev/null
+++ b/drivers/spi/spi_ppc4xx.c
@@ -0,0 +1,612 @@
+/*
+ * SPI_PPC4XX SPI controller driver.
+ *
+ * Copyright (C) 2007 Gary Jennejohn <garyj@denx.de>
+ * Copyright 2008 Stefan Roese <sr@denx.de>, DENX Software Engineering
+ * Copyright 2009 Harris Corporation, Steven A. Falco <sfalco@harris.com>
+ *
+ * Based in part on drivers/spi/spi_s3c24xx.c
+ *
+ * Copyright (c) 2006 Ben Dooks
+ * Copyright (c) 2006 Simtec Electronics
+ *	Ben Dooks <ben@simtec.co.uk>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+/*
+ * The PPC4xx SPI controller has no FIFO so each sent/received byte will
+ * generate an interrupt to the CPU. This can cause high CPU utilization.
+ * This driver allows platforms to reduce the interrupt load on the CPU
+ * during SPI transfers by setting max_speed_hz via the device tree.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/wait.h>
+#include <linux/of_platform.h>
+#include <linux/of_spi.h>
+#include <linux/of_gpio.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+
+#include <linux/gpio.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/spi_bitbang.h>
+
+#include <asm/io.h>
+#include <asm/dcr.h>
+#include <asm/dcr-regs.h>
+
+/* bits in mode register - bit 0 is MSb */
+
+/*
+ * SPI_PPC4XX_MODE_SCP = 0 means "data latched on trailing edge of clock"
+ * SPI_PPC4XX_MODE_SCP = 1 means "data latched on leading edge of clock"
+ * Note: This is the inverse of CPHA.
+ */
+#define SPI_PPC4XX_MODE_SCP	(0x80 >> 3)
+
+/* SPI_PPC4XX_MODE_SPE = 1 means "port enabled" */
+#define SPI_PPC4XX_MODE_SPE	(0x80 >> 4)
+
+/*
+ * SPI_PPC4XX_MODE_RD = 0 means "MSB first" - this is the normal mode
+ * SPI_PPC4XX_MODE_RD = 1 means "LSB first" - this is bit-reversed mode
+ * Note: This is identical to SPI_LSB_FIRST.
+ */
+#define SPI_PPC4XX_MODE_RD	(0x80 >> 5)
+
+/*
+ * SPI_PPC4XX_MODE_CI = 0 means "clock idles low"
+ * SPI_PPC4XX_MODE_CI = 1 means "clock idles high"
+ * Note: This is identical to CPOL.
+ */
+#define SPI_PPC4XX_MODE_CI	(0x80 >> 6)
+
+/*
+ * SPI_PPC4XX_MODE_IL = 0 means "loopback disable"
+ * SPI_PPC4XX_MODE_IL = 1 means "loopback enable"
+ */
+#define SPI_PPC4XX_MODE_IL	(0x80 >> 7)
+
+/* bits in control register */
+/* starts a transfer when set */
+#define SPI_PPC4XX_CR_STR	(0x80 >> 7)
+
+/* bits in status register */
+/* port is busy with a transfer */
+#define SPI_PPC4XX_SR_BSY	(0x80 >> 6)
+/* RxD ready */
+#define SPI_PPC4XX_SR_RBR	(0x80 >> 7)
+
+/* clock settings (SCP and CI) for various SPI modes */
+#define SPI_CLK_MODE0	(SPI_PPC4XX_MODE_SCP | 0)
+#define SPI_CLK_MODE1	(0 | 0)
+#define SPI_CLK_MODE2	(SPI_PPC4XX_MODE_SCP | SPI_PPC4XX_MODE_CI)
+#define SPI_CLK_MODE3	(0 | SPI_PPC4XX_MODE_CI)
+
+#define DRIVER_NAME	"spi_ppc4xx_of"
+
+struct spi_ppc4xx_regs {
+	u8 mode;
+	u8 rxd;
+	u8 txd;
+	u8 cr;
+	u8 sr;
+	u8 dummy;
+	/*
+	 * Clock divisor modulus register
+	 * This uses the follwing formula:
+	 *    SCPClkOut = OPBCLK/(4(CDM + 1))
+	 * or
+	 *    CDM = (OPBCLK/4*SCPClkOut) - 1
+	 * bit 0 is the MSb!
+	 */
+	u8 cdm;
+};
+
+/* SPI Controller driver's private data. */
+struct ppc4xx_spi {
+	/* bitbang has to be first */
+	struct spi_bitbang bitbang;
+	struct completion done;
+
+	u64 mapbase;
+	u64 mapsize;
+	int irqnum;
+	/* need this to set the SPI clock */
+	unsigned int opb_freq;
+
+	/* for transfers */
+	int len;
+	int count;
+	/* data buffers */
+	const unsigned char *tx;
+	unsigned char *rx;
+
+	int *gpios;
+
+	struct spi_ppc4xx_regs __iomem *regs; /* pointer to the registers */
+	struct spi_master *master;
+	struct device *dev;
+};
+
+/* need this so we can set the clock in the chipselect routine */
+struct spi_ppc4xx_cs {
+	u8 mode;
+};
+
+static int spi_ppc4xx_txrx(struct spi_device *spi, struct spi_transfer *t)
+{
+	struct ppc4xx_spi *hw;
+	u8 data;
+
+	dev_dbg(&spi->dev, "txrx: tx %p, rx %p, len %d\n",
+		t->tx_buf, t->rx_buf, t->len);
+
+	hw = spi_master_get_devdata(spi->master);
+
+	hw->tx = t->tx_buf;
+	hw->rx = t->rx_buf;
+	hw->len = t->len;
+	hw->count = 0;
+
+	/* send the first byte */
+	data = hw->tx ? hw->tx[0] : 0;
+	out_8(&hw->regs->txd, data);
+	out_8(&hw->regs->cr, SPI_PPC4XX_CR_STR);
+	wait_for_completion(&hw->done);
+
+	return hw->count;
+}
+
+static int spi_ppc4xx_setupxfer(struct spi_device *spi, struct spi_transfer *t)
+{
+	struct ppc4xx_spi *hw = spi_master_get_devdata(spi->master);
+	struct spi_ppc4xx_cs *cs = spi->controller_state;
+	int scr;
+	u8 cdm = 0;
+	u32 speed;
+	u8 bits_per_word;
+
+	/* Start with the generic configuration for this device. */
+	bits_per_word = spi->bits_per_word;
+	speed = spi->max_speed_hz;
+
+	/*
+	 * Modify the configuration if the transfer overrides it.  Do not allow
+	 * the transfer to overwrite the generic configuration with zeros.
+	 */
+	if (t) {
+		if (t->bits_per_word)
+			bits_per_word = t->bits_per_word;
+
+		if (t->speed_hz)
+			speed = min(t->speed_hz, spi->max_speed_hz);
+	}
+
+	if (bits_per_word != 8) {
+		dev_err(&spi->dev, "invalid bits-per-word (%d)\n",
+				bits_per_word);
+		return -EINVAL;
+	}
+
+	if (!speed || (speed > spi->max_speed_hz)) {
+		dev_err(&spi->dev, "invalid speed_hz (%d)\n", speed);
+		return -EINVAL;
+	}
+
+	/* Write new configration */
+	out_8(&hw->regs->mode, cs->mode);
+
+	/* Set the clock */
+	/* opb_freq was already divided by 4 */
+	scr = (hw->opb_freq / speed) - 1;
+	if (scr > 0)
+		cdm = min(scr, 0xff);
+
+	dev_dbg(&spi->dev, "setting pre-scaler to %d (hz %d)\n", cdm, speed);
+
+	if (in_8(&hw->regs->cdm) != cdm)
+		out_8(&hw->regs->cdm, cdm);
+
+	spin_lock(&hw->bitbang.lock);
+	if (!hw->bitbang.busy) {
+		hw->bitbang.chipselect(spi, BITBANG_CS_INACTIVE);
+		/* Need to ndelay here? */
+	}
+	spin_unlock(&hw->bitbang.lock);
+
+	return 0;
+}
+
+static int spi_ppc4xx_setup(struct spi_device *spi)
+{
+	struct spi_ppc4xx_cs *cs = spi->controller_state;
+
+	if (spi->bits_per_word != 8) {
+		dev_err(&spi->dev, "invalid bits-per-word (%d)\n",
+			spi->bits_per_word);
+		return -EINVAL;
+	}
+
+	if (!spi->max_speed_hz) {
+		dev_err(&spi->dev, "invalid max_speed_hz (must be non-zero)\n");
+		return -EINVAL;
+	}
+
+	if (cs == NULL) {
+		cs = kzalloc(sizeof *cs, GFP_KERNEL);
+		if (!cs)
+			return -ENOMEM;
+		spi->controller_state = cs;
+	}
+
+	/*
+	 * We set all bits of the SPI0_MODE register, so,
+	 * no need to read-modify-write
+	 */
+	cs->mode = SPI_PPC4XX_MODE_SPE;
+
+	switch (spi->mode & (SPI_CPHA | SPI_CPOL)) {
+	case SPI_MODE_0:
+		cs->mode |= SPI_CLK_MODE0;
+		break;
+	case SPI_MODE_1:
+		cs->mode |= SPI_CLK_MODE1;
+		break;
+	case SPI_MODE_2:
+		cs->mode |= SPI_CLK_MODE2;
+		break;
+	case SPI_MODE_3:
+		cs->mode |= SPI_CLK_MODE3;
+		break;
+	}
+
+	if (spi->mode & SPI_LSB_FIRST)
+		cs->mode |= SPI_PPC4XX_MODE_RD;
+
+	return 0;
+}
+
+static void spi_ppc4xx_chipsel(struct spi_device *spi, int value)
+{
+	struct ppc4xx_spi *hw = spi_master_get_devdata(spi->master);
+	unsigned int cs = spi->chip_select;
+	unsigned int cspol;
+
+	/*
+	 * If there are no chip selects at all, or if this is the special
+	 * case of a non-existent (dummy) chip select, do nothing.
+	 */
+
+	if (!hw->master->num_chipselect || hw->gpios[cs] == -EEXIST)
+		return;
+
+	cspol = spi->mode & SPI_CS_HIGH ? 1 : 0;
+	if (value == BITBANG_CS_INACTIVE)
+		cspol = !cspol;
+
+	gpio_set_value(hw->gpios[cs], cspol);
+}
+
+static irqreturn_t spi_ppc4xx_int(int irq, void *dev_id)
+{
+	struct ppc4xx_spi *hw;
+	u8 status;
+	u8 data;
+	unsigned int count;
+
+	hw = (struct ppc4xx_spi *)dev_id;
+
+	status = in_8(&hw->regs->sr);
+	if (!status)
+		return IRQ_NONE;
+
+	/*
+	 * BSY de-asserts one cycle after the transfer is complete.  The
+	 * interrupt is asserted after the transfer is complete.  The exact
+	 * relationship is not documented, hence this code.
+	 */
+
+	if (unlikely(status & SPI_PPC4XX_SR_BSY)) {
+		u8 lstatus;
+		int cnt = 0;
+
+		dev_dbg(hw->dev, "got interrupt but spi still busy?\n");
+		do {
+			ndelay(10);
+			lstatus = in_8(&hw->regs->sr);
+		} while (++cnt < 100 && lstatus & SPI_PPC4XX_SR_BSY);
+
+		if (cnt >= 100) {
+			dev_err(hw->dev, "busywait: too many loops!\n");
+			complete(&hw->done);
+			return IRQ_HANDLED;
+		} else {
+			/* status is always 1 (RBR) here */
+			status = in_8(&hw->regs->sr);
+			dev_dbg(hw->dev, "loops %d status %x\n", cnt, status);
+		}
+	}
+
+	count = hw->count;
+	hw->count++;
+
+	/* RBR triggered this interrupt.  Therefore, data must be ready. */
+	data = in_8(&hw->regs->rxd);
+	if (hw->rx)
+		hw->rx[count] = data;
+
+	count++;
+
+	if (count < hw->len) {
+		data = hw->tx ? hw->tx[count] : 0;
+		out_8(&hw->regs->txd, data);
+		out_8(&hw->regs->cr, SPI_PPC4XX_CR_STR);
+	} else {
+		complete(&hw->done);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static void spi_ppc4xx_cleanup(struct spi_device *spi)
+{
+	kfree(spi->controller_state);
+}
+
+static void spi_ppc4xx_enable(struct ppc4xx_spi *hw)
+{
+	/*
+	 * On all 4xx PPC's the SPI bus is shared/multiplexed with
+	 * the 2nd I2C bus. We need to enable the the SPI bus before
+	 * using it.
+	 */
+
+	/* need to clear bit 14 to enable SPC */
+	dcri_clrset(SDR0, SDR0_PFC1, 0x80000000 >> 14, 0);
+}
+
+static void free_gpios(struct ppc4xx_spi *hw)
+{
+	if (hw->master->num_chipselect) {
+		int i;
+		for (i = 0; i < hw->master->num_chipselect; i++)
+			if (gpio_is_valid(hw->gpios[i]))
+				gpio_free(hw->gpios[i]);
+
+		kfree(hw->gpios);
+		hw->gpios = NULL;
+	}
+}
+
+/*
+ * of_device layer stuff...
+ */
+static int __init spi_ppc4xx_of_probe(struct of_device *op,
+				      const struct of_device_id *match)
+{
+	struct ppc4xx_spi *hw;
+	struct spi_master *master;
+	struct spi_bitbang *bbp;
+	struct resource resource;
+	struct device_node *np = op->node;
+	struct device *dev = &op->dev;
+	struct device_node *opbnp;
+	int ret;
+	int num_gpios;
+	const unsigned int *clk;
+
+	master = spi_alloc_master(dev, sizeof *hw);
+	if (master == NULL)
+		return -ENOMEM;
+	dev_set_drvdata(dev, master);
+	hw = spi_master_get_devdata(master);
+	hw->master = spi_master_get(master);
+	hw->dev = dev;
+
+	init_completion(&hw->done);
+
+	/*
+	 * A count of zero implies a single SPI device without any chip-select.
+	 * Note that of_gpio_count counts all gpios assigned to this spi master.
+	 * This includes both "null" gpio's and real ones.
+	 */
+	num_gpios = of_gpio_count(np);
+	if (num_gpios) {
+		int i;
+
+		hw->gpios = kzalloc(sizeof(int) * num_gpios, GFP_KERNEL);
+		if (!hw->gpios) {
+			ret = -ENOMEM;
+			goto free_master;
+		}
+
+		for (i = 0; i < num_gpios; i++) {
+			int gpio;
+			enum of_gpio_flags flags;
+
+			gpio = of_get_gpio_flags(np, i, &flags);
+			hw->gpios[i] = gpio;
+
+			if (gpio_is_valid(gpio)) {
+				/* Real CS - set the initial state. */
+				ret = gpio_request(gpio, np->name);
+				if (ret < 0) {
+					dev_err(dev, "can't request gpio "
+							"#%d: %d\n", i, ret);
+					goto free_gpios;
+				}
+
+				gpio_direction_output(gpio,
+						!!(flags & OF_GPIO_ACTIVE_LOW));
+			} else if (gpio == -EEXIST) {
+				; /* No CS, but that's OK. */
+			} else {
+				dev_err(dev, "invalid gpio #%d: %d\n", i, gpio);
+				ret = -EINVAL;
+				goto free_gpios;
+			}
+		}
+	}
+
+	/* Setup the state for the bitbang driver */
+	bbp = &hw->bitbang;
+	bbp->master = hw->master;
+	bbp->setup_transfer = spi_ppc4xx_setupxfer;
+	bbp->chipselect = spi_ppc4xx_chipsel;
+	bbp->txrx_bufs = spi_ppc4xx_txrx;
+	bbp->use_dma = 0;
+	bbp->master->setup = spi_ppc4xx_setup;
+	bbp->master->cleanup = spi_ppc4xx_cleanup;
+
+	/* Allocate bus num dynamically. */
+	bbp->master->bus_num = -1;
+
+	/* the spi->mode bits understood by this driver: */
+	bbp->master->mode_bits =
+		SPI_CPHA | SPI_CPOL | SPI_CS_HIGH | SPI_LSB_FIRST;
+
+	/* this many pins in all GPIO controllers */
+	bbp->master->num_chipselect = num_gpios;
+
+	/* Get the clock for the OPB */
+	opbnp = of_find_compatible_node(NULL, NULL, "ibm,opb");
+	if (opbnp == NULL) {
+		dev_err(dev, "OPB: cannot find node\n");
+		ret = -ENODEV;
+		goto free_gpios;
+	}
+	/* Get the clock (Hz) for the OPB */
+	clk = of_get_property(opbnp, "clock-frequency", NULL);
+	if (clk == NULL) {
+		dev_err(dev, "OPB: no clock-frequency property set\n");
+		of_node_put(opbnp);
+		ret = -ENODEV;
+		goto free_gpios;
+	}
+	hw->opb_freq = *clk;
+	hw->opb_freq >>= 2;
+	of_node_put(opbnp);
+
+	ret = of_address_to_resource(np, 0, &resource);
+	if (ret) {
+		dev_err(dev, "error while parsing device node resource\n");
+		goto free_gpios;
+	}
+	hw->mapbase = resource.start;
+	hw->mapsize = resource.end - resource.start + 1;
+
+	/* Sanity check */
+	if (hw->mapsize < sizeof(struct spi_ppc4xx_regs)) {
+		dev_err(dev, "too small to map registers\n");
+		ret = -EINVAL;
+		goto free_gpios;
+	}
+
+	/* Request IRQ */
+	hw->irqnum = irq_of_parse_and_map(np, 0);
+	ret = request_irq(hw->irqnum, spi_ppc4xx_int,
+			  IRQF_DISABLED, "spi_ppc4xx_of", (void *)hw);
+	if (ret) {
+		dev_err(dev, "unable to allocate interrupt\n");
+		goto free_gpios;
+	}
+
+	if (!request_mem_region(hw->mapbase, hw->mapsize, DRIVER_NAME)) {
+		dev_err(dev, "resource unavailable\n");
+		ret = -EBUSY;
+		goto request_mem_error;
+	}
+
+	hw->regs = ioremap(hw->mapbase, sizeof(struct spi_ppc4xx_regs));
+
+	if (!hw->regs) {
+		dev_err(dev, "unable to memory map registers\n");
+		ret = -ENXIO;
+		goto map_io_error;
+	}
+
+	spi_ppc4xx_enable(hw);
+
+	/* Finally register our spi controller */
+	dev->dma_mask = 0;
+	ret = spi_bitbang_start(bbp);
+	if (ret) {
+		dev_err(dev, "failed to register SPI master\n");
+		goto unmap_regs;
+	}
+
+	dev_info(dev, "driver initialized\n");
+	of_register_spi_devices(master, np);
+
+	return 0;
+
+unmap_regs:
+	iounmap(hw->regs);
+map_io_error:
+	release_mem_region(hw->mapbase, hw->mapsize);
+request_mem_error:
+	free_irq(hw->irqnum, hw);
+free_gpios:
+	free_gpios(hw);
+free_master:
+	dev_set_drvdata(dev, NULL);
+	spi_master_put(master);
+
+	dev_err(dev, "initialization failed\n");
+	return ret;
+}
+
+static int __exit spi_ppc4xx_of_remove(struct of_device *op)
+{
+	struct spi_master *master = dev_get_drvdata(&op->dev);
+	struct ppc4xx_spi *hw = spi_master_get_devdata(master);
+
+	spi_bitbang_stop(&hw->bitbang);
+	dev_set_drvdata(&op->dev, NULL);
+	release_mem_region(hw->mapbase, hw->mapsize);
+	free_irq(hw->irqnum, hw);
+	iounmap(hw->regs);
+	free_gpios(hw);
+	return 0;
+}
+
+static struct of_device_id spi_ppc4xx_of_match[] = {
+	{ .compatible = "ibm,ppc4xx-spi", },
+	{},
+};
+
+MODULE_DEVICE_TABLE(of, spi_ppc4xx_of_match);
+
+static struct of_platform_driver spi_ppc4xx_of_driver = {
+	.match_table = spi_ppc4xx_of_match,
+	.probe = spi_ppc4xx_of_probe,
+	.remove = __exit_p(spi_ppc4xx_of_remove),
+	.driver = {
+		.name = DRIVER_NAME,
+		.owner = THIS_MODULE,
+	},
+};
+
+static int __init spi_ppc4xx_init(void)
+{
+	return of_register_platform_driver(&spi_ppc4xx_of_driver);
+}
+module_init(spi_ppc4xx_init);
+
+static void __exit spi_ppc4xx_exit(void)
+{
+	of_unregister_platform_driver(&spi_ppc4xx_of_driver);
+}
+module_exit(spi_ppc4xx_exit);
+
+MODULE_AUTHOR("Gary Jennejohn & Stefan Roese");
+MODULE_DESCRIPTION("Simple PPC4xx SPI Driver");
+MODULE_LICENSE("GPL");
-- 
cgit v0.10.2


From b73b255956119111dc18fa063d1e3a0bb3f06328 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Tue, 22 Sep 2009 16:46:00 -0700
Subject: spi.h: add missing kernel-doc for struct spi_master

Add missing kernel-doc notation in spi.h for struct spi_master:

Warning(include/linux/spi/spi.h:289): No description found for parameter 'mode_bits'
Warning(include/linux/spi/spi.h:289): No description found for parameter 'flags'

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Acked-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index c47c4b4..eb25ced 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -207,6 +207,8 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv)
  *	each slave has a chipselect signal, but it's common that not
  *	every chipselect is connected to a slave.
  * @dma_alignment: SPI controller constraint on DMA buffers alignment.
+ * @mode_bits: flags understood by this controller driver
+ * @flags: other constraints relevant to this driver
  * @setup: updates the device mode and clocking records used by a
  *	device's SPI controller; protocol code may call this.  This
  *	must fail if an unrecognized or unsupported mode is requested.
-- 
cgit v0.10.2


From f33b29ee3309ba2cd56ade8e905b3640a7dea909 Mon Sep 17 00:00:00 2001
From: "linus.walleij@stericsson.com" <linus.walleij@stericsson.com>
Date: Tue, 22 Sep 2009 16:46:01 -0700
Subject: spi: add default selection of PL022 for ARM reference platforms

This makes the PL022 driver a default choice for any RealView and
Versatile boards plus the integrator IMPD1 which all contain the PL022
PrimeCell.  This will make it a default choice if and only if a user
selects SPI support for their board.

Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index 3ac072f..927a8b7 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -166,6 +166,9 @@ config SPI_PL022
 	tristate "ARM AMBA PL022 SSP controller (EXPERIMENTAL)"
 	depends on ARM_AMBA && EXPERIMENTAL
 	default y if MACH_U300
+	default y if ARCH_REALVIEW
+	default y if INTEGRATOR_IMPD1
+	default y if ARCH_VERSATILE
 	help
 	  This selects the ARM(R) AMBA(R) PrimeCell PL022 SSP
 	  controller. If you have an embedded system with an AMBA(R)
-- 
cgit v0.10.2


From b5f3294f0be5496aec01e5aa709a5fab8bb2f225 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Tue, 22 Sep 2009 16:46:02 -0700
Subject: spi: add SPI driver for most known i.MX SoCs

This driver has been tested on i.MX1/i.MX27/i.MX35 with an AT25 type
EEPROM and on i.MX27/i.MX31 with a Freescale MC13783 PMIC.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Tested-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Acked-by: David Brownell <david-b@pacbell.net>
Cc: Andrea Paterniani <a.paterniani@swapp-eng.it>
Cc: Russell King <rmk@arm.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/arm/plat-mxc/include/mach/spi.h b/arch/arm/plat-mxc/include/mach/spi.h
new file mode 100644
index 0000000..08be445
--- /dev/null
+++ b/arch/arm/plat-mxc/include/mach/spi.h
@@ -0,0 +1,27 @@
+
+#ifndef __MACH_SPI_H_
+#define __MACH_SPI_H_
+
+/*
+ * struct spi_imx_master - device.platform_data for SPI controller devices.
+ * @chipselect: Array of chipselects for this master. Numbers >= 0 mean gpio
+ *              pins, numbers < 0 mean internal CSPI chipselects according
+ *              to MXC_SPI_CS(). Normally you want to use gpio based chip
+ *              selects as the CSPI module tries to be intelligent about
+ *              when to assert the chipselect: The CSPI module deasserts the
+ *              chipselect once it runs out of input data. The other problem
+ *              is that it is not possible to mix between high active and low
+ *              active chipselects on one single bus using the internal
+ *              chipselects. Unfortunately Freescale decided to put some
+ *              chipselects on dedicated pins which are not usable as gpios,
+ *              so we have to support the internal chipselects.
+ * @num_chipselect: ARRAY_SIZE(chipselect)
+ */
+struct spi_imx_master {
+	int	*chipselect;
+	int	num_chipselect;
+};
+
+#define MXC_SPI_CS(no)	((no) - 32)
+
+#endif /* __MACH_SPI_H_*/
diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index 927a8b7..cd3acfe 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -116,6 +116,14 @@ config SPI_GPIO
 	  GPIO operations, you should be able to leverage that for better
 	  speed with a custom version of this driver; see the source code.
 
+config SPI_IMX
+	tristate "Freescale i.MX SPI controllers"
+	depends on ARCH_MXC
+	select SPI_BITBANG
+	help
+	  This enables using the Freescale i.MX SPI controllers in master
+	  mode.
+
 config SPI_LM70_LLP
 	tristate "Parallel port adapter for LM70 eval board (DEVELOPMENT)"
 	depends on PARPORT && EXPERIMENTAL
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index bdd1f3a..fbfc5f2 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -17,6 +17,7 @@ obj-$(CONFIG_SPI_BITBANG)		+= spi_bitbang.o
 obj-$(CONFIG_SPI_AU1550)		+= au1550_spi.o
 obj-$(CONFIG_SPI_BUTTERFLY)		+= spi_butterfly.o
 obj-$(CONFIG_SPI_GPIO)			+= spi_gpio.o
+obj-$(CONFIG_SPI_IMX)			+= mxc_spi.o
 obj-$(CONFIG_SPI_LM70_LLP)		+= spi_lm70llp.o
 obj-$(CONFIG_SPI_PXA2XX)		+= pxa2xx_spi.o
 obj-$(CONFIG_SPI_OMAP_UWIRE)		+= omap_uwire.o
diff --git a/drivers/spi/mxc_spi.c b/drivers/spi/mxc_spi.c
new file mode 100644
index 0000000..b144723
--- /dev/null
+++ b/drivers/spi/mxc_spi.c
@@ -0,0 +1,685 @@
+/*
+ * Copyright 2004-2007 Freescale Semiconductor, Inc. All Rights Reserved.
+ * Copyright (C) 2008 Juergen Beisert
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the
+ * Free Software Foundation
+ * 51 Franklin Street, Fifth Floor
+ * Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/clk.h>
+#include <linux/completion.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/gpio.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/spi_bitbang.h>
+#include <linux/types.h>
+
+#include <mach/spi.h>
+
+#define DRIVER_NAME "spi_imx"
+
+#define MXC_CSPIRXDATA		0x00
+#define MXC_CSPITXDATA		0x04
+#define MXC_CSPICTRL		0x08
+#define MXC_CSPIINT		0x0c
+#define MXC_RESET		0x1c
+
+/* generic defines to abstract from the different register layouts */
+#define MXC_INT_RR	(1 << 0) /* Receive data ready interrupt */
+#define MXC_INT_TE	(1 << 1) /* Transmit FIFO empty interrupt */
+
+struct mxc_spi_config {
+	unsigned int speed_hz;
+	unsigned int bpw;
+	unsigned int mode;
+	int cs;
+};
+
+struct mxc_spi_data {
+	struct spi_bitbang bitbang;
+
+	struct completion xfer_done;
+	void *base;
+	int irq;
+	struct clk *clk;
+	unsigned long spi_clk;
+	int *chipselect;
+
+	unsigned int count;
+	void (*tx)(struct mxc_spi_data *);
+	void (*rx)(struct mxc_spi_data *);
+	void *rx_buf;
+	const void *tx_buf;
+	unsigned int txfifo; /* number of words pushed in tx FIFO */
+
+	/* SoC specific functions */
+	void (*intctrl)(struct mxc_spi_data *, int);
+	int (*config)(struct mxc_spi_data *, struct mxc_spi_config *);
+	void (*trigger)(struct mxc_spi_data *);
+	int (*rx_available)(struct mxc_spi_data *);
+};
+
+#define MXC_SPI_BUF_RX(type)						\
+static void mxc_spi_buf_rx_##type(struct mxc_spi_data *mxc_spi)		\
+{									\
+	unsigned int val = readl(mxc_spi->base + MXC_CSPIRXDATA);	\
+									\
+	if (mxc_spi->rx_buf) {						\
+		*(type *)mxc_spi->rx_buf = val;				\
+		mxc_spi->rx_buf += sizeof(type);			\
+	}								\
+}
+
+#define MXC_SPI_BUF_TX(type)						\
+static void mxc_spi_buf_tx_##type(struct mxc_spi_data *mxc_spi)		\
+{									\
+	type val = 0;							\
+									\
+	if (mxc_spi->tx_buf) {						\
+		val = *(type *)mxc_spi->tx_buf;				\
+		mxc_spi->tx_buf += sizeof(type);			\
+	}								\
+									\
+	mxc_spi->count -= sizeof(type);					\
+									\
+	writel(val, mxc_spi->base + MXC_CSPITXDATA);			\
+}
+
+MXC_SPI_BUF_RX(u8)
+MXC_SPI_BUF_TX(u8)
+MXC_SPI_BUF_RX(u16)
+MXC_SPI_BUF_TX(u16)
+MXC_SPI_BUF_RX(u32)
+MXC_SPI_BUF_TX(u32)
+
+/* First entry is reserved, second entry is valid only if SDHC_SPIEN is set
+ * (which is currently not the case in this driver)
+ */
+static int mxc_clkdivs[] = {0, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192,
+	256, 384, 512, 768, 1024};
+
+/* MX21, MX27 */
+static unsigned int mxc_spi_clkdiv_1(unsigned int fin,
+		unsigned int fspi)
+{
+	int i, max;
+
+	if (cpu_is_mx21())
+		max = 18;
+	else
+		max = 16;
+
+	for (i = 2; i < max; i++)
+		if (fspi * mxc_clkdivs[i] >= fin)
+			return i;
+
+	return max;
+}
+
+/* MX1, MX31, MX35 */
+static unsigned int mxc_spi_clkdiv_2(unsigned int fin,
+		unsigned int fspi)
+{
+	int i, div = 4;
+
+	for (i = 0; i < 7; i++) {
+		if (fspi * div >= fin)
+			return i;
+		div <<= 1;
+	}
+
+	return 7;
+}
+
+#define MX31_INTREG_TEEN	(1 << 0)
+#define MX31_INTREG_RREN	(1 << 3)
+
+#define MX31_CSPICTRL_ENABLE	(1 << 0)
+#define MX31_CSPICTRL_MASTER	(1 << 1)
+#define MX31_CSPICTRL_XCH	(1 << 2)
+#define MX31_CSPICTRL_POL	(1 << 4)
+#define MX31_CSPICTRL_PHA	(1 << 5)
+#define MX31_CSPICTRL_SSCTL	(1 << 6)
+#define MX31_CSPICTRL_SSPOL	(1 << 7)
+#define MX31_CSPICTRL_BC_SHIFT	8
+#define MX35_CSPICTRL_BL_SHIFT	20
+#define MX31_CSPICTRL_CS_SHIFT	24
+#define MX35_CSPICTRL_CS_SHIFT	12
+#define MX31_CSPICTRL_DR_SHIFT	16
+
+#define MX31_CSPISTATUS		0x14
+#define MX31_STATUS_RR		(1 << 3)
+
+/* These functions also work for the i.MX35, but be aware that
+ * the i.MX35 has a slightly different register layout for bits
+ * we do not use here.
+ */
+static void mx31_intctrl(struct mxc_spi_data *mxc_spi, int enable)
+{
+	unsigned int val = 0;
+
+	if (enable & MXC_INT_TE)
+		val |= MX31_INTREG_TEEN;
+	if (enable & MXC_INT_RR)
+		val |= MX31_INTREG_RREN;
+
+	writel(val, mxc_spi->base + MXC_CSPIINT);
+}
+
+static void mx31_trigger(struct mxc_spi_data *mxc_spi)
+{
+	unsigned int reg;
+
+	reg = readl(mxc_spi->base + MXC_CSPICTRL);
+	reg |= MX31_CSPICTRL_XCH;
+	writel(reg, mxc_spi->base + MXC_CSPICTRL);
+}
+
+static int mx31_config(struct mxc_spi_data *mxc_spi,
+		struct mxc_spi_config *config)
+{
+	unsigned int reg = MX31_CSPICTRL_ENABLE | MX31_CSPICTRL_MASTER;
+
+	reg |= mxc_spi_clkdiv_2(mxc_spi->spi_clk, config->speed_hz) <<
+		MX31_CSPICTRL_DR_SHIFT;
+
+	if (cpu_is_mx31())
+		reg |= (config->bpw - 1) << MX31_CSPICTRL_BC_SHIFT;
+	else if (cpu_is_mx35()) {
+		reg |= (config->bpw - 1) << MX35_CSPICTRL_BL_SHIFT;
+		reg |= MX31_CSPICTRL_SSCTL;
+	}
+
+	if (config->mode & SPI_CPHA)
+		reg |= MX31_CSPICTRL_PHA;
+	if (config->mode & SPI_CPOL)
+		reg |= MX31_CSPICTRL_POL;
+	if (config->mode & SPI_CS_HIGH)
+		reg |= MX31_CSPICTRL_SSPOL;
+	if (config->cs < 0) {
+		if (cpu_is_mx31())
+			reg |= (config->cs + 32) << MX31_CSPICTRL_CS_SHIFT;
+		else if (cpu_is_mx35())
+			reg |= (config->cs + 32) << MX35_CSPICTRL_CS_SHIFT;
+	}
+
+	writel(reg, mxc_spi->base + MXC_CSPICTRL);
+
+	return 0;
+}
+
+static int mx31_rx_available(struct mxc_spi_data *mxc_spi)
+{
+	return readl(mxc_spi->base + MX31_CSPISTATUS) & MX31_STATUS_RR;
+}
+
+#define MX27_INTREG_RR		(1 << 4)
+#define MX27_INTREG_TEEN	(1 << 9)
+#define MX27_INTREG_RREN	(1 << 13)
+
+#define MX27_CSPICTRL_POL	(1 << 5)
+#define MX27_CSPICTRL_PHA	(1 << 6)
+#define MX27_CSPICTRL_SSPOL	(1 << 8)
+#define MX27_CSPICTRL_XCH	(1 << 9)
+#define MX27_CSPICTRL_ENABLE	(1 << 10)
+#define MX27_CSPICTRL_MASTER	(1 << 11)
+#define MX27_CSPICTRL_DR_SHIFT	14
+#define MX27_CSPICTRL_CS_SHIFT	19
+
+static void mx27_intctrl(struct mxc_spi_data *mxc_spi, int enable)
+{
+	unsigned int val = 0;
+
+	if (enable & MXC_INT_TE)
+		val |= MX27_INTREG_TEEN;
+	if (enable & MXC_INT_RR)
+		val |= MX27_INTREG_RREN;
+
+	writel(val, mxc_spi->base + MXC_CSPIINT);
+}
+
+static void mx27_trigger(struct mxc_spi_data *mxc_spi)
+{
+	unsigned int reg;
+
+	reg = readl(mxc_spi->base + MXC_CSPICTRL);
+	reg |= MX27_CSPICTRL_XCH;
+	writel(reg, mxc_spi->base + MXC_CSPICTRL);
+}
+
+static int mx27_config(struct mxc_spi_data *mxc_spi,
+		struct mxc_spi_config *config)
+{
+	unsigned int reg = MX27_CSPICTRL_ENABLE | MX27_CSPICTRL_MASTER;
+
+	reg |= mxc_spi_clkdiv_1(mxc_spi->spi_clk, config->speed_hz) <<
+		MX27_CSPICTRL_DR_SHIFT;
+	reg |= config->bpw - 1;
+
+	if (config->mode & SPI_CPHA)
+		reg |= MX27_CSPICTRL_PHA;
+	if (config->mode & SPI_CPOL)
+		reg |= MX27_CSPICTRL_POL;
+	if (config->mode & SPI_CS_HIGH)
+		reg |= MX27_CSPICTRL_SSPOL;
+	if (config->cs < 0)
+		reg |= (config->cs + 32) << MX27_CSPICTRL_CS_SHIFT;
+
+	writel(reg, mxc_spi->base + MXC_CSPICTRL);
+
+	return 0;
+}
+
+static int mx27_rx_available(struct mxc_spi_data *mxc_spi)
+{
+	return readl(mxc_spi->base + MXC_CSPIINT) & MX27_INTREG_RR;
+}
+
+#define MX1_INTREG_RR		(1 << 3)
+#define MX1_INTREG_TEEN		(1 << 8)
+#define MX1_INTREG_RREN		(1 << 11)
+
+#define MX1_CSPICTRL_POL	(1 << 4)
+#define MX1_CSPICTRL_PHA	(1 << 5)
+#define MX1_CSPICTRL_XCH	(1 << 8)
+#define MX1_CSPICTRL_ENABLE	(1 << 9)
+#define MX1_CSPICTRL_MASTER	(1 << 10)
+#define MX1_CSPICTRL_DR_SHIFT	13
+
+static void mx1_intctrl(struct mxc_spi_data *mxc_spi, int enable)
+{
+	unsigned int val = 0;
+
+	if (enable & MXC_INT_TE)
+		val |= MX1_INTREG_TEEN;
+	if (enable & MXC_INT_RR)
+		val |= MX1_INTREG_RREN;
+
+	writel(val, mxc_spi->base + MXC_CSPIINT);
+}
+
+static void mx1_trigger(struct mxc_spi_data *mxc_spi)
+{
+	unsigned int reg;
+
+	reg = readl(mxc_spi->base + MXC_CSPICTRL);
+	reg |= MX1_CSPICTRL_XCH;
+	writel(reg, mxc_spi->base + MXC_CSPICTRL);
+}
+
+static int mx1_config(struct mxc_spi_data *mxc_spi,
+		struct mxc_spi_config *config)
+{
+	unsigned int reg = MX1_CSPICTRL_ENABLE | MX1_CSPICTRL_MASTER;
+
+	reg |= mxc_spi_clkdiv_2(mxc_spi->spi_clk, config->speed_hz) <<
+		MX1_CSPICTRL_DR_SHIFT;
+	reg |= config->bpw - 1;
+
+	if (config->mode & SPI_CPHA)
+		reg |= MX1_CSPICTRL_PHA;
+	if (config->mode & SPI_CPOL)
+		reg |= MX1_CSPICTRL_POL;
+
+	writel(reg, mxc_spi->base + MXC_CSPICTRL);
+
+	return 0;
+}
+
+static int mx1_rx_available(struct mxc_spi_data *mxc_spi)
+{
+	return readl(mxc_spi->base + MXC_CSPIINT) & MX1_INTREG_RR;
+}
+
+static void mxc_spi_chipselect(struct spi_device *spi, int is_active)
+{
+	struct mxc_spi_data *mxc_spi = spi_master_get_devdata(spi->master);
+	unsigned int cs = 0;
+	int gpio = mxc_spi->chipselect[spi->chip_select];
+	struct mxc_spi_config config;
+
+	if (spi->mode & SPI_CS_HIGH)
+		cs = 1;
+
+	if (is_active == BITBANG_CS_INACTIVE) {
+		if (gpio >= 0)
+			gpio_set_value(gpio, !cs);
+		return;
+	}
+
+	config.bpw = spi->bits_per_word;
+	config.speed_hz = spi->max_speed_hz;
+	config.mode = spi->mode;
+	config.cs = mxc_spi->chipselect[spi->chip_select];
+
+	mxc_spi->config(mxc_spi, &config);
+
+	/* Initialize the functions for transfer */
+	if (config.bpw <= 8) {
+		mxc_spi->rx = mxc_spi_buf_rx_u8;
+		mxc_spi->tx = mxc_spi_buf_tx_u8;
+	} else if (config.bpw <= 16) {
+		mxc_spi->rx = mxc_spi_buf_rx_u16;
+		mxc_spi->tx = mxc_spi_buf_tx_u16;
+	} else if (config.bpw <= 32) {
+		mxc_spi->rx = mxc_spi_buf_rx_u32;
+		mxc_spi->tx = mxc_spi_buf_tx_u32;
+	} else
+		BUG();
+
+	if (gpio >= 0)
+		gpio_set_value(gpio, cs);
+
+	return;
+}
+
+static void mxc_spi_push(struct mxc_spi_data *mxc_spi)
+{
+	while (mxc_spi->txfifo < 8) {
+		if (!mxc_spi->count)
+			break;
+		mxc_spi->tx(mxc_spi);
+		mxc_spi->txfifo++;
+	}
+
+	mxc_spi->trigger(mxc_spi);
+}
+
+static irqreturn_t mxc_spi_isr(int irq, void *dev_id)
+{
+	struct mxc_spi_data *mxc_spi = dev_id;
+
+	while (mxc_spi->rx_available(mxc_spi)) {
+		mxc_spi->rx(mxc_spi);
+		mxc_spi->txfifo--;
+	}
+
+	if (mxc_spi->count) {
+		mxc_spi_push(mxc_spi);
+		return IRQ_HANDLED;
+	}
+
+	if (mxc_spi->txfifo) {
+		/* No data left to push, but still waiting for rx data,
+		 * enable receive data available interrupt.
+		 */
+		mxc_spi->intctrl(mxc_spi, MXC_INT_RR);
+		return IRQ_HANDLED;
+	}
+
+	mxc_spi->intctrl(mxc_spi, 0);
+	complete(&mxc_spi->xfer_done);
+
+	return IRQ_HANDLED;
+}
+
+static int mxc_spi_setupxfer(struct spi_device *spi,
+				 struct spi_transfer *t)
+{
+	struct mxc_spi_data *mxc_spi = spi_master_get_devdata(spi->master);
+	struct mxc_spi_config config;
+
+	config.bpw = t ? t->bits_per_word : spi->bits_per_word;
+	config.speed_hz  = t ? t->speed_hz : spi->max_speed_hz;
+	config.mode = spi->mode;
+
+	mxc_spi->config(mxc_spi, &config);
+
+	return 0;
+}
+
+static int mxc_spi_transfer(struct spi_device *spi,
+				struct spi_transfer *transfer)
+{
+	struct mxc_spi_data *mxc_spi = spi_master_get_devdata(spi->master);
+
+	mxc_spi->tx_buf = transfer->tx_buf;
+	mxc_spi->rx_buf = transfer->rx_buf;
+	mxc_spi->count = transfer->len;
+	mxc_spi->txfifo = 0;
+
+	init_completion(&mxc_spi->xfer_done);
+
+	mxc_spi_push(mxc_spi);
+
+	mxc_spi->intctrl(mxc_spi, MXC_INT_TE);
+
+	wait_for_completion(&mxc_spi->xfer_done);
+
+	return transfer->len;
+}
+
+static int mxc_spi_setup(struct spi_device *spi)
+{
+	if (!spi->bits_per_word)
+		spi->bits_per_word = 8;
+
+	pr_debug("%s: mode %d, %u bpw, %d hz\n", __func__,
+		 spi->mode, spi->bits_per_word, spi->max_speed_hz);
+
+	mxc_spi_chipselect(spi, BITBANG_CS_INACTIVE);
+
+	return 0;
+}
+
+static void mxc_spi_cleanup(struct spi_device *spi)
+{
+}
+
+static int __init mxc_spi_probe(struct platform_device *pdev)
+{
+	struct spi_imx_master *mxc_platform_info;
+	struct spi_master *master;
+	struct mxc_spi_data *mxc_spi;
+	struct resource *res;
+	int i, ret;
+
+	mxc_platform_info = (struct spi_imx_master *)pdev->dev.platform_data;
+	if (!mxc_platform_info) {
+		dev_err(&pdev->dev, "can't get the platform data\n");
+		return -EINVAL;
+	}
+
+	master = spi_alloc_master(&pdev->dev, sizeof(struct mxc_spi_data));
+	if (!master)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, master);
+
+	master->bus_num = pdev->id;
+	master->num_chipselect = mxc_platform_info->num_chipselect;
+
+	mxc_spi = spi_master_get_devdata(master);
+	mxc_spi->bitbang.master = spi_master_get(master);
+	mxc_spi->chipselect = mxc_platform_info->chipselect;
+
+	for (i = 0; i < master->num_chipselect; i++) {
+		if (mxc_spi->chipselect[i] < 0)
+			continue;
+		ret = gpio_request(mxc_spi->chipselect[i], DRIVER_NAME);
+		if (ret) {
+			i--;
+			while (i > 0)
+				if (mxc_spi->chipselect[i] >= 0)
+					gpio_free(mxc_spi->chipselect[i--]);
+			dev_err(&pdev->dev, "can't get cs gpios");
+			goto out_master_put;
+		}
+		gpio_direction_output(mxc_spi->chipselect[i], 1);
+	}
+
+	mxc_spi->bitbang.chipselect = mxc_spi_chipselect;
+	mxc_spi->bitbang.setup_transfer = mxc_spi_setupxfer;
+	mxc_spi->bitbang.txrx_bufs = mxc_spi_transfer;
+	mxc_spi->bitbang.master->setup = mxc_spi_setup;
+	mxc_spi->bitbang.master->cleanup = mxc_spi_cleanup;
+
+	init_completion(&mxc_spi->xfer_done);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(&pdev->dev, "can't get platform resource\n");
+		ret = -ENOMEM;
+		goto out_gpio_free;
+	}
+
+	if (!request_mem_region(res->start, resource_size(res), pdev->name)) {
+		dev_err(&pdev->dev, "request_mem_region failed\n");
+		ret = -EBUSY;
+		goto out_gpio_free;
+	}
+
+	mxc_spi->base = ioremap(res->start, resource_size(res));
+	if (!mxc_spi->base) {
+		ret = -EINVAL;
+		goto out_release_mem;
+	}
+
+	mxc_spi->irq = platform_get_irq(pdev, 0);
+	if (!mxc_spi->irq) {
+		ret = -EINVAL;
+		goto out_iounmap;
+	}
+
+	ret = request_irq(mxc_spi->irq, mxc_spi_isr, 0, DRIVER_NAME, mxc_spi);
+	if (ret) {
+		dev_err(&pdev->dev, "can't get irq%d: %d\n", mxc_spi->irq, ret);
+		goto out_iounmap;
+	}
+
+	if (cpu_is_mx31() || cpu_is_mx35()) {
+		mxc_spi->intctrl = mx31_intctrl;
+		mxc_spi->config = mx31_config;
+		mxc_spi->trigger = mx31_trigger;
+		mxc_spi->rx_available = mx31_rx_available;
+	} else  if (cpu_is_mx27() || cpu_is_mx21()) {
+		mxc_spi->intctrl = mx27_intctrl;
+		mxc_spi->config = mx27_config;
+		mxc_spi->trigger = mx27_trigger;
+		mxc_spi->rx_available = mx27_rx_available;
+	} else if (cpu_is_mx1()) {
+		mxc_spi->intctrl = mx1_intctrl;
+		mxc_spi->config = mx1_config;
+		mxc_spi->trigger = mx1_trigger;
+		mxc_spi->rx_available = mx1_rx_available;
+	} else
+		BUG();
+
+	mxc_spi->clk = clk_get(&pdev->dev, NULL);
+	if (IS_ERR(mxc_spi->clk)) {
+		dev_err(&pdev->dev, "unable to get clock\n");
+		ret = PTR_ERR(mxc_spi->clk);
+		goto out_free_irq;
+	}
+
+	clk_enable(mxc_spi->clk);
+	mxc_spi->spi_clk = clk_get_rate(mxc_spi->clk);
+
+	if (!cpu_is_mx31() || !cpu_is_mx35())
+		writel(1, mxc_spi->base + MXC_RESET);
+
+	mxc_spi->intctrl(mxc_spi, 0);
+
+	ret = spi_bitbang_start(&mxc_spi->bitbang);
+	if (ret) {
+		dev_err(&pdev->dev, "bitbang start failed with %d\n", ret);
+		goto out_clk_put;
+	}
+
+	dev_info(&pdev->dev, "probed\n");
+
+	return ret;
+
+out_clk_put:
+	clk_disable(mxc_spi->clk);
+	clk_put(mxc_spi->clk);
+out_free_irq:
+	free_irq(mxc_spi->irq, mxc_spi);
+out_iounmap:
+	iounmap(mxc_spi->base);
+out_release_mem:
+	release_mem_region(res->start, resource_size(res));
+out_gpio_free:
+	for (i = 0; i < master->num_chipselect; i++)
+		if (mxc_spi->chipselect[i] >= 0)
+			gpio_free(mxc_spi->chipselect[i]);
+out_master_put:
+	spi_master_put(master);
+	kfree(master);
+	platform_set_drvdata(pdev, NULL);
+	return ret;
+}
+
+static int __exit mxc_spi_remove(struct platform_device *pdev)
+{
+	struct spi_master *master = platform_get_drvdata(pdev);
+	struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	struct mxc_spi_data *mxc_spi = spi_master_get_devdata(master);
+	int i;
+
+	spi_bitbang_stop(&mxc_spi->bitbang);
+
+	writel(0, mxc_spi->base + MXC_CSPICTRL);
+	clk_disable(mxc_spi->clk);
+	clk_put(mxc_spi->clk);
+	free_irq(mxc_spi->irq, mxc_spi);
+	iounmap(mxc_spi->base);
+
+	for (i = 0; i < master->num_chipselect; i++)
+		if (mxc_spi->chipselect[i] >= 0)
+			gpio_free(mxc_spi->chipselect[i]);
+
+	spi_master_put(master);
+
+	release_mem_region(res->start, resource_size(res));
+
+	platform_set_drvdata(pdev, NULL);
+
+	return 0;
+}
+
+static struct platform_driver mxc_spi_driver = {
+	.driver = {
+		   .name = DRIVER_NAME,
+		   .owner = THIS_MODULE,
+		   },
+	.probe = mxc_spi_probe,
+	.remove = __exit_p(mxc_spi_remove),
+};
+
+static int __init mxc_spi_init(void)
+{
+	return platform_driver_register(&mxc_spi_driver);
+}
+
+static void __exit mxc_spi_exit(void)
+{
+	platform_driver_unregister(&mxc_spi_driver);
+}
+
+module_init(mxc_spi_init);
+module_exit(mxc_spi_exit);
+
+MODULE_DESCRIPTION("SPI Master Controller driver");
+MODULE_AUTHOR("Sascha Hauer, Pengutronix");
+MODULE_LICENSE("GPL");
-- 
cgit v0.10.2


From 75368bf6c2876d8f33abfe77aa3864869a3893eb Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@ru.mvista.com>
Date: Tue, 22 Sep 2009 16:46:04 -0700
Subject: spi: add support for device table matching

With this patch spi drivers can use standard spi_driver.id_table and
MODULE_DEVICE_TABLE() mechanisms to bind against the devices.  Just like
we do with I2C drivers.

This is useful when a single driver supports several variants of devices
but it is not possible to detect them in run-time (like non-JEDEC chips
probing in drivers/mtd/devices/m25p80.c), and when platform_data usage is
overkill.

This patch also makes life a lot easier on OpenFirmware platforms, since
with OF we extensively use proper device IDs in modaliases.

Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Cc: David Brownell <dbrownell@users.sourceforge.net>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Grant Likely <grant.likely@secretlab.ca>
Cc: Jean Delvare <khali@linux-fr.org>
Cc: Ben Dooks <ben-linux@fluff.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 70845cc..8518a6e 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -59,9 +59,32 @@ static struct device_attribute spi_dev_attrs[] = {
  * and the sysfs version makes coldplug work too.
  */
 
+static const struct spi_device_id *spi_match_id(const struct spi_device_id *id,
+						const struct spi_device *sdev)
+{
+	while (id->name[0]) {
+		if (!strcmp(sdev->modalias, id->name))
+			return id;
+		id++;
+	}
+	return NULL;
+}
+
+const struct spi_device_id *spi_get_device_id(const struct spi_device *sdev)
+{
+	const struct spi_driver *sdrv = to_spi_driver(sdev->dev.driver);
+
+	return spi_match_id(sdrv->id_table, sdev);
+}
+EXPORT_SYMBOL_GPL(spi_get_device_id);
+
 static int spi_match_device(struct device *dev, struct device_driver *drv)
 {
 	const struct spi_device	*spi = to_spi_device(dev);
+	const struct spi_driver	*sdrv = to_spi_driver(drv);
+
+	if (sdrv->id_table)
+		return !!spi_match_id(sdrv->id_table, spi);
 
 	return strcmp(spi->modalias, drv->name) == 0;
 }
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index 1bf5900..b34f1ef 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -399,6 +399,16 @@ struct i2c_device_id {
 			__attribute__((aligned(sizeof(kernel_ulong_t))));
 };
 
+/* spi */
+
+#define SPI_NAME_SIZE	32
+
+struct spi_device_id {
+	char name[SPI_NAME_SIZE];
+	kernel_ulong_t driver_data	/* Data private to the driver */
+			__attribute__((aligned(sizeof(kernel_ulong_t))));
+};
+
 /* dmi */
 enum dmi_field {
 	DMI_NONE,
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index eb25ced..e2051f3 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -20,6 +20,7 @@
 #define __LINUX_SPI_H
 
 #include <linux/device.h>
+#include <linux/mod_devicetable.h>
 
 /*
  * INTERFACES between SPI master-side drivers and SPI infrastructure.
@@ -86,7 +87,7 @@ struct spi_device {
 	int			irq;
 	void			*controller_state;
 	void			*controller_data;
-	char			modalias[32];
+	char			modalias[SPI_NAME_SIZE];
 
 	/*
 	 * likely need more hooks for more protocol options affecting how
@@ -145,6 +146,7 @@ struct spi_message;
 
 /**
  * struct spi_driver - Host side "protocol" driver
+ * @id_table: List of SPI devices supported by this driver
  * @probe: Binds this driver to the spi device.  Drivers can verify
  *	that the device is actually present, and may need to configure
  *	characteristics (such as bits_per_word) which weren't needed for
@@ -170,6 +172,7 @@ struct spi_message;
  * MMC, RTC, filesystem character device nodes, and hardware monitoring.
  */
 struct spi_driver {
+	const struct spi_device_id *id_table;
 	int			(*probe)(struct spi_device *spi);
 	int			(*remove)(struct spi_device *spi);
 	void			(*shutdown)(struct spi_device *spi);
@@ -734,7 +737,7 @@ struct spi_board_info {
 	 * controller_data goes to spi_device.controller_data,
 	 * irq is copied too
 	 */
-	char		modalias[32];
+	char		modalias[SPI_NAME_SIZE];
 	const void	*platform_data;
 	void		*controller_data;
 	int		irq;
@@ -802,4 +805,7 @@ spi_unregister_device(struct spi_device *spi)
 		device_unregister(&spi->dev);
 }
 
+extern const struct spi_device_id *
+spi_get_device_id(const struct spi_device *sdev);
+
 #endif /* __LINUX_SPI_H */
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index 40e0045..9d446e3 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -657,6 +657,15 @@ static int do_i2c_entry(const char *filename, struct i2c_device_id *id,
 	return 1;
 }
 
+/* Looks like: S */
+static int do_spi_entry(const char *filename, struct spi_device_id *id,
+			char *alias)
+{
+	sprintf(alias, "%s", id->name);
+
+	return 1;
+}
+
 static const struct dmifield {
 	const char *prefix;
 	int field;
@@ -853,6 +862,10 @@ void handle_moddevtable(struct module *mod, struct elf_info *info,
 		do_table(symval, sym->st_size,
 			 sizeof(struct i2c_device_id), "i2c",
 			 do_i2c_entry, mod);
+	else if (sym_is(symname, "__mod_spi_device_table"))
+		do_table(symval, sym->st_size,
+			 sizeof(struct spi_device_id), "spi",
+			 do_spi_entry, mod);
 	else if (sym_is(symname, "__mod_dmi_device_table"))
 		do_table(symval, sym->st_size,
 			 sizeof(struct dmi_system_id), "dmi",
-- 
cgit v0.10.2


From 225d8b248843d1b3fbcf616d5e7d9544463aca3e Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@ru.mvista.com>
Date: Tue, 22 Sep 2009 16:46:04 -0700
Subject: of: remove "stm,m25p40" alias

The alias isn't needed any longer since the m25p80 driver converted to the
module device table matching.

Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Cc: David Brownell <dbrownell@users.sourceforge.net>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Grant Likely <grant.likely@secretlab.ca>
Cc: Jean Delvare <khali@linux-fr.org>
Cc: Ben Dooks <ben-linux@fluff.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/of/base.c b/drivers/of/base.c
index 69f85c0..ddf224d 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -447,7 +447,6 @@ struct of_modalias_table {
 static struct of_modalias_table of_modalias_table[] = {
 	{ "fsl,mcu-mpc8349emitx", "mcu-mpc8349emitx" },
 	{ "mmc-spi-slot", "mmc_spi" },
-	{ "stm,m25p40", "m25p80" },
 };
 
 /**
-- 
cgit v0.10.2


From d2a5c10f806b089a6e6f10deefd01dc4ce67940d Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@ru.mvista.com>
Date: Tue, 22 Sep 2009 16:46:05 -0700
Subject: hwmon: adxx: convert to device table matching

Make the code a little bit nicer, and shorter.

Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Cc: Marc Pignat <marc.pignat@hevs.ch>
Cc: David Brownell <dbrownell@users.sourceforge.net>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Grant Likely <grant.likely@secretlab.ca>
Cc: Jean Delvare <khali@linux-fr.org>
Cc: Ben Dooks <ben-linux@fluff.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Marc Pignat <marc.pignat@hevs.ch>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/hwmon/adcxx.c b/drivers/hwmon/adcxx.c
index 242294d..5e9e095 100644
--- a/drivers/hwmon/adcxx.c
+++ b/drivers/hwmon/adcxx.c
@@ -43,6 +43,7 @@
 #include <linux/hwmon.h>
 #include <linux/hwmon-sysfs.h>
 #include <linux/mutex.h>
+#include <linux/mod_devicetable.h>
 #include <linux/spi/spi.h>
 
 #define DRVNAME		"adcxx"
@@ -157,8 +158,9 @@ static struct sensor_device_attribute ad_input[] = {
 
 /*----------------------------------------------------------------------*/
 
-static int __devinit adcxx_probe(struct spi_device *spi, int channels)
+static int __devinit adcxx_probe(struct spi_device *spi)
 {
+	int channels = spi_get_device_id(spi)->driver_data;
 	struct adcxx *adc;
 	int status;
 	int i;
@@ -204,26 +206,6 @@ out_err:
 	return status;
 }
 
-static int __devinit adcxx1s_probe(struct spi_device *spi)
-{
-	return adcxx_probe(spi, 1);
-}
-
-static int __devinit adcxx2s_probe(struct spi_device *spi)
-{
-	return adcxx_probe(spi, 2);
-}
-
-static int __devinit adcxx4s_probe(struct spi_device *spi)
-{
-	return adcxx_probe(spi, 4);
-}
-
-static int __devinit adcxx8s_probe(struct spi_device *spi)
-{
-	return adcxx_probe(spi, 8);
-}
-
 static int __devexit adcxx_remove(struct spi_device *spi)
 {
 	struct adcxx *adc = dev_get_drvdata(&spi->dev);
@@ -241,79 +223,33 @@ static int __devexit adcxx_remove(struct spi_device *spi)
 	return 0;
 }
 
-static struct spi_driver adcxx1s_driver = {
-	.driver = {
-		.name	= "adcxx1s",
-		.owner	= THIS_MODULE,
-	},
-	.probe	= adcxx1s_probe,
-	.remove	= __devexit_p(adcxx_remove),
+static const struct spi_device_id adcxx_ids[] = {
+	{ "adcxx1s", 1 },
+	{ "adcxx2s", 2 },
+	{ "adcxx4s", 4 },
+	{ "adcxx8s", 8 },
+	{ },
 };
+MODULE_DEVICE_TABLE(spi, adcxx_ids);
 
-static struct spi_driver adcxx2s_driver = {
+static struct spi_driver adcxx_driver = {
 	.driver = {
-		.name	= "adcxx2s",
+		.name	= "adcxx",
 		.owner	= THIS_MODULE,
 	},
-	.probe	= adcxx2s_probe,
-	.remove	= __devexit_p(adcxx_remove),
-};
-
-static struct spi_driver adcxx4s_driver = {
-	.driver = {
-		.name	= "adcxx4s",
-		.owner	= THIS_MODULE,
-	},
-	.probe	= adcxx4s_probe,
-	.remove	= __devexit_p(adcxx_remove),
-};
-
-static struct spi_driver adcxx8s_driver = {
-	.driver = {
-		.name	= "adcxx8s",
-		.owner	= THIS_MODULE,
-	},
-	.probe	= adcxx8s_probe,
+	.id_table = adcxx_ids,
+	.probe	= adcxx_probe,
 	.remove	= __devexit_p(adcxx_remove),
 };
 
 static int __init init_adcxx(void)
 {
-	int status;
-	status = spi_register_driver(&adcxx1s_driver);
-	if (status)
-		goto reg_1_failed;
-
-	status = spi_register_driver(&adcxx2s_driver);
-	if (status)
-		goto reg_2_failed;
-
-	status = spi_register_driver(&adcxx4s_driver);
-	if (status)
-		goto reg_4_failed;
-
-	status = spi_register_driver(&adcxx8s_driver);
-	if (status)
-		goto reg_8_failed;
-
-	return status;
-
-reg_8_failed:
-	spi_unregister_driver(&adcxx4s_driver);
-reg_4_failed:
-	spi_unregister_driver(&adcxx2s_driver);
-reg_2_failed:
-	spi_unregister_driver(&adcxx1s_driver);
-reg_1_failed:
-	return status;
+	return spi_register_driver(&adcxx_driver);
 }
 
 static void __exit exit_adcxx(void)
 {
-	spi_unregister_driver(&adcxx1s_driver);
-	spi_unregister_driver(&adcxx2s_driver);
-	spi_unregister_driver(&adcxx4s_driver);
-	spi_unregister_driver(&adcxx8s_driver);
+	spi_unregister_driver(&adcxx_driver);
 }
 
 module_init(init_adcxx);
@@ -322,8 +258,3 @@ module_exit(exit_adcxx);
 MODULE_AUTHOR("Marc Pignat");
 MODULE_DESCRIPTION("National Semiconductor adcxx8sxxx Linux driver");
 MODULE_LICENSE("GPL");
-
-MODULE_ALIAS("adcxx1s");
-MODULE_ALIAS("adcxx2s");
-MODULE_ALIAS("adcxx4s");
-MODULE_ALIAS("adcxx8s");
-- 
cgit v0.10.2


From 8cec03eee4a771f949c70cff07775c9bb21d4642 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@ru.mvista.com>
Date: Tue, 22 Sep 2009 16:46:07 -0700
Subject: hwmon: lm70: convert to device table matching

Make the code a little bit nicer, and shorter.

Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Cc: Kaiwan N Billimoria <kaiwan@designergraphix.com>
Cc: David Brownell <dbrownell@users.sourceforge.net>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Grant Likely <grant.likely@secretlab.ca>
Cc: Jean Delvare <khali@linux-fr.org>
Cc: Ben Dooks <ben-linux@fluff.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/hwmon/lm70.c b/drivers/hwmon/lm70.c
index ae6204f..ab8a5d3 100644
--- a/drivers/hwmon/lm70.c
+++ b/drivers/hwmon/lm70.c
@@ -32,6 +32,7 @@
 #include <linux/sysfs.h>
 #include <linux/hwmon.h>
 #include <linux/mutex.h>
+#include <linux/mod_devicetable.h>
 #include <linux/spi/spi.h>
 
 
@@ -130,11 +131,20 @@ static DEVICE_ATTR(name, S_IRUGO, lm70_show_name, NULL);
 
 /*----------------------------------------------------------------------*/
 
-static int __devinit common_probe(struct spi_device *spi, int chip)
+static int __devinit lm70_probe(struct spi_device *spi)
 {
+	int chip = spi_get_device_id(spi)->driver_data;
 	struct lm70 *p_lm70;
 	int status;
 
+	/* signaling is SPI_MODE_0 for both LM70 and TMP121 */
+	if (spi->mode & (SPI_CPOL | SPI_CPHA))
+		return -EINVAL;
+
+	/* 3-wire link (shared SI/SO) for LM70 */
+	if (chip == LM70_CHIP_LM70 && !(spi->mode & SPI_3WIRE))
+		return -EINVAL;
+
 	/* NOTE:  we assume 8-bit words, and convert to 16 bits manually */
 
 	p_lm70 = kzalloc(sizeof *p_lm70, GFP_KERNEL);
@@ -170,24 +180,6 @@ out_dev_reg_failed:
 	return status;
 }
 
-static int __devinit lm70_probe(struct spi_device *spi)
-{
-	/* signaling is SPI_MODE_0 on a 3-wire link (shared SI/SO) */
-	if ((spi->mode & (SPI_CPOL | SPI_CPHA)) || !(spi->mode & SPI_3WIRE))
-		return -EINVAL;
-
-	return common_probe(spi, LM70_CHIP_LM70);
-}
-
-static int __devinit tmp121_probe(struct spi_device *spi)
-{
-	/* signaling is SPI_MODE_0 with only MISO connected */
-	if (spi->mode & (SPI_CPOL | SPI_CPHA))
-		return -EINVAL;
-
-	return common_probe(spi, LM70_CHIP_TMP121);
-}
-
 static int __devexit lm70_remove(struct spi_device *spi)
 {
 	struct lm70 *p_lm70 = dev_get_drvdata(&spi->dev);
@@ -201,41 +193,32 @@ static int __devexit lm70_remove(struct spi_device *spi)
 	return 0;
 }
 
-static struct spi_driver tmp121_driver = {
-	.driver = {
-		.name	= "tmp121",
-		.owner	= THIS_MODULE,
-	},
-	.probe	= tmp121_probe,
-	.remove	= __devexit_p(lm70_remove),
+
+static const struct spi_device_id lm70_ids[] = {
+	{ "lm70",   LM70_CHIP_LM70 },
+	{ "tmp121", LM70_CHIP_TMP121 },
+	{ },
 };
+MODULE_DEVICE_TABLE(spi, lm70_ids);
 
 static struct spi_driver lm70_driver = {
 	.driver = {
 		.name	= "lm70",
 		.owner	= THIS_MODULE,
 	},
+	.id_table = lm70_ids,
 	.probe	= lm70_probe,
 	.remove	= __devexit_p(lm70_remove),
 };
 
 static int __init init_lm70(void)
 {
-	int ret = spi_register_driver(&lm70_driver);
-	if (ret)
-		return ret;
-
-	ret = spi_register_driver(&tmp121_driver);
-	if (ret)
-		spi_unregister_driver(&lm70_driver);
-
-	return ret;
+	return spi_register_driver(&lm70_driver);
 }
 
 static void __exit cleanup_lm70(void)
 {
 	spi_unregister_driver(&lm70_driver);
-	spi_unregister_driver(&tmp121_driver);
 }
 
 module_init(init_lm70);
-- 
cgit v0.10.2


From e0626e3844e8f430fc1a4417f523a00797df7ca6 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@ru.mvista.com>
Date: Tue, 22 Sep 2009 16:46:08 -0700
Subject: spi: prefix modalias with "spi:"

This makes it consistent with other buses (platform, i2c, vio, ...).  I'm
not sure why we use the prefixes, but there must be a reason.

This was easy enough to do it, and I did it.

Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Cc: David Brownell <dbrownell@users.sourceforge.net>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Grant Likely <grant.likely@secretlab.ca>
Cc: Jean Delvare <khali@linux-fr.org>
Cc: Ben Dooks <ben-linux@fluff.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Dmitry Torokhov <dtor@mail.ru>
Cc: Samuel Ortiz <sameo@openedhand.com>
Cc: "John W. Linville" <linville@tuxdriver.com>
Acked-by: Mike Frysinger <vapier.adi@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/gpio/max7301.c b/drivers/gpio/max7301.c
index 7b82eaa..480956f 100644
--- a/drivers/gpio/max7301.c
+++ b/drivers/gpio/max7301.c
@@ -339,3 +339,4 @@ module_exit(max7301_exit);
 MODULE_AUTHOR("Juergen Beisert");
 MODULE_LICENSE("GPL v2");
 MODULE_DESCRIPTION("MAX7301 SPI based GPIO-Expander");
+MODULE_ALIAS("spi:" DRIVER_NAME);
diff --git a/drivers/gpio/mcp23s08.c b/drivers/gpio/mcp23s08.c
index f6fae0e..c6c7aa1 100644
--- a/drivers/gpio/mcp23s08.c
+++ b/drivers/gpio/mcp23s08.c
@@ -433,3 +433,4 @@ static void __exit mcp23s08_exit(void)
 module_exit(mcp23s08_exit);
 
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:mcp23s08");
diff --git a/drivers/hwmon/lis3lv02d_spi.c b/drivers/hwmon/lis3lv02d_spi.c
index 82ebca5..ecd7395 100644
--- a/drivers/hwmon/lis3lv02d_spi.c
+++ b/drivers/hwmon/lis3lv02d_spi.c
@@ -139,4 +139,4 @@ module_exit(lis302dl_exit);
 MODULE_AUTHOR("Daniel Mack <daniel@caiaq.de>");
 MODULE_DESCRIPTION("lis3lv02d SPI glue layer");
 MODULE_LICENSE("GPL");
-
+MODULE_ALIAS("spi:" DRV_NAME);
diff --git a/drivers/hwmon/max1111.c b/drivers/hwmon/max1111.c
index bfaa665..9ac4972 100644
--- a/drivers/hwmon/max1111.c
+++ b/drivers/hwmon/max1111.c
@@ -242,3 +242,4 @@ module_exit(max1111_exit);
 MODULE_AUTHOR("Eric Miao <eric.miao@marvell.com>");
 MODULE_DESCRIPTION("MAX1111 ADC Driver");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:max1111");
diff --git a/drivers/input/touchscreen/ad7877.c b/drivers/input/touchscreen/ad7877.c
index ecaeb7e..eb83939c 100644
--- a/drivers/input/touchscreen/ad7877.c
+++ b/drivers/input/touchscreen/ad7877.c
@@ -842,3 +842,4 @@ module_exit(ad7877_exit);
 MODULE_AUTHOR("Michael Hennerich <hennerich@blackfin.uclinux.org>");
 MODULE_DESCRIPTION("AD7877 touchscreen Driver");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:ad7877");
diff --git a/drivers/input/touchscreen/ad7879.c b/drivers/input/touchscreen/ad7879.c
index 5d8a703..19b4db7e 100644
--- a/drivers/input/touchscreen/ad7879.c
+++ b/drivers/input/touchscreen/ad7879.c
@@ -779,3 +779,4 @@ module_exit(ad7879_exit);
 MODULE_AUTHOR("Michael Hennerich <hennerich@blackfin.uclinux.org>");
 MODULE_DESCRIPTION("AD7879(-1) touchscreen Driver");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:ad7879");
diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c
index ba9d38c..09c8109 100644
--- a/drivers/input/touchscreen/ads7846.c
+++ b/drivers/input/touchscreen/ads7846.c
@@ -1256,3 +1256,4 @@ module_exit(ads7846_exit);
 
 MODULE_DESCRIPTION("ADS7846 TouchScreen Driver");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:ads7846");
diff --git a/drivers/leds/leds-dac124s085.c b/drivers/leds/leds-dac124s085.c
index 098d9aa..2913d76 100644
--- a/drivers/leds/leds-dac124s085.c
+++ b/drivers/leds/leds-dac124s085.c
@@ -148,3 +148,4 @@ module_exit(dac124s085_leds_exit);
 MODULE_AUTHOR("Guennadi Liakhovetski <lg@denx.de>");
 MODULE_DESCRIPTION("DAC124S085 LED driver");
 MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("spi:dac124s085");
diff --git a/drivers/mfd/ezx-pcap.c b/drivers/mfd/ezx-pcap.c
index 016be49..87628891 100644
--- a/drivers/mfd/ezx-pcap.c
+++ b/drivers/mfd/ezx-pcap.c
@@ -548,3 +548,4 @@ module_exit(ezx_pcap_exit);
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Daniel Ribeiro / Harald Welte");
 MODULE_DESCRIPTION("Motorola PCAP2 ASIC Driver");
+MODULE_ALIAS("spi:ezx-pcap");
diff --git a/drivers/misc/eeprom/at25.c b/drivers/misc/eeprom/at25.c
index 2e535a0..d902d81 100644
--- a/drivers/misc/eeprom/at25.c
+++ b/drivers/misc/eeprom/at25.c
@@ -417,4 +417,4 @@ module_exit(at25_exit);
 MODULE_DESCRIPTION("Driver for most SPI EEPROMs");
 MODULE_AUTHOR("David Brownell");
 MODULE_LICENSE("GPL");
-
+MODULE_ALIAS("spi:at25");
diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c
index a461017..d55fe4f 100644
--- a/drivers/mmc/host/mmc_spi.c
+++ b/drivers/mmc/host/mmc_spi.c
@@ -1562,3 +1562,4 @@ MODULE_AUTHOR("Mike Lavender, David Brownell, "
 		"Hans-Peter Nilsson, Jan Nikitenko");
 MODULE_DESCRIPTION("SPI SD/MMC host driver");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:mmc_spi");
diff --git a/drivers/mtd/devices/mtd_dataflash.c b/drivers/mtd/devices/mtd_dataflash.c
index 43976aa..211c27ac 100644
--- a/drivers/mtd/devices/mtd_dataflash.c
+++ b/drivers/mtd/devices/mtd_dataflash.c
@@ -966,3 +966,4 @@ module_exit(dataflash_exit);
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Andrew Victor, David Brownell");
 MODULE_DESCRIPTION("MTD DataFlash driver");
+MODULE_ALIAS("spi:mtd_dataflash");
diff --git a/drivers/net/enc28j60.c b/drivers/net/enc28j60.c
index 117fc6c..66813c9 100644
--- a/drivers/net/enc28j60.c
+++ b/drivers/net/enc28j60.c
@@ -1666,3 +1666,4 @@ MODULE_AUTHOR("Claudio Lanconelli <lanconelli.claudio@eptar.com>");
 MODULE_LICENSE("GPL");
 module_param_named(debug, debug.msg_enable, int, 0);
 MODULE_PARM_DESC(debug, "Debug verbosity level (0=none, ..., ffff=all)");
+MODULE_ALIAS("spi:" DRV_NAME);
diff --git a/drivers/net/ks8851.c b/drivers/net/ks8851.c
index 547ac7c..2378358 100644
--- a/drivers/net/ks8851.c
+++ b/drivers/net/ks8851.c
@@ -1321,3 +1321,4 @@ MODULE_LICENSE("GPL");
 
 module_param_named(message, msg_enable, int, 0);
 MODULE_PARM_DESC(message, "Message verbosity level (0=none, 31=all)");
+MODULE_ALIAS("spi:ks8851");
diff --git a/drivers/net/wireless/libertas/if_spi.c b/drivers/net/wireless/libertas/if_spi.c
index 446e327..cb8be8d 100644
--- a/drivers/net/wireless/libertas/if_spi.c
+++ b/drivers/net/wireless/libertas/if_spi.c
@@ -1222,3 +1222,4 @@ MODULE_DESCRIPTION("Libertas SPI WLAN Driver");
 MODULE_AUTHOR("Andrey Yurovsky <andrey@cozybit.com>, "
 	      "Colin McCabe <colin@cozybit.com>");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:libertas_spi");
diff --git a/drivers/net/wireless/p54/p54spi.c b/drivers/net/wireless/p54/p54spi.c
index 05458d9..afd26bf 100644
--- a/drivers/net/wireless/p54/p54spi.c
+++ b/drivers/net/wireless/p54/p54spi.c
@@ -731,3 +731,4 @@ module_exit(p54spi_exit);
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Christian Lamparter <chunkeey@web.de>");
+MODULE_ALIAS("spi:cx3110x");
diff --git a/drivers/net/wireless/wl12xx/wl1251_main.c b/drivers/net/wireless/wl12xx/wl1251_main.c
index 5809ef5..1103256 100644
--- a/drivers/net/wireless/wl12xx/wl1251_main.c
+++ b/drivers/net/wireless/wl12xx/wl1251_main.c
@@ -1426,3 +1426,4 @@ EXPORT_SYMBOL_GPL(wl1251_free_hw);
 MODULE_DESCRIPTION("TI wl1251 Wireles LAN Driver Core");
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Kalle Valo <kalle.valo@nokia.com>");
+MODULE_ALIAS("spi:wl12xx");
diff --git a/drivers/rtc/rtc-ds1305.c b/drivers/rtc/rtc-ds1305.c
index 8f410e5..2736b11 100644
--- a/drivers/rtc/rtc-ds1305.c
+++ b/drivers/rtc/rtc-ds1305.c
@@ -841,3 +841,4 @@ module_exit(ds1305_exit);
 
 MODULE_DESCRIPTION("RTC driver for DS1305 and DS1306 chips");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:rtc-ds1305");
diff --git a/drivers/rtc/rtc-ds1390.c b/drivers/rtc/rtc-ds1390.c
index e01b955..cdb7050 100644
--- a/drivers/rtc/rtc-ds1390.c
+++ b/drivers/rtc/rtc-ds1390.c
@@ -189,3 +189,4 @@ module_exit(ds1390_exit);
 MODULE_DESCRIPTION("Dallas/Maxim DS1390/93/94 SPI RTC driver");
 MODULE_AUTHOR("Mark Jackson <mpfj@mimc.co.uk>");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:rtc-ds1390");
diff --git a/drivers/rtc/rtc-ds3234.c b/drivers/rtc/rtc-ds3234.c
index c51589e..a774ca3 100644
--- a/drivers/rtc/rtc-ds3234.c
+++ b/drivers/rtc/rtc-ds3234.c
@@ -188,3 +188,4 @@ module_exit(ds3234_exit);
 MODULE_DESCRIPTION("DS3234 SPI RTC driver");
 MODULE_AUTHOR("Dennis Aberilla <denzzzhome@yahoo.com>");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:ds3234");
diff --git a/drivers/rtc/rtc-m41t94.c b/drivers/rtc/rtc-m41t94.c
index c3a18c5..c8c97a4 100644
--- a/drivers/rtc/rtc-m41t94.c
+++ b/drivers/rtc/rtc-m41t94.c
@@ -171,3 +171,4 @@ module_exit(m41t94_exit);
 MODULE_AUTHOR("Kim B. Heino <Kim.Heino@bluegiga.com>");
 MODULE_DESCRIPTION("Driver for ST M41T94 SPI RTC");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:rtc-m41t94");
diff --git a/drivers/rtc/rtc-max6902.c b/drivers/rtc/rtc-max6902.c
index 36a8ea9..657403e 100644
--- a/drivers/rtc/rtc-max6902.c
+++ b/drivers/rtc/rtc-max6902.c
@@ -175,3 +175,4 @@ module_exit(max6902_exit);
 MODULE_DESCRIPTION ("max6902 spi RTC driver");
 MODULE_AUTHOR ("Raphael Assenat");
 MODULE_LICENSE ("GPL");
+MODULE_ALIAS("spi:rtc-max6902");
diff --git a/drivers/rtc/rtc-r9701.c b/drivers/rtc/rtc-r9701.c
index 42028f2..9beba49c 100644
--- a/drivers/rtc/rtc-r9701.c
+++ b/drivers/rtc/rtc-r9701.c
@@ -174,3 +174,4 @@ module_exit(r9701_exit);
 MODULE_DESCRIPTION("r9701 spi RTC driver");
 MODULE_AUTHOR("Magnus Damm <damm@opensource.se>");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:rtc-r9701");
diff --git a/drivers/rtc/rtc-rs5c348.c b/drivers/rtc/rtc-rs5c348.c
index dd1e2bc..2099037 100644
--- a/drivers/rtc/rtc-rs5c348.c
+++ b/drivers/rtc/rtc-rs5c348.c
@@ -251,3 +251,4 @@ MODULE_AUTHOR("Atsushi Nemoto <anemo@mba.ocn.ne.jp>");
 MODULE_DESCRIPTION("Ricoh RS5C348 RTC driver");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(DRV_VERSION);
+MODULE_ALIAS("spi:rtc-rs5c348");
diff --git a/drivers/serial/max3100.c b/drivers/serial/max3100.c
index 75ab006..3c30c56 100644
--- a/drivers/serial/max3100.c
+++ b/drivers/serial/max3100.c
@@ -925,3 +925,4 @@ module_exit(max3100_exit);
 MODULE_DESCRIPTION("MAX3100 driver");
 MODULE_AUTHOR("Christian Pellegrin <chripell@evolware.org>");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:max3100");
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 8518a6e..49e8486 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -23,6 +23,7 @@
 #include <linux/init.h>
 #include <linux/cache.h>
 #include <linux/mutex.h>
+#include <linux/mod_devicetable.h>
 #include <linux/spi/spi.h>
 
 
@@ -93,7 +94,7 @@ static int spi_uevent(struct device *dev, struct kobj_uevent_env *env)
 {
 	const struct spi_device		*spi = to_spi_device(dev);
 
-	add_uevent_var(env, "MODALIAS=%s", spi->modalias);
+	add_uevent_var(env, "MODALIAS=%s%s", SPI_MODULE_PREFIX, spi->modalias);
 	return 0;
 }
 
diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c
index 606e7a4..f921bd1 100644
--- a/drivers/spi/spidev.c
+++ b/drivers/spi/spidev.c
@@ -688,3 +688,4 @@ module_exit(spidev_exit);
 MODULE_AUTHOR("Andrea Paterniani, <a.paterniani@swapp-eng.it>");
 MODULE_DESCRIPTION("User mode SPI device interface");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:spidev");
diff --git a/drivers/spi/tle62x0.c b/drivers/spi/tle62x0.c
index 455991f..bf9540f 100644
--- a/drivers/spi/tle62x0.c
+++ b/drivers/spi/tle62x0.c
@@ -329,3 +329,4 @@ module_exit(tle62x0_exit);
 MODULE_AUTHOR("Ben Dooks <ben@simtec.co.uk>");
 MODULE_DESCRIPTION("TLE62x0 SPI driver");
 MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("spi:tle62x0");
diff --git a/drivers/staging/stlc45xx/stlc45xx.c b/drivers/staging/stlc45xx/stlc45xx.c
index 12d414d..be99eb3 100644
--- a/drivers/staging/stlc45xx/stlc45xx.c
+++ b/drivers/staging/stlc45xx/stlc45xx.c
@@ -2591,3 +2591,4 @@ module_exit(stlc45xx_exit);
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Kalle Valo <kalle.valo@nokia.com>");
+MODULE_ALIAS("spi:cx3110x");
diff --git a/drivers/video/backlight/corgi_lcd.c b/drivers/video/backlight/corgi_lcd.c
index f8a4bb2..2211a85 100644
--- a/drivers/video/backlight/corgi_lcd.c
+++ b/drivers/video/backlight/corgi_lcd.c
@@ -639,3 +639,4 @@ module_exit(corgi_lcd_exit);
 MODULE_DESCRIPTION("LCD and backlight driver for SHARP C7x0/Cxx00");
 MODULE_AUTHOR("Eric Miao <eric.miao@marvell.com>");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:corgi-lcd");
diff --git a/drivers/video/backlight/ltv350qv.c b/drivers/video/backlight/ltv350qv.c
index 2eb206b..4631ca8 100644
--- a/drivers/video/backlight/ltv350qv.c
+++ b/drivers/video/backlight/ltv350qv.c
@@ -328,3 +328,4 @@ module_exit(ltv350qv_exit);
 MODULE_AUTHOR("Haavard Skinnemoen <hskinnemoen@atmel.com>");
 MODULE_DESCRIPTION("Samsung LTV350QV LCD Driver");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:ltv350qv");
diff --git a/drivers/video/backlight/tdo24m.c b/drivers/video/backlight/tdo24m.c
index 51422fc..bbfb502 100644
--- a/drivers/video/backlight/tdo24m.c
+++ b/drivers/video/backlight/tdo24m.c
@@ -472,3 +472,4 @@ module_exit(tdo24m_exit);
 MODULE_AUTHOR("Eric Miao <eric.miao@marvell.com>");
 MODULE_DESCRIPTION("Driver for Toppoly TDO24M LCD Panel");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:tdo24m");
diff --git a/drivers/video/backlight/tosa_lcd.c b/drivers/video/backlight/tosa_lcd.c
index b7fbc75..50ec17d 100644
--- a/drivers/video/backlight/tosa_lcd.c
+++ b/drivers/video/backlight/tosa_lcd.c
@@ -300,4 +300,4 @@ module_exit(tosa_lcd_exit);
 MODULE_AUTHOR("Dmitry Baryshkov");
 MODULE_LICENSE("GPL v2");
 MODULE_DESCRIPTION("LCD/Backlight control for Sharp SL-6000 PDA");
-
+MODULE_ALIAS("spi:tosa-lcd");
diff --git a/drivers/video/backlight/vgg2432a4.c b/drivers/video/backlight/vgg2432a4.c
index 8e653b8..b49063c 100644
--- a/drivers/video/backlight/vgg2432a4.c
+++ b/drivers/video/backlight/vgg2432a4.c
@@ -280,5 +280,4 @@ module_exit(vgg2432a4_exit);
 MODULE_AUTHOR("Ben Dooks <ben-linux@fluff.org>");
 MODULE_DESCRIPTION("VGG2432A4 LCD Driver");
 MODULE_LICENSE("GPL v2");
-
-
+MODULE_ALIAS("spi:VGG2432A4");
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index b34f1ef..f58e9d83 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -402,6 +402,7 @@ struct i2c_device_id {
 /* spi */
 
 #define SPI_NAME_SIZE	32
+#define SPI_MODULE_PREFIX "spi:"
 
 struct spi_device_id {
 	char name[SPI_NAME_SIZE];
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index 9d446e3..62a9025 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -657,11 +657,11 @@ static int do_i2c_entry(const char *filename, struct i2c_device_id *id,
 	return 1;
 }
 
-/* Looks like: S */
+/* Looks like: spi:S */
 static int do_spi_entry(const char *filename, struct spi_device_id *id,
 			char *alias)
 {
-	sprintf(alias, "%s", id->name);
+	sprintf(alias, SPI_MODULE_PREFIX "%s", id->name);
 
 	return 1;
 }
-- 
cgit v0.10.2


From 5b61a749e8fd0a45a5e37c267d20a43ef0590d68 Mon Sep 17 00:00:00 2001
From: Antonio Ospite <ospite@studenti.unina.it>
Date: Tue, 22 Sep 2009 16:46:10 -0700
Subject: pxa2xx_spi: register earlier

Register pxa2xx_spi earlier so it can be used with cpufreq

Signed-off-by: Daniel Ribeiro <drwyrm@gmail.com>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Cc: David Brownell <dbrownell@users.sourceforge.net>
Cc: Eric Miao <eric.y.miao@gmail.com>
Cc: Mark Brown <broonie@opensource.wolfsonmicro.com>
Cc: Daniel Ribeiro <drwyrm@gmail.com>
Cc: Samuel Ortiz <sameo@linux.intel.com>
Cc: Liam Girdwood <lrg@slimlogic.co.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/spi/pxa2xx_spi.c b/drivers/spi/pxa2xx_spi.c
index d949dbf..31dd56f 100644
--- a/drivers/spi/pxa2xx_spi.c
+++ b/drivers/spi/pxa2xx_spi.c
@@ -1729,7 +1729,7 @@ static int __init pxa2xx_spi_init(void)
 {
 	return platform_driver_probe(&driver, pxa2xx_spi_probe);
 }
-module_init(pxa2xx_spi_init);
+subsys_initcall(pxa2xx_spi_init);
 
 static void __exit pxa2xx_spi_exit(void)
 {
-- 
cgit v0.10.2


From 9ed7ef526ade622cdc22ac365a95197ddd1e09fb Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben@simtec.co.uk>
Date: Tue, 22 Sep 2009 16:46:11 -0700
Subject: spi: fix spelling of `automatically' in documentation

Fix spelling of `automatically' in Documentation/spi/spi-summary.

Signed-off-by: Ben Dooks <ben@simtec.co.uk>
Cc: David Brownell <david-b@pacbell.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/Documentation/spi/spi-summary b/Documentation/spi/spi-summary
index 4a02d25..deab51d 100644
--- a/Documentation/spi/spi-summary
+++ b/Documentation/spi/spi-summary
@@ -350,7 +350,7 @@ SPI protocol drivers somewhat resemble platform device drivers:
 		.resume		= CHIP_resume,
 	};
 
-The driver core will autmatically attempt to bind this driver to any SPI
+The driver core will automatically attempt to bind this driver to any SPI
 device whose board_info gave a modalias of "CHIP".  Your probe() code
 might look like this unless you're creating a device which is managing
 a bus (appearing under /sys/class/spi_master).
-- 
cgit v0.10.2


From 1a0c220f791be9e15fd897adee257e72ed4134f8 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben@simtec.co.uk>
Date: Tue, 22 Sep 2009 16:46:12 -0700
Subject: spi_s3c24xx: fix header includes

The driver includes <asm/io.h> where it should be including <linux/io.h>
and also includes <mach/hardware.h> and <asm/dma.h> without using anything
from these.

Signed-off-by: Ben Dooks <ben@simtec.co.uk>
Cc: David Brownell <david-b@pacbell.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/spi/spi_s3c24xx.c b/drivers/spi/spi_s3c24xx.c
index 6ba8aec..0be4da0 100644
--- a/drivers/spi/spi_s3c24xx.c
+++ b/drivers/spi/spi_s3c24xx.c
@@ -20,14 +20,11 @@
 #include <linux/clk.h>
 #include <linux/platform_device.h>
 #include <linux/gpio.h>
+#include <linux/io.h>
 
 #include <linux/spi/spi.h>
 #include <linux/spi/spi_bitbang.h>
 
-#include <asm/io.h>
-#include <asm/dma.h>
-#include <mach/hardware.h>
-
 #include <plat/regs-spi.h>
 #include <mach/spi.h>
 
-- 
cgit v0.10.2


From b5e3afb5e32c9acf69fcc17961c3fddc47e9cc06 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben@simtec.co.uk>
Date: Tue, 22 Sep 2009 16:46:13 -0700
Subject: spi_s3c24xx: use resource_size() to get resource size

Change the use of (res->end - res->start) to use resource_size() to
get the size of the resource.

Signed-off-by; Ben Dooks <ben@simtec.co.uk>
Cc: David Brownell <david-b@pacbell.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/spi/spi_s3c24xx.c b/drivers/spi/spi_s3c24xx.c
index 0be4da0..705841f 100644
--- a/drivers/spi/spi_s3c24xx.c
+++ b/drivers/spi/spi_s3c24xx.c
@@ -299,7 +299,7 @@ static int __init s3c24xx_spi_probe(struct platform_device *pdev)
 		goto err_no_iores;
 	}
 
-	hw->ioarea = request_mem_region(res->start, (res->end - res->start)+1,
+	hw->ioarea = request_mem_region(res->start, resource_size(res),
 					pdev->name);
 
 	if (hw->ioarea == NULL) {
@@ -308,7 +308,7 @@ static int __init s3c24xx_spi_probe(struct platform_device *pdev)
 		goto err_no_iores;
 	}
 
-	hw->regs = ioremap(res->start, (res->end - res->start)+1);
+	hw->regs = ioremap(res->start, resource_size(res));
 	if (hw->regs == NULL) {
 		dev_err(&pdev->dev, "Cannot map IO\n");
 		err = -ENXIO;
-- 
cgit v0.10.2


From 6d61320707ac2960bc820616bd5921885b874780 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben@simtec.co.uk>
Date: Tue, 22 Sep 2009 16:46:13 -0700
Subject: spi_s3c24xx: use dev_pm_ops

Change the spi_s3c24xx driver to use dev_pm_ops to avoid the following
warning during probe:

Platform driver 's3c2410-spi' needs updating - please use dev_pm_ops

Signed-off-by: Ben Dooks <ben@simtec.co.uk>
Cc: David Brownell <david-b@pacbell.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/spi/spi_s3c24xx.c b/drivers/spi/spi_s3c24xx.c
index 705841f..03695b6 100644
--- a/drivers/spi/spi_s3c24xx.c
+++ b/drivers/spi/spi_s3c24xx.c
@@ -418,9 +418,9 @@ static int __exit s3c24xx_spi_remove(struct platform_device *dev)
 
 #ifdef CONFIG_PM
 
-static int s3c24xx_spi_suspend(struct platform_device *pdev, pm_message_t msg)
+static int s3c24xx_spi_suspend(struct device *dev)
 {
-	struct s3c24xx_spi *hw = platform_get_drvdata(pdev);
+	struct s3c24xx_spi *hw = platform_get_drvdata(to_platform_device(dev));
 
 	if (hw->pdata && hw->pdata->gpio_setup)
 		hw->pdata->gpio_setup(hw->pdata, 0);
@@ -429,27 +429,31 @@ static int s3c24xx_spi_suspend(struct platform_device *pdev, pm_message_t msg)
 	return 0;
 }
 
-static int s3c24xx_spi_resume(struct platform_device *pdev)
+static int s3c24xx_spi_resume(struct device *dev)
 {
-	struct s3c24xx_spi *hw = platform_get_drvdata(pdev);
+	struct s3c24xx_spi *hw = platform_get_drvdata(to_platform_device(dev));
 
 	s3c24xx_spi_initialsetup(hw);
 	return 0;
 }
 
+static struct dev_pm_ops s3c24xx_spi_pmops = {
+	.suspend	= s3c24xx_spi_suspend,
+	.resume		= s3c24xx_spi_resume,
+};
+
+#define S3C24XX_SPI_PMOPS &s3c24xx_spi_pmops
 #else
-#define s3c24xx_spi_suspend NULL
-#define s3c24xx_spi_resume  NULL
-#endif
+#define S3C24XX_SPI_PMOPS NULL
+#endif /* CONFIG_PM */
 
 MODULE_ALIAS("platform:s3c2410-spi");
 static struct platform_driver s3c24xx_spi_driver = {
 	.remove		= __exit_p(s3c24xx_spi_remove),
-	.suspend	= s3c24xx_spi_suspend,
-	.resume		= s3c24xx_spi_resume,
 	.driver		= {
 		.name	= "s3c2410-spi",
 		.owner	= THIS_MODULE,
+		.pm	= S3C24XX_SPI_PMOPS,
 	},
 };
 
-- 
cgit v0.10.2


From 570327d9f48b0be5ca626bcfd80266b7ee2001aa Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben@simtec.co.uk>
Date: Tue, 22 Sep 2009 16:46:14 -0700
Subject: spi_s3c24xx: cache device setup data

With the update to the spi_bitbang driver, the transfer setup code is
being called more often, and thus is often re-doing calculations that have
been done before.  The SPI layer allows our driver to add its own data to
each device so add a result cache to each device.

This should also remove the problem where we where directly setting up
registers in the setup call which meant we might overwrite the state of an
extant transfer.,

Signed-off-by: Ben Dooks <ben@simtec.co.uk>
Cc: David Brownell <david-b@pacbell.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/spi/spi_s3c24xx.c b/drivers/spi/spi_s3c24xx.c
index 03695b6..33d94f7 100644
--- a/drivers/spi/spi_s3c24xx.c
+++ b/drivers/spi/spi_s3c24xx.c
@@ -28,6 +28,20 @@
 #include <plat/regs-spi.h>
 #include <mach/spi.h>
 
+/**
+ * s3c24xx_spi_devstate - per device data
+ * @hz: Last frequency calculated for @sppre field.
+ * @mode: Last mode setting for the @spcon field.
+ * @spcon: Value to write to the SPCON register.
+ * @sppre: Value to write to the SPPRE register.
+ */
+struct s3c24xx_spi_devstate {
+	unsigned int	hz;
+	unsigned int	mode;
+	u8		spcon;
+	u8		sppre;
+};
+
 struct s3c24xx_spi {
 	/* bitbang has to be first */
 	struct spi_bitbang	 bitbang;
@@ -68,43 +82,31 @@ static void s3c24xx_spi_gpiocs(struct s3c2410_spi_info *spi, int cs, int pol)
 
 static void s3c24xx_spi_chipsel(struct spi_device *spi, int value)
 {
+	struct s3c24xx_spi_devstate *cs = spi->controller_state;
 	struct s3c24xx_spi *hw = to_hw(spi);
 	unsigned int cspol = spi->mode & SPI_CS_HIGH ? 1 : 0;
-	unsigned int spcon;
+
+	/* change the chipselect state and the state of the spi engine clock */
 
 	switch (value) {
 	case BITBANG_CS_INACTIVE:
 		hw->set_cs(hw->pdata, spi->chip_select, cspol^1);
+		writeb(cs->spcon, hw->regs + S3C2410_SPCON);
 		break;
 
 	case BITBANG_CS_ACTIVE:
-		spcon = readb(hw->regs + S3C2410_SPCON);
-
-		if (spi->mode & SPI_CPHA)
-			spcon |= S3C2410_SPCON_CPHA_FMTB;
-		else
-			spcon &= ~S3C2410_SPCON_CPHA_FMTB;
-
-		if (spi->mode & SPI_CPOL)
-			spcon |= S3C2410_SPCON_CPOL_HIGH;
-		else
-			spcon &= ~S3C2410_SPCON_CPOL_HIGH;
-
-		spcon |= S3C2410_SPCON_ENSCK;
-
-		/* write new configration */
-
-		writeb(spcon, hw->regs + S3C2410_SPCON);
+		writeb(cs->spcon | S3C2410_SPCON_ENSCK,
+		       hw->regs + S3C2410_SPCON);
 		hw->set_cs(hw->pdata, spi->chip_select, cspol);
-
 		break;
 	}
 }
 
-static int s3c24xx_spi_setupxfer(struct spi_device *spi,
-				 struct spi_transfer *t)
+static int s3c24xx_spi_update_state(struct spi_device *spi,
+				    struct spi_transfer *t)
 {
 	struct s3c24xx_spi *hw = to_hw(spi);
+	struct s3c24xx_spi_devstate *cs = spi->controller_state;
 	unsigned int bpw;
 	unsigned int hz;
 	unsigned int div;
@@ -124,41 +126,89 @@ static int s3c24xx_spi_setupxfer(struct spi_device *spi,
 		return -EINVAL;
 	}
 
-	clk = clk_get_rate(hw->clk);
-	div = DIV_ROUND_UP(clk, hz * 2) - 1;
+	if (spi->mode != cs->mode) {
+		u8 spcon = SPCON_DEFAULT;
 
-	if (div > 255)
-		div = 255;
+		if (spi->mode & SPI_CPHA)
+			spcon |= S3C2410_SPCON_CPHA_FMTB;
 
-	dev_dbg(&spi->dev, "setting pre-scaler to %d (wanted %d, got %ld)\n",
-		div, hz, clk / (2 * (div + 1)));
+		if (spi->mode & SPI_CPOL)
+			spcon |= S3C2410_SPCON_CPOL_HIGH;
 
+		cs->mode = spi->mode;
+		cs->spcon = spcon;
+	}
 
-	writeb(div, hw->regs + S3C2410_SPPRE);
+	if (cs->hz != hz) {
+		clk = clk_get_rate(hw->clk);
+		div = DIV_ROUND_UP(clk, hz * 2) - 1;
 
-	spin_lock(&hw->bitbang.lock);
-	if (!hw->bitbang.busy) {
-		hw->bitbang.chipselect(spi, BITBANG_CS_INACTIVE);
-		/* need to ndelay for 0.5 clocktick ? */
+		if (div > 255)
+			div = 255;
+
+		dev_dbg(&spi->dev, "pre-scaler=%d (wanted %d, got %ld)\n",
+			div, hz, clk / (2 * (div + 1)));
+
+		cs->hz = hz;
+		cs->sppre = div;
 	}
-	spin_unlock(&hw->bitbang.lock);
 
 	return 0;
 }
 
+static int s3c24xx_spi_setupxfer(struct spi_device *spi,
+				 struct spi_transfer *t)
+{
+	struct s3c24xx_spi_devstate *cs = spi->controller_state;
+	struct s3c24xx_spi *hw = to_hw(spi);
+	int ret;
+
+	ret = s3c24xx_spi_update_state(spi, t);
+	if (!ret)
+		writeb(cs->sppre, hw->regs + S3C2410_SPPRE);
+
+	return ret;
+}
+
 static int s3c24xx_spi_setup(struct spi_device *spi)
 {
+	struct s3c24xx_spi_devstate *cs = spi->controller_state;
+	struct s3c24xx_spi *hw = to_hw(spi);
 	int ret;
 
-	ret = s3c24xx_spi_setupxfer(spi, NULL);
-	if (ret < 0) {
-		dev_err(&spi->dev, "setupxfer returned %d\n", ret);
+	/* allocate settings on the first call */
+	if (!cs) {
+		cs = kzalloc(sizeof(struct s3c24xx_spi_devstate), GFP_KERNEL);
+		if (!cs) {
+			dev_err(&spi->dev, "no memory for controller state\n");
+			return -ENOMEM;
+		}
+
+		cs->spcon = SPCON_DEFAULT;
+		cs->hz = -1;
+		spi->controller_state = cs;
+	}
+
+	/* initialise the state from the device */
+	ret = s3c24xx_spi_update_state(spi, NULL);
+	if (ret)
 		return ret;
+
+	spin_lock(&hw->bitbang.lock);
+	if (!hw->bitbang.busy) {
+		hw->bitbang.chipselect(spi, BITBANG_CS_INACTIVE);
+		/* need to ndelay for 0.5 clocktick ? */
 	}
+	spin_unlock(&hw->bitbang.lock);
 
 	return 0;
 }
 
+static void s3c24xx_spi_cleanup(struct spi_device *spi)
+{
+	kfree(spi->controller_state);
+}
+
 static inline unsigned int hw_txbyte(struct s3c24xx_spi *hw, int count)
 {
 	return hw->tx ? hw->tx[count] : 0;
@@ -286,7 +336,9 @@ static int __init s3c24xx_spi_probe(struct platform_device *pdev)
 	hw->bitbang.setup_transfer = s3c24xx_spi_setupxfer;
 	hw->bitbang.chipselect     = s3c24xx_spi_chipsel;
 	hw->bitbang.txrx_bufs      = s3c24xx_spi_txrx;
-	hw->bitbang.master->setup  = s3c24xx_spi_setup;
+
+	hw->master->setup  = s3c24xx_spi_setup;
+	hw->master->cleanup = s3c24xx_spi_cleanup;
 
 	dev_dbg(hw->dev, "bitbang at %p\n", &hw->bitbang);
 
-- 
cgit v0.10.2


From 0644c48672a3146c01bf7314d440edecf8742d62 Mon Sep 17 00:00:00 2001
From: dmitry pervushin <dpervushin@embeddedalley.com>
Date: Tue, 22 Sep 2009 16:46:15 -0700
Subject: spi: Freescale STMP driver

Add SPI driver for Freescale STMP 3xxx-based boards

[dbrownell@users.sourceforge.net: cleanup sequence, spi_unregister_master]
Signed-off-by: dmitry pervushin <dpervushin@embeddedalley.com>
Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Cc: Kumar Gala <galak@gate.crashing.org>
Cc: Timur Tabi <timur@freescale.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index cd3acfe..4b6f7cb 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -222,6 +222,12 @@ config SPI_SH_SCI
 	help
 	  SPI driver for SuperH SCI blocks.
 
+config SPI_STMP3XXX
+	tristate "Freescale STMP37xx/378x SPI/SSP controller"
+	depends on ARCH_STMP3XXX && SPI_MASTER
+	help
+	  SPI driver for Freescale STMP37xx/378x SoC SSP interface
+
 config SPI_TXX9
 	tristate "Toshiba TXx9 SPI controller"
 	depends on GENERIC_GPIO && CPU_TX49XX
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index fbfc5f2..6d7a3f8 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -32,6 +32,7 @@ obj-$(CONFIG_SPI_S3C24XX)		+= spi_s3c24xx.o
 obj-$(CONFIG_SPI_TXX9)			+= spi_txx9.o
 obj-$(CONFIG_SPI_XILINX)		+= xilinx_spi.o
 obj-$(CONFIG_SPI_SH_SCI)		+= spi_sh_sci.o
+obj-$(CONFIG_SPI_STMP3XXX)		+= spi_stmp.o
 # 	... add above this line ...
 
 # SPI protocol drivers (device/link on bus)
diff --git a/drivers/spi/spi_stmp.c b/drivers/spi/spi_stmp.c
new file mode 100644
index 0000000..d871dc2
--- /dev/null
+++ b/drivers/spi/spi_stmp.c
@@ -0,0 +1,679 @@
+/*
+ * Freescale STMP378X SPI master driver
+ *
+ * Author: dmitry pervushin <dimka@embeddedalley.com>
+ *
+ * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved.
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ */
+
+/*
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/spi/spi.h>
+#include <linux/err.h>
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <linux/dma-mapping.h>
+#include <linux/delay.h>
+
+#include <mach/platform.h>
+#include <mach/stmp3xxx.h>
+#include <mach/dma.h>
+#include <mach/regs-ssp.h>
+#include <mach/regs-apbh.h>
+
+
+/* 0 means DMA mode(recommended, default), !0 - PIO mode */
+static int pio;
+static int clock;
+
+/* default timeout for busy waits is 2 seconds */
+#define STMP_SPI_TIMEOUT	(2 * HZ)
+
+struct stmp_spi {
+	int		id;
+
+	void *  __iomem regs;	/* vaddr of the control registers */
+
+	int		irq, err_irq;
+	u32		dma;
+	struct stmp3xxx_dma_descriptor d;
+
+	u32		speed_khz;
+	u32		saved_timings;
+	u32		divider;
+
+	struct clk	*clk;
+	struct device	*master_dev;
+
+	struct work_struct work;
+	struct workqueue_struct *workqueue;
+
+	/* lock protects queue access */
+	spinlock_t lock;
+	struct list_head queue;
+
+	struct completion done;
+};
+
+#define busy_wait(cond)							\
+	({								\
+	unsigned long end_jiffies = jiffies + STMP_SPI_TIMEOUT;		\
+	bool succeeded = false;						\
+	do {								\
+		if (cond) {						\
+			succeeded = true;				\
+			break;						\
+		}							\
+		cpu_relax();						\
+	} while (time_before(end_jiffies, jiffies));			\
+	succeeded;							\
+	})
+
+/**
+ * stmp_spi_init_hw
+ * Initialize the SSP port
+ */
+static int stmp_spi_init_hw(struct stmp_spi *ss)
+{
+	int err = 0;
+	void *pins = ss->master_dev->platform_data;
+
+	err = stmp3xxx_request_pin_group(pins, dev_name(ss->master_dev));
+	if (err)
+		goto out;
+
+	ss->clk = clk_get(NULL, "ssp");
+	if (IS_ERR(ss->clk)) {
+		err = PTR_ERR(ss->clk);
+		goto out_free_pins;
+	}
+	clk_enable(ss->clk);
+
+	stmp3xxx_reset_block(ss->regs, false);
+	stmp3xxx_dma_reset_channel(ss->dma);
+
+	return 0;
+
+out_free_pins:
+	stmp3xxx_release_pin_group(pins, dev_name(ss->master_dev));
+out:
+	return err;
+}
+
+static void stmp_spi_release_hw(struct stmp_spi *ss)
+{
+	void *pins = ss->master_dev->platform_data;
+
+	if (ss->clk && !IS_ERR(ss->clk)) {
+		clk_disable(ss->clk);
+		clk_put(ss->clk);
+	}
+	stmp3xxx_release_pin_group(pins, dev_name(ss->master_dev));
+}
+
+static int stmp_spi_setup_transfer(struct spi_device *spi,
+		struct spi_transfer *t)
+{
+	u8 bits_per_word;
+	u32 hz;
+	struct stmp_spi *ss = spi_master_get_devdata(spi->master);
+	u16 rate;
+
+	bits_per_word = spi->bits_per_word;
+	if (t && t->bits_per_word)
+		bits_per_word = t->bits_per_word;
+
+	/*
+	 * Calculate speed:
+	 *	- by default, use maximum speed from ssp clk
+	 *	- if device overrides it, use it
+	 *	- if transfer specifies other speed, use transfer's one
+	 */
+	hz = 1000 * ss->speed_khz / ss->divider;
+	if (spi->max_speed_hz)
+		hz = min(hz, spi->max_speed_hz);
+	if (t && t->speed_hz)
+		hz = min(hz, t->speed_hz);
+
+	if (hz == 0) {
+		dev_err(&spi->dev, "Cannot continue with zero clock\n");
+		return -EINVAL;
+	}
+
+	if (bits_per_word != 8) {
+		dev_err(&spi->dev, "%s, unsupported bits_per_word=%d\n",
+			__func__, bits_per_word);
+		return -EINVAL;
+	}
+
+	dev_dbg(&spi->dev, "Requested clk rate = %uHz, max = %uHz/%d = %uHz\n",
+		hz, ss->speed_khz, ss->divider,
+		ss->speed_khz * 1000 / ss->divider);
+
+	if (ss->speed_khz * 1000 / ss->divider < hz) {
+		dev_err(&spi->dev, "%s, unsupported clock rate %uHz\n",
+			__func__, hz);
+		return -EINVAL;
+	}
+
+	rate = 1000 * ss->speed_khz/ss->divider/hz;
+
+	writel(BF(ss->divider, SSP_TIMING_CLOCK_DIVIDE)		|
+	       BF(rate - 1, SSP_TIMING_CLOCK_RATE),
+	       HW_SSP_TIMING + ss->regs);
+
+	writel(BF(1 /* mode SPI */, SSP_CTRL1_SSP_MODE)		|
+	       BF(4 /* 8 bits   */, SSP_CTRL1_WORD_LENGTH)	|
+	       ((spi->mode & SPI_CPOL) ? BM_SSP_CTRL1_POLARITY : 0) |
+	       ((spi->mode & SPI_CPHA) ? BM_SSP_CTRL1_PHASE : 0) |
+	       (pio ? 0 : BM_SSP_CTRL1_DMA_ENABLE),
+	       ss->regs + HW_SSP_CTRL1);
+
+	return 0;
+}
+
+static int stmp_spi_setup(struct spi_device *spi)
+{
+	/* spi_setup() does basic checks,
+	 * stmp_spi_setup_transfer() does more later
+	 */
+	if (spi->bits_per_word != 8) {
+		dev_err(&spi->dev, "%s, unsupported bits_per_word=%d\n",
+			__func__, spi->bits_per_word);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static inline u32 stmp_spi_cs(unsigned cs)
+{
+	return  ((cs & 1) ? BM_SSP_CTRL0_WAIT_FOR_CMD : 0) |
+		((cs & 2) ? BM_SSP_CTRL0_WAIT_FOR_IRQ : 0);
+}
+
+static int stmp_spi_txrx_dma(struct stmp_spi *ss, int cs,
+		unsigned char *buf, dma_addr_t dma_buf, int len,
+		int first, int last, bool write)
+{
+	u32 c0 = 0;
+	dma_addr_t spi_buf_dma = dma_buf;
+	int status = 0;
+	enum dma_data_direction dir = write ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
+
+	c0 |= (first ? BM_SSP_CTRL0_LOCK_CS : 0);
+	c0 |= (last ? BM_SSP_CTRL0_IGNORE_CRC : 0);
+	c0 |= (write ? 0 : BM_SSP_CTRL0_READ);
+	c0 |= BM_SSP_CTRL0_DATA_XFER;
+
+	c0 |= stmp_spi_cs(cs);
+
+	c0 |= BF(len, SSP_CTRL0_XFER_COUNT);
+
+	if (!dma_buf)
+		spi_buf_dma = dma_map_single(ss->master_dev, buf, len, dir);
+
+	ss->d.command->cmd =
+		BF(len, APBH_CHn_CMD_XFER_COUNT)	|
+		BF(1, APBH_CHn_CMD_CMDWORDS)		|
+		BM_APBH_CHn_CMD_WAIT4ENDCMD		|
+		BM_APBH_CHn_CMD_IRQONCMPLT		|
+		BF(write ? BV_APBH_CHn_CMD_COMMAND__DMA_READ :
+			   BV_APBH_CHn_CMD_COMMAND__DMA_WRITE,
+		   APBH_CHn_CMD_COMMAND);
+	ss->d.command->pio_words[0] = c0;
+	ss->d.command->buf_ptr = spi_buf_dma;
+
+	stmp3xxx_dma_reset_channel(ss->dma);
+	stmp3xxx_dma_clear_interrupt(ss->dma);
+	stmp3xxx_dma_enable_interrupt(ss->dma);
+	init_completion(&ss->done);
+	stmp3xxx_dma_go(ss->dma, &ss->d, 1);
+	wait_for_completion(&ss->done);
+
+	if (!busy_wait(readl(ss->regs + HW_SSP_CTRL0) & BM_SSP_CTRL0_RUN))
+		status = ETIMEDOUT;
+
+	if (!dma_buf)
+		dma_unmap_single(ss->master_dev, spi_buf_dma, len, dir);
+
+	return status;
+}
+
+static inline void stmp_spi_enable(struct stmp_spi *ss)
+{
+	stmp3xxx_setl(BM_SSP_CTRL0_LOCK_CS, ss->regs + HW_SSP_CTRL0);
+	stmp3xxx_clearl(BM_SSP_CTRL0_IGNORE_CRC, ss->regs + HW_SSP_CTRL0);
+}
+
+static inline void stmp_spi_disable(struct stmp_spi *ss)
+{
+	stmp3xxx_clearl(BM_SSP_CTRL0_LOCK_CS, ss->regs + HW_SSP_CTRL0);
+	stmp3xxx_setl(BM_SSP_CTRL0_IGNORE_CRC, ss->regs + HW_SSP_CTRL0);
+}
+
+static int stmp_spi_txrx_pio(struct stmp_spi *ss, int cs,
+		unsigned char *buf, int len,
+		bool first, bool last, bool write)
+{
+	if (first)
+		stmp_spi_enable(ss);
+
+	stmp3xxx_setl(stmp_spi_cs(cs), ss->regs + HW_SSP_CTRL0);
+
+	while (len--) {
+		if (last && len <= 0)
+			stmp_spi_disable(ss);
+
+		stmp3xxx_clearl(BM_SSP_CTRL0_XFER_COUNT,
+				ss->regs + HW_SSP_CTRL0);
+		stmp3xxx_setl(1, ss->regs + HW_SSP_CTRL0);
+
+		if (write)
+			stmp3xxx_clearl(BM_SSP_CTRL0_READ,
+					ss->regs + HW_SSP_CTRL0);
+		else
+			stmp3xxx_setl(BM_SSP_CTRL0_READ,
+					ss->regs + HW_SSP_CTRL0);
+
+		/* Run! */
+		stmp3xxx_setl(BM_SSP_CTRL0_RUN, ss->regs + HW_SSP_CTRL0);
+
+		if (!busy_wait(readl(ss->regs + HW_SSP_CTRL0) &
+				BM_SSP_CTRL0_RUN))
+			break;
+
+		if (write)
+			writel(*buf, ss->regs + HW_SSP_DATA);
+
+		/* Set TRANSFER */
+		stmp3xxx_setl(BM_SSP_CTRL0_DATA_XFER, ss->regs + HW_SSP_CTRL0);
+
+		if (!write) {
+			if (busy_wait((readl(ss->regs + HW_SSP_STATUS) &
+					BM_SSP_STATUS_FIFO_EMPTY)))
+				break;
+			*buf = readl(ss->regs + HW_SSP_DATA) & 0xFF;
+		}
+
+		if (!busy_wait(readl(ss->regs + HW_SSP_CTRL0) &
+					BM_SSP_CTRL0_RUN))
+			break;
+
+		/* advance to the next byte */
+		buf++;
+	}
+
+	return len < 0 ? 0 : -ETIMEDOUT;
+}
+
+static int stmp_spi_handle_message(struct stmp_spi *ss, struct spi_message *m)
+{
+	bool first, last;
+	struct spi_transfer *t, *tmp_t;
+	int status = 0;
+	int cs;
+
+	cs = m->spi->chip_select;
+
+	list_for_each_entry_safe(t, tmp_t, &m->transfers, transfer_list) {
+
+		first = (&t->transfer_list == m->transfers.next);
+		last = (&t->transfer_list == m->transfers.prev);
+
+		if (first || t->speed_hz || t->bits_per_word)
+			stmp_spi_setup_transfer(m->spi, t);
+
+		/* reject "not last" transfers which request to change cs */
+		if (t->cs_change && !last) {
+			dev_err(&m->spi->dev,
+				"Message with t->cs_change has been skipped\n");
+			continue;
+		}
+
+		if (t->tx_buf) {
+			status = pio ?
+			   stmp_spi_txrx_pio(ss, cs, (void *)t->tx_buf,
+				   t->len, first, last, true) :
+			   stmp_spi_txrx_dma(ss, cs, (void *)t->tx_buf,
+				   t->tx_dma, t->len, first, last, true);
+#ifdef DEBUG
+			if (t->len < 0x10)
+				print_hex_dump_bytes("Tx ",
+					DUMP_PREFIX_OFFSET,
+					t->tx_buf, t->len);
+			else
+				pr_debug("Tx: %d bytes\n", t->len);
+#endif
+		}
+		if (t->rx_buf) {
+			status = pio ?
+			   stmp_spi_txrx_pio(ss, cs, t->rx_buf,
+				   t->len, first, last, false) :
+			   stmp_spi_txrx_dma(ss, cs, t->rx_buf,
+				   t->rx_dma, t->len, first, last, false);
+#ifdef DEBUG
+			if (t->len < 0x10)
+				print_hex_dump_bytes("Rx ",
+					DUMP_PREFIX_OFFSET,
+					t->rx_buf, t->len);
+			else
+				pr_debug("Rx: %d bytes\n", t->len);
+#endif
+		}
+
+		if (t->delay_usecs)
+			udelay(t->delay_usecs);
+
+		if (status)
+			break;
+
+	}
+	return status;
+}
+
+/**
+ * stmp_spi_handle - handle messages from the queue
+ */
+static void stmp_spi_handle(struct work_struct *w)
+{
+	struct stmp_spi *ss = container_of(w, struct stmp_spi, work);
+	unsigned long flags;
+	struct spi_message *m;
+
+	spin_lock_irqsave(&ss->lock, flags);
+	while (!list_empty(&ss->queue)) {
+		m = list_entry(ss->queue.next, struct spi_message, queue);
+		list_del_init(&m->queue);
+		spin_unlock_irqrestore(&ss->lock, flags);
+
+		m->status = stmp_spi_handle_message(ss, m);
+		m->complete(m->context);
+
+		spin_lock_irqsave(&ss->lock, flags);
+	}
+	spin_unlock_irqrestore(&ss->lock, flags);
+
+	return;
+}
+
+/**
+ * stmp_spi_transfer - perform message transfer.
+ * Called indirectly from spi_async, queues all the messages to
+ * spi_handle_message.
+ * @spi: spi device
+ * @m: message to be queued
+ */
+static int stmp_spi_transfer(struct spi_device *spi, struct spi_message *m)
+{
+	struct stmp_spi *ss = spi_master_get_devdata(spi->master);
+	unsigned long flags;
+
+	m->status = -EINPROGRESS;
+	spin_lock_irqsave(&ss->lock, flags);
+	list_add_tail(&m->queue, &ss->queue);
+	queue_work(ss->workqueue, &ss->work);
+	spin_unlock_irqrestore(&ss->lock, flags);
+	return 0;
+}
+
+static irqreturn_t stmp_spi_irq(int irq, void *dev_id)
+{
+	struct stmp_spi *ss = dev_id;
+
+	stmp3xxx_dma_clear_interrupt(ss->dma);
+	complete(&ss->done);
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t stmp_spi_irq_err(int irq, void *dev_id)
+{
+	struct stmp_spi *ss = dev_id;
+	u32 c1, st;
+
+	c1 = readl(ss->regs + HW_SSP_CTRL1);
+	st = readl(ss->regs + HW_SSP_STATUS);
+	dev_err(ss->master_dev, "%s: status = 0x%08X, c1 = 0x%08X\n",
+		__func__, st, c1);
+	stmp3xxx_clearl(c1 & 0xCCCC0000, ss->regs + HW_SSP_CTRL1);
+
+	return IRQ_HANDLED;
+}
+
+static int __devinit stmp_spi_probe(struct platform_device *dev)
+{
+	int err = 0;
+	struct spi_master *master;
+	struct stmp_spi *ss;
+	struct resource *r;
+
+	master = spi_alloc_master(&dev->dev, sizeof(struct stmp_spi));
+	if (master == NULL) {
+		err = -ENOMEM;
+		goto out0;
+	}
+	master->flags = SPI_MASTER_HALF_DUPLEX;
+
+	ss = spi_master_get_devdata(master);
+	platform_set_drvdata(dev, master);
+
+	/* Get resources(memory, IRQ) associated with the device */
+	r = platform_get_resource(dev, IORESOURCE_MEM, 0);
+	if (r == NULL) {
+		err = -ENODEV;
+		goto out_put_master;
+	}
+	ss->regs = ioremap(r->start, resource_size(r));
+	if (!ss->regs) {
+		err = -EINVAL;
+		goto out_put_master;
+	}
+
+	ss->master_dev = &dev->dev;
+	ss->id = dev->id;
+
+	INIT_WORK(&ss->work, stmp_spi_handle);
+	INIT_LIST_HEAD(&ss->queue);
+	spin_lock_init(&ss->lock);
+
+	ss->workqueue = create_singlethread_workqueue(dev_name(&dev->dev));
+	if (!ss->workqueue) {
+		err = -ENXIO;
+		goto out_put_master;
+	}
+	master->transfer = stmp_spi_transfer;
+	master->setup = stmp_spi_setup;
+
+	/* the spi->mode bits understood by this driver: */
+	master->mode_bits = SPI_CPOL | SPI_CPHA;
+
+	ss->irq = platform_get_irq(dev, 0);
+	if (ss->irq < 0) {
+		err = ss->irq;
+		goto out_put_master;
+	}
+	ss->err_irq = platform_get_irq(dev, 1);
+	if (ss->err_irq < 0) {
+		err = ss->err_irq;
+		goto out_put_master;
+	}
+
+	r = platform_get_resource(dev, IORESOURCE_DMA, 0);
+	if (r == NULL) {
+		err = -ENODEV;
+		goto out_put_master;
+	}
+
+	ss->dma = r->start;
+	err = stmp3xxx_dma_request(ss->dma, &dev->dev, dev_name(&dev->dev));
+	if (err)
+		goto out_put_master;
+
+	err = stmp3xxx_dma_allocate_command(ss->dma, &ss->d);
+	if (err)
+		goto out_free_dma;
+
+	master->bus_num = dev->id;
+	master->num_chipselect = 1;
+
+	/* SPI controller initializations */
+	err = stmp_spi_init_hw(ss);
+	if (err) {
+		dev_dbg(&dev->dev, "cannot initialize hardware\n");
+		goto out_free_dma_desc;
+	}
+
+	if (clock) {
+		dev_info(&dev->dev, "clock rate forced to %d\n", clock);
+		clk_set_rate(ss->clk, clock);
+	}
+	ss->speed_khz = clk_get_rate(ss->clk);
+	ss->divider = 2;
+	dev_info(&dev->dev, "max possible speed %d = %ld/%d kHz\n",
+		ss->speed_khz, clk_get_rate(ss->clk), ss->divider);
+
+	/* Register for SPI interrupt */
+	err = request_irq(ss->irq, stmp_spi_irq, 0,
+			  dev_name(&dev->dev), ss);
+	if (err) {
+		dev_dbg(&dev->dev, "request_irq failed, %d\n", err);
+		goto out_release_hw;
+	}
+
+	/* ..and shared interrupt for all SSP controllers */
+	err = request_irq(ss->err_irq, stmp_spi_irq_err, IRQF_SHARED,
+			  dev_name(&dev->dev), ss);
+	if (err) {
+		dev_dbg(&dev->dev, "request_irq(error) failed, %d\n", err);
+		goto out_free_irq;
+	}
+
+	err = spi_register_master(master);
+	if (err) {
+		dev_dbg(&dev->dev, "cannot register spi master, %d\n", err);
+		goto out_free_irq_2;
+	}
+	dev_info(&dev->dev, "at (mapped) 0x%08X, irq=%d, bus %d, %s mode\n",
+			(u32)ss->regs, ss->irq, master->bus_num,
+			pio ? "PIO" : "DMA");
+	return 0;
+
+out_free_irq_2:
+	free_irq(ss->err_irq, ss);
+out_free_irq:
+	free_irq(ss->irq, ss);
+out_free_dma_desc:
+	stmp3xxx_dma_free_command(ss->dma, &ss->d);
+out_free_dma:
+	stmp3xxx_dma_release(ss->dma);
+out_release_hw:
+	stmp_spi_release_hw(ss);
+out_put_master:
+	if (ss->workqueue)
+		destroy_workqueue(ss->workqueue);
+	if (ss->regs)
+		iounmap(ss->regs);
+	platform_set_drvdata(dev, NULL);
+	spi_master_put(master);
+out0:
+	return err;
+}
+
+static int __devexit stmp_spi_remove(struct platform_device *dev)
+{
+	struct stmp_spi *ss;
+	struct spi_master *master;
+
+	master = platform_get_drvdata(dev);
+	if (master == NULL)
+		goto out0;
+	ss = spi_master_get_devdata(master);
+
+	spi_unregister_master(master);
+
+	free_irq(ss->err_irq, ss);
+	free_irq(ss->irq, ss);
+	stmp3xxx_dma_free_command(ss->dma, &ss->d);
+	stmp3xxx_dma_release(ss->dma);
+	stmp_spi_release_hw(ss);
+	destroy_workqueue(ss->workqueue);
+	iounmap(ss->regs);
+	spi_master_put(master);
+	platform_set_drvdata(dev, NULL);
+out0:
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int stmp_spi_suspend(struct platform_device *pdev, pm_message_t pmsg)
+{
+	struct stmp_spi *ss;
+	struct spi_master *master;
+
+	master = platform_get_drvdata(pdev);
+	ss = spi_master_get_devdata(master);
+
+	ss->saved_timings = readl(HW_SSP_TIMING + ss->regs);
+	clk_disable(ss->clk);
+
+	return 0;
+}
+
+static int stmp_spi_resume(struct platform_device *pdev)
+{
+	struct stmp_spi *ss;
+	struct spi_master *master;
+
+	master = platform_get_drvdata(pdev);
+	ss = spi_master_get_devdata(master);
+
+	clk_enable(ss->clk);
+	stmp3xxx_reset_block(ss->regs, false);
+	writel(ss->saved_timings, ss->regs + HW_SSP_TIMING);
+
+	return 0;
+}
+
+#else
+#define stmp_spi_suspend NULL
+#define stmp_spi_resume  NULL
+#endif
+
+static struct platform_driver stmp_spi_driver = {
+	.probe	= stmp_spi_probe,
+	.remove	= __devexit_p(stmp_spi_remove),
+	.driver = {
+		.name = "stmp3xxx_ssp",
+		.owner = THIS_MODULE,
+	},
+	.suspend = stmp_spi_suspend,
+	.resume  = stmp_spi_resume,
+};
+
+static int __init stmp_spi_init(void)
+{
+	return platform_driver_register(&stmp_spi_driver);
+}
+
+static void __exit stmp_spi_exit(void)
+{
+	platform_driver_unregister(&stmp_spi_driver);
+}
+
+module_init(stmp_spi_init);
+module_exit(stmp_spi_exit);
+module_param(pio, int, S_IRUGO);
+module_param(clock, int, S_IRUGO);
+MODULE_AUTHOR("dmitry pervushin <dpervushin@embeddedalley.com>");
+MODULE_DESCRIPTION("STMP3xxx SPI/SSP driver");
+MODULE_LICENSE("GPL");
-- 
cgit v0.10.2


From a41ae1ad907655b2efbb9b1a97736ab1451e1649 Mon Sep 17 00:00:00 2001
From: Hemanth V <hemanthv@ti.com>
Date: Tue, 22 Sep 2009 16:46:16 -0700
Subject: spi: McSPI off-mode support

Add context save/restore feature to McSPI driver.

Signed-off-by: Hemanth V <hemanthv@ti.com>
Reviewed-by: Aaro Koskinen <Aaro.Koskinen@nokia.com>
Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>
Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/spi/omap2_mcspi.c b/drivers/spi/omap2_mcspi.c
index ae785cc..e96ad27 100644
--- a/drivers/spi/omap2_mcspi.c
+++ b/drivers/spi/omap2_mcspi.c
@@ -41,6 +41,9 @@
 
 #define OMAP2_MCSPI_MAX_FREQ		48000000
 
+/* OMAP2 has 3 SPI controllers, while OMAP3 has 4 */
+#define OMAP2_MCSPI_MAX_CTRL 		4
+
 #define OMAP2_MCSPI_REVISION		0x00
 #define OMAP2_MCSPI_SYSCONFIG		0x10
 #define OMAP2_MCSPI_SYSSTATUS		0x14
@@ -131,8 +134,21 @@ struct omap2_mcspi_cs {
 	void __iomem		*base;
 	unsigned long		phys;
 	int			word_len;
+	/* Context save and restore shadow register */
+	u32			chconf0;
+};
+
+/* used for context save and restore, structure members to be updated whenever
+ * corresponding registers are modified.
+ */
+struct omap2_mcspi_regs {
+	u32 sysconfig;
+	u32 modulctrl;
+	u32 wakeupenable;
 };
 
+static struct omap2_mcspi_regs omap2_mcspi_ctx[OMAP2_MCSPI_MAX_CTRL];
+
 static struct workqueue_struct *omap2_mcspi_wq;
 
 #define MOD_REG_BIT(val, mask, set) do { \
@@ -172,12 +188,27 @@ static inline u32 mcspi_read_cs_reg(const struct spi_device *spi, int idx)
 	return __raw_readl(cs->base + idx);
 }
 
+static inline u32 mcspi_cached_chconf0(const struct spi_device *spi)
+{
+	struct omap2_mcspi_cs *cs = spi->controller_state;
+
+	return cs->chconf0;
+}
+
+static inline void mcspi_write_chconf0(const struct spi_device *spi, u32 val)
+{
+	struct omap2_mcspi_cs *cs = spi->controller_state;
+
+	cs->chconf0 = val;
+	mcspi_write_cs_reg(spi, OMAP2_MCSPI_CHCONF0, val);
+}
+
 static void omap2_mcspi_set_dma_req(const struct spi_device *spi,
 		int is_read, int enable)
 {
 	u32 l, rw;
 
-	l = mcspi_read_cs_reg(spi, OMAP2_MCSPI_CHCONF0);
+	l = mcspi_cached_chconf0(spi);
 
 	if (is_read) /* 1 is read, 0 write */
 		rw = OMAP2_MCSPI_CHCONF_DMAR;
@@ -185,7 +216,7 @@ static void omap2_mcspi_set_dma_req(const struct spi_device *spi,
 		rw = OMAP2_MCSPI_CHCONF_DMAW;
 
 	MOD_REG_BIT(l, rw, enable);
-	mcspi_write_cs_reg(spi, OMAP2_MCSPI_CHCONF0, l);
+	mcspi_write_chconf0(spi, l);
 }
 
 static void omap2_mcspi_set_enable(const struct spi_device *spi, int enable)
@@ -200,9 +231,9 @@ static void omap2_mcspi_force_cs(struct spi_device *spi, int cs_active)
 {
 	u32 l;
 
-	l = mcspi_read_cs_reg(spi, OMAP2_MCSPI_CHCONF0);
+	l = mcspi_cached_chconf0(spi);
 	MOD_REG_BIT(l, OMAP2_MCSPI_CHCONF_FORCE, cs_active);
-	mcspi_write_cs_reg(spi, OMAP2_MCSPI_CHCONF0, l);
+	mcspi_write_chconf0(spi, l);
 }
 
 static void omap2_mcspi_set_master_mode(struct spi_master *master)
@@ -217,6 +248,41 @@ static void omap2_mcspi_set_master_mode(struct spi_master *master)
 	MOD_REG_BIT(l, OMAP2_MCSPI_MODULCTRL_MS, 0);
 	MOD_REG_BIT(l, OMAP2_MCSPI_MODULCTRL_SINGLE, 1);
 	mcspi_write_reg(master, OMAP2_MCSPI_MODULCTRL, l);
+
+	omap2_mcspi_ctx[master->bus_num - 1].modulctrl = l;
+}
+
+static void omap2_mcspi_restore_ctx(struct omap2_mcspi *mcspi)
+{
+	struct spi_master *spi_cntrl;
+	spi_cntrl = mcspi->master;
+
+	/* McSPI: context restore */
+	mcspi_write_reg(spi_cntrl, OMAP2_MCSPI_MODULCTRL,
+			omap2_mcspi_ctx[spi_cntrl->bus_num - 1].modulctrl);
+
+	mcspi_write_reg(spi_cntrl, OMAP2_MCSPI_SYSCONFIG,
+			omap2_mcspi_ctx[spi_cntrl->bus_num - 1].sysconfig);
+
+	mcspi_write_reg(spi_cntrl, OMAP2_MCSPI_WAKEUPENABLE,
+			omap2_mcspi_ctx[spi_cntrl->bus_num - 1].wakeupenable);
+}
+static void omap2_mcspi_disable_clocks(struct omap2_mcspi *mcspi)
+{
+	clk_disable(mcspi->ick);
+	clk_disable(mcspi->fck);
+}
+
+static int omap2_mcspi_enable_clocks(struct omap2_mcspi *mcspi)
+{
+	if (clk_enable(mcspi->ick))
+		return -ENODEV;
+	if (clk_enable(mcspi->fck))
+		return -ENODEV;
+
+	omap2_mcspi_restore_ctx(mcspi);
+
+	return 0;
 }
 
 static unsigned
@@ -357,7 +423,7 @@ omap2_mcspi_txrx_pio(struct spi_device *spi, struct spi_transfer *xfer)
 	c = count;
 	word_len = cs->word_len;
 
-	l = mcspi_read_cs_reg(spi, OMAP2_MCSPI_CHCONF0);
+	l = mcspi_cached_chconf0(spi);
 	l &= ~OMAP2_MCSPI_CHCONF_TRM_MASK;
 
 	/* We store the pre-calculated register addresses on stack to speed
@@ -397,8 +463,7 @@ omap2_mcspi_txrx_pio(struct spi_device *spi, struct spi_transfer *xfer)
 				 * more word i/o: switch to rx+tx
 				 */
 				if (c == 0 && tx == NULL)
-					mcspi_write_cs_reg(spi,
-							OMAP2_MCSPI_CHCONF0, l);
+					mcspi_write_chconf0(spi, l);
 				*rx++ = __raw_readl(rx_reg);
 #ifdef VERBOSE
 				dev_dbg(&spi->dev, "read-%d %02x\n",
@@ -436,8 +501,7 @@ omap2_mcspi_txrx_pio(struct spi_device *spi, struct spi_transfer *xfer)
 				 * more word i/o: switch to rx+tx
 				 */
 				if (c == 0 && tx == NULL)
-					mcspi_write_cs_reg(spi,
-							OMAP2_MCSPI_CHCONF0, l);
+					mcspi_write_chconf0(spi, l);
 				*rx++ = __raw_readl(rx_reg);
 #ifdef VERBOSE
 				dev_dbg(&spi->dev, "read-%d %04x\n",
@@ -475,8 +539,7 @@ omap2_mcspi_txrx_pio(struct spi_device *spi, struct spi_transfer *xfer)
 				 * more word i/o: switch to rx+tx
 				 */
 				if (c == 0 && tx == NULL)
-					mcspi_write_cs_reg(spi,
-							OMAP2_MCSPI_CHCONF0, l);
+					mcspi_write_chconf0(spi, l);
 				*rx++ = __raw_readl(rx_reg);
 #ifdef VERBOSE
 				dev_dbg(&spi->dev, "read-%d %04x\n",
@@ -505,10 +568,12 @@ static int omap2_mcspi_setup_transfer(struct spi_device *spi,
 {
 	struct omap2_mcspi_cs *cs = spi->controller_state;
 	struct omap2_mcspi *mcspi;
+	struct spi_master *spi_cntrl;
 	u32 l = 0, div = 0;
 	u8 word_len = spi->bits_per_word;
 
 	mcspi = spi_master_get_devdata(spi->master);
+	spi_cntrl = mcspi->master;
 
 	if (t != NULL && t->bits_per_word)
 		word_len = t->bits_per_word;
@@ -522,7 +587,7 @@ static int omap2_mcspi_setup_transfer(struct spi_device *spi,
 	} else
 		div = 15;
 
-	l = mcspi_read_cs_reg(spi, OMAP2_MCSPI_CHCONF0);
+	l = mcspi_cached_chconf0(spi);
 
 	/* standard 4-wire master mode:  SCK, MOSI/out, MISO/in, nCS
 	 * REVISIT: this controller could support SPI_3WIRE mode.
@@ -554,7 +619,7 @@ static int omap2_mcspi_setup_transfer(struct spi_device *spi,
 	else
 		l &= ~OMAP2_MCSPI_CHCONF_PHA;
 
-	mcspi_write_cs_reg(spi, OMAP2_MCSPI_CHCONF0, l);
+	mcspi_write_chconf0(spi, l);
 
 	dev_dbg(&spi->dev, "setup: speed %d, sample %s edge, clk %s\n",
 			OMAP2_MCSPI_MAX_FREQ / (1 << div),
@@ -647,6 +712,7 @@ static int omap2_mcspi_setup(struct spi_device *spi)
 			return -ENOMEM;
 		cs->base = mcspi->base + spi->chip_select * 0x14;
 		cs->phys = mcspi->phys + spi->chip_select * 0x14;
+		cs->chconf0 = 0;
 		spi->controller_state = cs;
 	}
 
@@ -657,11 +723,11 @@ static int omap2_mcspi_setup(struct spi_device *spi)
 			return ret;
 	}
 
-	clk_enable(mcspi->ick);
-	clk_enable(mcspi->fck);
+	if (omap2_mcspi_enable_clocks(mcspi))
+		return -ENODEV;
+
 	ret = omap2_mcspi_setup_transfer(spi, NULL);
-	clk_disable(mcspi->fck);
-	clk_disable(mcspi->ick);
+	omap2_mcspi_disable_clocks(mcspi);
 
 	return ret;
 }
@@ -693,8 +759,8 @@ static void omap2_mcspi_work(struct work_struct *work)
 	mcspi = container_of(work, struct omap2_mcspi, work);
 	spin_lock_irq(&mcspi->lock);
 
-	clk_enable(mcspi->ick);
-	clk_enable(mcspi->fck);
+	if (omap2_mcspi_enable_clocks(mcspi))
+		goto out;
 
 	/* We only enable one channel at a time -- the one whose message is
 	 * at the head of the queue -- although this controller would gladly
@@ -741,13 +807,13 @@ static void omap2_mcspi_work(struct work_struct *work)
 				cs_active = 1;
 			}
 
-			chconf = mcspi_read_cs_reg(spi, OMAP2_MCSPI_CHCONF0);
+			chconf = mcspi_cached_chconf0(spi);
 			chconf &= ~OMAP2_MCSPI_CHCONF_TRM_MASK;
 			if (t->tx_buf == NULL)
 				chconf |= OMAP2_MCSPI_CHCONF_TRM_RX_ONLY;
 			else if (t->rx_buf == NULL)
 				chconf |= OMAP2_MCSPI_CHCONF_TRM_TX_ONLY;
-			mcspi_write_cs_reg(spi, OMAP2_MCSPI_CHCONF0, chconf);
+			mcspi_write_chconf0(spi, chconf);
 
 			if (t->len) {
 				unsigned	count;
@@ -796,9 +862,9 @@ static void omap2_mcspi_work(struct work_struct *work)
 		spin_lock_irq(&mcspi->lock);
 	}
 
-	clk_disable(mcspi->fck);
-	clk_disable(mcspi->ick);
+	omap2_mcspi_disable_clocks(mcspi);
 
+out:
 	spin_unlock_irq(&mcspi->lock);
 }
 
@@ -885,8 +951,8 @@ static int __init omap2_mcspi_reset(struct omap2_mcspi *mcspi)
 	struct spi_master	*master = mcspi->master;
 	u32			tmp;
 
-	clk_enable(mcspi->ick);
-	clk_enable(mcspi->fck);
+	if (omap2_mcspi_enable_clocks(mcspi))
+		return -1;
 
 	mcspi_write_reg(master, OMAP2_MCSPI_SYSCONFIG,
 			OMAP2_MCSPI_SYSCONFIG_SOFTRESET);
@@ -894,18 +960,18 @@ static int __init omap2_mcspi_reset(struct omap2_mcspi *mcspi)
 		tmp = mcspi_read_reg(master, OMAP2_MCSPI_SYSSTATUS);
 	} while (!(tmp & OMAP2_MCSPI_SYSSTATUS_RESETDONE));
 
-	mcspi_write_reg(master, OMAP2_MCSPI_SYSCONFIG,
-			OMAP2_MCSPI_SYSCONFIG_AUTOIDLE |
-			OMAP2_MCSPI_SYSCONFIG_ENAWAKEUP |
-			OMAP2_MCSPI_SYSCONFIG_SMARTIDLE);
+	tmp = OMAP2_MCSPI_SYSCONFIG_AUTOIDLE |
+		OMAP2_MCSPI_SYSCONFIG_ENAWAKEUP |
+		OMAP2_MCSPI_SYSCONFIG_SMARTIDLE;
+	mcspi_write_reg(master, OMAP2_MCSPI_SYSCONFIG, tmp);
+	omap2_mcspi_ctx[master->bus_num - 1].sysconfig = tmp;
 
-	mcspi_write_reg(master, OMAP2_MCSPI_WAKEUPENABLE,
-			OMAP2_MCSPI_WAKEUPENABLE_WKEN);
+	tmp = OMAP2_MCSPI_WAKEUPENABLE_WKEN;
+	mcspi_write_reg(master, OMAP2_MCSPI_WAKEUPENABLE, tmp);
+	omap2_mcspi_ctx[master->bus_num - 1].wakeupenable = tmp;
 
 	omap2_mcspi_set_master_mode(master);
-
-	clk_disable(mcspi->fck);
-	clk_disable(mcspi->ick);
+	omap2_mcspi_disable_clocks(mcspi);
 	return 0;
 }
 
-- 
cgit v0.10.2


From 89c05372d08f3982eeb94d1ea22a60a5eaa8cd6d Mon Sep 17 00:00:00 2001
From: Tero Kristo <tero.kristo@nokia.com>
Date: Tue, 22 Sep 2009 16:46:17 -0700
Subject: spi: McSPI saves CHCONFx too

Previous restore was lazy and only restored CHxCONF when it was needed by
a specific chip select.  This could cause occasional errors on an SPI bus
where multiple chip selects are in use.

Signed-off-by: Tero Kristo <tero.kristo@nokia.com>
Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>
Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/spi/omap2_mcspi.c b/drivers/spi/omap2_mcspi.c
index e96ad27..85b7aea 100644
--- a/drivers/spi/omap2_mcspi.c
+++ b/drivers/spi/omap2_mcspi.c
@@ -134,6 +134,7 @@ struct omap2_mcspi_cs {
 	void __iomem		*base;
 	unsigned long		phys;
 	int			word_len;
+	struct list_head	node;
 	/* Context save and restore shadow register */
 	u32			chconf0;
 };
@@ -145,6 +146,7 @@ struct omap2_mcspi_regs {
 	u32 sysconfig;
 	u32 modulctrl;
 	u32 wakeupenable;
+	struct list_head cs;
 };
 
 static struct omap2_mcspi_regs omap2_mcspi_ctx[OMAP2_MCSPI_MAX_CTRL];
@@ -255,6 +257,7 @@ static void omap2_mcspi_set_master_mode(struct spi_master *master)
 static void omap2_mcspi_restore_ctx(struct omap2_mcspi *mcspi)
 {
 	struct spi_master *spi_cntrl;
+	struct omap2_mcspi_cs *cs;
 	spi_cntrl = mcspi->master;
 
 	/* McSPI: context restore */
@@ -266,6 +269,10 @@ static void omap2_mcspi_restore_ctx(struct omap2_mcspi *mcspi)
 
 	mcspi_write_reg(spi_cntrl, OMAP2_MCSPI_WAKEUPENABLE,
 			omap2_mcspi_ctx[spi_cntrl->bus_num - 1].wakeupenable);
+
+	list_for_each_entry(cs, &omap2_mcspi_ctx[spi_cntrl->bus_num - 1].cs,
+			node)
+		__raw_writel(cs->chconf0, cs->base + OMAP2_MCSPI_CHCONF0);
 }
 static void omap2_mcspi_disable_clocks(struct omap2_mcspi *mcspi)
 {
@@ -714,6 +721,9 @@ static int omap2_mcspi_setup(struct spi_device *spi)
 		cs->phys = mcspi->phys + spi->chip_select * 0x14;
 		cs->chconf0 = 0;
 		spi->controller_state = cs;
+		/* Link this to context save list */
+		list_add_tail(&cs->node,
+			&omap2_mcspi_ctx[mcspi->master->bus_num - 1].cs);
 	}
 
 	if (mcspi_dma->dma_rx_channel == -1
@@ -736,10 +746,15 @@ static void omap2_mcspi_cleanup(struct spi_device *spi)
 {
 	struct omap2_mcspi	*mcspi;
 	struct omap2_mcspi_dma	*mcspi_dma;
+	struct omap2_mcspi_cs	*cs;
 
 	mcspi = spi_master_get_devdata(spi->master);
 	mcspi_dma = &mcspi->dma_channels[spi->chip_select];
 
+	/* Unlink controller state from context save list */
+	cs = spi->controller_state;
+	list_del(&cs->node);
+
 	kfree(spi->controller_state);
 
 	if (mcspi_dma->dma_rx_channel != -1) {
@@ -1104,6 +1119,7 @@ static int __init omap2_mcspi_probe(struct platform_device *pdev)
 
 	spin_lock_init(&mcspi->lock);
 	INIT_LIST_HEAD(&mcspi->msg_queue);
+	INIT_LIST_HEAD(&omap2_mcspi_ctx[master->bus_num - 1].cs);
 
 	mcspi->ick = clk_get(&pdev->dev, "ick");
 	if (IS_ERR(mcspi->ick)) {
-- 
cgit v0.10.2


From 7869c0b9ed44404bbc675ef76f8ccb3be5496f39 Mon Sep 17 00:00:00 2001
From: Syed Rafiuddin <rafiuddin.syed@ti.com>
Date: Tue, 22 Sep 2009 16:46:18 -0700
Subject: spi: McSPI support for OMAP4

tAdd adds McSPI support for OMAP4430 SDP platform.  All the base addresses
are changed between OMAP1/2/3 and OMAP4.  The fields of the resource
structures are filled at runtime to have McSPI support on OMAP4.

Signed-off-by: Syed Rafiuddin <rafiuddin.syed@ti.com>
Acked-by: Kevin Hilman <khilman@deeprootsystems.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/arm/mach-omap2/devices.c b/arch/arm/mach-omap2/devices.c
index 92009a4..bcfcfc7 100644
--- a/arch/arm/mach-omap2/devices.c
+++ b/arch/arm/mach-omap2/devices.c
@@ -257,6 +257,11 @@ static inline void omap_init_sti(void) {}
 #define OMAP2_MCSPI3_BASE		0x480b8000
 #define OMAP2_MCSPI4_BASE		0x480ba000
 
+#define OMAP4_MCSPI1_BASE		0x48098100
+#define OMAP4_MCSPI2_BASE		0x4809a100
+#define OMAP4_MCSPI3_BASE		0x480b8100
+#define OMAP4_MCSPI4_BASE		0x480ba100
+
 static struct omap2_mcspi_platform_config omap2_mcspi1_config = {
 	.num_cs		= 4,
 };
@@ -301,7 +306,8 @@ static struct platform_device omap2_mcspi2 = {
 	},
 };
 
-#if defined(CONFIG_ARCH_OMAP2430) || defined(CONFIG_ARCH_OMAP3)
+#if defined(CONFIG_ARCH_OMAP2430) || defined(CONFIG_ARCH_OMAP3) || \
+	defined(CONFIG_ARCH_OMAP4)
 static struct omap2_mcspi_platform_config omap2_mcspi3_config = {
 	.num_cs		= 2,
 };
@@ -325,7 +331,7 @@ static struct platform_device omap2_mcspi3 = {
 };
 #endif
 
-#ifdef CONFIG_ARCH_OMAP3
+#if defined(CONFIG_ARCH_OMAP3) || defined(CONFIG_ARCH_OMAP4)
 static struct omap2_mcspi_platform_config omap2_mcspi4_config = {
 	.num_cs		= 1,
 };
@@ -351,14 +357,25 @@ static struct platform_device omap2_mcspi4 = {
 
 static void omap_init_mcspi(void)
 {
+	if (cpu_is_omap44xx()) {
+		omap2_mcspi1_resources[0].start	= OMAP4_MCSPI1_BASE;
+		omap2_mcspi1_resources[0].end	= OMAP4_MCSPI1_BASE + 0xff;
+		omap2_mcspi2_resources[0].start	= OMAP4_MCSPI2_BASE;
+		omap2_mcspi2_resources[0].end	= OMAP4_MCSPI2_BASE + 0xff;
+		omap2_mcspi3_resources[0].start	= OMAP4_MCSPI3_BASE;
+		omap2_mcspi3_resources[0].end	= OMAP4_MCSPI3_BASE + 0xff;
+		omap2_mcspi4_resources[0].start	= OMAP4_MCSPI4_BASE;
+		omap2_mcspi4_resources[0].end	= OMAP4_MCSPI4_BASE + 0xff;
+	}
 	platform_device_register(&omap2_mcspi1);
 	platform_device_register(&omap2_mcspi2);
-#if defined(CONFIG_ARCH_OMAP2430) || defined(CONFIG_ARCH_OMAP3)
-	if (cpu_is_omap2430() || cpu_is_omap343x())
+#if defined(CONFIG_ARCH_OMAP2430) || defined(CONFIG_ARCH_OMAP3) || \
+	defined(CONFIG_ARCH_OMAP4)
+	if (cpu_is_omap2430() || cpu_is_omap343x() || cpu_is_omap44xx())
 		platform_device_register(&omap2_mcspi3);
 #endif
-#ifdef CONFIG_ARCH_OMAP3
-	if (cpu_is_omap343x())
+#if defined(CONFIG_ARCH_OMAP3) || defined(CONFIG_ARCH_OMAP4)
+	if (cpu_is_omap343x() || cpu_is_omap44xx())
 		platform_device_register(&omap2_mcspi4);
 #endif
 }
diff --git a/drivers/spi/omap2_mcspi.c b/drivers/spi/omap2_mcspi.c
index 85b7aea..ba1a872 100644
--- a/drivers/spi/omap2_mcspi.c
+++ b/drivers/spi/omap2_mcspi.c
@@ -1014,7 +1014,8 @@ static u8 __initdata spi2_txdma_id[] = {
 	OMAP24XX_DMA_SPI2_TX1,
 };
 
-#if defined(CONFIG_ARCH_OMAP2430) || defined(CONFIG_ARCH_OMAP34XX)
+#if defined(CONFIG_ARCH_OMAP2430) || defined(CONFIG_ARCH_OMAP34XX) \
+	|| defined(CONFIG_ARCH_OMAP4)
 static u8 __initdata spi3_rxdma_id[] = {
 	OMAP24XX_DMA_SPI3_RX0,
 	OMAP24XX_DMA_SPI3_RX1,
@@ -1026,7 +1027,7 @@ static u8 __initdata spi3_txdma_id[] = {
 };
 #endif
 
-#ifdef CONFIG_ARCH_OMAP3
+#if defined(CONFIG_ARCH_OMAP3) || defined(CONFIG_ARCH_OMAP4)
 static u8 __initdata spi4_rxdma_id[] = {
 	OMAP34XX_DMA_SPI4_RX0,
 };
@@ -1056,14 +1057,15 @@ static int __init omap2_mcspi_probe(struct platform_device *pdev)
 		txdma_id = spi2_txdma_id;
 		num_chipselect = 2;
 		break;
-#if defined(CONFIG_ARCH_OMAP2430) || defined(CONFIG_ARCH_OMAP3)
+#if defined(CONFIG_ARCH_OMAP2430) || defined(CONFIG_ARCH_OMAP3) \
+	|| defined(CONFIG_ARCH_OMAP4)
 	case 3:
 		rxdma_id = spi3_rxdma_id;
 		txdma_id = spi3_txdma_id;
 		num_chipselect = 2;
 		break;
 #endif
-#ifdef CONFIG_ARCH_OMAP3
+#if defined(CONFIG_ARCH_OMAP3) || defined(CONFIG_ARCH_OMAP4)
 	case 4:
 		rxdma_id = spi4_rxdma_id;
 		txdma_id = spi4_txdma_id;
-- 
cgit v0.10.2


From 568d0697f42771425ae9f1e9a3db769fef7e10b6 Mon Sep 17 00:00:00 2001
From: David Brownell <dbrownell@users.sourceforge.net>
Date: Tue, 22 Sep 2009 16:46:18 -0700
Subject: spi: handle TX-only/RX-only

Support two new half-duplex SPI implementation restrictions, for links
that talk to TX-only or RX-only devices.  (Existing half-duplex flavors
support both transfer directions, just not at the same time.)

Move spi_async() into the spi.c core, and stop inlining it.  Then make
that function perform error checks and reject messages that demand more
than the underlying controller can support.

Based on a patch from Marek Szyprowski which did this only for the
bitbanged GPIO driver.

Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 49e8486..b76f246 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -663,6 +663,65 @@ int spi_setup(struct spi_device *spi)
 }
 EXPORT_SYMBOL_GPL(spi_setup);
 
+/**
+ * spi_async - asynchronous SPI transfer
+ * @spi: device with which data will be exchanged
+ * @message: describes the data transfers, including completion callback
+ * Context: any (irqs may be blocked, etc)
+ *
+ * This call may be used in_irq and other contexts which can't sleep,
+ * as well as from task contexts which can sleep.
+ *
+ * The completion callback is invoked in a context which can't sleep.
+ * Before that invocation, the value of message->status is undefined.
+ * When the callback is issued, message->status holds either zero (to
+ * indicate complete success) or a negative error code.  After that
+ * callback returns, the driver which issued the transfer request may
+ * deallocate the associated memory; it's no longer in use by any SPI
+ * core or controller driver code.
+ *
+ * Note that although all messages to a spi_device are handled in
+ * FIFO order, messages may go to different devices in other orders.
+ * Some device might be higher priority, or have various "hard" access
+ * time requirements, for example.
+ *
+ * On detection of any fault during the transfer, processing of
+ * the entire message is aborted, and the device is deselected.
+ * Until returning from the associated message completion callback,
+ * no other spi_message queued to that device will be processed.
+ * (This rule applies equally to all the synchronous transfer calls,
+ * which are wrappers around this core asynchronous primitive.)
+ */
+int spi_async(struct spi_device *spi, struct spi_message *message)
+{
+	struct spi_master *master = spi->master;
+
+	/* Half-duplex links include original MicroWire, and ones with
+	 * only one data pin like SPI_3WIRE (switches direction) or where
+	 * either MOSI or MISO is missing.  They can also be caused by
+	 * software limitations.
+	 */
+	if ((master->flags & SPI_MASTER_HALF_DUPLEX)
+			|| (spi->mode & SPI_3WIRE)) {
+		struct spi_transfer *xfer;
+		unsigned flags = master->flags;
+
+		list_for_each_entry(xfer, &message->transfers, transfer_list) {
+			if (xfer->rx_buf && xfer->tx_buf)
+				return -EINVAL;
+			if ((flags & SPI_MASTER_NO_TX) && xfer->tx_buf)
+				return -EINVAL;
+			if ((flags & SPI_MASTER_NO_RX) && xfer->rx_buf)
+				return -EINVAL;
+		}
+	}
+
+	message->spi = spi;
+	message->status = -EINPROGRESS;
+	return master->transfer(spi, message);
+}
+EXPORT_SYMBOL_GPL(spi_async);
+
 
 /*-------------------------------------------------------------------------*/
 
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index e2051f3..97b60b3 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -258,6 +258,8 @@ struct spi_master {
 	/* other constraints relevant to this driver */
 	u16			flags;
 #define SPI_MASTER_HALF_DUPLEX	BIT(0)		/* can't do full duplex */
+#define SPI_MASTER_NO_RX	BIT(1)		/* can't do buffer read */
+#define SPI_MASTER_NO_TX	BIT(2)		/* can't do buffer write */
 
 	/* Setup mode and clock, etc (spi driver may call many times).
 	 *
@@ -538,42 +540,7 @@ static inline void spi_message_free(struct spi_message *m)
 }
 
 extern int spi_setup(struct spi_device *spi);
-
-/**
- * spi_async - asynchronous SPI transfer
- * @spi: device with which data will be exchanged
- * @message: describes the data transfers, including completion callback
- * Context: any (irqs may be blocked, etc)
- *
- * This call may be used in_irq and other contexts which can't sleep,
- * as well as from task contexts which can sleep.
- *
- * The completion callback is invoked in a context which can't sleep.
- * Before that invocation, the value of message->status is undefined.
- * When the callback is issued, message->status holds either zero (to
- * indicate complete success) or a negative error code.  After that
- * callback returns, the driver which issued the transfer request may
- * deallocate the associated memory; it's no longer in use by any SPI
- * core or controller driver code.
- *
- * Note that although all messages to a spi_device are handled in
- * FIFO order, messages may go to different devices in other orders.
- * Some device might be higher priority, or have various "hard" access
- * time requirements, for example.
- *
- * On detection of any fault during the transfer, processing of
- * the entire message is aborted, and the device is deselected.
- * Until returning from the associated message completion callback,
- * no other spi_message queued to that device will be processed.
- * (This rule applies equally to all the synchronous transfer calls,
- * which are wrappers around this core asynchronous primitive.)
- */
-static inline int
-spi_async(struct spi_device *spi, struct spi_message *message)
-{
-	message->spi = spi;
-	return spi->master->transfer(spi, message);
-}
+extern int spi_async(struct spi_device *spi, struct spi_message *message);
 
 /*---------------------------------------------------------------------------*/
 
-- 
cgit v0.10.2


From 7f3923a184bb8e7ede5e5f58f1114bf7b8c611ea Mon Sep 17 00:00:00 2001
From: Chris Verges <chrisv@cyberswitching.com>
Date: Tue, 22 Sep 2009 16:46:20 -0700
Subject: rtc: Philips PCF2123 RTC SPI driver

Add support for the Philips/NXP PCF2123 RTC.

Signed-off: Chris Verges <chrisv@cyberswitching.com>
Tested-by: Chris Verges <chrisv@cyberswitching.com>
Signed-off: Christian Pellegrin <chripell@fsfe.org>
Tested-by: Christian Pellegrin <chripell@fsfe.org>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Cc: David Brownell <david-b@pacbell.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 73771b0..2ccd351 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -378,6 +378,15 @@ config RTC_DRV_DS3234
 	  This driver can also be built as a module. If so, the module
 	  will be called rtc-ds3234.
 
+config RTC_DRV_PCF2123
+	tristate "NXP PCF2123"
+	help
+	  If you say yes here you get support for the NXP PCF2123
+	  RTC chip.
+
+	  This driver can also be built as a module. If so, the module
+	  will be called rtc-pcf2123.
+
 endif # SPI_MASTER
 
 comment "Platform RTC drivers"
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index 5e152ff..0e94149 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -55,6 +55,7 @@ obj-$(CONFIG_RTC_DRV_MV)	+= rtc-mv.o
 obj-$(CONFIG_RTC_DRV_OMAP)	+= rtc-omap.o
 obj-$(CONFIG_RTC_DRV_PCF8563)	+= rtc-pcf8563.o
 obj-$(CONFIG_RTC_DRV_PCF8583)	+= rtc-pcf8583.o
+obj-$(CONFIG_RTC_DRV_PCF2123)	+= rtc-pcf2123.o
 obj-$(CONFIG_RTC_DRV_PL030)	+= rtc-pl030.o
 obj-$(CONFIG_RTC_DRV_PL031)	+= rtc-pl031.o
 obj-$(CONFIG_RTC_DRV_GENERIC)	+= rtc-generic.o
diff --git a/drivers/rtc/rtc-pcf2123.c b/drivers/rtc/rtc-pcf2123.c
new file mode 100644
index 0000000..d07b199
--- /dev/null
+++ b/drivers/rtc/rtc-pcf2123.c
@@ -0,0 +1,344 @@
+/*
+ * An SPI driver for the Philips PCF2123 RTC
+ * Copyright 2009 Cyber Switching, Inc.
+ *
+ * Author: Chris Verges <chrisv@cyberswitching.com>
+ * Maintainers: http://www.cyberswitching.com
+ *
+ * based on the RS5C348 driver in this same directory.
+ *
+ * Thanks to Christian Pellegrin <chripell@fsfe.org> for
+ * the sysfs contributions to this driver.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Please note that the CS is active high, so platform data
+ * should look something like:
+ *
+ * static struct spi_board_info ek_spi_devices[] = {
+ * 	...
+ * 	{
+ * 		.modalias		= "rtc-pcf2123",
+ * 		.chip_select		= 1,
+ * 		.controller_data	= (void *)AT91_PIN_PA10,
+ *		.max_speed_hz		= 1000 * 1000,
+ *		.mode			= SPI_CS_HIGH,
+ *		.bus_num		= 0,
+ *	},
+ *	...
+ *};
+ *
+ */
+
+#include <linux/bcd.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/rtc.h>
+#include <linux/spi/spi.h>
+
+#define DRV_VERSION "0.5"
+
+#define PCF2123_REG_CTRL1	(0x00)	/* Control Register 1 */
+#define PCF2123_REG_CTRL2	(0x01)	/* Control Register 2 */
+#define PCF2123_REG_SC		(0x02)	/* datetime */
+#define PCF2123_REG_MN		(0x03)
+#define PCF2123_REG_HR		(0x04)
+#define PCF2123_REG_DM		(0x05)
+#define PCF2123_REG_DW		(0x06)
+#define PCF2123_REG_MO		(0x07)
+#define PCF2123_REG_YR		(0x08)
+
+#define PCF2123_SUBADDR		(1 << 4)
+#define PCF2123_WRITE		((0 << 7) | PCF2123_SUBADDR)
+#define PCF2123_READ		((1 << 7) | PCF2123_SUBADDR)
+
+static struct spi_driver pcf2123_driver;
+
+struct pcf2123_sysfs_reg {
+	struct device_attribute attr; /* must be first */
+	char name[2];
+};
+
+struct pcf2123_plat_data {
+	struct rtc_device *rtc;
+	struct pcf2123_sysfs_reg regs[16];
+};
+
+/*
+ * Causes a 30 nanosecond delay to ensure that the PCF2123 chip select
+ * is released properly after an SPI write.  This function should be
+ * called after EVERY read/write call over SPI.
+ */
+static inline void pcf2123_delay_trec(void)
+{
+	ndelay(30);
+}
+
+static ssize_t pcf2123_show(struct device *dev, struct device_attribute *attr,
+			    char *buffer)
+{
+	struct spi_device *spi = to_spi_device(dev);
+	struct pcf2123_sysfs_reg *r = (struct pcf2123_sysfs_reg *) attr;
+	u8 txbuf[1], rxbuf[1];
+	int ret;
+
+	txbuf[0] = PCF2123_READ | simple_strtoul(r->name, NULL, 16);
+	ret = spi_write_then_read(spi, txbuf, 1, rxbuf, 1);
+	if (ret < 0)
+		return sprintf(buffer, "error: %d", ret);
+	pcf2123_delay_trec();
+	return sprintf(buffer, "0x%x", rxbuf[0]);
+}
+
+static ssize_t pcf2123_store(struct device *dev, struct device_attribute *attr,
+			     const char *buffer, size_t count) {
+	struct spi_device *spi = to_spi_device(dev);
+	struct pcf2123_sysfs_reg *r = (struct pcf2123_sysfs_reg *) attr;
+	u8 txbuf[2];
+	int ret;
+
+	txbuf[0] = PCF2123_WRITE | simple_strtoul(r->name, NULL, 16);
+	txbuf[1] = simple_strtoul(buffer, NULL, 0);
+	ret = spi_write(spi, txbuf, sizeof(txbuf));
+	if (ret < 0)
+		return -EIO;
+	pcf2123_delay_trec();
+	return count;
+}
+
+static int pcf2123_rtc_read_time(struct device *dev, struct rtc_time *tm)
+{
+	struct spi_device *spi = to_spi_device(dev);
+	u8 txbuf[1], rxbuf[7];
+	int ret;
+
+	txbuf[0] = PCF2123_READ | PCF2123_REG_SC;
+	ret = spi_write_then_read(spi, txbuf, sizeof(txbuf),
+			rxbuf, sizeof(rxbuf));
+	if (ret < 0)
+		return ret;
+	pcf2123_delay_trec();
+
+	tm->tm_sec = bcd2bin(rxbuf[0] & 0x7F);
+	tm->tm_min = bcd2bin(rxbuf[1] & 0x7F);
+	tm->tm_hour = bcd2bin(rxbuf[2] & 0x3F); /* rtc hr 0-23 */
+	tm->tm_mday = bcd2bin(rxbuf[3] & 0x3F);
+	tm->tm_wday = rxbuf[4] & 0x07;
+	tm->tm_mon = bcd2bin(rxbuf[5] & 0x1F) - 1; /* rtc mn 1-12 */
+	tm->tm_year = bcd2bin(rxbuf[6]);
+	if (tm->tm_year < 70)
+		tm->tm_year += 100;	/* assume we are in 1970...2069 */
+
+	dev_dbg(dev, "%s: tm is secs=%d, mins=%d, hours=%d, "
+			"mday=%d, mon=%d, year=%d, wday=%d\n",
+			__func__,
+			tm->tm_sec, tm->tm_min, tm->tm_hour,
+			tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday);
+
+	/* the clock can give out invalid datetime, but we cannot return
+	 * -EINVAL otherwise hwclock will refuse to set the time on bootup.
+	 */
+	if (rtc_valid_tm(tm) < 0)
+		dev_err(dev, "retrieved date/time is not valid.\n");
+
+	return 0;
+}
+
+static int pcf2123_rtc_set_time(struct device *dev, struct rtc_time *tm)
+{
+	struct spi_device *spi = to_spi_device(dev);
+	u8 txbuf[8];
+	int ret;
+
+	dev_dbg(dev, "%s: tm is secs=%d, mins=%d, hours=%d, "
+			"mday=%d, mon=%d, year=%d, wday=%d\n",
+			__func__,
+			tm->tm_sec, tm->tm_min, tm->tm_hour,
+			tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday);
+
+	/* Stop the counter first */
+	txbuf[0] = PCF2123_WRITE | PCF2123_REG_CTRL1;
+	txbuf[1] = 0x20;
+	ret = spi_write(spi, txbuf, 2);
+	if (ret < 0)
+		return ret;
+	pcf2123_delay_trec();
+
+	/* Set the new time */
+	txbuf[0] = PCF2123_WRITE | PCF2123_REG_SC;
+	txbuf[1] = bin2bcd(tm->tm_sec & 0x7F);
+	txbuf[2] = bin2bcd(tm->tm_min & 0x7F);
+	txbuf[3] = bin2bcd(tm->tm_hour & 0x3F);
+	txbuf[4] = bin2bcd(tm->tm_mday & 0x3F);
+	txbuf[5] = tm->tm_wday & 0x07;
+	txbuf[6] = bin2bcd((tm->tm_mon + 1) & 0x1F); /* rtc mn 1-12 */
+	txbuf[7] = bin2bcd(tm->tm_year < 100 ? tm->tm_year : tm->tm_year - 100);
+
+	ret = spi_write(spi, txbuf, sizeof(txbuf));
+	if (ret < 0)
+		return ret;
+	pcf2123_delay_trec();
+
+	/* Start the counter */
+	txbuf[0] = PCF2123_WRITE | PCF2123_REG_CTRL1;
+	txbuf[1] = 0x00;
+	ret = spi_write(spi, txbuf, 2);
+	if (ret < 0)
+		return ret;
+	pcf2123_delay_trec();
+
+	return 0;
+}
+
+static const struct rtc_class_ops pcf2123_rtc_ops = {
+	.read_time	= pcf2123_rtc_read_time,
+	.set_time	= pcf2123_rtc_set_time,
+};
+
+static int __devinit pcf2123_probe(struct spi_device *spi)
+{
+	struct rtc_device *rtc;
+	struct pcf2123_plat_data *pdata;
+	u8 txbuf[2], rxbuf[2];
+	int ret, i;
+
+	pdata = kzalloc(sizeof(struct pcf2123_plat_data), GFP_KERNEL);
+	if (!pdata)
+		return -ENOMEM;
+	spi->dev.platform_data = pdata;
+
+	/* Send a software reset command */
+	txbuf[0] = PCF2123_WRITE | PCF2123_REG_CTRL1;
+	txbuf[1] = 0x58;
+	dev_dbg(&spi->dev, "resetting RTC (0x%02X 0x%02X)\n",
+			txbuf[0], txbuf[1]);
+	ret = spi_write(spi, txbuf, 2 * sizeof(u8));
+	if (ret < 0)
+		return ret;
+	pcf2123_delay_trec();
+
+	/* Stop the counter */
+	txbuf[0] = PCF2123_WRITE | PCF2123_REG_CTRL1;
+	txbuf[1] = 0x20;
+	dev_dbg(&spi->dev, "stopping RTC (0x%02X 0x%02X)\n",
+			txbuf[0], txbuf[1]);
+	ret = spi_write(spi, txbuf, 2 * sizeof(u8));
+	if (ret < 0)
+		return ret;
+	pcf2123_delay_trec();
+
+	/* See if the counter was actually stopped */
+	txbuf[0] = PCF2123_READ | PCF2123_REG_CTRL1;
+	dev_dbg(&spi->dev, "checking for presence of RTC (0x%02X)\n",
+			txbuf[0]);
+	ret = spi_write_then_read(spi, txbuf, 1 * sizeof(u8),
+					rxbuf, 2 * sizeof(u8));
+	dev_dbg(&spi->dev, "received data from RTC (0x%02X 0x%02X)\n",
+			rxbuf[0], rxbuf[1]);
+	if (ret < 0)
+		goto kfree_exit;
+	pcf2123_delay_trec();
+
+	if (!(rxbuf[0] & 0x20)) {
+		dev_err(&spi->dev, "chip not found\n");
+		goto kfree_exit;
+	}
+
+	dev_info(&spi->dev, "chip found, driver version " DRV_VERSION "\n");
+	dev_info(&spi->dev, "spiclk %u KHz.\n",
+			(spi->max_speed_hz + 500) / 1000);
+
+	/* Start the counter */
+	txbuf[0] = PCF2123_WRITE | PCF2123_REG_CTRL1;
+	txbuf[1] = 0x00;
+	ret = spi_write(spi, txbuf, sizeof(txbuf));
+	if (ret < 0)
+		goto kfree_exit;
+	pcf2123_delay_trec();
+
+	/* Finalize the initialization */
+	rtc = rtc_device_register(pcf2123_driver.driver.name, &spi->dev,
+			&pcf2123_rtc_ops, THIS_MODULE);
+
+	if (IS_ERR(rtc)) {
+		dev_err(&spi->dev, "failed to register.\n");
+		ret = PTR_ERR(rtc);
+		goto kfree_exit;
+	}
+
+	pdata->rtc = rtc;
+
+	for (i = 0; i < 16; i++) {
+		sprintf(pdata->regs[i].name, "%1x", i);
+		pdata->regs[i].attr.attr.mode = S_IRUGO | S_IWUSR;
+		pdata->regs[i].attr.attr.name = pdata->regs[i].name;
+		pdata->regs[i].attr.show = pcf2123_show;
+		pdata->regs[i].attr.store = pcf2123_store;
+		ret = device_create_file(&spi->dev, &pdata->regs[i].attr);
+		if (ret) {
+			dev_err(&spi->dev, "Unable to create sysfs %s\n",
+				pdata->regs[i].name);
+			pdata->regs[i].name[0] = '\0';
+		}
+	}
+
+	return 0;
+kfree_exit:
+	kfree(pdata);
+	spi->dev.platform_data = NULL;
+	return ret;
+}
+
+static int pcf2123_remove(struct spi_device *spi)
+{
+	struct pcf2123_plat_data *pdata = spi->dev.platform_data;
+	int i;
+
+	if (pdata) {
+		struct rtc_device *rtc = pdata->rtc;
+
+		if (rtc)
+			rtc_device_unregister(rtc);
+		for (i = 0; i < 16; i++)
+			if (pdata->regs[i].name[0])
+				device_remove_file(&spi->dev,
+						   &pdata->regs[i].attr);
+		kfree(pdata);
+	}
+
+	return 0;
+}
+
+static struct spi_driver pcf2123_driver = {
+	.driver	= {
+			.name	= "rtc-pcf2123",
+			.bus	= &spi_bus_type,
+			.owner	= THIS_MODULE,
+	},
+	.probe	= pcf2123_probe,
+	.remove	= __devexit_p(pcf2123_remove),
+};
+
+static int __init pcf2123_init(void)
+{
+	return spi_register_driver(&pcf2123_driver);
+}
+
+static void __exit pcf2123_exit(void)
+{
+	spi_unregister_driver(&pcf2123_driver);
+}
+
+MODULE_AUTHOR("Chris Verges <chrisv@cyberswitching.com>");
+MODULE_DESCRIPTION("NXP PCF2123 RTC driver");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRV_VERSION);
+
+module_init(pcf2123_init);
+module_exit(pcf2123_exit);
-- 
cgit v0.10.2


From f3d2570a1482a6d897ba29276964965f6fe970d8 Mon Sep 17 00:00:00 2001
From: Chris Verges <chrisv@cyberswitching.com>
Date: Tue, 22 Sep 2009 16:46:22 -0700
Subject: rtc-philips-pcf2123-rtc-spi-driver-updates

Signed-off: Chris Verges <chrisv@cyberswitching.com>
Cc: Christian Pellegrin <chripell@fsfe.org>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Cc: David Brownell <david-b@pacbell.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/rtc/rtc-pcf2123.c b/drivers/rtc/rtc-pcf2123.c
index d07b199..e75df9d 100644
--- a/drivers/rtc/rtc-pcf2123.c
+++ b/drivers/rtc/rtc-pcf2123.c
@@ -42,7 +42,7 @@
 #include <linux/rtc.h>
 #include <linux/spi/spi.h>
 
-#define DRV_VERSION "0.5"
+#define DRV_VERSION "0.6"
 
 #define PCF2123_REG_CTRL1	(0x00)	/* Control Register 1 */
 #define PCF2123_REG_CTRL2	(0x01)	/* Control Register 2 */
@@ -61,7 +61,7 @@
 static struct spi_driver pcf2123_driver;
 
 struct pcf2123_sysfs_reg {
-	struct device_attribute attr; /* must be first */
+	struct device_attribute attr;
 	char name[2];
 };
 
@@ -84,27 +84,42 @@ static ssize_t pcf2123_show(struct device *dev, struct device_attribute *attr,
 			    char *buffer)
 {
 	struct spi_device *spi = to_spi_device(dev);
-	struct pcf2123_sysfs_reg *r = (struct pcf2123_sysfs_reg *) attr;
+	struct pcf2123_sysfs_reg *r;
 	u8 txbuf[1], rxbuf[1];
+	unsigned long reg;
 	int ret;
 
-	txbuf[0] = PCF2123_READ | simple_strtoul(r->name, NULL, 16);
+	r = container_of(attr, struct pcf2123_sysfs_reg, attr);
+
+	if (strict_strtoul(r->name, 16, &reg))
+		return -EINVAL;
+
+	txbuf[0] = PCF2123_READ | reg;
 	ret = spi_write_then_read(spi, txbuf, 1, rxbuf, 1);
 	if (ret < 0)
-		return sprintf(buffer, "error: %d", ret);
+		return -EIO;
 	pcf2123_delay_trec();
-	return sprintf(buffer, "0x%x", rxbuf[0]);
+	return sprintf(buffer, "0x%x\n", rxbuf[0]);
 }
 
 static ssize_t pcf2123_store(struct device *dev, struct device_attribute *attr,
 			     const char *buffer, size_t count) {
 	struct spi_device *spi = to_spi_device(dev);
-	struct pcf2123_sysfs_reg *r = (struct pcf2123_sysfs_reg *) attr;
+	struct pcf2123_sysfs_reg *r;
 	u8 txbuf[2];
+	unsigned long reg;
+	unsigned long val;
+
 	int ret;
 
-	txbuf[0] = PCF2123_WRITE | simple_strtoul(r->name, NULL, 16);
-	txbuf[1] = simple_strtoul(buffer, NULL, 0);
+	r = container_of(attr, struct pcf2123_sysfs_reg, attr);
+
+	if (strict_strtoul(r->name, 16, &reg)
+		|| strict_strtoul(buffer, 10, &val))
+		return -EINVAL;
+
+	txbuf[0] = PCF2123_WRITE | reg;
+	txbuf[1] = val;
 	ret = spi_write(spi, txbuf, sizeof(txbuf));
 	if (ret < 0)
 		return -EIO;
@@ -220,7 +235,7 @@ static int __devinit pcf2123_probe(struct spi_device *spi)
 			txbuf[0], txbuf[1]);
 	ret = spi_write(spi, txbuf, 2 * sizeof(u8));
 	if (ret < 0)
-		return ret;
+		goto kfree_exit;
 	pcf2123_delay_trec();
 
 	/* Stop the counter */
@@ -230,7 +245,7 @@ static int __devinit pcf2123_probe(struct spi_device *spi)
 			txbuf[0], txbuf[1]);
 	ret = spi_write(spi, txbuf, 2 * sizeof(u8));
 	if (ret < 0)
-		return ret;
+		goto kfree_exit;
 	pcf2123_delay_trec();
 
 	/* See if the counter was actually stopped */
@@ -284,11 +299,16 @@ static int __devinit pcf2123_probe(struct spi_device *spi)
 		if (ret) {
 			dev_err(&spi->dev, "Unable to create sysfs %s\n",
 				pdata->regs[i].name);
-			pdata->regs[i].name[0] = '\0';
+			goto sysfs_exit;
 		}
 	}
 
 	return 0;
+
+sysfs_exit:
+	for (i--; i >= 0; i--)
+		device_remove_file(&spi->dev, &pdata->regs[i].attr);
+
 kfree_exit:
 	kfree(pdata);
 	spi->dev.platform_data = NULL;
-- 
cgit v0.10.2


From d00ed3cf6e54312fb59cd1fd6300d787d22373c7 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Tue, 22 Sep 2009 16:46:23 -0700
Subject: rtc: add driver for MXC's internal RTC module

This adds a driver for Freescale's MXC internal real time clock modules.

The code is taken from Freescale's BSPs, but modified to fit the current
kernel coding mechanisms.  Also, the PMIC external clock function was
removed for now to not add dead bits and keep the code as simple as
possible.

[akpm@linux-foundation.org: make PIE_BIT_DEF[] static]
Signed-off-by: Daniel Mack <daniel@caiaq.de>
Cc: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Alessandro Zummo <a.zummo@towertech.it>

Cc: Alessandro Zummo <a.zummo@towertech.it>
Cc: Daniel Mack <daniel@caiaq.de>
Cc: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 2ccd351..18c5be1 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -509,6 +509,17 @@ config RTC_DRV_M48T59
 	  This driver can also be built as a module, if so, the module
 	  will be called "rtc-m48t59".
 
+config RTC_MXC
+	tristate "Freescale MXC Real Time Clock"
+	depends on ARCH_MXC
+	depends on RTC_CLASS
+	help
+	   If you say yes here you get support for the Freescale MXC
+	   RTC module.
+
+	   This driver can also be built as a module, if so, the module
+	   will be called "rtc-mxc".
+
 config RTC_DRV_BQ4802
 	tristate "TI BQ4802"
 	help
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index 0e94149..5c16333 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -46,6 +46,7 @@ obj-$(CONFIG_RTC_DRV_M41T94)	+= rtc-m41t94.o
 obj-$(CONFIG_RTC_DRV_M48T35)	+= rtc-m48t35.o
 obj-$(CONFIG_RTC_DRV_M48T59)	+= rtc-m48t59.o
 obj-$(CONFIG_RTC_DRV_M48T86)	+= rtc-m48t86.o
+obj-$(CONFIG_RTC_MXC)		+= rtc-mxc.o
 obj-$(CONFIG_RTC_DRV_BQ4802)	+= rtc-bq4802.o
 obj-$(CONFIG_RTC_DRV_SUN4V)	+= rtc-sun4v.o
 obj-$(CONFIG_RTC_DRV_STARFIRE)	+= rtc-starfire.o
diff --git a/drivers/rtc/rtc-mxc.c b/drivers/rtc/rtc-mxc.c
new file mode 100644
index 0000000..6bd5072
--- /dev/null
+++ b/drivers/rtc/rtc-mxc.c
@@ -0,0 +1,507 @@
+/*
+ * Copyright 2004-2008 Freescale Semiconductor, Inc. All Rights Reserved.
+ *
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+
+#include <linux/io.h>
+#include <linux/rtc.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/clk.h>
+
+#include <mach/hardware.h>
+
+#define RTC_INPUT_CLK_32768HZ	(0x00 << 5)
+#define RTC_INPUT_CLK_32000HZ	(0x01 << 5)
+#define RTC_INPUT_CLK_38400HZ	(0x02 << 5)
+
+#define RTC_SW_BIT      (1 << 0)
+#define RTC_ALM_BIT     (1 << 2)
+#define RTC_1HZ_BIT     (1 << 4)
+#define RTC_2HZ_BIT     (1 << 7)
+#define RTC_SAM0_BIT    (1 << 8)
+#define RTC_SAM1_BIT    (1 << 9)
+#define RTC_SAM2_BIT    (1 << 10)
+#define RTC_SAM3_BIT    (1 << 11)
+#define RTC_SAM4_BIT    (1 << 12)
+#define RTC_SAM5_BIT    (1 << 13)
+#define RTC_SAM6_BIT    (1 << 14)
+#define RTC_SAM7_BIT    (1 << 15)
+#define PIT_ALL_ON      (RTC_2HZ_BIT | RTC_SAM0_BIT | RTC_SAM1_BIT | \
+			 RTC_SAM2_BIT | RTC_SAM3_BIT | RTC_SAM4_BIT | \
+			 RTC_SAM5_BIT | RTC_SAM6_BIT | RTC_SAM7_BIT)
+
+#define RTC_ENABLE_BIT  (1 << 7)
+
+#define MAX_PIE_NUM     9
+#define MAX_PIE_FREQ    512
+static const u32 PIE_BIT_DEF[MAX_PIE_NUM][2] = {
+	{ 2,		RTC_2HZ_BIT },
+	{ 4,		RTC_SAM0_BIT },
+	{ 8,		RTC_SAM1_BIT },
+	{ 16,		RTC_SAM2_BIT },
+	{ 32,		RTC_SAM3_BIT },
+	{ 64,		RTC_SAM4_BIT },
+	{ 128,		RTC_SAM5_BIT },
+	{ 256,		RTC_SAM6_BIT },
+	{ MAX_PIE_FREQ,	RTC_SAM7_BIT },
+};
+
+/* Those are the bits from a classic RTC we want to mimic */
+#define RTC_IRQF	0x80	/* any of the following 3 is active */
+#define RTC_PF		0x40	/* Periodic interrupt */
+#define RTC_AF		0x20	/* Alarm interrupt */
+#define RTC_UF		0x10	/* Update interrupt for 1Hz RTC */
+
+#define MXC_RTC_TIME	0
+#define MXC_RTC_ALARM	1
+
+#define RTC_HOURMIN	0x00	/*  32bit rtc hour/min counter reg */
+#define RTC_SECOND	0x04	/*  32bit rtc seconds counter reg */
+#define RTC_ALRM_HM	0x08	/*  32bit rtc alarm hour/min reg */
+#define RTC_ALRM_SEC	0x0C	/*  32bit rtc alarm seconds reg */
+#define RTC_RTCCTL	0x10	/*  32bit rtc control reg */
+#define RTC_RTCISR	0x14	/*  32bit rtc interrupt status reg */
+#define RTC_RTCIENR	0x18	/*  32bit rtc interrupt enable reg */
+#define RTC_STPWCH	0x1C	/*  32bit rtc stopwatch min reg */
+#define RTC_DAYR	0x20	/*  32bit rtc days counter reg */
+#define RTC_DAYALARM	0x24	/*  32bit rtc day alarm reg */
+#define RTC_TEST1	0x28	/*  32bit rtc test reg 1 */
+#define RTC_TEST2	0x2C	/*  32bit rtc test reg 2 */
+#define RTC_TEST3	0x30	/*  32bit rtc test reg 3 */
+
+struct rtc_plat_data {
+	struct rtc_device *rtc;
+	void __iomem *ioaddr;
+	int irq;
+	struct clk *clk;
+	unsigned int irqen;
+	int alrm_sec;
+	int alrm_min;
+	int alrm_hour;
+	int alrm_mday;
+	struct timespec mxc_rtc_delta;
+	struct rtc_time g_rtc_alarm;
+};
+
+/*
+ * This function is used to obtain the RTC time or the alarm value in
+ * second.
+ */
+static u32 get_alarm_or_time(struct device *dev, int time_alarm)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
+	void __iomem *ioaddr = pdata->ioaddr;
+	u32 day = 0, hr = 0, min = 0, sec = 0, hr_min = 0;
+
+	switch (time_alarm) {
+	case MXC_RTC_TIME:
+		day = readw(ioaddr + RTC_DAYR);
+		hr_min = readw(ioaddr + RTC_HOURMIN);
+		sec = readw(ioaddr + RTC_SECOND);
+		break;
+	case MXC_RTC_ALARM:
+		day = readw(ioaddr + RTC_DAYALARM);
+		hr_min = readw(ioaddr + RTC_ALRM_HM) & 0xffff;
+		sec = readw(ioaddr + RTC_ALRM_SEC);
+		break;
+	}
+
+	hr = hr_min >> 8;
+	min = hr_min & 0xff;
+
+	return (((day * 24 + hr) * 60) + min) * 60 + sec;
+}
+
+/*
+ * This function sets the RTC alarm value or the time value.
+ */
+static void set_alarm_or_time(struct device *dev, int time_alarm, u32 time)
+{
+	u32 day, hr, min, sec, temp;
+	struct platform_device *pdev = to_platform_device(dev);
+	struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
+	void __iomem *ioaddr = pdata->ioaddr;
+
+	day = time / 86400;
+	time -= day * 86400;
+
+	/* time is within a day now */
+	hr = time / 3600;
+	time -= hr * 3600;
+
+	/* time is within an hour now */
+	min = time / 60;
+	sec = time - min * 60;
+
+	temp = (hr << 8) + min;
+
+	switch (time_alarm) {
+	case MXC_RTC_TIME:
+		writew(day, ioaddr + RTC_DAYR);
+		writew(sec, ioaddr + RTC_SECOND);
+		writew(temp, ioaddr + RTC_HOURMIN);
+		break;
+	case MXC_RTC_ALARM:
+		writew(day, ioaddr + RTC_DAYALARM);
+		writew(sec, ioaddr + RTC_ALRM_SEC);
+		writew(temp, ioaddr + RTC_ALRM_HM);
+		break;
+	}
+}
+
+/*
+ * This function updates the RTC alarm registers and then clears all the
+ * interrupt status bits.
+ */
+static int rtc_update_alarm(struct device *dev, struct rtc_time *alrm)
+{
+	struct rtc_time alarm_tm, now_tm;
+	unsigned long now, time;
+	int ret;
+	struct platform_device *pdev = to_platform_device(dev);
+	struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
+	void __iomem *ioaddr = pdata->ioaddr;
+
+	now = get_alarm_or_time(dev, MXC_RTC_TIME);
+	rtc_time_to_tm(now, &now_tm);
+	alarm_tm.tm_year = now_tm.tm_year;
+	alarm_tm.tm_mon = now_tm.tm_mon;
+	alarm_tm.tm_mday = now_tm.tm_mday;
+	alarm_tm.tm_hour = alrm->tm_hour;
+	alarm_tm.tm_min = alrm->tm_min;
+	alarm_tm.tm_sec = alrm->tm_sec;
+	rtc_tm_to_time(&now_tm, &now);
+	rtc_tm_to_time(&alarm_tm, &time);
+
+	if (time < now) {
+		time += 60 * 60 * 24;
+		rtc_time_to_tm(time, &alarm_tm);
+	}
+
+	ret = rtc_tm_to_time(&alarm_tm, &time);
+
+	/* clear all the interrupt status bits */
+	writew(readw(ioaddr + RTC_RTCISR), ioaddr + RTC_RTCISR);
+	set_alarm_or_time(dev, MXC_RTC_ALARM, time);
+
+	return ret;
+}
+
+/* This function is the RTC interrupt service routine. */
+static irqreturn_t mxc_rtc_interrupt(int irq, void *dev_id)
+{
+	struct platform_device *pdev = dev_id;
+	struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
+	void __iomem *ioaddr = pdata->ioaddr;
+	u32 status;
+	u32 events = 0;
+
+	spin_lock_irq(&pdata->rtc->irq_lock);
+	status = readw(ioaddr + RTC_RTCISR) & readw(ioaddr + RTC_RTCIENR);
+	/* clear interrupt sources */
+	writew(status, ioaddr + RTC_RTCISR);
+
+	/* clear alarm interrupt if it has occurred */
+	if (status & RTC_ALM_BIT)
+		status &= ~RTC_ALM_BIT;
+
+	/* update irq data & counter */
+	if (status & RTC_ALM_BIT)
+		events |= (RTC_AF | RTC_IRQF);
+
+	if (status & RTC_1HZ_BIT)
+		events |= (RTC_UF | RTC_IRQF);
+
+	if (status & PIT_ALL_ON)
+		events |= (RTC_PF | RTC_IRQF);
+
+	if ((status & RTC_ALM_BIT) && rtc_valid_tm(&pdata->g_rtc_alarm))
+		rtc_update_alarm(&pdev->dev, &pdata->g_rtc_alarm);
+
+	rtc_update_irq(pdata->rtc, 1, events);
+	spin_unlock_irq(&pdata->rtc->irq_lock);
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * Clear all interrupts and release the IRQ
+ */
+static void mxc_rtc_release(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
+	void __iomem *ioaddr = pdata->ioaddr;
+
+	spin_lock_irq(&pdata->rtc->irq_lock);
+
+	/* Disable all rtc interrupts */
+	writew(0, ioaddr + RTC_RTCIENR);
+
+	/* Clear all interrupt status */
+	writew(0xffffffff, ioaddr + RTC_RTCISR);
+
+	spin_unlock_irq(&pdata->rtc->irq_lock);
+}
+
+static void mxc_rtc_irq_enable(struct device *dev, unsigned int bit,
+				unsigned int enabled)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
+	void __iomem *ioaddr = pdata->ioaddr;
+	u32 reg;
+
+	spin_lock_irq(&pdata->rtc->irq_lock);
+	reg = readw(ioaddr + RTC_RTCIENR);
+
+	if (enabled)
+		reg |= bit;
+	else
+		reg &= ~bit;
+
+	writew(reg, ioaddr + RTC_RTCIENR);
+	spin_unlock_irq(&pdata->rtc->irq_lock);
+}
+
+static int mxc_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
+{
+	mxc_rtc_irq_enable(dev, RTC_ALM_BIT, enabled);
+	return 0;
+}
+
+static int mxc_rtc_update_irq_enable(struct device *dev, unsigned int enabled)
+{
+	mxc_rtc_irq_enable(dev, RTC_1HZ_BIT, enabled);
+	return 0;
+}
+
+/*
+ * This function reads the current RTC time into tm in Gregorian date.
+ */
+static int mxc_rtc_read_time(struct device *dev, struct rtc_time *tm)
+{
+	u32 val;
+
+	/* Avoid roll-over from reading the different registers */
+	do {
+		val = get_alarm_or_time(dev, MXC_RTC_TIME);
+	} while (val != get_alarm_or_time(dev, MXC_RTC_TIME));
+
+	rtc_time_to_tm(val, tm);
+
+	return 0;
+}
+
+/*
+ * This function sets the internal RTC time based on tm in Gregorian date.
+ */
+static int mxc_rtc_set_mmss(struct device *dev, unsigned long time)
+{
+	/* Avoid roll-over from reading the different registers */
+	do {
+		set_alarm_or_time(dev, MXC_RTC_TIME, time);
+	} while (time != get_alarm_or_time(dev, MXC_RTC_TIME));
+
+	return 0;
+}
+
+/*
+ * This function reads the current alarm value into the passed in 'alrm'
+ * argument. It updates the alrm's pending field value based on the whether
+ * an alarm interrupt occurs or not.
+ */
+static int mxc_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
+	void __iomem *ioaddr = pdata->ioaddr;
+
+	rtc_time_to_tm(get_alarm_or_time(dev, MXC_RTC_ALARM), &alrm->time);
+	alrm->pending = ((readw(ioaddr + RTC_RTCISR) & RTC_ALM_BIT)) ? 1 : 0;
+
+	return 0;
+}
+
+/*
+ * This function sets the RTC alarm based on passed in alrm.
+ */
+static int mxc_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
+	int ret;
+
+	if (rtc_valid_tm(&alrm->time)) {
+		if (alrm->time.tm_sec > 59 ||
+		    alrm->time.tm_hour > 23 ||
+		    alrm->time.tm_min > 59)
+			return -EINVAL;
+
+		ret = rtc_update_alarm(dev, &alrm->time);
+	} else {
+		ret = rtc_valid_tm(&alrm->time);
+		if (ret)
+			return ret;
+
+		ret = rtc_update_alarm(dev, &alrm->time);
+	}
+
+	if (ret)
+		return ret;
+
+	memcpy(&pdata->g_rtc_alarm, &alrm->time, sizeof(struct rtc_time));
+	mxc_rtc_irq_enable(dev, RTC_ALM_BIT, alrm->enabled);
+
+	return 0;
+}
+
+/* RTC layer */
+static struct rtc_class_ops mxc_rtc_ops = {
+	.release		= mxc_rtc_release,
+	.read_time		= mxc_rtc_read_time,
+	.set_mmss		= mxc_rtc_set_mmss,
+	.read_alarm		= mxc_rtc_read_alarm,
+	.set_alarm		= mxc_rtc_set_alarm,
+	.alarm_irq_enable	= mxc_rtc_alarm_irq_enable,
+	.update_irq_enable	= mxc_rtc_update_irq_enable,
+};
+
+static int __init mxc_rtc_probe(struct platform_device *pdev)
+{
+	struct clk *clk;
+	struct resource *res;
+	struct rtc_device *rtc;
+	struct rtc_plat_data *pdata = NULL;
+	u32 reg;
+	int ret, rate;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -ENODEV;
+
+	pdata = kzalloc(sizeof(*pdata), GFP_KERNEL);
+	if (!pdata)
+		return -ENOMEM;
+
+	pdata->ioaddr = ioremap(res->start, resource_size(res));
+
+	clk = clk_get(&pdev->dev, "ckil");
+	if (IS_ERR(clk))
+		return PTR_ERR(clk);
+
+	rate = clk_get_rate(clk);
+	clk_put(clk);
+
+	if (rate == 32768)
+		reg = RTC_INPUT_CLK_32768HZ;
+	else if (rate == 32000)
+		reg = RTC_INPUT_CLK_32000HZ;
+	else if (rate == 38400)
+		reg = RTC_INPUT_CLK_38400HZ;
+	else {
+		dev_err(&pdev->dev, "rtc clock is not valid (%lu)\n",
+			clk_get_rate(clk));
+		ret = -EINVAL;
+		goto exit_free_pdata;
+	}
+
+	reg |= RTC_ENABLE_BIT;
+	writew(reg, (pdata->ioaddr + RTC_RTCCTL));
+	if (((readw(pdata->ioaddr + RTC_RTCCTL)) & RTC_ENABLE_BIT) == 0) {
+		dev_err(&pdev->dev, "hardware module can't be enabled!\n");
+		ret = -EIO;
+		goto exit_free_pdata;
+	}
+
+	pdata->clk = clk_get(&pdev->dev, "rtc");
+	if (IS_ERR(pdata->clk)) {
+		dev_err(&pdev->dev, "unable to get clock!\n");
+		ret = PTR_ERR(pdata->clk);
+		goto exit_free_pdata;
+	}
+
+	clk_enable(pdata->clk);
+
+	rtc = rtc_device_register(pdev->name, &pdev->dev, &mxc_rtc_ops,
+				  THIS_MODULE);
+	if (IS_ERR(rtc)) {
+		ret = PTR_ERR(rtc);
+		goto exit_put_clk;
+	}
+
+	pdata->rtc = rtc;
+	platform_set_drvdata(pdev, pdata);
+
+	/* Configure and enable the RTC */
+	pdata->irq = platform_get_irq(pdev, 0);
+
+	if (pdata->irq >= 0 &&
+	    request_irq(pdata->irq, mxc_rtc_interrupt, IRQF_SHARED,
+			pdev->name, pdev) < 0) {
+		dev_warn(&pdev->dev, "interrupt not available.\n");
+		pdata->irq = -1;
+	}
+
+	return 0;
+
+exit_put_clk:
+	clk_put(pdata->clk);
+
+exit_free_pdata:
+	kfree(pdata);
+
+	return ret;
+}
+
+static int __exit mxc_rtc_remove(struct platform_device *pdev)
+{
+	struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
+
+	rtc_device_unregister(pdata->rtc);
+
+	if (pdata->irq >= 0)
+		free_irq(pdata->irq, pdev);
+
+	clk_disable(pdata->clk);
+	clk_put(pdata->clk);
+	kfree(pdata);
+	platform_set_drvdata(pdev, NULL);
+
+	return 0;
+}
+
+static struct platform_driver mxc_rtc_driver = {
+	.driver = {
+		   .name	= "mxc_rtc",
+		   .owner	= THIS_MODULE,
+	},
+	.remove		= __exit_p(mxc_rtc_remove),
+};
+
+static int __init mxc_rtc_init(void)
+{
+	return platform_driver_probe(&mxc_rtc_driver, mxc_rtc_probe);
+}
+
+static void __exit mxc_rtc_exit(void)
+{
+	platform_driver_unregister(&mxc_rtc_driver);
+}
+
+module_init(mxc_rtc_init);
+module_exit(mxc_rtc_exit);
+
+MODULE_AUTHOR("Daniel Mack <daniel@caiaq.de>");
+MODULE_DESCRIPTION("RTC driver for Freescale MXC");
+MODULE_LICENSE("GPL");
+
-- 
cgit v0.10.2


From aa958f571ec9492b8100302ee70ac0ab2598bf19 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.ml.walleij@gmail.com>
Date: Tue, 22 Sep 2009 16:46:24 -0700
Subject: rtc: U300 COH 901 331 RTC driver v3

This adds a driver for the RTC COH 901 331 found in the ST-Ericsson U300
series mobile platforms to the RTC subsystem.  It integrates to the ARM
kernel support recently added to RMKs ARM tree and will be enabled in the
U300 defconfig in due time.

Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Alessandro Zummo <a.zummo@towertech.it>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 18c5be1..c5b906b 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -798,4 +798,16 @@ config RTC_DRV_PS3
 	  This driver can also be built as a module. If so, the module
 	  will be called rtc-ps3.
 
+config RTC_DRV_COH901331
+	tristate "ST-Ericsson COH 901 331 RTC"
+	depends on ARCH_U300
+	help
+	  If you say Y here you will get access to ST-Ericsson
+	  COH 901 331 RTC clock found in some ST-Ericsson Mobile
+	  Platforms.
+
+	  This driver can also be built as a module. If so, the module
+	  will be called "rtc-coh901331".
+
+
 endif # RTC_CLASS
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index 5c16333..40c116b 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -82,3 +82,4 @@ obj-$(CONFIG_RTC_DRV_WM8350)	+= rtc-wm8350.o
 obj-$(CONFIG_RTC_DRV_X1205)	+= rtc-x1205.o
 obj-$(CONFIG_RTC_DRV_PCF50633)	+= rtc-pcf50633.o
 obj-$(CONFIG_RTC_DRV_PS3)	+= rtc-ps3.o
+obj-$(CONFIG_RTC_DRV_COH901331)	+= rtc-coh901331.o
diff --git a/drivers/rtc/rtc-coh901331.c b/drivers/rtc/rtc-coh901331.c
new file mode 100644
index 0000000..7fe1fa2
--- /dev/null
+++ b/drivers/rtc/rtc-coh901331.c
@@ -0,0 +1,311 @@
+/*
+ * Copyright (C) 2007-2009 ST-Ericsson AB
+ * License terms: GNU General Public License (GPL) version 2
+ * Real Time Clock interface for ST-Ericsson AB COH 901 331 RTC.
+ * Author: Linus Walleij <linus.walleij@stericsson.com>
+ * Based on rtc-pl031.c by Deepak Saxena <dsaxena@plexity.net>
+ * Copyright 2006 (c) MontaVista Software, Inc.
+ */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/rtc.h>
+#include <linux/clk.h>
+#include <linux/interrupt.h>
+#include <linux/pm.h>
+#include <linux/platform_device.h>
+#include <linux/io.h>
+
+/*
+ * Registers in the COH 901 331
+ */
+/* Alarm value 32bit (R/W) */
+#define COH901331_ALARM		0x00U
+/* Used to set current time 32bit (R/W) */
+#define COH901331_SET_TIME	0x04U
+/* Indication if current time is valid 32bit (R/-) */
+#define COH901331_VALID		0x08U
+/* Read the current time 32bit (R/-) */
+#define COH901331_CUR_TIME	0x0cU
+/* Event register for the "alarm" interrupt */
+#define COH901331_IRQ_EVENT	0x10U
+/* Mask register for the "alarm" interrupt */
+#define COH901331_IRQ_MASK	0x14U
+/* Force register for the "alarm" interrupt */
+#define COH901331_IRQ_FORCE	0x18U
+
+/*
+ * Reference to RTC block clock
+ * Notice that the frequent clk_enable()/clk_disable() on this
+ * clock is mainly to be able to turn on/off other clocks in the
+ * hierarchy as needed, the RTC clock is always on anyway.
+ */
+struct coh901331_port {
+	struct rtc_device *rtc;
+	struct clk *clk;
+	u32 phybase;
+	u32 physize;
+	void __iomem *virtbase;
+	int irq;
+#ifdef CONFIG_PM
+	u32 irqmaskstore;
+#endif
+};
+
+static irqreturn_t coh901331_interrupt(int irq, void *data)
+{
+	struct coh901331_port *rtap = data;
+
+	clk_enable(rtap->clk);
+	/* Ack IRQ */
+	writel(1, rtap->virtbase + COH901331_IRQ_EVENT);
+	clk_disable(rtap->clk);
+	/* Set alarm flag */
+	rtc_update_irq(rtap->rtc, 1, RTC_AF);
+
+	return IRQ_HANDLED;
+}
+
+static int coh901331_read_time(struct device *dev, struct rtc_time *tm)
+{
+	struct coh901331_port *rtap = dev_get_drvdata(dev);
+
+	clk_enable(rtap->clk);
+	/* Check if the time is valid */
+	if (readl(rtap->virtbase + COH901331_VALID)) {
+		rtc_time_to_tm(readl(rtap->virtbase + COH901331_CUR_TIME), tm);
+		clk_disable(rtap->clk);
+		return rtc_valid_tm(tm);
+	}
+	clk_disable(rtap->clk);
+	return -EINVAL;
+}
+
+static int coh901331_set_mmss(struct device *dev, unsigned long secs)
+{
+	struct coh901331_port *rtap = dev_get_drvdata(dev);
+
+	clk_enable(rtap->clk);
+	writel(secs, rtap->virtbase + COH901331_SET_TIME);
+	clk_disable(rtap->clk);
+
+	return 0;
+}
+
+static int coh901331_read_alarm(struct device *dev, struct rtc_wkalrm *alarm)
+{
+	struct coh901331_port *rtap = dev_get_drvdata(dev);
+
+	clk_enable(rtap->clk);
+	rtc_time_to_tm(readl(rtap->virtbase + COH901331_ALARM), &alarm->time);
+	alarm->pending = readl(rtap->virtbase + COH901331_IRQ_EVENT) & 1U;
+	alarm->enabled = readl(rtap->virtbase + COH901331_IRQ_MASK) & 1U;
+	clk_disable(rtap->clk);
+
+	return 0;
+}
+
+static int coh901331_set_alarm(struct device *dev, struct rtc_wkalrm *alarm)
+{
+	struct coh901331_port *rtap = dev_get_drvdata(dev);
+	unsigned long time;
+
+	rtc_tm_to_time(&alarm->time, &time);
+	clk_enable(rtap->clk);
+	writel(time, rtap->virtbase + COH901331_ALARM);
+	writel(alarm->enabled, rtap->virtbase + COH901331_IRQ_MASK);
+	clk_disable(rtap->clk);
+
+	return 0;
+}
+
+static int coh901331_alarm_irq_enable(struct device *dev, unsigned int enabled)
+{
+	struct coh901331_port *rtap = dev_get_drvdata(dev);
+
+	clk_enable(rtap->clk);
+	if (enabled)
+		writel(1, rtap->virtbase + COH901331_IRQ_MASK);
+	else
+		writel(0, rtap->virtbase + COH901331_IRQ_MASK);
+	clk_disable(rtap->clk);
+}
+
+static struct rtc_class_ops coh901331_ops = {
+	.read_time = coh901331_read_time,
+	.set_mmss = coh901331_set_mmss,
+	.read_alarm = coh901331_read_alarm,
+	.set_alarm = coh901331_set_alarm,
+	.alarm_irq_enable = coh901331_alarm_irq_enable,
+};
+
+static int __exit coh901331_remove(struct platform_device *pdev)
+{
+	struct coh901331_port *rtap = dev_get_drvdata(&pdev->dev);
+
+	if (rtap) {
+		free_irq(rtap->irq, rtap);
+		rtc_device_unregister(rtap->rtc);
+		clk_put(rtap->clk);
+		iounmap(rtap->virtbase);
+		release_mem_region(rtap->phybase, rtap->physize);
+		platform_set_drvdata(pdev, NULL);
+		kfree(rtap);
+	}
+
+	return 0;
+}
+
+
+static int __init coh901331_probe(struct platform_device *pdev)
+{
+	int ret;
+	struct coh901331_port *rtap;
+	struct resource *res;
+
+	rtap = kzalloc(sizeof(struct coh901331_port), GFP_KERNEL);
+	if (!rtap)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		ret = -ENOENT;
+		goto out_no_resource;
+	}
+	rtap->phybase = res->start;
+	rtap->physize = resource_size(res);
+
+	if (request_mem_region(rtap->phybase, rtap->physize,
+			       "rtc-coh901331") == NULL) {
+		ret = -EBUSY;
+		goto out_no_memregion;
+	}
+
+	rtap->virtbase = ioremap(rtap->phybase, rtap->physize);
+	if (!rtap->virtbase) {
+		ret = -ENOMEM;
+		goto out_no_remap;
+	}
+
+	rtap->irq = platform_get_irq(pdev, 0);
+	if (request_irq(rtap->irq, coh901331_interrupt, IRQF_DISABLED,
+			"RTC COH 901 331 Alarm", rtap)) {
+		ret = -EIO;
+		goto out_no_irq;
+	}
+
+	rtap->clk = clk_get(&pdev->dev, NULL);
+	if (IS_ERR(rtap->clk)) {
+		ret = PTR_ERR(rtap->clk);
+		dev_err(&pdev->dev, "could not get clock\n");
+		goto out_no_clk;
+	}
+
+	/* We enable/disable the clock only to assure it works */
+	ret = clk_enable(rtap->clk);
+	if (ret) {
+		dev_err(&pdev->dev, "could not enable clock\n");
+		goto out_no_clk_enable;
+	}
+	clk_disable(rtap->clk);
+
+	rtap->rtc = rtc_device_register("coh901331", &pdev->dev, &coh901331_ops,
+					 THIS_MODULE);
+	if (IS_ERR(rtap->rtc)) {
+		ret = PTR_ERR(rtap->rtc);
+		goto out_no_rtc;
+	}
+
+	platform_set_drvdata(pdev, rtap);
+
+	return 0;
+
+ out_no_rtc:
+ out_no_clk_enable:
+	clk_put(rtap->clk);
+ out_no_clk:
+	free_irq(rtap->irq, rtap);
+ out_no_irq:
+	iounmap(rtap->virtbase);
+ out_no_remap:
+	platform_set_drvdata(pdev, NULL);
+ out_no_memregion:
+	release_mem_region(rtap->phybase, SZ_4K);
+ out_no_resource:
+	kfree(rtap);
+	return ret;
+}
+
+#ifdef CONFIG_PM
+static int coh901331_suspend(struct platform_device *pdev, pm_message_t state)
+{
+	struct coh901331_port *rtap = dev_get_drvdata(&pdev->dev);
+
+	/*
+	 * If this RTC alarm will be used for waking the system up,
+	 * don't disable it of course. Else we just disable the alarm
+	 * and await suspension.
+	 */
+	if (device_may_wakeup(&pdev->dev)) {
+		enable_irq_wake(rtap->irq);
+	} else {
+		clk_enable(rtap->clk);
+		rtap->irqmaskstore = readl(rtap->virtbase + COH901331_IRQ_MASK);
+		writel(0, rtap->virtbase + COH901331_IRQ_MASK);
+		clk_disable(rtap->clk);
+	}
+	return 0;
+}
+
+static int coh901331_resume(struct platform_device *pdev)
+{
+	struct coh901331_port *rtap = dev_get_drvdata(&pdev->dev);
+
+	if (device_may_wakeup(&pdev->dev))
+		disable_irq_wake(rtap->irq);
+	else
+		clk_enable(rtap->clk);
+		writel(rtap->irqmaskstore, rtap->virtbase + COH901331_IRQ_MASK);
+		clk_disable(rtap->clk);
+	return 0;
+}
+#else
+#define coh901331_suspend NULL
+#define coh901331_resume NULL
+#endif
+
+static void coh901331_shutdown(struct platform_device *pdev)
+{
+	struct coh901331_port *rtap = dev_get_drvdata(&pdev->dev);
+
+	clk_enable(rtap->clk);
+	writel(0, rtap->virtbase + COH901331_IRQ_MASK);
+	clk_disable(rtap->clk);
+}
+
+static struct platform_driver coh901331_driver = {
+	.driver = {
+		.name = "rtc-coh901331",
+		.owner = THIS_MODULE,
+	},
+	.remove = __exit_p(coh901331_remove),
+	.suspend = coh901331_suspend,
+	.resume = coh901331_resume,
+	.shutdown = coh901331_shutdown,
+};
+
+static int __init coh901331_init(void)
+{
+	return platform_driver_probe(&coh901331_driver, coh901331_probe);
+}
+
+static void __exit coh901331_exit(void)
+{
+	platform_driver_unregister(&coh901331_driver);
+}
+
+module_init(coh901331_init);
+module_exit(coh901331_exit);
+
+MODULE_AUTHOR("Linus Walleij <linus.walleij@stericsson.com>");
+MODULE_DESCRIPTION("ST-Ericsson AB COH 901 331 RTC Driver");
+MODULE_LICENSE("GPL");
-- 
cgit v0.10.2


From ad91fd854f08d1795b58addc09752de3351644f3 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Tue, 22 Sep 2009 16:46:24 -0700
Subject: rtc: update documentation wrt RTC_PIE/irq_set_state

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Cc: David Brownell <david-b@pacbell.net>
Signed-off-by: Alessandro Zummo <a.zummo@towertech.it>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/Documentation/rtc.txt b/Documentation/rtc.txt
index 8deffcd..102ce96 100644
--- a/Documentation/rtc.txt
+++ b/Documentation/rtc.txt
@@ -185,6 +185,8 @@ driver returns ENOIOCTLCMD.  Some common examples:
 	hardware in the irq_set_freq function.  If it isn't, return -EINVAL.  If
 	you cannot actually change the frequency, do not define irq_set_freq.
 
+    *	RTC_PIE_ON, RTC_PIE_OFF: the irq_set_state function will be called.
+
 If all else fails, check out the rtc-test.c driver!
 
 
-- 
cgit v0.10.2


From 6bff5fb80b86b461a233eaea4da769ba1b349cbe Mon Sep 17 00:00:00 2001
From: Michael Hennerich <michael.hennerich@analog.com>
Date: Tue, 22 Sep 2009 16:46:25 -0700
Subject: rtc-bfin: do not share RTC IRQ

The Blackfin RTC IRQ is an internal interrupt, so it makes no sense to
have it be shared.

Signed-off-by: Michael Hennerich <michael.hennerich@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Alessandro Zummo <a.zummo@towertech.it>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/rtc/rtc-bfin.c b/drivers/rtc/rtc-bfin.c
index a118eb0..b11485b 100644
--- a/drivers/rtc/rtc-bfin.c
+++ b/drivers/rtc/rtc-bfin.c
@@ -383,7 +383,7 @@ static int __devinit bfin_rtc_probe(struct platform_device *pdev)
 	}
 
 	/* Grab the IRQ and init the hardware */
-	ret = request_irq(IRQ_RTC, bfin_rtc_interrupt, IRQF_SHARED, pdev->name, dev);
+	ret = request_irq(IRQ_RTC, bfin_rtc_interrupt, 0, pdev->name, dev);
 	if (unlikely(ret))
 		goto err_reg;
 	/* sometimes the bootloader touched things, but the write complete was not
-- 
cgit v0.10.2


From df17f63173bcfcc8b4b90f63bf88f54ca0dd2dd7 Mon Sep 17 00:00:00 2001
From: dmitry pervushin <dpervushin@embeddedalley.com>
Date: Tue, 22 Sep 2009 16:46:26 -0700
Subject: rtc: add Freescale stmp37xx/378x driver

Add support for RTC on the Freescale STMP37xx/378x platform.

Signed-off-by: dmitry pervushin <dpervushin@embeddedalley.com>
Signed-off-by: Alessandro Zummo <a.zummo@towertech.it>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index c5b906b..8455de8 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -810,4 +810,14 @@ config RTC_DRV_COH901331
 	  will be called "rtc-coh901331".
 
 
+config RTC_DRV_STMP
+	tristate "Freescale STMP3xxx RTC"
+	depends on ARCH_STMP3XXX
+	help
+	  If you say yes here you will get support for the onboard
+	  STMP3xxx RTC.
+
+	  This driver can also be built as a module. If so, the module
+	  will be called rtc-stmp3xxx.
+
 endif # RTC_CLASS
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index 40c116b..61d5f10 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -83,3 +83,4 @@ obj-$(CONFIG_RTC_DRV_X1205)	+= rtc-x1205.o
 obj-$(CONFIG_RTC_DRV_PCF50633)	+= rtc-pcf50633.o
 obj-$(CONFIG_RTC_DRV_PS3)	+= rtc-ps3.o
 obj-$(CONFIG_RTC_DRV_COH901331)	+= rtc-coh901331.o
+obj-$(CONFIG_RTC_DRV_STMP)	+= rtc-stmp3xxx.o
diff --git a/drivers/rtc/rtc-stmp3xxx.c b/drivers/rtc/rtc-stmp3xxx.c
new file mode 100644
index 0000000..d7ce1a5
--- /dev/null
+++ b/drivers/rtc/rtc-stmp3xxx.c
@@ -0,0 +1,304 @@
+/*
+ * Freescale STMP37XX/STMP378X Real Time Clock driver
+ *
+ * Copyright (c) 2007 Sigmatel, Inc.
+ * Peter Hartley, <peter.hartley@sigmatel.com>
+ *
+ * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved.
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ */
+
+/*
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/interrupt.h>
+#include <linux/rtc.h>
+
+#include <mach/platform.h>
+#include <mach/stmp3xxx.h>
+#include <mach/regs-rtc.h>
+
+struct stmp3xxx_rtc_data {
+	struct rtc_device *rtc;
+	unsigned irq_count;
+	void __iomem *io;
+	int irq_alarm, irq_1msec;
+};
+
+static void stmp3xxx_wait_time(struct stmp3xxx_rtc_data *rtc_data)
+{
+	/*
+	 * The datasheet doesn't say which way round the
+	 * NEW_REGS/STALE_REGS bitfields go. In fact it's 0x1=P0,
+	 * 0x2=P1, .., 0x20=P5, 0x40=ALARM, 0x80=SECONDS
+	 */
+	while (__raw_readl(rtc_data->io + HW_RTC_STAT) &
+			BF(0x80, RTC_STAT_STALE_REGS))
+		cpu_relax();
+}
+
+/* Time read/write */
+static int stmp3xxx_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm)
+{
+	struct stmp3xxx_rtc_data *rtc_data = dev_get_drvdata(dev);
+
+	stmp3xxx_wait_time(rtc_data);
+	rtc_time_to_tm(__raw_readl(rtc_data->io + HW_RTC_SECONDS), rtc_tm);
+	return 0;
+}
+
+static int stmp3xxx_rtc_set_mmss(struct device *dev, unsigned long t)
+{
+	struct stmp3xxx_rtc_data *rtc_data = dev_get_drvdata(dev);
+
+	__raw_writel(t, rtc_data->io + HW_RTC_SECONDS);
+	stmp3xxx_wait_time(rtc_data);
+	return 0;
+}
+
+/* interrupt(s) handler */
+static irqreturn_t stmp3xxx_rtc_interrupt(int irq, void *dev_id)
+{
+	struct stmp3xxx_rtc_data *rtc_data = dev_get_drvdata(dev_id);
+	u32 status;
+	u32 events = 0;
+
+	status = __raw_readl(rtc_data->io + HW_RTC_CTRL) &
+			(BM_RTC_CTRL_ALARM_IRQ | BM_RTC_CTRL_ONEMSEC_IRQ);
+
+	if (status & BM_RTC_CTRL_ALARM_IRQ) {
+		stmp3xxx_clearl(BM_RTC_CTRL_ALARM_IRQ,
+				rtc_data->io + HW_RTC_CTRL);
+		events |= RTC_AF | RTC_IRQF;
+	}
+
+	if (status & BM_RTC_CTRL_ONEMSEC_IRQ) {
+		stmp3xxx_clearl(BM_RTC_CTRL_ONEMSEC_IRQ,
+				rtc_data->io + HW_RTC_CTRL);
+		if (++rtc_data->irq_count % 1000 == 0) {
+			events |= RTC_UF | RTC_IRQF;
+			rtc_data->irq_count = 0;
+		}
+	}
+
+	if (events)
+		rtc_update_irq(rtc_data->rtc, 1, events);
+
+	return IRQ_HANDLED;
+}
+
+static int stmp3xxx_alarm_irq_enable(struct device *dev, unsigned int enabled)
+{
+	struct stmp3xxx_rtc_data *rtc_data = dev_get_drvdata(dev);
+	void __iomem *p = rtc_data->io + HW_RTC_PERSISTENT0,
+		     *ctl = rtc_data->io + HW_RTC_CTRL;
+
+	if (enabled) {
+		stmp3xxx_setl(BM_RTC_PERSISTENT0_ALARM_EN |
+			      BM_RTC_PERSISTENT0_ALARM_WAKE_EN, p);
+		stmp3xxx_setl(BM_RTC_CTRL_ALARM_IRQ_EN, ctl);
+	} else {
+		stmp3xxx_clearl(BM_RTC_PERSISTENT0_ALARM_EN |
+			      BM_RTC_PERSISTENT0_ALARM_WAKE_EN, p);
+		stmp3xxx_clearl(BM_RTC_CTRL_ALARM_IRQ_EN, ctl);
+	}
+	return 0;
+}
+
+static int stmp3xxx_update_irq_enable(struct device *dev, unsigned int enabled)
+{
+	struct stmp3xxx_rtc_data *rtc_data = dev_get_drvdata(dev);
+
+	if (enabled)
+		stmp3xxx_setl(BM_RTC_CTRL_ONEMSEC_IRQ_EN,
+				rtc_data->io + HW_RTC_CTRL);
+	else
+		stmp3xxx_clearl(BM_RTC_CTRL_ONEMSEC_IRQ_EN,
+				rtc_data->io + HW_RTC_CTRL);
+	return 0;
+}
+
+static int stmp3xxx_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alm)
+{
+	struct stmp3xxx_rtc_data *rtc_data = dev_get_drvdata(dev);
+
+	rtc_time_to_tm(__raw_readl(rtc_data->io + HW_RTC_ALARM), &alm->time);
+	return 0;
+}
+
+static int stmp3xxx_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alm)
+{
+	unsigned long t;
+	struct stmp3xxx_rtc_data *rtc_data = dev_get_drvdata(dev);
+
+	rtc_tm_to_time(&alm->time, &t);
+	__raw_writel(t, rtc_data->io + HW_RTC_ALARM);
+	return 0;
+}
+
+static struct rtc_class_ops stmp3xxx_rtc_ops = {
+	.alarm_irq_enable =
+			  stmp3xxx_alarm_irq_enable,
+	.update_irq_enable =
+			  stmp3xxx_update_irq_enable,
+	.read_time	= stmp3xxx_rtc_gettime,
+	.set_mmss	= stmp3xxx_rtc_set_mmss,
+	.read_alarm	= stmp3xxx_rtc_read_alarm,
+	.set_alarm	= stmp3xxx_rtc_set_alarm,
+};
+
+static int stmp3xxx_rtc_remove(struct platform_device *pdev)
+{
+	struct stmp3xxx_rtc_data *rtc_data = platform_get_drvdata(pdev);
+
+	if (!rtc_data)
+		return 0;
+
+	stmp3xxx_clearl(BM_RTC_CTRL_ONEMSEC_IRQ_EN | BM_RTC_CTRL_ALARM_IRQ_EN,
+			rtc_data->io + HW_RTC_CTRL);
+	free_irq(rtc_data->irq_alarm, &pdev->dev);
+	free_irq(rtc_data->irq_1msec, &pdev->dev);
+	rtc_device_unregister(rtc_data->rtc);
+	iounmap(rtc_data->io);
+	kfree(rtc_data);
+
+	return 0;
+}
+
+static int stmp3xxx_rtc_probe(struct platform_device *pdev)
+{
+	struct stmp3xxx_rtc_data *rtc_data;
+	struct resource *r;
+	int err;
+
+	rtc_data = kzalloc(sizeof *rtc_data, GFP_KERNEL);
+	if (!rtc_data)
+		return -ENOMEM;
+
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!r) {
+		dev_err(&pdev->dev, "failed to get resource\n");
+		err = -ENXIO;
+		goto out_free;
+	}
+
+	rtc_data->io = ioremap(r->start, resource_size(r));
+	if (!rtc_data->io) {
+		dev_err(&pdev->dev, "ioremap failed\n");
+		err = -EIO;
+		goto out_free;
+	}
+
+	rtc_data->irq_alarm = platform_get_irq(pdev, 0);
+	rtc_data->irq_1msec = platform_get_irq(pdev, 1);
+
+	if (!(__raw_readl(HW_RTC_STAT + rtc_data->io) &
+			BM_RTC_STAT_RTC_PRESENT)) {
+		dev_err(&pdev->dev, "no device onboard\n");
+		err = -ENODEV;
+		goto out_remap;
+	}
+
+	stmp3xxx_reset_block(rtc_data->io, true);
+	stmp3xxx_clearl(BM_RTC_PERSISTENT0_ALARM_EN |
+			BM_RTC_PERSISTENT0_ALARM_WAKE_EN |
+			BM_RTC_PERSISTENT0_ALARM_WAKE,
+			rtc_data->io + HW_RTC_PERSISTENT0);
+	rtc_data->rtc = rtc_device_register(pdev->name, &pdev->dev,
+				&stmp3xxx_rtc_ops, THIS_MODULE);
+	if (IS_ERR(rtc_data->rtc)) {
+		err = PTR_ERR(rtc_data->rtc);
+		goto out_remap;
+	}
+
+	rtc_data->irq_count = 0;
+	err = request_irq(rtc_data->irq_alarm, stmp3xxx_rtc_interrupt,
+			IRQF_DISABLED, "RTC alarm", &pdev->dev);
+	if (err) {
+		dev_err(&pdev->dev, "Cannot claim IRQ%d\n",
+			rtc_data->irq_alarm);
+		goto out_irq_alarm;
+	}
+	err = request_irq(rtc_data->irq_1msec, stmp3xxx_rtc_interrupt,
+			IRQF_DISABLED, "RTC tick", &pdev->dev);
+	if (err) {
+		dev_err(&pdev->dev, "Cannot claim IRQ%d\n",
+			rtc_data->irq_1msec);
+		goto out_irq1;
+	}
+
+	platform_set_drvdata(pdev, rtc_data);
+
+	return 0;
+
+out_irq1:
+	free_irq(rtc_data->irq_alarm, &pdev->dev);
+out_irq_alarm:
+	stmp3xxx_clearl(BM_RTC_CTRL_ONEMSEC_IRQ_EN | BM_RTC_CTRL_ALARM_IRQ_EN,
+			rtc_data->io + HW_RTC_CTRL);
+	rtc_device_unregister(rtc_data->rtc);
+out_remap:
+	iounmap(rtc_data->io);
+out_free:
+	kfree(rtc_data);
+	return err;
+}
+
+#ifdef CONFIG_PM
+static int stmp3xxx_rtc_suspend(struct platform_device *dev, pm_message_t state)
+{
+	return 0;
+}
+
+static int stmp3xxx_rtc_resume(struct platform_device *dev)
+{
+	struct stmp3xxx_rtc_data *rtc_data = platform_get_drvdata(dev);
+
+	stmp3xxx_reset_block(rtc_data->io, true);
+	stmp3xxx_clearl(BM_RTC_PERSISTENT0_ALARM_EN |
+			BM_RTC_PERSISTENT0_ALARM_WAKE_EN |
+			BM_RTC_PERSISTENT0_ALARM_WAKE,
+			rtc_data->io + HW_RTC_PERSISTENT0);
+	return 0;
+}
+#else
+#define stmp3xxx_rtc_suspend	NULL
+#define stmp3xxx_rtc_resume	NULL
+#endif
+
+static struct platform_driver stmp3xxx_rtcdrv = {
+	.probe		= stmp3xxx_rtc_probe,
+	.remove		= stmp3xxx_rtc_remove,
+	.suspend	= stmp3xxx_rtc_suspend,
+	.resume		= stmp3xxx_rtc_resume,
+	.driver		= {
+		.name	= "stmp3xxx-rtc",
+		.owner	= THIS_MODULE,
+	},
+};
+
+static int __init stmp3xxx_rtc_init(void)
+{
+	return platform_driver_register(&stmp3xxx_rtcdrv);
+}
+
+static void __exit stmp3xxx_rtc_exit(void)
+{
+	platform_driver_unregister(&stmp3xxx_rtcdrv);
+}
+
+module_init(stmp3xxx_rtc_init);
+module_exit(stmp3xxx_rtc_exit);
+
+MODULE_DESCRIPTION("STMP3xxx RTC Driver");
+MODULE_AUTHOR("dmitry pervushin <dpervushin@embeddedalley.com>");
+MODULE_LICENSE("GPL");
-- 
cgit v0.10.2


From 88413e1eeb2cccc721cd5568544f32f4234700fb Mon Sep 17 00:00:00 2001
From: Alessandro Zummo <a.zummo@towertech.it>
Date: Tue, 22 Sep 2009 16:46:26 -0700
Subject: rtc: reorder Makefile

Signed-off-by: Alessandro Zummo <a.zummo@towertech.it>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index 61d5f10..82c718b 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -23,7 +23,9 @@ obj-$(CONFIG_RTC_DRV_AT91RM9200)+= rtc-at91rm9200.o
 obj-$(CONFIG_RTC_DRV_AT91SAM9)	+= rtc-at91sam9.o
 obj-$(CONFIG_RTC_DRV_AU1XXX)	+= rtc-au1xxx.o
 obj-$(CONFIG_RTC_DRV_BFIN)	+= rtc-bfin.o
+obj-$(CONFIG_RTC_DRV_BQ4802)	+= rtc-bq4802.o
 obj-$(CONFIG_RTC_DRV_CMOS)	+= rtc-cmos.o
+obj-$(CONFIG_RTC_DRV_COH901331)	+= rtc-coh901331.o
 obj-$(CONFIG_RTC_DRV_DM355EVM)	+= rtc-dm355evm.o
 obj-$(CONFIG_RTC_DRV_DS1216)	+= rtc-ds1216.o
 obj-$(CONFIG_RTC_DRV_DS1286)	+= rtc-ds1286.o
@@ -40,6 +42,7 @@ obj-$(CONFIG_RTC_DRV_DS3234)	+= rtc-ds3234.o
 obj-$(CONFIG_RTC_DRV_EFI)	+= rtc-efi.o
 obj-$(CONFIG_RTC_DRV_EP93XX)	+= rtc-ep93xx.o
 obj-$(CONFIG_RTC_DRV_FM3130)	+= rtc-fm3130.o
+obj-$(CONFIG_RTC_DRV_GENERIC)	+= rtc-generic.o
 obj-$(CONFIG_RTC_DRV_ISL1208)	+= rtc-isl1208.o
 obj-$(CONFIG_RTC_DRV_M41T80)	+= rtc-m41t80.o
 obj-$(CONFIG_RTC_DRV_M41T94)	+= rtc-m41t94.o
@@ -47,9 +50,6 @@ obj-$(CONFIG_RTC_DRV_M48T35)	+= rtc-m48t35.o
 obj-$(CONFIG_RTC_DRV_M48T59)	+= rtc-m48t59.o
 obj-$(CONFIG_RTC_DRV_M48T86)	+= rtc-m48t86.o
 obj-$(CONFIG_RTC_MXC)		+= rtc-mxc.o
-obj-$(CONFIG_RTC_DRV_BQ4802)	+= rtc-bq4802.o
-obj-$(CONFIG_RTC_DRV_SUN4V)	+= rtc-sun4v.o
-obj-$(CONFIG_RTC_DRV_STARFIRE)	+= rtc-starfire.o
 obj-$(CONFIG_RTC_DRV_MAX6900)	+= rtc-max6900.o
 obj-$(CONFIG_RTC_DRV_MAX6902)	+= rtc-max6902.o
 obj-$(CONFIG_RTC_DRV_MV)	+= rtc-mv.o
@@ -57,9 +57,10 @@ obj-$(CONFIG_RTC_DRV_OMAP)	+= rtc-omap.o
 obj-$(CONFIG_RTC_DRV_PCF8563)	+= rtc-pcf8563.o
 obj-$(CONFIG_RTC_DRV_PCF8583)	+= rtc-pcf8583.o
 obj-$(CONFIG_RTC_DRV_PCF2123)	+= rtc-pcf2123.o
+obj-$(CONFIG_RTC_DRV_PCF50633)	+= rtc-pcf50633.o
 obj-$(CONFIG_RTC_DRV_PL030)	+= rtc-pl030.o
 obj-$(CONFIG_RTC_DRV_PL031)	+= rtc-pl031.o
-obj-$(CONFIG_RTC_DRV_GENERIC)	+= rtc-generic.o
+obj-$(CONFIG_RTC_DRV_PS3)	+= rtc-ps3.o
 obj-$(CONFIG_RTC_DRV_PXA)	+= rtc-pxa.o
 obj-$(CONFIG_RTC_DRV_R9701)	+= rtc-r9701.o
 obj-$(CONFIG_RTC_DRV_RS5C313)	+= rtc-rs5c313.o
@@ -71,7 +72,10 @@ obj-$(CONFIG_RTC_DRV_S35390A)	+= rtc-s35390a.o
 obj-$(CONFIG_RTC_DRV_S3C)	+= rtc-s3c.o
 obj-$(CONFIG_RTC_DRV_SA1100)	+= rtc-sa1100.o
 obj-$(CONFIG_RTC_DRV_SH)	+= rtc-sh.o
+obj-$(CONFIG_RTC_DRV_STARFIRE)	+= rtc-starfire.o
 obj-$(CONFIG_RTC_DRV_STK17TA8)	+= rtc-stk17ta8.o
+obj-$(CONFIG_RTC_DRV_STMP)	+= rtc-stmp3xxx.o
+obj-$(CONFIG_RTC_DRV_SUN4V)	+= rtc-sun4v.o
 obj-$(CONFIG_RTC_DRV_TEST)	+= rtc-test.o
 obj-$(CONFIG_RTC_DRV_TWL4030)	+= rtc-twl4030.o
 obj-$(CONFIG_RTC_DRV_TX4939)	+= rtc-tx4939.o
@@ -80,7 +84,3 @@ obj-$(CONFIG_RTC_DRV_VR41XX)	+= rtc-vr41xx.o
 obj-$(CONFIG_RTC_DRV_WM831X)	+= rtc-wm831x.o
 obj-$(CONFIG_RTC_DRV_WM8350)	+= rtc-wm8350.o
 obj-$(CONFIG_RTC_DRV_X1205)	+= rtc-x1205.o
-obj-$(CONFIG_RTC_DRV_PCF50633)	+= rtc-pcf50633.o
-obj-$(CONFIG_RTC_DRV_PS3)	+= rtc-ps3.o
-obj-$(CONFIG_RTC_DRV_COH901331)	+= rtc-coh901331.o
-obj-$(CONFIG_RTC_DRV_STMP)	+= rtc-stmp3xxx.o
-- 
cgit v0.10.2


From d3c7a3f71a103abb7af5bdaf1adf6f693913a4a9 Mon Sep 17 00:00:00 2001
From: Daniel Ribeiro <drwyrm@gmail.com>
Date: Tue, 22 Sep 2009 16:46:27 -0700
Subject: rtc: driver for PCAP2 PMIC

[ospite@studenti.unina.it: get pcap data from the parent device]
Signed-off-by: guiming zhuo <gmzhuo@gmail.com>
Signed-off-by: Daniel Ribeiro <drwyrm@gmail.com>
Acked-by: Alessandro Zummo <a.zummo@towertech.it>
Signed-off-by: Antonio Ospite <ospite@studenti.unina.it>
Cc: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 8455de8..3c20dae 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -820,4 +820,11 @@ config RTC_DRV_STMP
 	  This driver can also be built as a module. If so, the module
 	  will be called rtc-stmp3xxx.
 
+config RTC_DRV_PCAP
+	tristate "PCAP RTC"
+	depends on EZX_PCAP
+	help
+	  If you say Y here you will get support for the RTC found on
+	  the PCAP2 ASIC used on some Motorola phones.
+
 endif # RTC_CLASS
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index 82c718b..aa3fbd5 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -54,6 +54,7 @@ obj-$(CONFIG_RTC_DRV_MAX6900)	+= rtc-max6900.o
 obj-$(CONFIG_RTC_DRV_MAX6902)	+= rtc-max6902.o
 obj-$(CONFIG_RTC_DRV_MV)	+= rtc-mv.o
 obj-$(CONFIG_RTC_DRV_OMAP)	+= rtc-omap.o
+obj-$(CONFIG_RTC_DRV_PCAP)	+= rtc-pcap.o
 obj-$(CONFIG_RTC_DRV_PCF8563)	+= rtc-pcf8563.o
 obj-$(CONFIG_RTC_DRV_PCF8583)	+= rtc-pcf8583.o
 obj-$(CONFIG_RTC_DRV_PCF2123)	+= rtc-pcf2123.o
diff --git a/drivers/rtc/rtc-pcap.c b/drivers/rtc/rtc-pcap.c
new file mode 100644
index 0000000..a99c289
--- /dev/null
+++ b/drivers/rtc/rtc-pcap.c
@@ -0,0 +1,224 @@
+/*
+ *  pcap rtc code for Motorola EZX phones
+ *
+ *  Copyright (c) 2008 guiming zhuo <gmzhuo@gmail.com>
+ *  Copyright (c) 2009 Daniel Ribeiro <drwyrm@gmail.com>
+ *
+ *  Based on Motorola's rtc.c Copyright (c) 2003-2005 Motorola
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/mfd/ezx-pcap.h>
+#include <linux/rtc.h>
+#include <linux/platform_device.h>
+
+struct pcap_rtc {
+	struct pcap_chip *pcap;
+	struct rtc_device *rtc;
+};
+
+static irqreturn_t pcap_rtc_irq(int irq, void *_pcap_rtc)
+{
+	struct pcap_rtc *pcap_rtc = _pcap_rtc;
+	unsigned long rtc_events;
+
+	if (irq == pcap_to_irq(pcap_rtc->pcap, PCAP_IRQ_1HZ))
+		rtc_events = RTC_IRQF | RTC_UF;
+	else if (irq == pcap_to_irq(pcap_rtc->pcap, PCAP_IRQ_TODA))
+		rtc_events = RTC_IRQF | RTC_AF;
+	else
+		rtc_events = 0;
+
+	rtc_update_irq(pcap_rtc->rtc, 1, rtc_events);
+	return IRQ_HANDLED;
+}
+
+static int pcap_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct pcap_rtc *pcap_rtc = platform_get_drvdata(pdev);
+	struct rtc_time *tm = &alrm->time;
+	unsigned long secs;
+	u32 tod;	/* time of day, seconds since midnight */
+	u32 days;	/* days since 1/1/1970 */
+
+	ezx_pcap_read(pcap_rtc->pcap, PCAP_REG_RTC_TODA, &tod);
+	secs = tod & PCAP_RTC_TOD_MASK;
+
+	ezx_pcap_read(pcap_rtc->pcap, PCAP_REG_RTC_DAYA, &days);
+	secs += (days & PCAP_RTC_DAY_MASK) * SEC_PER_DAY;
+
+	rtc_time_to_tm(secs, tm);
+
+	return 0;
+}
+
+static int pcap_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct pcap_rtc *pcap_rtc = platform_get_drvdata(pdev);
+	struct rtc_time *tm = &alrm->time;
+	unsigned long secs;
+	u32 tod, days;
+
+	rtc_tm_to_time(tm, &secs);
+
+	tod = secs % SEC_PER_DAY;
+	ezx_pcap_write(pcap_rtc->pcap, PCAP_REG_RTC_TODA, tod);
+
+	days = secs / SEC_PER_DAY;
+	ezx_pcap_write(pcap_rtc->pcap, PCAP_REG_RTC_DAYA, days);
+
+	return 0;
+}
+
+static int pcap_rtc_read_time(struct device *dev, struct rtc_time *tm)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct pcap_rtc *pcap_rtc = platform_get_drvdata(pdev);
+	unsigned long secs;
+	u32 tod, days;
+
+	ezx_pcap_read(pcap_rtc->pcap, PCAP_REG_RTC_TOD, &tod);
+	secs = tod & PCAP_RTC_TOD_MASK;
+
+	ezx_pcap_read(pcap_rtc->pcap, PCAP_REG_RTC_DAY, &days);
+	secs += (days & PCAP_RTC_DAY_MASK) * SEC_PER_DAY;
+
+	rtc_time_to_tm(secs, tm);
+
+	return rtc_valid_tm(tm);
+}
+
+static int pcap_rtc_set_mmss(struct device *dev, unsigned long secs)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct pcap_rtc *pcap_rtc = platform_get_drvdata(pdev);
+	u32 tod, days;
+
+	tod = secs % SEC_PER_DAY;
+	ezx_pcap_write(pcap_rtc->pcap, PCAP_REG_RTC_TOD, tod);
+
+	days = secs / SEC_PER_DAY;
+	ezx_pcap_write(pcap_rtc->pcap, PCAP_REG_RTC_DAY, days);
+
+	return 0;
+}
+
+static int pcap_rtc_irq_enable(struct device *dev, int pirq, unsigned int en)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct pcap_rtc *pcap_rtc = platform_get_drvdata(pdev);
+
+	if (en)
+		enable_irq(pcap_to_irq(pcap_rtc->pcap, pirq));
+	else
+		disable_irq(pcap_to_irq(pcap_rtc->pcap, pirq));
+
+	return 0;
+}
+
+static int pcap_rtc_alarm_irq_enable(struct device *dev, unsigned int en)
+{
+	return pcap_rtc_irq_enable(dev, PCAP_IRQ_TODA, en);
+}
+
+static int pcap_rtc_update_irq_enable(struct device *dev, unsigned int en)
+{
+	return pcap_rtc_irq_enable(dev, PCAP_IRQ_1HZ, en);
+}
+
+static const struct rtc_class_ops pcap_rtc_ops = {
+	.read_time = pcap_rtc_read_time,
+	.read_alarm = pcap_rtc_read_alarm,
+	.set_alarm = pcap_rtc_set_alarm,
+	.set_mmss = pcap_rtc_set_mmss,
+	.alarm_irq_enable = pcap_rtc_alarm_irq_enable,
+	.update_irq_enable = pcap_rtc_update_irq_enable,
+};
+
+static int __devinit pcap_rtc_probe(struct platform_device *pdev)
+{
+	struct pcap_rtc *pcap_rtc;
+	int timer_irq, alarm_irq;
+	int err = -ENOMEM;
+
+	pcap_rtc = kmalloc(sizeof(struct pcap_rtc), GFP_KERNEL);
+	if (!pcap_rtc)
+		return err;
+
+	pcap_rtc->pcap = dev_get_drvdata(pdev->dev.parent);
+
+	pcap_rtc->rtc = rtc_device_register("pcap", &pdev->dev,
+				  &pcap_rtc_ops, THIS_MODULE);
+	if (IS_ERR(pcap_rtc->rtc)) {
+		err = PTR_ERR(pcap_rtc->rtc);
+		goto fail_rtc;
+	}
+
+	platform_set_drvdata(pdev, pcap_rtc);
+
+	timer_irq = pcap_to_irq(pcap_rtc->pcap, PCAP_IRQ_1HZ);
+	alarm_irq = pcap_to_irq(pcap_rtc->pcap, PCAP_IRQ_TODA);
+
+	err = request_irq(timer_irq, pcap_rtc_irq, 0, "RTC Timer", pcap_rtc);
+	if (err)
+		goto fail_timer;
+
+	err = request_irq(alarm_irq, pcap_rtc_irq, 0, "RTC Alarm", pcap_rtc);
+	if (err)
+		goto fail_alarm;
+
+	return 0;
+fail_alarm:
+	free_irq(timer_irq, pcap_rtc);
+fail_timer:
+	rtc_device_unregister(pcap_rtc->rtc);
+fail_rtc:
+	kfree(pcap_rtc);
+	return err;
+}
+
+static int __devexit pcap_rtc_remove(struct platform_device *pdev)
+{
+	struct pcap_rtc *pcap_rtc = platform_get_drvdata(pdev);
+
+	free_irq(pcap_to_irq(pcap_rtc->pcap, PCAP_IRQ_1HZ), pcap_rtc);
+	free_irq(pcap_to_irq(pcap_rtc->pcap, PCAP_IRQ_TODA), pcap_rtc);
+	rtc_device_unregister(pcap_rtc->rtc);
+	kfree(pcap_rtc);
+
+	return 0;
+}
+
+static struct platform_driver pcap_rtc_driver = {
+	.remove = __devexit_p(pcap_rtc_remove),
+	.driver = {
+		.name  = "pcap-rtc",
+		.owner = THIS_MODULE,
+	},
+};
+
+static int __init rtc_pcap_init(void)
+{
+	return platform_driver_probe(&pcap_rtc_driver, pcap_rtc_probe);
+}
+
+static void __exit rtc_pcap_exit(void)
+{
+	platform_driver_unregister(&pcap_rtc_driver);
+}
+
+module_init(rtc_pcap_init);
+module_exit(rtc_pcap_exit);
+
+MODULE_DESCRIPTION("Motorola pcap rtc driver");
+MODULE_AUTHOR("guiming zhuo <gmzhuo@gmail.com>");
+MODULE_LICENSE("GPL");
-- 
cgit v0.10.2


From 72445af880dbcd41cffd0d7b78a8c74da093e9eb Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Tue, 22 Sep 2009 16:46:29 -0700
Subject: drivers/rtc: correct error-handling code

This code is not executed before ds1307->rtc has been successfully
initialized to the result of calling rtc_device_register.  Thus the test
that ds1307->rtc is not NULL is always true.

A simplified version of the semantic match that finds this problem is as
follows: (http://coccinelle.lip6.fr/)

// <smpl>
@match exists@
expression x, E;
statement S1, S2;
@@

x = rtc_device_register(...)
... when != x = E
(
*  if (x == NULL || ...) S1 else S2
|
*  if (x == NULL && ...) S1 else S2
)
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Acked-by: Wolfram Sang <w.sang@pengutronix.de>
Cc: David Brownell <david-b@pacbell.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c
index 47a93c0..eb99ee4 100644
--- a/drivers/rtc/rtc-ds1307.c
+++ b/drivers/rtc/rtc-ds1307.c
@@ -896,8 +896,7 @@ read_rtc:
 	return 0;
 
 exit_irq:
-	if (ds1307->rtc)
-		rtc_device_unregister(ds1307->rtc);
+	rtc_device_unregister(ds1307->rtc);
 exit_free:
 	kfree(ds1307);
 	return err;
-- 
cgit v0.10.2


From 971370cc18ae13e87b68ba1769cbad497fa4ab98 Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Tue, 22 Sep 2009 16:46:30 -0700
Subject: drivers/rtc: introduce missing kfree

Error handling code following a kzalloc should free the allocated data.

The semantic match that finds the problem is as follows:
(http://www.emn.fr/x-info/coccinelle/)

// <smpl>
@r exists@
local idexpression x;
statement S;
expression E;
identifier f,f1,l;
position p1,p2;
expression *ptr != NULL;
@@

x@p1 = \(kmalloc\|kzalloc\|kcalloc\)(...);
...
if (x == NULL) S
<... when != x
     when != if (...) { <+...x...+> }
(
x->f1 = E
|
 (x->f1 == NULL || ...)
|
 f(...,x->f1,...)
)
...>
(
 return \(0\|<+...x...+>\|ptr\);
|
 return@p2 ...;
)

@script:python@
p1 << r.p1;
p2 << r.p2;
@@

print "* file: %s kmalloc %s return %s" % (p1[0].file,p1[0].line,p2[0].line)
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Cc: David Brownell <david-b@pacbell.net>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/rtc/rtc-ep93xx.c b/drivers/rtc/rtc-ep93xx.c
index 551332e..9da02d1 100644
--- a/drivers/rtc/rtc-ep93xx.c
+++ b/drivers/rtc/rtc-ep93xx.c
@@ -128,12 +128,16 @@ static int __init ep93xx_rtc_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (res == NULL)
-		return -ENXIO;
+	if (res == NULL) {
+		err = -ENXIO;
+		goto fail_free;
+	}
 
 	res = request_mem_region(res->start, resource_size(res), pdev->name);
-	if (res == NULL)
-		return -EBUSY;
+	if (res == NULL) {
+		err = -EBUSY;
+		goto fail_free;
+	}
 
 	ep93xx_rtc->mmio_base = ioremap(res->start, resource_size(res));
 	if (ep93xx_rtc->mmio_base == NULL) {
@@ -169,6 +173,8 @@ fail:
 		pdev->dev.platform_data = NULL;
 	}
 	release_mem_region(res->start, resource_size(res));
+fail_free:
+	kfree(ep93xx_rtc);
 	return err;
 }
 
-- 
cgit v0.10.2


From dac94d9ec98517e8fe3f980e38f29ea3ac712168 Mon Sep 17 00:00:00 2001
From: David Brownell <dbrownell@users.sourceforge.net>
Date: Tue, 22 Sep 2009 16:46:31 -0700
Subject: rtc: at91rm9200 fixes

Fix two new-ish runtime warnings in the at91rm9200 (etc) RTC:

 Platform driver 'at91_rtc' needs updating - please use dev_pm_ops
 	... by just switching

 IRQ 1/at91_rtc: IRQF_DISABLED is not guaranteed on shared IRQs
 	... no longer needed now that rtc_update_irq() changed

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Acked-by: Alessandro Zummo <a.zummo@towertech.it>
Cc: Andrew Victor <linux@maxim.org.za>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/rtc/rtc-at91rm9200.c b/drivers/rtc/rtc-at91rm9200.c
index b5bf937..bc8bbca 100644
--- a/drivers/rtc/rtc-at91rm9200.c
+++ b/drivers/rtc/rtc-at91rm9200.c
@@ -289,7 +289,7 @@ static int __init at91_rtc_probe(struct platform_device *pdev)
 					AT91_RTC_CALEV);
 
 	ret = request_irq(AT91_ID_SYS, at91_rtc_interrupt,
-				IRQF_DISABLED | IRQF_SHARED,
+				IRQF_SHARED,
 				"at91_rtc", pdev);
 	if (ret) {
 		printk(KERN_ERR "at91_rtc: IRQ %d already in use.\n",
@@ -340,7 +340,7 @@ static int __exit at91_rtc_remove(struct platform_device *pdev)
 
 static u32 at91_rtc_imr;
 
-static int at91_rtc_suspend(struct platform_device *pdev, pm_message_t state)
+static int at91_rtc_suspend(struct device *dev)
 {
 	/* this IRQ is shared with DBGU and other hardware which isn't
 	 * necessarily doing PM like we are...
@@ -348,7 +348,7 @@ static int at91_rtc_suspend(struct platform_device *pdev, pm_message_t state)
 	at91_rtc_imr = at91_sys_read(AT91_RTC_IMR)
 			& (AT91_RTC_ALARM|AT91_RTC_SECEV);
 	if (at91_rtc_imr) {
-		if (device_may_wakeup(&pdev->dev))
+		if (device_may_wakeup(dev))
 			enable_irq_wake(AT91_ID_SYS);
 		else
 			at91_sys_write(AT91_RTC_IDR, at91_rtc_imr);
@@ -356,28 +356,34 @@ static int at91_rtc_suspend(struct platform_device *pdev, pm_message_t state)
 	return 0;
 }
 
-static int at91_rtc_resume(struct platform_device *pdev)
+static int at91_rtc_resume(struct device *dev)
 {
 	if (at91_rtc_imr) {
-		if (device_may_wakeup(&pdev->dev))
+		if (device_may_wakeup(dev))
 			disable_irq_wake(AT91_ID_SYS);
 		else
 			at91_sys_write(AT91_RTC_IER, at91_rtc_imr);
 	}
 	return 0;
 }
+
+static const struct dev_pm_ops at91_rtc_pm = {
+	.suspend =	at91_rtc_suspend,
+	.resume =	at91_rtc_resume,
+};
+
+#define at91_rtc_pm_ptr	&at91_rtc_pm
+
 #else
-#define at91_rtc_suspend NULL
-#define at91_rtc_resume  NULL
+#define at91_rtc_pm_ptr	NULL
 #endif
 
 static struct platform_driver at91_rtc_driver = {
 	.remove		= __exit_p(at91_rtc_remove),
-	.suspend	= at91_rtc_suspend,
-	.resume		= at91_rtc_resume,
 	.driver		= {
 		.name	= "at91_rtc",
 		.owner	= THIS_MODULE,
+		.pm	= at91_rtc_pm_ptr,
 	},
 };
 
-- 
cgit v0.10.2


From ea3d1606fd32059461309099e8856c6652888a79 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg@redhat.com>
Date: Tue, 22 Sep 2009 16:46:31 -0700
Subject: rtc: document the sysfs interface

The sysfs interface to the RTC class drivers is currently undocumented.
Add some basic documentation defining the semantics of the fields.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Cc: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Alessandro Zummo <a.zummo@towertech.it>
Cc: David Brownell <david-b@pacbell.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/Documentation/rtc.txt b/Documentation/rtc.txt
index 102ce96..2745e81 100644
--- a/Documentation/rtc.txt
+++ b/Documentation/rtc.txt
@@ -135,6 +135,28 @@ a high functionality RTC is integrated into the SOC.  That system might read
 the system clock from the discrete RTC, but use the integrated one for all
 other tasks, because of its greater functionality.
 
+SYSFS INTERFACE
+---------------
+
+The sysfs interface under /sys/class/rtc/rtcN provides access to various
+rtc attributes without requiring the use of ioctls. All dates and times
+are in the RTC's timezone, rather than in system time.
+
+date:  	   	 RTC-provided date
+max_user_freq:	 The maximum interrupt rate an unprivileged user may request
+		 from this RTC.
+name:		 The name of the RTC corresponding to this sysfs directory
+since_epoch:	 The number of seconds since the epoch according to the RTC
+time:		 RTC-provided time
+wakealarm:	 The time at which the clock will generate a system wakeup
+		 event. This is a one shot wakeup event, so must be reset
+		 after wake if a daily wakeup is required. Format is either
+		 seconds since the epoch or, if there's a leading +, seconds
+		 in the future.
+
+IOCTL INTERFACE
+---------------
+
 The ioctl() calls supported by /dev/rtc are also supported by the RTC class
 framework.  However, because the chips and systems are not standardized,
 some PC/AT functionality might not be provided.  And in the same way, some
-- 
cgit v0.10.2


From d8c1acb1664d17dd995e34507533321e986d9215 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg59@srcf.ucam.org>
Date: Tue, 22 Sep 2009 16:46:32 -0700
Subject: rtc: add boot_timesource sysfs attribute

CONFIG_RTC_HCTOSYS allows the kernel to read the system time from the RTC
at boot and resume, avoiding the need for userspace to do so.
Unfortunately userspace currently has no way to know whether this
configuration option is enabled and thus cannot sensibly choose whether to
run hwclock itself or not.  Add a hctosys sysfs attribute which indicates
whether a given RTC set the system clock.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Acked-by: Alessandro Zummo <a.zummo@towertech.it>
Cc: Mark Brown <broonie@opensource.wolfsonmicro.com>
Cc: David Brownell <david-b@pacbell.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/Documentation/rtc.txt b/Documentation/rtc.txt
index 2745e81..9104c10 100644
--- a/Documentation/rtc.txt
+++ b/Documentation/rtc.txt
@@ -143,6 +143,8 @@ rtc attributes without requiring the use of ioctls. All dates and times
 are in the RTC's timezone, rather than in system time.
 
 date:  	   	 RTC-provided date
+hctosys:   	 1 if the RTC provided the system time at boot via the
+		 CONFIG_RTC_HCTOSYS kernel option, 0 otherwise
 max_user_freq:	 The maximum interrupt rate an unprivileged user may request
 		 from this RTC.
 name:		 The name of the RTC corresponding to this sysfs directory
diff --git a/drivers/rtc/rtc-sysfs.c b/drivers/rtc/rtc-sysfs.c
index 2531ce4..7dd23a6 100644
--- a/drivers/rtc/rtc-sysfs.c
+++ b/drivers/rtc/rtc-sysfs.c
@@ -102,6 +102,19 @@ rtc_sysfs_set_max_user_freq(struct device *dev, struct device_attribute *attr,
 	return n;
 }
 
+static ssize_t
+rtc_sysfs_show_hctosys(struct device *dev, struct device_attribute *attr,
+		char *buf)
+{
+#ifdef CONFIG_RTC_HCTOSYS_DEVICE
+	if (strcmp(dev_name(&to_rtc_device(dev)->dev),
+		   CONFIG_RTC_HCTOSYS_DEVICE) == 0)
+		return sprintf(buf, "1\n");
+	else
+#endif
+		return sprintf(buf, "0\n");
+}
+
 static struct device_attribute rtc_attrs[] = {
 	__ATTR(name, S_IRUGO, rtc_sysfs_show_name, NULL),
 	__ATTR(date, S_IRUGO, rtc_sysfs_show_date, NULL),
@@ -109,6 +122,7 @@ static struct device_attribute rtc_attrs[] = {
 	__ATTR(since_epoch, S_IRUGO, rtc_sysfs_show_since_epoch, NULL),
 	__ATTR(max_user_freq, S_IRUGO | S_IWUSR, rtc_sysfs_show_max_user_freq,
 			rtc_sysfs_set_max_user_freq),
+	__ATTR(hctosys, S_IRUGO, rtc_sysfs_show_hctosys, NULL),
 	{ },
 };
 
-- 
cgit v0.10.2


From a4177ee7f1a83eecb1d75e85d32664b023ef65e9 Mon Sep 17 00:00:00 2001
From: Jani Nikula <ext-jani.1.nikula@nokia.com>
Date: Tue, 22 Sep 2009 16:46:33 -0700
Subject: gpiolib: allow exported GPIO nodes to be named using sysfs links

Commit 926b663ce8215ba448960e1ff6e58b67a2c3b99b (gpiolib: allow GPIOs to
be named) already provides naming on the chip level. This patch provides
more flexibility by allowing multiple names where ever in sysfs on a per
GPIO basis.

Adapted from David Brownell's comments on a similar concept:
http://lkml.org/lkml/2009/4/20/203.

[randy.dunlap@oracle.com: fix build for CONFIG_GENERIC_GPIO=n]
Signed-off-by: Jani Nikula <ext-jani.1.nikula@nokia.com>
Acked-by: David Brownell <david-b@pacbell.net>
Cc: Daniel Silverstone <dsilvers@simtec.co.uk>
Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/Documentation/gpio.txt b/Documentation/gpio.txt
index e4b6985..566edaa 100644
--- a/Documentation/gpio.txt
+++ b/Documentation/gpio.txt
@@ -555,6 +555,11 @@ requested using gpio_request():
 	/* reverse gpio_export() */
 	void gpio_unexport();
 
+	/* create a sysfs link to an exported GPIO node */
+	int gpio_export_link(struct device *dev, const char *name,
+		unsigned gpio)
+
+
 After a kernel driver requests a GPIO, it may only be made available in
 the sysfs interface by gpio_export().  The driver can control whether the
 signal direction may change.  This helps drivers prevent userspace code
@@ -563,3 +568,8 @@ from accidentally clobbering important system state.
 This explicit exporting can help with debugging (by making some kinds
 of experiments easier), or can provide an always-there interface that's
 suitable for documenting as part of a board support package.
+
+After the GPIO has been exported, gpio_export_link() allows creating
+symlinks from elsewhere in sysfs to the GPIO sysfs node.  Drivers can
+use this to provide the interface under their own device in sysfs with
+a descriptive name.
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 51a8d41..aef6b3d 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -505,6 +505,51 @@ static int match_export(struct device *dev, void *data)
 }
 
 /**
+ * gpio_export_link - create a sysfs link to an exported GPIO node
+ * @dev: device under which to create symlink
+ * @name: name of the symlink
+ * @gpio: gpio to create symlink to, already exported
+ *
+ * Set up a symlink from /sys/.../dev/name to /sys/class/gpio/gpioN
+ * node. Caller is responsible for unlinking.
+ *
+ * Returns zero on success, else an error.
+ */
+int gpio_export_link(struct device *dev, const char *name, unsigned gpio)
+{
+	struct gpio_desc	*desc;
+	int			status = -EINVAL;
+
+	if (!gpio_is_valid(gpio))
+		goto done;
+
+	mutex_lock(&sysfs_lock);
+
+	desc = &gpio_desc[gpio];
+
+	if (test_bit(FLAG_EXPORT, &desc->flags)) {
+		struct device *tdev;
+
+		tdev = class_find_device(&gpio_class, NULL, desc, match_export);
+		if (tdev != NULL) {
+			status = sysfs_create_link(&dev->kobj, &tdev->kobj,
+						name);
+		} else {
+			status = -ENODEV;
+		}
+	}
+
+	mutex_unlock(&sysfs_lock);
+
+done:
+	if (status)
+		pr_debug("%s: gpio%d status %d\n", __func__, gpio, status);
+
+	return status;
+}
+EXPORT_SYMBOL_GPL(gpio_export_link);
+
+/**
  * gpio_unexport - reverse effect of gpio_export()
  * @gpio: gpio to make unavailable
  *
diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h
index d6c379d..9cca3785 100644
--- a/include/asm-generic/gpio.h
+++ b/include/asm-generic/gpio.h
@@ -141,6 +141,8 @@ extern int __gpio_to_irq(unsigned gpio);
  * but more typically is configured entirely from userspace.
  */
 extern int gpio_export(unsigned gpio, bool direction_may_change);
+extern int gpio_export_link(struct device *dev, const char *name,
+			unsigned gpio);
 extern void gpio_unexport(unsigned gpio);
 
 #endif	/* CONFIG_GPIO_SYSFS */
@@ -185,6 +187,12 @@ static inline int gpio_export(unsigned gpio, bool direction_may_change)
 	return -ENOSYS;
 }
 
+static inline int gpio_export_link(struct device *dev, const char *name,
+				unsigned gpio)
+{
+	return -ENOSYS;
+}
+
 static inline void gpio_unexport(unsigned gpio)
 {
 }
diff --git a/include/linux/gpio.h b/include/linux/gpio.h
index e10c49a..059bd18 100644
--- a/include/linux/gpio.h
+++ b/include/linux/gpio.h
@@ -12,6 +12,8 @@
 #include <linux/types.h>
 #include <linux/errno.h>
 
+struct device;
+
 /*
  * Some platforms don't support the GPIO programming interface.
  *
@@ -89,6 +91,15 @@ static inline int gpio_export(unsigned gpio, bool direction_may_change)
 	return -EINVAL;
 }
 
+static inline int gpio_export_link(struct device *dev, const char *name,
+				unsigned gpio)
+{
+	/* GPIO can never have been exported */
+	WARN_ON(1);
+	return -EINVAL;
+}
+
+
 static inline void gpio_unexport(unsigned gpio)
 {
 	/* GPIO can never have been exported */
-- 
cgit v0.10.2


From 1e5db00687c1ebd93a902caf1d3694209013cb3e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Richard=20R=C3=B6jfors?=
 <richard.rojfors.ext@mocean-labs.com>
Date: Tue, 22 Sep 2009 16:46:34 -0700
Subject: gpio: add MC33880 driver
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A GPIO driver for the Freescale MC33880 High/Low side switch

Signed-off-by: Richard Röjfors <richard.rojfors.ext@mocean-labs.com>
Cc: David Brownell <david-b@pacbell.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 6b4c484..223e7c9 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -195,4 +195,11 @@ config GPIO_MCP23S08
 	  SPI driver for Microchip MCP23S08 I/O expander.  This provides
 	  a GPIO interface supporting inputs and outputs.
 
+config GPIO_MC33880
+	tristate "Freescale MC33880 high-side/low-side switch"
+	depends on SPI_MASTER
+	help
+	  SPI driver for Freescale MC33880 high-side/low-side switch.
+	  This provides GPIO interface supporting inputs and outputs.
+
 endif
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index ea7c745..f35de16 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -6,6 +6,7 @@ obj-$(CONFIG_GPIOLIB)		+= gpiolib.o
 
 obj-$(CONFIG_GPIO_MAX7301)	+= max7301.o
 obj-$(CONFIG_GPIO_MAX732X)	+= max732x.o
+obj-$(CONFIG_GPIO_MC33880)	+= mc33880.o
 obj-$(CONFIG_GPIO_MCP23S08)	+= mcp23s08.o
 obj-$(CONFIG_GPIO_PCA953X)	+= pca953x.o
 obj-$(CONFIG_GPIO_PCF857X)	+= pcf857x.o
diff --git a/drivers/gpio/mc33880.c b/drivers/gpio/mc33880.c
new file mode 100644
index 0000000..e7d01bd
--- /dev/null
+++ b/drivers/gpio/mc33880.c
@@ -0,0 +1,196 @@
+/*
+ * mc33880.c MC33880 high-side/low-side switch GPIO driver
+ * Copyright (c) 2009 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* Supports:
+ * Freescale MC33880 high-side/low-side switch
+ */
+
+#include <linux/init.h>
+#include <linux/mutex.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/mc33880.h>
+#include <linux/gpio.h>
+
+#define DRIVER_NAME "mc33880"
+
+/*
+ * Pin configurations, see MAX7301 datasheet page 6
+ */
+#define PIN_CONFIG_MASK 0x03
+#define PIN_CONFIG_IN_PULLUP 0x03
+#define PIN_CONFIG_IN_WO_PULLUP 0x02
+#define PIN_CONFIG_OUT 0x01
+
+#define PIN_NUMBER 8
+
+
+/*
+ * Some registers must be read back to modify.
+ * To save time we cache them here in memory
+ */
+struct mc33880 {
+	struct mutex	lock;	/* protect from simultanous accesses */
+	u8		port_config;
+	struct gpio_chip chip;
+	struct spi_device *spi;
+};
+
+static int mc33880_write_config(struct mc33880 *mc)
+{
+	return spi_write(mc->spi, &mc->port_config, sizeof(mc->port_config));
+}
+
+
+static int __mc33880_set(struct mc33880 *mc, unsigned offset, int value)
+{
+	if (value)
+		mc->port_config |= 1 << offset;
+	else
+		mc->port_config &= ~(1 << offset);
+
+	return mc33880_write_config(mc);
+}
+
+
+static void mc33880_set(struct gpio_chip *chip, unsigned offset, int value)
+{
+	struct mc33880 *mc = container_of(chip, struct mc33880, chip);
+
+	mutex_lock(&mc->lock);
+
+	__mc33880_set(mc, offset, value);
+
+	mutex_unlock(&mc->lock);
+}
+
+static int __devinit mc33880_probe(struct spi_device *spi)
+{
+	struct mc33880 *mc;
+	struct mc33880_platform_data *pdata;
+	int ret;
+
+	pdata = spi->dev.platform_data;
+	if (!pdata || !pdata->base) {
+		dev_dbg(&spi->dev, "incorrect or missing platform data\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * bits_per_word cannot be configured in platform data
+	 */
+	spi->bits_per_word = 8;
+
+	ret = spi_setup(spi);
+	if (ret < 0)
+		return ret;
+
+	mc = kzalloc(sizeof(struct mc33880), GFP_KERNEL);
+	if (!mc)
+		return -ENOMEM;
+
+	mutex_init(&mc->lock);
+
+	dev_set_drvdata(&spi->dev, mc);
+
+	mc->spi = spi;
+
+	mc->chip.label = DRIVER_NAME,
+	mc->chip.set = mc33880_set;
+	mc->chip.base = pdata->base;
+	mc->chip.ngpio = PIN_NUMBER;
+	mc->chip.can_sleep = 1;
+	mc->chip.dev = &spi->dev;
+	mc->chip.owner = THIS_MODULE;
+
+	mc->port_config = 0x00;
+	/* write twice, because during initialisation the first setting
+	 * is just for testing SPI communication, and the second is the
+	 * "real" configuration
+	 */
+	ret = mc33880_write_config(mc);
+	mc->port_config = 0x00;
+	if (!ret)
+		ret = mc33880_write_config(mc);
+
+	if (ret) {
+		printk(KERN_ERR "Failed writing to " DRIVER_NAME ": %d\n", ret);
+		goto exit_destroy;
+	}
+
+	ret = gpiochip_add(&mc->chip);
+	if (ret)
+		goto exit_destroy;
+
+	return ret;
+
+exit_destroy:
+	dev_set_drvdata(&spi->dev, NULL);
+	mutex_destroy(&mc->lock);
+	kfree(mc);
+	return ret;
+}
+
+static int mc33880_remove(struct spi_device *spi)
+{
+	struct mc33880 *mc;
+	int ret;
+
+	mc = dev_get_drvdata(&spi->dev);
+	if (mc == NULL)
+		return -ENODEV;
+
+	dev_set_drvdata(&spi->dev, NULL);
+
+	ret = gpiochip_remove(&mc->chip);
+	if (!ret) {
+		mutex_destroy(&mc->lock);
+		kfree(mc);
+	} else
+		dev_err(&spi->dev, "Failed to remove the GPIO controller: %d\n",
+			ret);
+
+	return ret;
+}
+
+static struct spi_driver mc33880_driver = {
+	.driver = {
+		.name		= DRIVER_NAME,
+		.owner		= THIS_MODULE,
+	},
+	.probe		= mc33880_probe,
+	.remove		= __devexit_p(mc33880_remove),
+};
+
+static int __init mc33880_init(void)
+{
+	return spi_register_driver(&mc33880_driver);
+}
+/* register after spi postcore initcall and before
+ * subsys initcalls that may rely on these GPIOs
+ */
+subsys_initcall(mc33880_init);
+
+static void __exit mc33880_exit(void)
+{
+	spi_unregister_driver(&mc33880_driver);
+}
+module_exit(mc33880_exit);
+
+MODULE_AUTHOR("Mocean Laboratories <info@mocean-labs.com>");
+MODULE_LICENSE("GPL v2");
+
diff --git a/include/linux/spi/mc33880.h b/include/linux/spi/mc33880.h
new file mode 100644
index 0000000..82ffccd
--- /dev/null
+++ b/include/linux/spi/mc33880.h
@@ -0,0 +1,10 @@
+#ifndef LINUX_SPI_MC33880_H
+#define LINUX_SPI_MC33880_H
+
+struct mc33880_platform_data {
+	/* number assigned to the first GPIO */
+	unsigned	base;
+};
+
+#endif
+
-- 
cgit v0.10.2


From 4cf8e53b3b55fa2f9b2a6b9c3e557b649adf7c6a Mon Sep 17 00:00:00 2001
From: Marek Vasut <marek.vasut@gmail.com>
Date: Tue, 22 Sep 2009 16:46:35 -0700
Subject: mfd/gpio: add a GPIO interface to the UCB1400 MFD chip driver via
 gpiolib

Cc: Eric Miao <eric.y.miao@gmail.com>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: David Brownell <david-b@pacbell.net>
Cc: Samuel Ortiz <sameo@openedhand.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 223e7c9..ccca08e 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -202,4 +202,16 @@ config GPIO_MC33880
 	  SPI driver for Freescale MC33880 high-side/low-side switch.
 	  This provides GPIO interface supporting inputs and outputs.
 
+comment "AC97 GPIO expanders:"
+
+config GPIO_UCB1400
+	bool "Philips UCB1400 GPIO"
+	depends on UCB1400_CORE
+	help
+	  This enables support for the Philips UCB1400 GPIO pins.
+	  The UCB1400 is an AC97 audio codec.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called ucb1400_gpio.
+
 endif
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index f35de16..c1ac034 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -12,6 +12,7 @@ obj-$(CONFIG_GPIO_PCA953X)	+= pca953x.o
 obj-$(CONFIG_GPIO_PCF857X)	+= pcf857x.o
 obj-$(CONFIG_GPIO_PL061)	+= pl061.o
 obj-$(CONFIG_GPIO_TWL4030)	+= twl4030-gpio.o
+obj-$(CONFIG_GPIO_UCB1400)	+= ucb1400_gpio.o
 obj-$(CONFIG_GPIO_XILINX)	+= xilinx_gpio.o
 obj-$(CONFIG_GPIO_BT8XX)	+= bt8xxgpio.o
 obj-$(CONFIG_GPIO_VR41XX)	+= vr41xx_giu.o
diff --git a/drivers/gpio/ucb1400_gpio.c b/drivers/gpio/ucb1400_gpio.c
new file mode 100644
index 0000000..50e6bd1
--- /dev/null
+++ b/drivers/gpio/ucb1400_gpio.c
@@ -0,0 +1,125 @@
+/*
+ * Philips UCB1400 GPIO driver
+ *
+ * Author: Marek Vasut <marek.vasut@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/ucb1400.h>
+
+struct ucb1400_gpio_data *ucbdata;
+
+static int ucb1400_gpio_dir_in(struct gpio_chip *gc, unsigned off)
+{
+	struct ucb1400_gpio *gpio;
+	gpio = container_of(gc, struct ucb1400_gpio, gc);
+	ucb1400_gpio_set_direction(gpio->ac97, off, 0);
+	return 0;
+}
+
+static int ucb1400_gpio_dir_out(struct gpio_chip *gc, unsigned off, int val)
+{
+	struct ucb1400_gpio *gpio;
+	gpio = container_of(gc, struct ucb1400_gpio, gc);
+	ucb1400_gpio_set_direction(gpio->ac97, off, 1);
+	ucb1400_gpio_set_value(gpio->ac97, off, val);
+	return 0;
+}
+
+static int ucb1400_gpio_get(struct gpio_chip *gc, unsigned off)
+{
+	struct ucb1400_gpio *gpio;
+	gpio = container_of(gc, struct ucb1400_gpio, gc);
+	return ucb1400_gpio_get_value(gpio->ac97, off);
+}
+
+static void ucb1400_gpio_set(struct gpio_chip *gc, unsigned off, int val)
+{
+	struct ucb1400_gpio *gpio;
+	gpio = container_of(gc, struct ucb1400_gpio, gc);
+	ucb1400_gpio_set_value(gpio->ac97, off, val);
+}
+
+static int ucb1400_gpio_probe(struct platform_device *dev)
+{
+	struct ucb1400_gpio *ucb = dev->dev.platform_data;
+	int err = 0;
+
+	if (!(ucbdata && ucbdata->gpio_offset)) {
+		err = -EINVAL;
+		goto err;
+	}
+
+	platform_set_drvdata(dev, ucb);
+
+	ucb->gc.label = "ucb1400_gpio";
+	ucb->gc.base = ucbdata->gpio_offset;
+	ucb->gc.ngpio = 10;
+	ucb->gc.owner = THIS_MODULE;
+
+	ucb->gc.direction_input = ucb1400_gpio_dir_in;
+	ucb->gc.direction_output = ucb1400_gpio_dir_out;
+	ucb->gc.get = ucb1400_gpio_get;
+	ucb->gc.set = ucb1400_gpio_set;
+	ucb->gc.can_sleep = 1;
+
+	err = gpiochip_add(&ucb->gc);
+	if (err)
+		goto err;
+
+	if (ucbdata && ucbdata->gpio_setup)
+		err = ucbdata->gpio_setup(&dev->dev, ucb->gc.ngpio);
+
+err:
+	return err;
+
+}
+
+static int ucb1400_gpio_remove(struct platform_device *dev)
+{
+	int err = 0;
+	struct ucb1400_gpio *ucb = platform_get_drvdata(dev);
+
+	if (ucbdata && ucbdata->gpio_teardown) {
+		err = ucbdata->gpio_teardown(&dev->dev, ucb->gc.ngpio);
+		if (err)
+			return err;
+	}
+
+	err = gpiochip_remove(&ucb->gc);
+	return err;
+}
+
+static struct platform_driver ucb1400_gpio_driver = {
+	.probe	= ucb1400_gpio_probe,
+	.remove	= ucb1400_gpio_remove,
+	.driver	= {
+		.name	= "ucb1400_gpio"
+	},
+};
+
+static int __init ucb1400_gpio_init(void)
+{
+	return platform_driver_register(&ucb1400_gpio_driver);
+}
+
+static void __exit ucb1400_gpio_exit(void)
+{
+	platform_driver_unregister(&ucb1400_gpio_driver);
+}
+
+void __init ucb1400_gpio_set_data(struct ucb1400_gpio_data *data)
+{
+	ucbdata = data;
+}
+
+module_init(ucb1400_gpio_init);
+module_exit(ucb1400_gpio_exit);
+
+MODULE_DESCRIPTION("Philips UCB1400 GPIO driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/mfd/ucb1400_core.c b/drivers/mfd/ucb1400_core.c
index 78c2135..2afc080 100644
--- a/drivers/mfd/ucb1400_core.c
+++ b/drivers/mfd/ucb1400_core.c
@@ -48,9 +48,11 @@ static int ucb1400_core_probe(struct device *dev)
 	int err;
 	struct ucb1400 *ucb;
 	struct ucb1400_ts ucb_ts;
+	struct ucb1400_gpio ucb_gpio;
 	struct snd_ac97 *ac97;
 
 	memset(&ucb_ts, 0, sizeof(ucb_ts));
+	memset(&ucb_gpio, 0, sizeof(ucb_gpio));
 
 	ucb = kzalloc(sizeof(struct ucb1400), GFP_KERNEL);
 	if (!ucb) {
@@ -68,25 +70,44 @@ static int ucb1400_core_probe(struct device *dev)
 		goto err0;
 	}
 
+	/* GPIO */
+	ucb_gpio.ac97 = ac97;
+	ucb->ucb1400_gpio = platform_device_alloc("ucb1400_gpio", -1);
+	if (!ucb->ucb1400_gpio) {
+		err = -ENOMEM;
+		goto err0;
+	}
+	err = platform_device_add_data(ucb->ucb1400_gpio, &ucb_gpio,
+					sizeof(ucb_gpio));
+	if (err)
+		goto err1;
+	err = platform_device_add(ucb->ucb1400_gpio);
+	if (err)
+		goto err1;
+
 	/* TOUCHSCREEN */
 	ucb_ts.ac97 = ac97;
 	ucb->ucb1400_ts = platform_device_alloc("ucb1400_ts", -1);
 	if (!ucb->ucb1400_ts) {
 		err = -ENOMEM;
-		goto err0;
+		goto err2;
 	}
 	err = platform_device_add_data(ucb->ucb1400_ts, &ucb_ts,
 					sizeof(ucb_ts));
 	if (err)
-		goto err1;
+		goto err3;
 	err = platform_device_add(ucb->ucb1400_ts);
 	if (err)
-		goto err1;
+		goto err3;
 
 	return 0;
 
-err1:
+err3:
 	platform_device_put(ucb->ucb1400_ts);
+err2:
+	platform_device_unregister(ucb->ucb1400_gpio);
+err1:
+	platform_device_put(ucb->ucb1400_gpio);
 err0:
 	kfree(ucb);
 err:
@@ -98,6 +119,8 @@ static int ucb1400_core_remove(struct device *dev)
 	struct ucb1400 *ucb = dev_get_drvdata(dev);
 
 	platform_device_unregister(ucb->ucb1400_ts);
+	platform_device_unregister(ucb->ucb1400_gpio);
+
 	kfree(ucb);
 	return 0;
 }
diff --git a/include/linux/ucb1400.h b/include/linux/ucb1400.h
index ae779bb..adb4406 100644
--- a/include/linux/ucb1400.h
+++ b/include/linux/ucb1400.h
@@ -26,6 +26,7 @@
 #include <sound/ac97_codec.h>
 #include <linux/mutex.h>
 #include <linux/platform_device.h>
+#include <linux/gpio.h>
 
 /*
  * UCB1400 AC-link registers
@@ -82,6 +83,17 @@
 #define UCB_ID			0x7e
 #define UCB_ID_1400             0x4304
 
+struct ucb1400_gpio_data {
+	int gpio_offset;
+	int (*gpio_setup)(struct device *dev, int ngpio);
+	int (*gpio_teardown)(struct device *dev, int ngpio);
+};
+
+struct ucb1400_gpio {
+	struct gpio_chip	gc;
+	struct snd_ac97		*ac97;
+};
+
 struct ucb1400_ts {
 	struct input_dev	*ts_idev;
 	struct task_struct	*ts_task;
@@ -95,6 +107,7 @@ struct ucb1400_ts {
 
 struct ucb1400 {
 	struct platform_device	*ucb1400_ts;
+	struct platform_device	*ucb1400_gpio;
 };
 
 static inline u16 ucb1400_reg_read(struct snd_ac97 *ac97, u16 reg)
@@ -147,4 +160,10 @@ static inline void ucb1400_adc_disable(struct snd_ac97 *ac97)
 unsigned int ucb1400_adc_read(struct snd_ac97 *ac97, u16 adc_channel,
 			      int adcsync);
 
+#ifdef CONFIG_GPIO_UCB1400
+void __init ucb1400_gpio_set_data(struct ucb1400_gpio_data *data);
+#else
+static inline void ucb1400_gpio_set_data(struct ucb1400_gpio_data *data) {}
+#endif
+
 #endif
-- 
cgit v0.10.2


From 8bf026177000c5bb566cafe2528a96f8380f38bd Mon Sep 17 00:00:00 2001
From: Alek Du <alek.du@intel.com>
Date: Tue, 22 Sep 2009 16:46:36 -0700
Subject: gpio: add Intel Moorestown Platform Langwell chip gpio driver

The Langwell chip is the IO hub for Intel Moorestown platform which has a
64-pin gpio block device inside.  It is exposed as a dedicated PCI device.
 We use it to control outside peripheral as well as to do IRQ demuxing.
The gpio block uses MSI to send level type interrupt to IOAPIC.

Signed-off-by: Alek Du <alek.du@intel.com>
Cc: David Brownell <david-b@pacbell.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index ccca08e..9ad20ff 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -180,6 +180,12 @@ config GPIO_BT8XX
 
 	  If unsure, say N.
 
+config GPIO_LANGWELL
+	bool "Intel Moorestown Platform Langwell GPIO support"
+	depends on PCI
+	help
+	  Say Y here to support Intel Moorestown platform GPIO.
+
 comment "SPI GPIO expanders:"
 
 config GPIO_MAX7301
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index c1ac034..b8eef76 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -4,6 +4,7 @@ ccflags-$(CONFIG_DEBUG_GPIO)	+= -DDEBUG
 
 obj-$(CONFIG_GPIOLIB)		+= gpiolib.o
 
+obj-$(CONFIG_GPIO_LANGWELL)	+= langwell_gpio.o
 obj-$(CONFIG_GPIO_MAX7301)	+= max7301.o
 obj-$(CONFIG_GPIO_MAX732X)	+= max732x.o
 obj-$(CONFIG_GPIO_MC33880)	+= mc33880.o
diff --git a/drivers/gpio/langwell_gpio.c b/drivers/gpio/langwell_gpio.c
new file mode 100644
index 0000000..5711ce5
--- /dev/null
+++ b/drivers/gpio/langwell_gpio.c
@@ -0,0 +1,297 @@
+/* langwell_gpio.c Moorestown platform Langwell chip GPIO driver
+ * Copyright (c) 2008 - 2009,  Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* Supports:
+ * Moorestown platform Langwell chip.
+ */
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/stddef.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/io.h>
+#include <linux/gpio.h>
+
+struct lnw_gpio_register {
+	u32	GPLR[2];
+	u32	GPDR[2];
+	u32	GPSR[2];
+	u32	GPCR[2];
+	u32	GRER[2];
+	u32	GFER[2];
+	u32	GEDR[2];
+};
+
+struct lnw_gpio {
+	struct gpio_chip		chip;
+	struct lnw_gpio_register 	*reg_base;
+	spinlock_t			lock;
+	unsigned			irq_base;
+};
+
+static int lnw_gpio_get(struct gpio_chip *chip, unsigned offset)
+{
+	struct lnw_gpio *lnw = container_of(chip, struct lnw_gpio, chip);
+	u8 reg = offset / 32;
+	void __iomem *gplr;
+
+	gplr = (void __iomem *)(&lnw->reg_base->GPLR[reg]);
+	return readl(gplr) & BIT(offset % 32);
+}
+
+static void lnw_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
+{
+	struct lnw_gpio *lnw = container_of(chip, struct lnw_gpio, chip);
+	u8 reg = offset / 32;
+	void __iomem *gpsr, *gpcr;
+
+	if (value) {
+		gpsr = (void __iomem *)(&lnw->reg_base->GPSR[reg]);
+		writel(BIT(offset % 32), gpsr);
+	} else {
+		gpcr = (void __iomem *)(&lnw->reg_base->GPCR[reg]);
+		writel(BIT(offset % 32), gpcr);
+	}
+}
+
+static int lnw_gpio_direction_input(struct gpio_chip *chip, unsigned offset)
+{
+	struct lnw_gpio *lnw = container_of(chip, struct lnw_gpio, chip);
+	u8 reg = offset / 32;
+	u32 value;
+	unsigned long flags;
+	void __iomem *gpdr;
+
+	gpdr = (void __iomem *)(&lnw->reg_base->GPDR[reg]);
+	spin_lock_irqsave(&lnw->lock, flags);
+	value = readl(gpdr);
+	value &= ~BIT(offset % 32);
+	writel(value, gpdr);
+	spin_unlock_irqrestore(&lnw->lock, flags);
+	return 0;
+}
+
+static int lnw_gpio_direction_output(struct gpio_chip *chip,
+			unsigned offset, int value)
+{
+	struct lnw_gpio *lnw = container_of(chip, struct lnw_gpio, chip);
+	u8 reg = offset / 32;
+	unsigned long flags;
+	void __iomem *gpdr;
+
+	lnw_gpio_set(chip, offset, value);
+	gpdr = (void __iomem *)(&lnw->reg_base->GPDR[reg]);
+	spin_lock_irqsave(&lnw->lock, flags);
+	value = readl(gpdr);
+	value |= BIT(offset % 32);;
+	writel(value, gpdr);
+	spin_unlock_irqrestore(&lnw->lock, flags);
+	return 0;
+}
+
+static int lnw_gpio_to_irq(struct gpio_chip *chip, unsigned offset)
+{
+	struct lnw_gpio *lnw = container_of(chip, struct lnw_gpio, chip);
+	return lnw->irq_base + offset;
+}
+
+static int lnw_irq_type(unsigned irq, unsigned type)
+{
+	struct lnw_gpio *lnw = get_irq_chip_data(irq);
+	u32 gpio = irq - lnw->irq_base;
+	u8 reg = gpio / 32;
+	unsigned long flags;
+	u32 value;
+	void __iomem *grer = (void __iomem *)(&lnw->reg_base->GRER[reg]);
+	void __iomem *gfer = (void __iomem *)(&lnw->reg_base->GFER[reg]);
+
+	if (gpio < 0 || gpio > lnw->chip.ngpio)
+		return -EINVAL;
+	spin_lock_irqsave(&lnw->lock, flags);
+	if (type & IRQ_TYPE_EDGE_RISING)
+		value = readl(grer) | BIT(gpio % 32);
+	else
+		value = readl(grer) & (~BIT(gpio % 32));
+	writel(value, grer);
+
+	if (type & IRQ_TYPE_EDGE_FALLING)
+		value = readl(gfer) | BIT(gpio % 32);
+	else
+		value = readl(gfer) & (~BIT(gpio % 32));
+	writel(value, gfer);
+	spin_unlock_irqrestore(&lnw->lock, flags);
+
+	return 0;
+};
+
+static void lnw_irq_unmask(unsigned irq)
+{
+	struct lnw_gpio *lnw = get_irq_chip_data(irq);
+	u32 gpio = irq - lnw->irq_base;
+	u8 reg = gpio / 32;
+	void __iomem *gedr;
+
+	gedr = (void __iomem *)(&lnw->reg_base->GEDR[reg]);
+	writel(BIT(gpio % 32), gedr);
+};
+
+static void lnw_irq_mask(unsigned irq)
+{
+};
+
+static struct irq_chip lnw_irqchip = {
+	.name		= "LNW-GPIO",
+	.mask		= lnw_irq_mask,
+	.unmask		= lnw_irq_unmask,
+	.set_type	= lnw_irq_type,
+};
+
+static struct pci_device_id lnw_gpio_ids[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x080f) },
+	{ 0, }
+};
+MODULE_DEVICE_TABLE(pci, lnw_gpio_ids);
+
+static void lnw_irq_handler(unsigned irq, struct irq_desc *desc)
+{
+	struct lnw_gpio *lnw = (struct lnw_gpio *)get_irq_data(irq);
+	u32 reg, gpio;
+	void __iomem *gedr;
+	u32 gedr_v;
+
+	/* check GPIO controller to check which pin triggered the interrupt */
+	for (reg = 0; reg < lnw->chip.ngpio / 32; reg++) {
+		gedr = (void __iomem *)(&lnw->reg_base->GEDR[reg]);
+		gedr_v = readl(gedr);
+		if (!gedr_v)
+			continue;
+		for (gpio = reg*32; gpio < reg*32+32; gpio++) {
+			gedr_v = readl(gedr);
+			if (gedr_v & BIT(gpio % 32)) {
+				pr_debug("pin %d triggered\n", gpio);
+				generic_handle_irq(lnw->irq_base + gpio);
+			}
+		}
+		/* clear the edge detect status bit */
+		writel(gedr_v, gedr);
+	}
+	desc->chip->eoi(irq);
+}
+
+static int __devinit lnw_gpio_probe(struct pci_dev *pdev,
+			const struct pci_device_id *id)
+{
+	void *base;
+	int i;
+	resource_size_t start, len;
+	struct lnw_gpio *lnw;
+	u32 irq_base;
+	u32 gpio_base;
+	int retval = 0;
+
+	retval = pci_enable_device(pdev);
+	if (retval)
+		goto done;
+
+	retval = pci_request_regions(pdev, "langwell_gpio");
+	if (retval) {
+		dev_err(&pdev->dev, "error requesting resources\n");
+		goto err2;
+	}
+	/* get the irq_base from bar1 */
+	start = pci_resource_start(pdev, 1);
+	len = pci_resource_len(pdev, 1);
+	base = ioremap_nocache(start, len);
+	if (!base) {
+		dev_err(&pdev->dev, "error mapping bar1\n");
+		goto err3;
+	}
+	irq_base = *(u32 *)base;
+	gpio_base = *((u32 *)base + 1);
+	/* release the IO mapping, since we already get the info from bar1 */
+	iounmap(base);
+	/* get the register base from bar0 */
+	start = pci_resource_start(pdev, 0);
+	len = pci_resource_len(pdev, 0);
+	base = ioremap_nocache(start, len);
+	if (!base) {
+		dev_err(&pdev->dev, "error mapping bar0\n");
+		retval = -EFAULT;
+		goto err3;
+	}
+
+	lnw = kzalloc(sizeof(struct lnw_gpio), GFP_KERNEL);
+	if (!lnw) {
+		dev_err(&pdev->dev, "can't allocate langwell_gpio chip data\n");
+		retval = -ENOMEM;
+		goto err4;
+	}
+	lnw->reg_base = base;
+	lnw->irq_base = irq_base;
+	lnw->chip.label = dev_name(&pdev->dev);
+	lnw->chip.direction_input = lnw_gpio_direction_input;
+	lnw->chip.direction_output = lnw_gpio_direction_output;
+	lnw->chip.get = lnw_gpio_get;
+	lnw->chip.set = lnw_gpio_set;
+	lnw->chip.to_irq = lnw_gpio_to_irq;
+	lnw->chip.base = gpio_base;
+	lnw->chip.ngpio = 64;
+	lnw->chip.can_sleep = 0;
+	pci_set_drvdata(pdev, lnw);
+	retval = gpiochip_add(&lnw->chip);
+	if (retval) {
+		dev_err(&pdev->dev, "langwell gpiochip_add error %d\n", retval);
+		goto err5;
+	}
+	set_irq_data(pdev->irq, lnw);
+	set_irq_chained_handler(pdev->irq, lnw_irq_handler);
+	for (i = 0; i < lnw->chip.ngpio; i++) {
+		set_irq_chip_and_handler_name(i + lnw->irq_base, &lnw_irqchip,
+					handle_simple_irq, "demux");
+		set_irq_chip_data(i + lnw->irq_base, lnw);
+	}
+
+	spin_lock_init(&lnw->lock);
+	goto done;
+err5:
+	kfree(lnw);
+err4:
+	iounmap(base);
+err3:
+	pci_release_regions(pdev);
+err2:
+	pci_disable_device(pdev);
+done:
+	return retval;
+}
+
+static struct pci_driver lnw_gpio_driver = {
+	.name		= "langwell_gpio",
+	.id_table	= lnw_gpio_ids,
+	.probe		= lnw_gpio_probe,
+};
+
+static int __init lnw_gpio_init(void)
+{
+	return pci_register_driver(&lnw_gpio_driver);
+}
+
+device_initcall(lnw_gpio_init);
-- 
cgit v0.10.2


From 61e0671c6740273663f35357987a2f7aa27479ca Mon Sep 17 00:00:00 2001
From: Alek Du <alek.du@intel.com>
Date: Tue, 22 Sep 2009 16:46:36 -0700
Subject: gpio: pca953x: add support for MAX7315

MAX7315 is pin and software compatible with PCA9534, so add it to the I2C
device ID table of pca953x driver.
http://www.datasheetcatalog.org/datasheet/maxim/MAX7315.pdf

Signed-off-by: Alek Du <alek.du@intel.com>
Acked-by: David Brownell <david-b@pacbell.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/gpio/pca953x.c b/drivers/gpio/pca953x.c
index cdb6574..8ab1308 100644
--- a/drivers/gpio/pca953x.c
+++ b/drivers/gpio/pca953x.c
@@ -40,6 +40,7 @@ static const struct i2c_device_id pca953x_id[] = {
 	{ "pca9557", 8, },
 
 	{ "max7310", 8, },
+	{ "max7315", 8, },
 	{ "pca6107", 8, },
 	{ "tca6408", 8, },
 	{ "tca6416", 16, },
-- 
cgit v0.10.2


From d120c17faeb391f5b4b99af8b1e190619934ecdd Mon Sep 17 00:00:00 2001
From: H Hartley Sweeten <hartleys@visionengravers.com>
Date: Tue, 22 Sep 2009 16:46:37 -0700
Subject: gpio: include <linux/gpio.h> not <asm/gpio.h>

Drivers should be including <linux/gpio.h> not <asm/gpio.h>.

Signed-off-by: H Hartley Sweeten <hsweeten@visionengravers.com>
Cc: David Brownell <david-b@pacbell.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/gpio/bt8xxgpio.c b/drivers/gpio/bt8xxgpio.c
index 5590414..2559f22 100644
--- a/drivers/gpio/bt8xxgpio.c
+++ b/drivers/gpio/bt8xxgpio.c
@@ -46,8 +46,7 @@
 #include <linux/module.h>
 #include <linux/pci.h>
 #include <linux/spinlock.h>
-
-#include <asm/gpio.h>
+#include <linux/gpio.h>
 
 /* Steal the hardware definitions from the bttv driver. */
 #include "../media/video/bt8xx/bt848.h"
diff --git a/drivers/gpio/mcp23s08.c b/drivers/gpio/mcp23s08.c
index c6c7aa1..cd651ec 100644
--- a/drivers/gpio/mcp23s08.c
+++ b/drivers/gpio/mcp23s08.c
@@ -6,12 +6,10 @@
 #include <linux/device.h>
 #include <linux/workqueue.h>
 #include <linux/mutex.h>
-
+#include <linux/gpio.h>
 #include <linux/spi/spi.h>
 #include <linux/spi/mcp23s08.h>
 
-#include <asm/gpio.h>
-
 
 /* Registers are all 8 bits wide.
  *
diff --git a/drivers/gpio/pca953x.c b/drivers/gpio/pca953x.c
index 8ab1308..6a2fb3f 100644
--- a/drivers/gpio/pca953x.c
+++ b/drivers/gpio/pca953x.c
@@ -13,6 +13,7 @@
 
 #include <linux/module.h>
 #include <linux/init.h>
+#include <linux/gpio.h>
 #include <linux/i2c.h>
 #include <linux/i2c/pca953x.h>
 #ifdef CONFIG_OF_GPIO
@@ -20,8 +21,6 @@
 #include <linux/of_gpio.h>
 #endif
 
-#include <asm/gpio.h>
-
 #define PCA953X_INPUT          0
 #define PCA953X_OUTPUT         1
 #define PCA953X_INVERT         2
diff --git a/drivers/gpio/pcf857x.c b/drivers/gpio/pcf857x.c
index 9525724..72f2449 100644
--- a/drivers/gpio/pcf857x.c
+++ b/drivers/gpio/pcf857x.c
@@ -20,11 +20,10 @@
 
 #include <linux/kernel.h>
 #include <linux/slab.h>
+#include <linux/gpio.h>
 #include <linux/i2c.h>
 #include <linux/i2c/pcf857x.h>
 
-#include <asm/gpio.h>
-
 
 static const struct i2c_device_id pcf857x_id[] = {
 	{ "pcf8574", 8 },
-- 
cgit v0.10.2


From ff77c352ae17768c61cfc36357f0a3904552f11c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniel=20Gl=C3=B6ckner?= <dg@emlix.com>
Date: Tue, 22 Sep 2009 16:46:38 -0700
Subject: gpiolib: allow poll() on value
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Many gpio chips allow to generate interrupts when the value of a pin
changes.  This patch gives usermode application the opportunity to make
use of this feature by calling poll(2) on the /sys/class/gpio/gpioN/value
sysfs file.  The edge to trigger can be set in the edge file in the same
directory.  Possible values are "none", "rising", "falling", and "both".

Using level triggers is not possible with current sysfs since nothing
changes the GPIO value (and the IRQ keeps triggering).  Edge triggering
will "just work".  Note that if there was an event between read() and
poll(), the poll() returns immediately.

Also note that this version only supports true GPIO interrupts.  Some
later patch might be able to synthesize this behavior by timer-driven
polling; some systems seem to need that.

[dbrownell@users.sourceforge.net: align ids to 16 bit ids; whitespace]
Signed-off-by: Daniel Glöckner <dg@emlix.com>
Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/Documentation/ABI/testing/sysfs-gpio b/Documentation/ABI/testing/sysfs-gpio
index 8aab809..80f4c94 100644
--- a/Documentation/ABI/testing/sysfs-gpio
+++ b/Documentation/ABI/testing/sysfs-gpio
@@ -19,6 +19,7 @@ Description:
 	/gpioN ... for each exported GPIO #N
 	    /value ... always readable, writes fail for input GPIOs
 	    /direction ... r/w as: in, out (default low); write: high, low
+	    /edge ... r/w as: none, falling, rising, both
 	/gpiochipN ... for each gpiochip; #N is its first GPIO
 	    /base ... (r/o) same as N
 	    /label ... (r/o) descriptive, not necessarily unique
diff --git a/Documentation/gpio.txt b/Documentation/gpio.txt
index 566edaa..fa4dc07 100644
--- a/Documentation/gpio.txt
+++ b/Documentation/gpio.txt
@@ -524,6 +524,13 @@ and have the following read/write attributes:
 		is configured as an output, this value may be written;
 		any nonzero value is treated as high.
 
+	"edge" ... reads as either "none", "rising", "falling", or
+		"both". Write these strings to select the signal edge(s)
+		that will make poll(2) on the "value" file return.
+
+		This file exists only if the pin can be configured as an
+		interrupt generating input pin.
+
 GPIO controllers have paths like /sys/class/gpio/chipchip42/ (for the
 controller implementing GPIOs starting at #42) and have the following
 read-only attributes:
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index aef6b3d..bb11a42 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -1,5 +1,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/spinlock.h>
 #include <linux/device.h>
@@ -7,6 +8,7 @@
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
 #include <linux/gpio.h>
+#include <linux/idr.h>
 
 
 /* Optional implementation infrastructure for GPIO interfaces.
@@ -49,6 +51,13 @@ struct gpio_desc {
 #define FLAG_RESERVED	2
 #define FLAG_EXPORT	3	/* protected by sysfs_lock */
 #define FLAG_SYSFS	4	/* exported via /sys/class/gpio/control */
+#define FLAG_TRIG_FALL	5	/* trigger on falling edge */
+#define FLAG_TRIG_RISE	6	/* trigger on rising edge */
+
+#define PDESC_ID_SHIFT	16	/* add new flags before this one */
+
+#define GPIO_FLAGS_MASK		((1 << PDESC_ID_SHIFT) - 1)
+#define GPIO_TRIGGER_MASK	(BIT(FLAG_TRIG_FALL) | BIT(FLAG_TRIG_RISE))
 
 #ifdef CONFIG_DEBUG_FS
 	const char		*label;
@@ -56,6 +65,15 @@ struct gpio_desc {
 };
 static struct gpio_desc gpio_desc[ARCH_NR_GPIOS];
 
+#ifdef CONFIG_GPIO_SYSFS
+struct poll_desc {
+	struct work_struct	work;
+	struct sysfs_dirent	*value_sd;
+};
+
+static struct idr pdesc_idr;
+#endif
+
 static inline void desc_set_label(struct gpio_desc *d, const char *label)
 {
 #ifdef CONFIG_DEBUG_FS
@@ -188,10 +206,10 @@ static DEFINE_MUTEX(sysfs_lock);
  *   /value
  *      * always readable, subject to hardware behavior
  *      * may be writable, as zero/nonzero
- *
- * REVISIT there will likely be an attribute for configuring async
- * notifications, e.g. to specify polling interval or IRQ trigger type
- * that would for example trigger a poll() on the "value".
+ *   /edge
+ *      * configures behavior of poll(2) on /value
+ *      * available only if pin can generate IRQs on input
+ *      * is read/write as "none", "falling", "rising", or "both"
  */
 
 static ssize_t gpio_direction_show(struct device *dev,
@@ -288,6 +306,175 @@ static ssize_t gpio_value_store(struct device *dev,
 static /*const*/ DEVICE_ATTR(value, 0644,
 		gpio_value_show, gpio_value_store);
 
+static irqreturn_t gpio_sysfs_irq(int irq, void *priv)
+{
+	struct work_struct	*work = priv;
+
+	schedule_work(work);
+	return IRQ_HANDLED;
+}
+
+static void gpio_notify_sysfs(struct work_struct *work)
+{
+	struct poll_desc	*pdesc;
+
+	pdesc = container_of(work, struct poll_desc, work);
+	sysfs_notify_dirent(pdesc->value_sd);
+}
+
+static int gpio_setup_irq(struct gpio_desc *desc, struct device *dev,
+		unsigned long gpio_flags)
+{
+	struct poll_desc	*pdesc;
+	unsigned long		irq_flags;
+	int			ret, irq, id;
+
+	if ((desc->flags & GPIO_TRIGGER_MASK) == gpio_flags)
+		return 0;
+
+	irq = gpio_to_irq(desc - gpio_desc);
+	if (irq < 0)
+		return -EIO;
+
+	id = desc->flags >> PDESC_ID_SHIFT;
+	pdesc = idr_find(&pdesc_idr, id);
+	if (pdesc) {
+		free_irq(irq, &pdesc->work);
+		cancel_work_sync(&pdesc->work);
+	}
+
+	desc->flags &= ~GPIO_TRIGGER_MASK;
+
+	if (!gpio_flags) {
+		ret = 0;
+		goto free_sd;
+	}
+
+	irq_flags = IRQF_SHARED;
+	if (test_bit(FLAG_TRIG_FALL, &gpio_flags))
+		irq_flags |= IRQF_TRIGGER_FALLING;
+	if (test_bit(FLAG_TRIG_RISE, &gpio_flags))
+		irq_flags |= IRQF_TRIGGER_RISING;
+
+	if (!pdesc) {
+		pdesc = kmalloc(sizeof(*pdesc), GFP_KERNEL);
+		if (!pdesc) {
+			ret = -ENOMEM;
+			goto err_out;
+		}
+
+		do {
+			ret = -ENOMEM;
+			if (idr_pre_get(&pdesc_idr, GFP_KERNEL))
+				ret = idr_get_new_above(&pdesc_idr,
+						pdesc, 1, &id);
+		} while (ret == -EAGAIN);
+
+		if (ret)
+			goto free_mem;
+
+		desc->flags &= GPIO_FLAGS_MASK;
+		desc->flags |= (unsigned long)id << PDESC_ID_SHIFT;
+
+		if (desc->flags >> PDESC_ID_SHIFT != id) {
+			ret = -ERANGE;
+			goto free_id;
+		}
+
+		pdesc->value_sd = sysfs_get_dirent(dev->kobj.sd, "value");
+		if (!pdesc->value_sd) {
+			ret = -ENODEV;
+			goto free_id;
+		}
+		INIT_WORK(&pdesc->work, gpio_notify_sysfs);
+	}
+
+	ret = request_irq(irq, gpio_sysfs_irq, irq_flags,
+			"gpiolib", &pdesc->work);
+	if (ret)
+		goto free_sd;
+
+	desc->flags |= gpio_flags;
+	return 0;
+
+free_sd:
+	sysfs_put(pdesc->value_sd);
+free_id:
+	idr_remove(&pdesc_idr, id);
+	desc->flags &= GPIO_FLAGS_MASK;
+free_mem:
+	kfree(pdesc);
+err_out:
+	return ret;
+}
+
+static const struct {
+	const char *name;
+	unsigned long flags;
+} trigger_types[] = {
+	{ "none",    0 },
+	{ "falling", BIT(FLAG_TRIG_FALL) },
+	{ "rising",  BIT(FLAG_TRIG_RISE) },
+	{ "both",    BIT(FLAG_TRIG_FALL) | BIT(FLAG_TRIG_RISE) },
+};
+
+static ssize_t gpio_edge_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	const struct gpio_desc	*desc = dev_get_drvdata(dev);
+	ssize_t			status;
+
+	mutex_lock(&sysfs_lock);
+
+	if (!test_bit(FLAG_EXPORT, &desc->flags))
+		status = -EIO;
+	else {
+		int i;
+
+		status = 0;
+		for (i = 0; i < ARRAY_SIZE(trigger_types); i++)
+			if ((desc->flags & GPIO_TRIGGER_MASK)
+					== trigger_types[i].flags) {
+				status = sprintf(buf, "%s\n",
+						 trigger_types[i].name);
+				break;
+			}
+	}
+
+	mutex_unlock(&sysfs_lock);
+	return status;
+}
+
+static ssize_t gpio_edge_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t size)
+{
+	struct gpio_desc	*desc = dev_get_drvdata(dev);
+	ssize_t			status;
+	int			i;
+
+	for (i = 0; i < ARRAY_SIZE(trigger_types); i++)
+		if (sysfs_streq(trigger_types[i].name, buf))
+			goto found;
+	return -EINVAL;
+
+found:
+	mutex_lock(&sysfs_lock);
+
+	if (!test_bit(FLAG_EXPORT, &desc->flags))
+		status = -EIO;
+	else {
+		status = gpio_setup_irq(desc, dev, trigger_types[i].flags);
+		if (!status)
+			status = size;
+	}
+
+	mutex_unlock(&sysfs_lock);
+
+	return status;
+}
+
+static DEVICE_ATTR(edge, 0644, gpio_edge_show, gpio_edge_store);
+
 static const struct attribute *gpio_attrs[] = {
 	&dev_attr_direction.attr,
 	&dev_attr_value.attr,
@@ -473,7 +660,7 @@ int gpio_export(unsigned gpio, bool direction_may_change)
 		struct device	*dev;
 
 		dev = device_create(&gpio_class, desc->chip->dev, MKDEV(0, 0),
-				    desc, ioname ? ioname : "gpio%d", gpio);
+				desc, ioname ? ioname : "gpio%d", gpio);
 		if (dev) {
 			if (direction_may_change)
 				status = sysfs_create_group(&dev->kobj,
@@ -481,6 +668,14 @@ int gpio_export(unsigned gpio, bool direction_may_change)
 			else
 				status = device_create_file(dev,
 						&dev_attr_value);
+
+			if (!status && gpio_to_irq(gpio) >= 0
+					&& (direction_may_change
+						|| !test_bit(FLAG_IS_OUT,
+							&desc->flags)))
+				status = device_create_file(dev,
+						&dev_attr_edge);
+
 			if (status != 0)
 				device_unregister(dev);
 		} else
@@ -572,6 +767,7 @@ void gpio_unexport(unsigned gpio)
 
 		dev = class_find_device(&gpio_class, NULL, desc, match_export);
 		if (dev) {
+			gpio_setup_irq(desc, dev, 0);
 			clear_bit(FLAG_EXPORT, &desc->flags);
 			put_device(dev);
 			device_unregister(dev);
@@ -656,6 +852,8 @@ static int __init gpiolib_sysfs_init(void)
 	unsigned long	flags;
 	unsigned	gpio;
 
+	idr_init(&pdesc_idr);
+
 	status = class_register(&gpio_class);
 	if (status < 0)
 		return status;
-- 
cgit v0.10.2


From ef72af408259f0ff26a733dfa2088d570a111550 Mon Sep 17 00:00:00 2001
From: Michael Hennerich <michael.hennerich@analog.com>
Date: Tue, 22 Sep 2009 16:46:39 -0700
Subject: gpio: gpio support for ADP5520/ADP5501 MFD PMICs

Signed-off-by: Michael Hennerich <michael.hennerich@analog.com>
Signed-off-by: Bryan Wu <cooloney@kernel.org>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Cc: David Brownell <david-b@pacbell.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 9ad20ff..2ad0128 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -162,6 +162,16 @@ config GPIO_WM831X
 	  Say yes here to access the GPIO signals of WM831x power management
 	  chips from Wolfson Microelectronics.
 
+config GPIO_ADP5520
+	tristate "GPIO Support for ADP5520 PMIC"
+	depends on PMIC_ADP5520
+	help
+	  This option enables support for on-chip GPIO found
+	  on Analog Devices ADP5520 PMICs.
+
+	  To compile this driver as a module, choose M here: the module will
+	  be called adp5520-gpio.
+
 comment "PCI GPIO expanders:"
 
 config GPIO_BT8XX
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index b8eef76..00a532c 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -4,6 +4,7 @@ ccflags-$(CONFIG_DEBUG_GPIO)	+= -DDEBUG
 
 obj-$(CONFIG_GPIOLIB)		+= gpiolib.o
 
+obj-$(CONFIG_GPIO_ADP5520)	+= adp5520-gpio.o
 obj-$(CONFIG_GPIO_LANGWELL)	+= langwell_gpio.o
 obj-$(CONFIG_GPIO_MAX7301)	+= max7301.o
 obj-$(CONFIG_GPIO_MAX732X)	+= max732x.o
diff --git a/drivers/gpio/adp5520-gpio.c b/drivers/gpio/adp5520-gpio.c
new file mode 100644
index 0000000..ad05bbc
--- /dev/null
+++ b/drivers/gpio/adp5520-gpio.c
@@ -0,0 +1,206 @@
+/*
+ * GPIO driver for Analog Devices ADP5520 MFD PMICs
+ *
+ * Copyright 2009 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/mfd/adp5520.h>
+
+#include <linux/gpio.h>
+
+struct adp5520_gpio {
+	struct device *master;
+	struct gpio_chip gpio_chip;
+	unsigned char lut[ADP5520_MAXGPIOS];
+	unsigned long output;
+};
+
+static int adp5520_gpio_get_value(struct gpio_chip *chip, unsigned off)
+{
+	struct adp5520_gpio *dev;
+	uint8_t reg_val;
+
+	dev = container_of(chip, struct adp5520_gpio, gpio_chip);
+
+	/*
+	 * There are dedicated registers for GPIO IN/OUT.
+	 * Make sure we return the right value, even when configured as output
+	 */
+
+	if (test_bit(off, &dev->output))
+		adp5520_read(dev->master, GPIO_OUT, &reg_val);
+	else
+		adp5520_read(dev->master, GPIO_IN, &reg_val);
+
+	return !!(reg_val & dev->lut[off]);
+}
+
+static void adp5520_gpio_set_value(struct gpio_chip *chip,
+		unsigned off, int val)
+{
+	struct adp5520_gpio *dev;
+	dev = container_of(chip, struct adp5520_gpio, gpio_chip);
+
+	if (val)
+		adp5520_set_bits(dev->master, GPIO_OUT, dev->lut[off]);
+	else
+		adp5520_clr_bits(dev->master, GPIO_OUT, dev->lut[off]);
+}
+
+static int adp5520_gpio_direction_input(struct gpio_chip *chip, unsigned off)
+{
+	struct adp5520_gpio *dev;
+	dev = container_of(chip, struct adp5520_gpio, gpio_chip);
+
+	clear_bit(off, &dev->output);
+
+	return adp5520_clr_bits(dev->master, GPIO_CFG_2, dev->lut[off]);
+}
+
+static int adp5520_gpio_direction_output(struct gpio_chip *chip,
+		unsigned off, int val)
+{
+	struct adp5520_gpio *dev;
+	int ret = 0;
+	dev = container_of(chip, struct adp5520_gpio, gpio_chip);
+
+	set_bit(off, &dev->output);
+
+	if (val)
+		ret |= adp5520_set_bits(dev->master, GPIO_OUT, dev->lut[off]);
+	else
+		ret |= adp5520_clr_bits(dev->master, GPIO_OUT, dev->lut[off]);
+
+	ret |= adp5520_set_bits(dev->master, GPIO_CFG_2, dev->lut[off]);
+
+	return ret;
+}
+
+static int __devinit adp5520_gpio_probe(struct platform_device *pdev)
+{
+	struct adp5520_gpio_platfrom_data *pdata = pdev->dev.platform_data;
+	struct adp5520_gpio *dev;
+	struct gpio_chip *gc;
+	int ret, i, gpios;
+	unsigned char ctl_mask = 0;
+
+	if (pdata == NULL) {
+		dev_err(&pdev->dev, "missing platform data\n");
+		return -ENODEV;
+	}
+
+	if (pdev->id != ID_ADP5520) {
+		dev_err(&pdev->dev, "only ADP5520 supports GPIO\n");
+		return -ENODEV;
+	}
+
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+	if (dev == NULL) {
+		dev_err(&pdev->dev, "failed to alloc memory\n");
+		return -ENOMEM;
+	}
+
+	dev->master = pdev->dev.parent;
+
+	for (gpios = 0, i = 0; i < ADP5520_MAXGPIOS; i++)
+		if (pdata->gpio_en_mask & (1 << i))
+			dev->lut[gpios++] = 1 << i;
+
+	if (gpios < 1) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	gc = &dev->gpio_chip;
+	gc->direction_input  = adp5520_gpio_direction_input;
+	gc->direction_output = adp5520_gpio_direction_output;
+	gc->get = adp5520_gpio_get_value;
+	gc->set = adp5520_gpio_set_value;
+	gc->can_sleep = 1;
+
+	gc->base = pdata->gpio_start;
+	gc->ngpio = gpios;
+	gc->label = pdev->name;
+	gc->owner = THIS_MODULE;
+
+	ret = adp5520_clr_bits(dev->master, GPIO_CFG_1,
+		pdata->gpio_en_mask);
+
+	if (pdata->gpio_en_mask & GPIO_C3)
+		ctl_mask |= C3_MODE;
+
+	if (pdata->gpio_en_mask & GPIO_R3)
+		ctl_mask |= R3_MODE;
+
+	if (ctl_mask)
+		ret = adp5520_set_bits(dev->master, LED_CONTROL,
+			ctl_mask);
+
+	ret |= adp5520_set_bits(dev->master, GPIO_PULLUP,
+		pdata->gpio_pullup_mask);
+
+	if (ret) {
+		dev_err(&pdev->dev, "failed to write\n");
+		goto err;
+	}
+
+	ret = gpiochip_add(&dev->gpio_chip);
+	if (ret)
+		goto err;
+
+	platform_set_drvdata(pdev, dev);
+	return 0;
+
+err:
+	kfree(dev);
+	return ret;
+}
+
+static int __devexit adp5520_gpio_remove(struct platform_device *pdev)
+{
+	struct adp5520_gpio *dev;
+	int ret;
+
+	dev = platform_get_drvdata(pdev);
+	ret = gpiochip_remove(&dev->gpio_chip);
+	if (ret) {
+		dev_err(&pdev->dev, "%s failed, %d\n",
+				"gpiochip_remove()", ret);
+		return ret;
+	}
+
+	kfree(dev);
+	return 0;
+}
+
+static struct platform_driver adp5520_gpio_driver = {
+	.driver	= {
+		.name	= "adp5520-gpio",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= adp5520_gpio_probe,
+	.remove		= __devexit_p(adp5520_gpio_remove),
+};
+
+static int __init adp5520_gpio_init(void)
+{
+	return platform_driver_register(&adp5520_gpio_driver);
+}
+module_init(adp5520_gpio_init);
+
+static void __exit adp5520_gpio_exit(void)
+{
+	platform_driver_unregister(&adp5520_gpio_driver);
+}
+module_exit(adp5520_gpio_exit);
+
+MODULE_AUTHOR("Michael Hennerich <hennerich@blackfin.uclinux.org>");
+MODULE_DESCRIPTION("GPIO ADP5520 Driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:adp5520-gpio");
-- 
cgit v0.10.2


From 28ff0c12b7069f076f08a936df66f1d5052e9121 Mon Sep 17 00:00:00 2001
From: Kyungmin Park <kyungmin.park@samsung.com>
Date: Tue, 22 Sep 2009 16:46:40 -0700
Subject: omapfb: add support for the Apollon LCD

Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Imre Deak <imre.deak@nokia.com>
Acked-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/omap/Makefile b/drivers/video/omap/Makefile
index ed13889..d053498 100644
--- a/drivers/video/omap/Makefile
+++ b/drivers/video/omap/Makefile
@@ -24,5 +24,7 @@ objs-$(CONFIG_ARCH_OMAP16XX)$(CONFIG_MACH_OMAP_INNOVATOR) += lcd_inn1610.o
 objs-$(CONFIG_ARCH_OMAP15XX)$(CONFIG_MACH_OMAP_INNOVATOR) += lcd_inn1510.o
 objs-y$(CONFIG_MACH_OMAP_OSK) += lcd_osk.o
 
+objs-y$(CONFIG_MACH_OMAP_APOLLON) += lcd_apollon.o
+
 omapfb-objs := $(objs-yy)
 
diff --git a/drivers/video/omap/lcd_apollon.c b/drivers/video/omap/lcd_apollon.c
new file mode 100644
index 0000000..626ae3a
--- /dev/null
+++ b/drivers/video/omap/lcd_apollon.c
@@ -0,0 +1,138 @@
+/*
+ * LCD panel support for the Samsung OMAP2 Apollon board
+ *
+ * Copyright (C) 2005,2006 Samsung Electronics
+ * Author: Kyungmin Park <kyungmin.park@samsung.com>
+ *
+ * Derived from drivers/video/omap/lcd-h4.c
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+
+#include <mach/gpio.h>
+#include <mach/mux.h>
+#include <mach/omapfb.h>
+
+/* #define USE_35INCH_LCD 1 */
+
+static int apollon_panel_init(struct lcd_panel *panel,
+				struct omapfb_device *fbdev)
+{
+	/* configure LCD PWR_EN */
+	omap_cfg_reg(M21_242X_GPIO11);
+	return 0;
+}
+
+static void apollon_panel_cleanup(struct lcd_panel *panel)
+{
+}
+
+static int apollon_panel_enable(struct lcd_panel *panel)
+{
+	return 0;
+}
+
+static void apollon_panel_disable(struct lcd_panel *panel)
+{
+}
+
+static unsigned long apollon_panel_get_caps(struct lcd_panel *panel)
+{
+	return 0;
+}
+
+struct lcd_panel apollon_panel = {
+	.name		= "apollon",
+	.config		= OMAP_LCDC_PANEL_TFT | OMAP_LCDC_INV_VSYNC |
+			  OMAP_LCDC_INV_HSYNC,
+
+	.bpp		= 16,
+	.data_lines	= 18,
+#ifdef USE_35INCH_LCD
+	.x_res		= 240,
+	.y_res		= 320,
+	.hsw		= 2,
+	.hfp		= 3,
+	.hbp		= 9,
+	.vsw		= 4,
+	.vfp		= 3,
+	.vbp		= 5,
+#else
+	.x_res		= 480,
+	.y_res		= 272,
+	.hsw		= 41,
+	.hfp		= 2,
+	.hbp		= 2,
+	.vsw		= 10,
+	.vfp		= 2,
+	.vbp		= 2,
+#endif
+	.pixel_clock	= 6250,
+
+	.init		= apollon_panel_init,
+	.cleanup	= apollon_panel_cleanup,
+	.enable		= apollon_panel_enable,
+	.disable	= apollon_panel_disable,
+	.get_caps	= apollon_panel_get_caps,
+};
+
+static int apollon_panel_probe(struct platform_device *pdev)
+{
+	omapfb_register_panel(&apollon_panel);
+	return 0;
+}
+
+static int apollon_panel_remove(struct platform_device *pdev)
+{
+	return 0;
+}
+
+static int apollon_panel_suspend(struct platform_device *pdev,
+				  pm_message_t mesg)
+{
+	return 0;
+}
+
+static int apollon_panel_resume(struct platform_device *pdev)
+{
+	return 0;
+}
+
+struct platform_driver apollon_panel_driver = {
+	.probe		= apollon_panel_probe,
+	.remove		= apollon_panel_remove,
+	.suspend	= apollon_panel_suspend,
+	.resume		= apollon_panel_resume,
+	.driver		= {
+		.name	= "apollon_lcd",
+		.owner	= THIS_MODULE,
+	},
+};
+
+static int __init apollon_panel_drv_init(void)
+{
+	return platform_driver_register(&apollon_panel_driver);
+}
+
+static void __exit apollon_panel_drv_exit(void)
+{
+	platform_driver_unregister(&apollon_panel_driver);
+}
+
+module_init(apollon_panel_drv_init);
+module_exit(apollon_panel_drv_exit);
-- 
cgit v0.10.2


From 66d2f99d0bb5a2972fb5c1d88b61169510e540d6 Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@nokia.com>
Date: Tue, 22 Sep 2009 16:46:41 -0700
Subject: omapfb: add support for MIPI-DCS compatible LCDs

Fixed-by: Mike Wege <ext-mike.wege@nokia.com>
Fixed-by: Arnaud Patard <arnaud.patard@rtp-net.org>
Fixed-by: Timo Savola <tsavola@movial.fi>
Fixed-by: Hiroshi DOYU <Hiroshi.DOYU@nokia.com>
Fixed-by: Trilok Soni <soni.trilok@gmail.com>
Signed-off-by: Imre Deak <imre.deak@solidboot.com>
Signed-off-by: Juha Yrjola <juha.yrjola@solidboot.com>
Acked-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/arm/plat-omap/include/mach/lcd_mipid.h b/arch/arm/plat-omap/include/mach/lcd_mipid.h
index f8fbc48..8e52c65 100644
--- a/arch/arm/plat-omap/include/mach/lcd_mipid.h
+++ b/arch/arm/plat-omap/include/mach/lcd_mipid.h
@@ -16,7 +16,12 @@ enum mipid_test_result {
 struct mipid_platform_data {
 	int	nreset_gpio;
 	int	data_lines;
+
 	void	(*shutdown)(struct mipid_platform_data *pdata);
+	void	(*set_bklight_level)(struct mipid_platform_data *pdata,
+				     int level);
+	int	(*get_bklight_level)(struct mipid_platform_data *pdata);
+	int	(*get_bklight_max)(struct mipid_platform_data *pdata);
 };
 
 #endif
diff --git a/drivers/video/omap/Kconfig b/drivers/video/omap/Kconfig
index 4440885..574453f 100644
--- a/drivers/video/omap/Kconfig
+++ b/drivers/video/omap/Kconfig
@@ -7,6 +7,14 @@ config FB_OMAP
 	help
           Frame buffer driver for OMAP based boards.
 
+config FB_OMAP_LCD_MIPID
+	bool "MIPI DBI-C/DCS compatible LCD support"
+	depends on FB_OMAP && SPI_MASTER
+	help
+	  Say Y here if you want to have support for LCDs compatible with
+	  the Mobile Industry Processor Interface DBI-C/DCS
+	  specification. (Supported LCDs: Philips LPH8923, Sharp LS041Y3)
+
 config FB_OMAP_BOOTLOADER_INIT
 	bool "Check bootloader initialization"
 	depends on FB_OMAP
diff --git a/drivers/video/omap/Makefile b/drivers/video/omap/Makefile
index d053498..d86d54a 100644
--- a/drivers/video/omap/Makefile
+++ b/drivers/video/omap/Makefile
@@ -25,6 +25,7 @@ objs-$(CONFIG_ARCH_OMAP15XX)$(CONFIG_MACH_OMAP_INNOVATOR) += lcd_inn1510.o
 objs-y$(CONFIG_MACH_OMAP_OSK) += lcd_osk.o
 
 objs-y$(CONFIG_MACH_OMAP_APOLLON) += lcd_apollon.o
+objs-y$(CONFIG_FB_OMAP_LCD_MIPID) += lcd_mipid.o
 
 omapfb-objs := $(objs-yy)
 
diff --git a/drivers/video/omap/lcd_mipid.c b/drivers/video/omap/lcd_mipid.c
new file mode 100644
index 0000000..918ee89
--- /dev/null
+++ b/drivers/video/omap/lcd_mipid.c
@@ -0,0 +1,625 @@
+/*
+ * LCD driver for MIPI DBI-C / DCS compatible LCDs
+ *
+ * Copyright (C) 2006 Nokia Corporation
+ * Author: Imre Deak <imre.deak@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ */
+#include <linux/device.h>
+#include <linux/delay.h>
+#include <linux/workqueue.h>
+#include <linux/spi/spi.h>
+
+#include <mach/omapfb.h>
+#include <mach/lcd_mipid.h>
+
+#define MIPID_MODULE_NAME		"lcd_mipid"
+
+#define MIPID_CMD_READ_DISP_ID		0x04
+#define MIPID_CMD_READ_RED		0x06
+#define MIPID_CMD_READ_GREEN		0x07
+#define MIPID_CMD_READ_BLUE		0x08
+#define MIPID_CMD_READ_DISP_STATUS	0x09
+#define MIPID_CMD_RDDSDR		0x0F
+#define MIPID_CMD_SLEEP_IN		0x10
+#define MIPID_CMD_SLEEP_OUT		0x11
+#define MIPID_CMD_DISP_OFF		0x28
+#define MIPID_CMD_DISP_ON		0x29
+
+#define MIPID_ESD_CHECK_PERIOD		msecs_to_jiffies(5000)
+
+#define to_mipid_device(p)		container_of(p, struct mipid_device, \
+						panel)
+struct mipid_device {
+	int		enabled;
+	int		revision;
+	unsigned int	saved_bklight_level;
+	unsigned long	hw_guard_end;		/* next value of jiffies
+						   when we can issue the
+						   next sleep in/out command */
+	unsigned long	hw_guard_wait;		/* max guard time in jiffies */
+
+	struct omapfb_device	*fbdev;
+	struct spi_device	*spi;
+	struct mutex		mutex;
+	struct lcd_panel	panel;
+
+	struct workqueue_struct	*esd_wq;
+	struct delayed_work	esd_work;
+	void			(*esd_check)(struct mipid_device *m);
+};
+
+static void mipid_transfer(struct mipid_device *md, int cmd, const u8 *wbuf,
+			   int wlen, u8 *rbuf, int rlen)
+{
+	struct spi_message	m;
+	struct spi_transfer	*x, xfer[4];
+	u16			w;
+	int			r;
+
+	BUG_ON(md->spi == NULL);
+
+	spi_message_init(&m);
+
+	memset(xfer, 0, sizeof(xfer));
+	x = &xfer[0];
+
+	cmd &=  0xff;
+	x->tx_buf		= &cmd;
+	x->bits_per_word	= 9;
+	x->len			= 2;
+	spi_message_add_tail(x, &m);
+
+	if (wlen) {
+		x++;
+		x->tx_buf		= wbuf;
+		x->len			= wlen;
+		x->bits_per_word	= 9;
+		spi_message_add_tail(x, &m);
+	}
+
+	if (rlen) {
+		x++;
+		x->rx_buf	= &w;
+		x->len		= 1;
+		spi_message_add_tail(x, &m);
+
+		if (rlen > 1) {
+			/* Arrange for the extra clock before the first
+			 * data bit.
+			 */
+			x->bits_per_word = 9;
+			x->len		 = 2;
+
+			x++;
+			x->rx_buf	 = &rbuf[1];
+			x->len		 = rlen - 1;
+			spi_message_add_tail(x, &m);
+		}
+	}
+
+	r = spi_sync(md->spi, &m);
+	if (r < 0)
+		dev_dbg(&md->spi->dev, "spi_sync %d\n", r);
+
+	if (rlen)
+		rbuf[0] = w & 0xff;
+}
+
+static inline void mipid_cmd(struct mipid_device *md, int cmd)
+{
+	mipid_transfer(md, cmd, NULL, 0, NULL, 0);
+}
+
+static inline void mipid_write(struct mipid_device *md,
+			       int reg, const u8 *buf, int len)
+{
+	mipid_transfer(md, reg, buf, len, NULL, 0);
+}
+
+static inline void mipid_read(struct mipid_device *md,
+			      int reg, u8 *buf, int len)
+{
+	mipid_transfer(md, reg, NULL, 0, buf, len);
+}
+
+static void set_data_lines(struct mipid_device *md, int data_lines)
+{
+	u16 par;
+
+	switch (data_lines) {
+	case 16:
+		par = 0x150;
+		break;
+	case 18:
+		par = 0x160;
+		break;
+	case 24:
+		par = 0x170;
+		break;
+	}
+	mipid_write(md, 0x3a, (u8 *)&par, 2);
+}
+
+static void send_init_string(struct mipid_device *md)
+{
+	u16 initpar[] = { 0x0102, 0x0100, 0x0100 };
+
+	mipid_write(md, 0xc2, (u8 *)initpar, sizeof(initpar));
+	set_data_lines(md, md->panel.data_lines);
+}
+
+static void hw_guard_start(struct mipid_device *md, int guard_msec)
+{
+	md->hw_guard_wait = msecs_to_jiffies(guard_msec);
+	md->hw_guard_end = jiffies + md->hw_guard_wait;
+}
+
+static void hw_guard_wait(struct mipid_device *md)
+{
+	unsigned long wait = md->hw_guard_end - jiffies;
+
+	if ((long)wait > 0 && wait <= md->hw_guard_wait) {
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		schedule_timeout(wait);
+	}
+}
+
+static void set_sleep_mode(struct mipid_device *md, int on)
+{
+	int cmd, sleep_time = 50;
+
+	if (on)
+		cmd = MIPID_CMD_SLEEP_IN;
+	else
+		cmd = MIPID_CMD_SLEEP_OUT;
+	hw_guard_wait(md);
+	mipid_cmd(md, cmd);
+	hw_guard_start(md, 120);
+	/*
+	 * When we enable the panel, it seems we _have_ to sleep
+	 * 120 ms before sending the init string. When disabling the
+	 * panel we'll sleep for the duration of 2 frames, so that the
+	 * controller can still provide the PCLK,HS,VS signals.
+	 */
+	if (!on)
+		sleep_time = 120;
+	msleep(sleep_time);
+}
+
+static void set_display_state(struct mipid_device *md, int enabled)
+{
+	int cmd = enabled ? MIPID_CMD_DISP_ON : MIPID_CMD_DISP_OFF;
+
+	mipid_cmd(md, cmd);
+}
+
+static int mipid_set_bklight_level(struct lcd_panel *panel, unsigned int level)
+{
+	struct mipid_device *md = to_mipid_device(panel);
+	struct mipid_platform_data *pd = md->spi->dev.platform_data;
+
+	if (pd->get_bklight_max == NULL || pd->set_bklight_level == NULL)
+		return -ENODEV;
+	if (level > pd->get_bklight_max(pd))
+		return -EINVAL;
+	if (!md->enabled) {
+		md->saved_bklight_level = level;
+		return 0;
+	}
+	pd->set_bklight_level(pd, level);
+
+	return 0;
+}
+
+static unsigned int mipid_get_bklight_level(struct lcd_panel *panel)
+{
+	struct mipid_device *md = to_mipid_device(panel);
+	struct mipid_platform_data *pd = md->spi->dev.platform_data;
+
+	if (pd->get_bklight_level == NULL)
+		return -ENODEV;
+	return pd->get_bklight_level(pd);
+}
+
+static unsigned int mipid_get_bklight_max(struct lcd_panel *panel)
+{
+	struct mipid_device *md = to_mipid_device(panel);
+	struct mipid_platform_data *pd = md->spi->dev.platform_data;
+
+	if (pd->get_bklight_max == NULL)
+		return -ENODEV;
+
+	return pd->get_bklight_max(pd);
+}
+
+static unsigned long mipid_get_caps(struct lcd_panel *panel)
+{
+	return OMAPFB_CAPS_SET_BACKLIGHT;
+}
+
+static u16 read_first_pixel(struct mipid_device *md)
+{
+	u16 pixel;
+	u8 red, green, blue;
+
+	mutex_lock(&md->mutex);
+	mipid_read(md, MIPID_CMD_READ_RED, &red, 1);
+	mipid_read(md, MIPID_CMD_READ_GREEN, &green, 1);
+	mipid_read(md, MIPID_CMD_READ_BLUE, &blue, 1);
+	mutex_unlock(&md->mutex);
+
+	switch (md->panel.data_lines) {
+	case 16:
+		pixel = ((red >> 1) << 11) | (green << 5) | (blue >> 1);
+		break;
+	case 24:
+		/* 24 bit -> 16 bit */
+		pixel = ((red >> 3) << 11) | ((green >> 2) << 5) |
+			(blue >> 3);
+		break;
+	default:
+		pixel = 0;
+		BUG();
+	}
+
+	return pixel;
+}
+
+static int mipid_run_test(struct lcd_panel *panel, int test_num)
+{
+	struct mipid_device *md = to_mipid_device(panel);
+	static const u16 test_values[4] = {
+		0x0000, 0xffff, 0xaaaa, 0x5555,
+	};
+	int i;
+
+	if (test_num != MIPID_TEST_RGB_LINES)
+		return MIPID_TEST_INVALID;
+
+	for (i = 0; i < ARRAY_SIZE(test_values); i++) {
+		int delay;
+		unsigned long tmo;
+
+		omapfb_write_first_pixel(md->fbdev, test_values[i]);
+		tmo = jiffies + msecs_to_jiffies(100);
+		delay = 25;
+		while (1) {
+			u16 pixel;
+
+			msleep(delay);
+			pixel = read_first_pixel(md);
+			if (pixel == test_values[i])
+				break;
+			if (time_after(jiffies, tmo)) {
+				dev_err(&md->spi->dev,
+					"MIPI LCD RGB I/F test failed: "
+					"expecting %04x, got %04x\n",
+					test_values[i], pixel);
+				return MIPID_TEST_FAILED;
+			}
+			delay = 10;
+		}
+	}
+
+	return 0;
+}
+
+static void ls041y3_esd_recover(struct mipid_device *md)
+{
+	dev_err(&md->spi->dev, "performing LCD ESD recovery\n");
+	set_sleep_mode(md, 1);
+	set_sleep_mode(md, 0);
+}
+
+static void ls041y3_esd_check_mode1(struct mipid_device *md)
+{
+	u8 state1, state2;
+
+	mipid_read(md, MIPID_CMD_RDDSDR, &state1, 1);
+	set_sleep_mode(md, 0);
+	mipid_read(md, MIPID_CMD_RDDSDR, &state2, 1);
+	dev_dbg(&md->spi->dev, "ESD mode 1 state1 %02x state2 %02x\n",
+		state1, state2);
+	/* Each sleep out command will trigger a self diagnostic and flip
+	* Bit6 if the test passes.
+	*/
+	if (!((state1 ^ state2) & (1 << 6)))
+		ls041y3_esd_recover(md);
+}
+
+static void ls041y3_esd_check_mode2(struct mipid_device *md)
+{
+	int i;
+	u8 rbuf[2];
+	static const struct {
+		int	cmd;
+		int	wlen;
+		u16	wbuf[3];
+	} *rd, rd_ctrl[7] = {
+		{ 0xb0, 4, { 0x0101, 0x01fe, } },
+		{ 0xb1, 4, { 0x01de, 0x0121, } },
+		{ 0xc2, 4, { 0x0100, 0x0100, } },
+		{ 0xbd, 2, { 0x0100, } },
+		{ 0xc2, 4, { 0x01fc, 0x0103, } },
+		{ 0xb4, 0, },
+		{ 0x00, 0, },
+	};
+
+	rd = rd_ctrl;
+	for (i = 0; i < 3; i++, rd++)
+		mipid_write(md, rd->cmd, (u8 *)rd->wbuf, rd->wlen);
+
+	udelay(10);
+	mipid_read(md, rd->cmd, rbuf, 2);
+	rd++;
+
+	for (i = 0; i < 3; i++, rd++) {
+		udelay(10);
+		mipid_write(md, rd->cmd, (u8 *)rd->wbuf, rd->wlen);
+	}
+
+	dev_dbg(&md->spi->dev, "ESD mode 2 state %02x\n", rbuf[1]);
+	if (rbuf[1] == 0x00)
+		ls041y3_esd_recover(md);
+}
+
+static void ls041y3_esd_check(struct mipid_device *md)
+{
+	ls041y3_esd_check_mode1(md);
+	if (md->revision >= 0x88)
+		ls041y3_esd_check_mode2(md);
+}
+
+static void mipid_esd_start_check(struct mipid_device *md)
+{
+	if (md->esd_check != NULL)
+		queue_delayed_work(md->esd_wq, &md->esd_work,
+				   MIPID_ESD_CHECK_PERIOD);
+}
+
+static void mipid_esd_stop_check(struct mipid_device *md)
+{
+	if (md->esd_check != NULL)
+		cancel_rearming_delayed_workqueue(md->esd_wq, &md->esd_work);
+}
+
+static void mipid_esd_work(struct work_struct *work)
+{
+	struct mipid_device *md = container_of(work, struct mipid_device,
+					       esd_work.work);
+
+	mutex_lock(&md->mutex);
+	md->esd_check(md);
+	mutex_unlock(&md->mutex);
+	mipid_esd_start_check(md);
+}
+
+static int mipid_enable(struct lcd_panel *panel)
+{
+	struct mipid_device *md = to_mipid_device(panel);
+
+	mutex_lock(&md->mutex);
+
+	if (md->enabled) {
+		mutex_unlock(&md->mutex);
+		return 0;
+	}
+	set_sleep_mode(md, 0);
+	md->enabled = 1;
+	send_init_string(md);
+	set_display_state(md, 1);
+	mipid_set_bklight_level(panel, md->saved_bklight_level);
+	mipid_esd_start_check(md);
+
+	mutex_unlock(&md->mutex);
+	return 0;
+}
+
+static void mipid_disable(struct lcd_panel *panel)
+{
+	struct mipid_device *md = to_mipid_device(panel);
+
+	/*
+	 * A final ESD work might be called before returning,
+	 * so do this without holding the lock.
+	 */
+	mipid_esd_stop_check(md);
+	mutex_lock(&md->mutex);
+
+	if (!md->enabled) {
+		mutex_unlock(&md->mutex);
+		return;
+	}
+	md->saved_bklight_level = mipid_get_bklight_level(panel);
+	mipid_set_bklight_level(panel, 0);
+	set_display_state(md, 0);
+	set_sleep_mode(md, 1);
+	md->enabled = 0;
+
+	mutex_unlock(&md->mutex);
+}
+
+static int panel_enabled(struct mipid_device *md)
+{
+	u32 disp_status;
+	int enabled;
+
+	mipid_read(md, MIPID_CMD_READ_DISP_STATUS, (u8 *)&disp_status, 4);
+	disp_status = __be32_to_cpu(disp_status);
+	enabled = (disp_status & (1 << 17)) && (disp_status & (1 << 10));
+	dev_dbg(&md->spi->dev,
+		"LCD panel %senabled by bootloader (status 0x%04x)\n",
+		enabled ? "" : "not ", disp_status);
+	return enabled;
+}
+
+static int mipid_init(struct lcd_panel *panel,
+			    struct omapfb_device *fbdev)
+{
+	struct mipid_device *md = to_mipid_device(panel);
+
+	md->fbdev = fbdev;
+	md->esd_wq = create_singlethread_workqueue("mipid_esd");
+	if (md->esd_wq == NULL) {
+		dev_err(&md->spi->dev, "can't create ESD workqueue\n");
+		return -ENOMEM;
+	}
+	INIT_DELAYED_WORK(&md->esd_work, mipid_esd_work);
+	mutex_init(&md->mutex);
+
+	md->enabled = panel_enabled(md);
+
+	if (md->enabled)
+		mipid_esd_start_check(md);
+	else
+		md->saved_bklight_level = mipid_get_bklight_level(panel);
+
+	return 0;
+}
+
+static void mipid_cleanup(struct lcd_panel *panel)
+{
+	struct mipid_device *md = to_mipid_device(panel);
+
+	if (md->enabled)
+		mipid_esd_stop_check(md);
+	destroy_workqueue(md->esd_wq);
+}
+
+static struct lcd_panel mipid_panel = {
+	.config		= OMAP_LCDC_PANEL_TFT,
+
+	.bpp		= 16,
+	.x_res		= 800,
+	.y_res		= 480,
+	.pixel_clock	= 21940,
+	.hsw		= 50,
+	.hfp		= 20,
+	.hbp		= 15,
+	.vsw		= 2,
+	.vfp		= 1,
+	.vbp		= 3,
+
+	.init			= mipid_init,
+	.cleanup		= mipid_cleanup,
+	.enable			= mipid_enable,
+	.disable		= mipid_disable,
+	.get_caps		= mipid_get_caps,
+	.set_bklight_level	= mipid_set_bklight_level,
+	.get_bklight_level	= mipid_get_bklight_level,
+	.get_bklight_max	= mipid_get_bklight_max,
+	.run_test		= mipid_run_test,
+};
+
+static int mipid_detect(struct mipid_device *md)
+{
+	struct mipid_platform_data *pdata;
+	u8 display_id[3];
+
+	pdata = md->spi->dev.platform_data;
+	if (pdata == NULL) {
+		dev_err(&md->spi->dev, "missing platform data\n");
+		return -ENOENT;
+	}
+
+	mipid_read(md, MIPID_CMD_READ_DISP_ID, display_id, 3);
+	dev_dbg(&md->spi->dev, "MIPI display ID: %02x%02x%02x\n",
+		display_id[0], display_id[1], display_id[2]);
+
+	switch (display_id[0]) {
+	case 0x45:
+		md->panel.name = "lph8923";
+		break;
+	case 0x83:
+		md->panel.name = "ls041y3";
+		md->esd_check = ls041y3_esd_check;
+		break;
+	default:
+		md->panel.name = "unknown";
+		dev_err(&md->spi->dev, "invalid display ID\n");
+		return -ENODEV;
+	}
+
+	md->revision = display_id[1];
+	md->panel.data_lines = pdata->data_lines;
+	pr_info("omapfb: %s rev %02x LCD detected, %d data lines\n",
+			md->panel.name, md->revision, md->panel.data_lines);
+
+	return 0;
+}
+
+static int mipid_spi_probe(struct spi_device *spi)
+{
+	struct mipid_device *md;
+	int r;
+
+	md = kzalloc(sizeof(*md), GFP_KERNEL);
+	if (md == NULL) {
+		dev_err(&spi->dev, "out of memory\n");
+		return -ENOMEM;
+	}
+
+	spi->mode = SPI_MODE_0;
+	md->spi = spi;
+	dev_set_drvdata(&spi->dev, md);
+	md->panel = mipid_panel;
+
+	r = mipid_detect(md);
+	if (r < 0)
+		return r;
+
+	omapfb_register_panel(&md->panel);
+
+	return 0;
+}
+
+static int mipid_spi_remove(struct spi_device *spi)
+{
+	struct mipid_device *md = dev_get_drvdata(&spi->dev);
+
+	mipid_disable(&md->panel);
+	kfree(md);
+
+	return 0;
+}
+
+static struct spi_driver mipid_spi_driver = {
+	.driver = {
+		.name	= MIPID_MODULE_NAME,
+		.bus	= &spi_bus_type,
+		.owner	= THIS_MODULE,
+	},
+	.probe	= mipid_spi_probe,
+	.remove	= __devexit_p(mipid_spi_remove),
+};
+
+static int mipid_drv_init(void)
+{
+	spi_register_driver(&mipid_spi_driver);
+
+	return 0;
+}
+module_init(mipid_drv_init);
+
+static void mipid_drv_cleanup(void)
+{
+	spi_unregister_driver(&mipid_spi_driver);
+}
+module_exit(mipid_drv_cleanup);
+
+MODULE_DESCRIPTION("MIPI display driver");
+MODULE_LICENSE("GPL");
-- 
cgit v0.10.2


From 92e0ed074a4f134084bef47a44e948b6c6429a89 Mon Sep 17 00:00:00 2001
From: Jonathan McDowell <noodles@earth.li>
Date: Tue, 22 Sep 2009 16:46:44 -0700
Subject: omapfb: add support for the Amstrad Delta LCD

This is an updated version of the LCD driver for the Amstrad Delta to take
into account the recent changes to the omapfb infrastructure.  The Delta
features a 480x320 12 bit DSTN panel.

Signed-off-by: Jonathan McDowell <noodles@earth.li>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Imre Deak <imre.deak@nokia.com>
Acked-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/omap/Makefile b/drivers/video/omap/Makefile
index d86d54a..2bf94ad 100644
--- a/drivers/video/omap/Makefile
+++ b/drivers/video/omap/Makefile
@@ -15,6 +15,7 @@ objs-$(CONFIG_ARCH_OMAP2)$(CONFIG_FB_OMAP_LCDC_EXTERNAL) += rfbi.o
 objs-y$(CONFIG_FB_OMAP_LCDC_HWA742) += hwa742.o
 objs-y$(CONFIG_FB_OMAP_LCDC_BLIZZARD) += blizzard.o
 
+objs-y$(CONFIG_MACH_AMS_DELTA) += lcd_ams_delta.o
 objs-y$(CONFIG_MACH_OMAP_H4) += lcd_h4.o
 objs-y$(CONFIG_MACH_OMAP_H3) += lcd_h3.o
 objs-y$(CONFIG_MACH_OMAP_PALMTE) += lcd_palmte.o
diff --git a/drivers/video/omap/lcd_ams_delta.c b/drivers/video/omap/lcd_ams_delta.c
new file mode 100644
index 0000000..1f74399
--- /dev/null
+++ b/drivers/video/omap/lcd_ams_delta.c
@@ -0,0 +1,137 @@
+/*
+ * Based on drivers/video/omap/lcd_inn1510.c
+ *
+ * LCD panel support for the Amstrad E3 (Delta) videophone.
+ *
+ * Copyright (C) 2006 Jonathan McDowell <noodles@earth.li>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/io.h>
+#include <linux/delay.h>
+
+#include <mach/board-ams-delta.h>
+#include <mach/hardware.h>
+#include <mach/omapfb.h>
+
+#define AMS_DELTA_DEFAULT_CONTRAST	112
+
+static int ams_delta_panel_init(struct lcd_panel *panel,
+		struct omapfb_device *fbdev)
+{
+	return 0;
+}
+
+static void ams_delta_panel_cleanup(struct lcd_panel *panel)
+{
+}
+
+static int ams_delta_panel_enable(struct lcd_panel *panel)
+{
+	ams_delta_latch2_write(AMS_DELTA_LATCH2_LCD_NDISP,
+			AMS_DELTA_LATCH2_LCD_NDISP);
+	ams_delta_latch2_write(AMS_DELTA_LATCH2_LCD_VBLEN,
+			AMS_DELTA_LATCH2_LCD_VBLEN);
+
+	omap_writeb(1, OMAP_PWL_CLK_ENABLE);
+	omap_writeb(AMS_DELTA_DEFAULT_CONTRAST, OMAP_PWL_ENABLE);
+
+	return 0;
+}
+
+static void ams_delta_panel_disable(struct lcd_panel *panel)
+{
+	ams_delta_latch2_write(AMS_DELTA_LATCH2_LCD_VBLEN, 0);
+	ams_delta_latch2_write(AMS_DELTA_LATCH2_LCD_NDISP, 0);
+}
+
+static unsigned long ams_delta_panel_get_caps(struct lcd_panel *panel)
+{
+	return 0;
+}
+
+static struct lcd_panel ams_delta_panel = {
+	.name		= "ams-delta",
+	.config		= 0,
+
+	.bpp		= 12,
+	.data_lines	= 16,
+	.x_res		= 480,
+	.y_res		= 320,
+	.pixel_clock	= 4687,
+	.hsw		= 3,
+	.hfp		= 1,
+	.hbp		= 1,
+	.vsw		= 1,
+	.vfp		= 0,
+	.vbp		= 0,
+	.pcd		= 0,
+	.acb		= 37,
+
+	.init		= ams_delta_panel_init,
+	.cleanup	= ams_delta_panel_cleanup,
+	.enable		= ams_delta_panel_enable,
+	.disable	= ams_delta_panel_disable,
+	.get_caps	= ams_delta_panel_get_caps,
+};
+
+static int ams_delta_panel_probe(struct platform_device *pdev)
+{
+	omapfb_register_panel(&ams_delta_panel);
+	return 0;
+}
+
+static int ams_delta_panel_remove(struct platform_device *pdev)
+{
+	return 0;
+}
+
+static int ams_delta_panel_suspend(struct platform_device *pdev,
+		pm_message_t mesg)
+{
+	return 0;
+}
+
+static int ams_delta_panel_resume(struct platform_device *pdev)
+{
+	return 0;
+}
+
+struct platform_driver ams_delta_panel_driver = {
+	.probe		= ams_delta_panel_probe,
+	.remove		= ams_delta_panel_remove,
+	.suspend	= ams_delta_panel_suspend,
+	.resume		= ams_delta_panel_resume,
+	.driver		= {
+		.name	= "lcd_ams_delta",
+		.owner	= THIS_MODULE,
+	},
+};
+
+static int ams_delta_panel_drv_init(void)
+{
+	return platform_driver_register(&ams_delta_panel_driver);
+}
+
+static void ams_delta_panel_drv_cleanup(void)
+{
+	platform_driver_unregister(&ams_delta_panel_driver);
+}
+
+module_init(ams_delta_panel_drv_init);
+module_exit(ams_delta_panel_drv_cleanup);
-- 
cgit v0.10.2


From 3a76a819dda9a6de0ab83d04b48b3735a4d10b89 Mon Sep 17 00:00:00 2001
From: Hunyue Yau <hyau@mvista.com>
Date: Tue, 22 Sep 2009 16:46:45 -0700
Subject: omapfb: add support for the 2430SDP LCD

Add glue to control the 2430SDP LCD as a frame buffer device using the
existing dispc.c driver under omapfb.

Fixed-by: Kevin Hilman <khilman@mvista.com>
Fixed-by: Sergio Aguirre <saaguirre@ti.com>
Fixed-by: Francisco Alecrim <francisco.alecrim@indt.org.br>
Fixed-by: Tony Lindgren <tony@atomide.com>
Fixed-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Hunyue Yau <hyau@mvista.com>
Signed-off-by: Kevin Hilman <khilman@mvista.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Imre Deak <imre.deak@nokia.com>
Acked-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/omap/Makefile b/drivers/video/omap/Makefile
index 2bf94ad..7a37b03 100644
--- a/drivers/video/omap/Makefile
+++ b/drivers/video/omap/Makefile
@@ -26,6 +26,7 @@ objs-$(CONFIG_ARCH_OMAP15XX)$(CONFIG_MACH_OMAP_INNOVATOR) += lcd_inn1510.o
 objs-y$(CONFIG_MACH_OMAP_OSK) += lcd_osk.o
 
 objs-y$(CONFIG_MACH_OMAP_APOLLON) += lcd_apollon.o
+objs-y$(CONFIG_MACH_OMAP_2430SDP) += lcd_2430sdp.o
 objs-y$(CONFIG_FB_OMAP_LCD_MIPID) += lcd_mipid.o
 
 omapfb-objs := $(objs-yy)
diff --git a/drivers/video/omap/lcd_2430sdp.c b/drivers/video/omap/lcd_2430sdp.c
new file mode 100644
index 0000000..284cfce
--- /dev/null
+++ b/drivers/video/omap/lcd_2430sdp.c
@@ -0,0 +1,198 @@
+/*
+ * LCD panel support for the TI 2430SDP board
+ *
+ * Copyright (C) 2007 MontaVista
+ * Author: Hunyue Yau <hyau@mvista.com>
+ *
+ * Derived from drivers/video/omap/lcd-apollon.c
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/delay.h>
+#include <linux/gpio.h>
+#include <linux/i2c/twl4030.h>
+
+#include <mach/mux.h>
+#include <mach/omapfb.h>
+#include <asm/mach-types.h>
+
+#define SDP2430_LCD_PANEL_BACKLIGHT_GPIO	91
+#define SDP2430_LCD_PANEL_ENABLE_GPIO		154
+#define SDP3430_LCD_PANEL_BACKLIGHT_GPIO	24
+#define SDP3430_LCD_PANEL_ENABLE_GPIO		28
+
+static unsigned backlight_gpio;
+static unsigned enable_gpio;
+
+#define LCD_PANEL_BACKLIGHT_GPIO	91
+#define LCD_PANEL_ENABLE_GPIO		154
+#define LCD_PIXCLOCK_MAX		5400 /* freq 5.4 MHz */
+#define PM_RECEIVER             TWL4030_MODULE_PM_RECEIVER
+#define ENABLE_VAUX2_DEDICATED  0x09
+#define ENABLE_VAUX2_DEV_GRP    0x20
+
+
+#define t2_out(c, r, v) twl4030_i2c_write_u8(c, r, v)
+
+
+static int sdp2430_panel_init(struct lcd_panel *panel,
+				struct omapfb_device *fbdev)
+{
+	if (machine_is_omap_3430sdp()) {
+		enable_gpio    = SDP3430_LCD_PANEL_ENABLE_GPIO;
+		backlight_gpio = SDP3430_LCD_PANEL_BACKLIGHT_GPIO;
+	} else {
+		enable_gpio    = SDP2430_LCD_PANEL_ENABLE_GPIO;
+		backlight_gpio = SDP2430_LCD_PANEL_BACKLIGHT_GPIO;
+	}
+
+	gpio_request(enable_gpio, "LCD enable");	/* LCD panel */
+	gpio_request(backlight_gpio, "LCD bl");		/* LCD backlight */
+	gpio_direction_output(enable_gpio, 0);
+	gpio_direction_output(backlight_gpio, 0);
+
+	return 0;
+}
+
+static void sdp2430_panel_cleanup(struct lcd_panel *panel)
+{
+	gpio_free(backlight_gpio);
+	gpio_free(enable_gpio);
+}
+
+static int sdp2430_panel_enable(struct lcd_panel *panel)
+{
+	u8 ded_val, ded_reg;
+	u8 grp_val, grp_reg;
+
+	if (machine_is_omap_3430sdp()) {
+		ded_reg = TWL4030_VAUX3_DEDICATED;
+		ded_val = ENABLE_VAUX3_DEDICATED;
+		grp_reg = TWL4030_VAUX3_DEV_GRP;
+		grp_val = ENABLE_VAUX3_DEV_GRP;
+
+		if (omap_rev() > OMAP3430_REV_ES1_0) {
+			t2_out(PM_RECEIVER, ENABLE_VPLL2_DEDICATED,
+					TWL4030_VPLL2_DEDICATED);
+			t2_out(PM_RECEIVER, ENABLE_VPLL2_DEV_GRP,
+					TWL4030_VPLL2_DEV_GRP);
+		}
+	} else {
+		ded_reg = TWL4030_VAUX2_DEDICATED;
+		ded_val = ENABLE_VAUX2_DEDICATED;
+		grp_reg = TWL4030_VAUX2_DEV_GRP;
+		grp_val = ENABLE_VAUX2_DEV_GRP;
+	}
+
+	gpio_set_value(enable_gpio, 1);
+	gpio_set_value(backlight_gpio, 1);
+
+	if (0 != t2_out(PM_RECEIVER, ded_val, ded_reg))
+		return -EIO;
+	if (0 != t2_out(PM_RECEIVER, grp_val, grp_reg))
+		return -EIO;
+
+	return 0;
+}
+
+static void sdp2430_panel_disable(struct lcd_panel *panel)
+{
+	gpio_set_value(enable_gpio, 0);
+	gpio_set_value(backlight_gpio, 0);
+	if (omap_rev() > OMAP3430_REV_ES1_0) {
+		t2_out(PM_RECEIVER, 0x0, TWL4030_VPLL2_DEDICATED);
+		t2_out(PM_RECEIVER, 0x0, TWL4030_VPLL2_DEV_GRP);
+		msleep(4);
+	}
+}
+
+static unsigned long sdp2430_panel_get_caps(struct lcd_panel *panel)
+{
+	return 0;
+}
+
+struct lcd_panel sdp2430_panel = {
+	.name		= "sdp2430",
+	.config		= OMAP_LCDC_PANEL_TFT | OMAP_LCDC_INV_VSYNC |
+			  OMAP_LCDC_INV_HSYNC,
+
+	.bpp		= 16,
+	.data_lines	= 16,
+	.x_res		= 240,
+	.y_res		= 320,
+	.hsw		= 3,		/* hsync_len (4) - 1 */
+	.hfp		= 3,		/* right_margin (4) - 1 */
+	.hbp		= 39,		/* left_margin (40) - 1 */
+	.vsw		= 1,		/* vsync_len (2) - 1 */
+	.vfp		= 2,		/* lower_margin */
+	.vbp		= 7,		/* upper_margin (8) - 1 */
+
+	.pixel_clock	= LCD_PIXCLOCK_MAX,
+
+	.init		= sdp2430_panel_init,
+	.cleanup	= sdp2430_panel_cleanup,
+	.enable	= sdp2430_panel_enable,
+	.disable	= sdp2430_panel_disable,
+	.get_caps	= sdp2430_panel_get_caps,
+};
+
+static int sdp2430_panel_probe(struct platform_device *pdev)
+{
+	omapfb_register_panel(&sdp2430_panel);
+	return 0;
+}
+
+static int sdp2430_panel_remove(struct platform_device *pdev)
+{
+	return 0;
+}
+
+static int sdp2430_panel_suspend(struct platform_device *pdev,
+					pm_message_t mesg)
+{
+	return 0;
+}
+
+static int sdp2430_panel_resume(struct platform_device *pdev)
+{
+	return 0;
+}
+
+struct platform_driver sdp2430_panel_driver = {
+	.probe		= sdp2430_panel_probe,
+	.remove		= sdp2430_panel_remove,
+	.suspend		= sdp2430_panel_suspend,
+	.resume		= sdp2430_panel_resume,
+	.driver		= {
+		.name	= "sdp2430_lcd",
+		.owner	= THIS_MODULE,
+	},
+};
+
+static int __init sdp2430_panel_drv_init(void)
+{
+	return platform_driver_register(&sdp2430_panel_driver);
+}
+
+static void __exit sdp2430_panel_drv_exit(void)
+{
+	platform_driver_unregister(&sdp2430_panel_driver);
+}
+
+module_init(sdp2430_panel_drv_init);
+module_exit(sdp2430_panel_drv_exit);
-- 
cgit v0.10.2


From 42acff34f85c5892c8c762145441e993a5596a13 Mon Sep 17 00:00:00 2001
From: arun c <arun.edarath@gmail.com>
Date: Tue, 22 Sep 2009 16:46:47 -0700
Subject: omapfb: add support for the OMAP2EVM LCD

omap2evm LCD supports VGA and QVGA resolution, by default its in VGA mode.

Fixed-by: Jarkko Nikula <jarkko.nikula@gmail.com>
Fixed-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Arun C <arunedarath@mistralsolutions.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Imre Deak <imre.deak@nokia.com>
Acked-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/omap/Makefile b/drivers/video/omap/Makefile
index 7a37b03..c2475e3 100644
--- a/drivers/video/omap/Makefile
+++ b/drivers/video/omap/Makefile
@@ -27,6 +27,7 @@ objs-y$(CONFIG_MACH_OMAP_OSK) += lcd_osk.o
 
 objs-y$(CONFIG_MACH_OMAP_APOLLON) += lcd_apollon.o
 objs-y$(CONFIG_MACH_OMAP_2430SDP) += lcd_2430sdp.o
+objs-y$(CONFIG_MACH_OMAP2EVM) += lcd_omap2evm.o
 objs-y$(CONFIG_FB_OMAP_LCD_MIPID) += lcd_mipid.o
 
 omapfb-objs := $(objs-yy)
diff --git a/drivers/video/omap/lcd_omap2evm.c b/drivers/video/omap/lcd_omap2evm.c
new file mode 100644
index 0000000..7a2bbe2
--- /dev/null
+++ b/drivers/video/omap/lcd_omap2evm.c
@@ -0,0 +1,191 @@
+/*
+ * LCD panel support for the MISTRAL OMAP2EVM board
+ *
+ * Author: Arun C <arunedarath@mistralsolutions.com>
+ *
+ * Derived from drivers/video/omap/lcd_omap3evm.c
+ * Derived from drivers/video/omap/lcd-apollon.c
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/gpio.h>
+#include <linux/i2c/twl4030.h>
+
+#include <mach/mux.h>
+#include <mach/omapfb.h>
+#include <asm/mach-types.h>
+
+#define LCD_PANEL_ENABLE_GPIO	154
+#define LCD_PANEL_LR		128
+#define LCD_PANEL_UD		129
+#define LCD_PANEL_INI		152
+#define LCD_PANEL_QVGA		148
+#define LCD_PANEL_RESB		153
+
+#define TWL_LED_LEDEN		0x00
+#define TWL_PWMA_PWMAON		0x00
+#define TWL_PWMA_PWMAOFF	0x01
+
+static unsigned int bklight_level;
+
+static int omap2evm_panel_init(struct lcd_panel *panel,
+				struct omapfb_device *fbdev)
+{
+	gpio_request(LCD_PANEL_ENABLE_GPIO, "LCD enable");
+	gpio_request(LCD_PANEL_LR, "LCD lr");
+	gpio_request(LCD_PANEL_UD, "LCD ud");
+	gpio_request(LCD_PANEL_INI, "LCD ini");
+	gpio_request(LCD_PANEL_QVGA, "LCD qvga");
+	gpio_request(LCD_PANEL_RESB, "LCD resb");
+
+	gpio_direction_output(LCD_PANEL_ENABLE_GPIO, 1);
+	gpio_direction_output(LCD_PANEL_RESB, 1);
+	gpio_direction_output(LCD_PANEL_INI, 1);
+	gpio_direction_output(LCD_PANEL_QVGA, 0);
+	gpio_direction_output(LCD_PANEL_LR, 1);
+	gpio_direction_output(LCD_PANEL_UD, 1);
+
+	twl4030_i2c_write_u8(TWL4030_MODULE_LED, 0x11, TWL_LED_LEDEN);
+	twl4030_i2c_write_u8(TWL4030_MODULE_PWMA, 0x01, TWL_PWMA_PWMAON);
+	twl4030_i2c_write_u8(TWL4030_MODULE_PWMA, 0x02, TWL_PWMA_PWMAOFF);
+	bklight_level = 100;
+
+	return 0;
+}
+
+static void omap2evm_panel_cleanup(struct lcd_panel *panel)
+{
+	gpio_free(LCD_PANEL_RESB);
+	gpio_free(LCD_PANEL_QVGA);
+	gpio_free(LCD_PANEL_INI);
+	gpio_free(LCD_PANEL_UD);
+	gpio_free(LCD_PANEL_LR);
+	gpio_free(LCD_PANEL_ENABLE_GPIO);
+}
+
+static int omap2evm_panel_enable(struct lcd_panel *panel)
+{
+	gpio_set_value(LCD_PANEL_ENABLE_GPIO, 0);
+	return 0;
+}
+
+static void omap2evm_panel_disable(struct lcd_panel *panel)
+{
+	gpio_set_value(LCD_PANEL_ENABLE_GPIO, 1);
+}
+
+static unsigned long omap2evm_panel_get_caps(struct lcd_panel *panel)
+{
+	return 0;
+}
+
+static int omap2evm_bklight_setlevel(struct lcd_panel *panel,
+						unsigned int level)
+{
+	u8 c;
+	if ((level >= 0) && (level <= 100)) {
+		c = (125 * (100 - level)) / 100 + 2;
+		twl4030_i2c_write_u8(TWL4030_MODULE_PWMA, c, TWL_PWMA_PWMAOFF);
+		bklight_level = level;
+	}
+	return 0;
+}
+
+static unsigned int omap2evm_bklight_getlevel(struct lcd_panel *panel)
+{
+	return bklight_level;
+}
+
+static unsigned int omap2evm_bklight_getmaxlevel(struct lcd_panel *panel)
+{
+	return 100;
+}
+
+struct lcd_panel omap2evm_panel = {
+	.name		= "omap2evm",
+	.config		= OMAP_LCDC_PANEL_TFT | OMAP_LCDC_INV_VSYNC |
+			  OMAP_LCDC_INV_HSYNC,
+
+	.bpp		= 16,
+	.data_lines	= 18,
+	.x_res		= 480,
+	.y_res		= 640,
+	.hsw		= 3,
+	.hfp		= 0,
+	.hbp		= 28,
+	.vsw		= 2,
+	.vfp		= 1,
+	.vbp		= 0,
+
+	.pixel_clock	= 20000,
+
+	.init		= omap2evm_panel_init,
+	.cleanup	= omap2evm_panel_cleanup,
+	.enable		= omap2evm_panel_enable,
+	.disable	= omap2evm_panel_disable,
+	.get_caps	= omap2evm_panel_get_caps,
+	.set_bklight_level      = omap2evm_bklight_setlevel,
+	.get_bklight_level      = omap2evm_bklight_getlevel,
+	.get_bklight_max        = omap2evm_bklight_getmaxlevel,
+};
+
+static int omap2evm_panel_probe(struct platform_device *pdev)
+{
+	omapfb_register_panel(&omap2evm_panel);
+	return 0;
+}
+
+static int omap2evm_panel_remove(struct platform_device *pdev)
+{
+	return 0;
+}
+
+static int omap2evm_panel_suspend(struct platform_device *pdev,
+				   pm_message_t mesg)
+{
+	return 0;
+}
+
+static int omap2evm_panel_resume(struct platform_device *pdev)
+{
+	return 0;
+}
+
+struct platform_driver omap2evm_panel_driver = {
+	.probe		= omap2evm_panel_probe,
+	.remove		= omap2evm_panel_remove,
+	.suspend	= omap2evm_panel_suspend,
+	.resume		= omap2evm_panel_resume,
+	.driver		= {
+		.name	= "omap2evm_lcd",
+		.owner	= THIS_MODULE,
+	},
+};
+
+static int __init omap2evm_panel_drv_init(void)
+{
+	return platform_driver_register(&omap2evm_panel_driver);
+}
+
+static void __exit omap2evm_panel_drv_exit(void)
+{
+	platform_driver_unregister(&omap2evm_panel_driver);
+}
+
+module_init(omap2evm_panel_drv_init);
+module_exit(omap2evm_panel_drv_exit);
-- 
cgit v0.10.2


From fb49b78452503f6cdf8b243ccb40cda81aae9998 Mon Sep 17 00:00:00 2001
From: Kevin Hilman <khilman@mvista.com>
Date: Tue, 22 Sep 2009 16:46:48 -0700
Subject: omapfb: add support for the 3430SDP LCD

The 3430SDP uses the same panel as the 2430SDP.  The main difference are
in the GPIO lines used for panel enable and backlight, and the VAUX
register/commands sent to the TWL4030 power subsystem.

Also, some misc. whitespace cleanups.

Fixed-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Kevin Hilman <khilman@mvsita.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Imre Deak <imre.deak@nokia.com>
Acked-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/arm/configs/omap_3430sdp_defconfig b/arch/arm/configs/omap_3430sdp_defconfig
index 9a510ea..8a4a7e2 100644
--- a/arch/arm/configs/omap_3430sdp_defconfig
+++ b/arch/arm/configs/omap_3430sdp_defconfig
@@ -1313,8 +1313,33 @@ CONFIG_DVB_ISL6421=m
 # Graphics support
 #
 # CONFIG_VGASTATE is not set
-# CONFIG_VIDEO_OUTPUT_CONTROL is not set
-# CONFIG_FB is not set
+CONFIG_FB=y
+# CONFIG_FIRMWARE_EDID is not set
+# CONFIG_FB_DDC is not set
+CONFIG_FB_CFB_FILLRECT=y
+CONFIG_FB_CFB_COPYAREA=y
+CONFIG_FB_CFB_IMAGEBLIT=y
+# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set
+# CONFIG_FB_SYS_FILLRECT is not set
+# CONFIG_FB_SYS_COPYAREA is not set
+# CONFIG_FB_SYS_IMAGEBLIT is not set
+# CONFIG_FB_FOREIGN_ENDIAN is not set
+# CONFIG_FB_SYS_FOPS is not set
+# CONFIG_FB_SVGALIB is not set
+# CONFIG_FB_MACMODES is not set
+# CONFIG_FB_BACKLIGHT is not set
+# CONFIG_FB_MODE_HELPERS is not set
+# CONFIG_FB_TILEBLITTING is not set
+
+#
+# Frame buffer hardware drivers
+#
+# CONFIG_FB_S1D13XXX is not set
+# CONFIG_FB_VIRTUAL is not set
+CONFIG_FB_OMAP=y
+# CONFIG_FB_OMAP_LCDC_EXTERNAL is not set
+# CONFIG_FB_OMAP_BOOTLOADER_INIT is not set
+CONFIG_FB_OMAP_CONSISTENT_DMA_SIZE=2
 # CONFIG_BACKLIGHT_LCD_SUPPORT is not set
 
 #
@@ -1331,6 +1356,16 @@ CONFIG_DISPLAY_SUPPORT=y
 #
 # CONFIG_VGA_CONSOLE is not set
 CONFIG_DUMMY_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+# CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY is not set
+# CONFIG_FRAMEBUFFER_CONSOLE_ROTATION is not set
+# CONFIG_FONTS is not set
+CONFIG_FONT_8x8=y
+CONFIG_FONT_8x16=y
+CONFIG_LOGO=y
+CONFIG_LOGO_LINUX_MONO=y
+CONFIG_LOGO_LINUX_VGA16=y
+CONFIG_LOGO_LINUX_CLUT224=y
 CONFIG_SOUND=y
 CONFIG_SOUND_OSS_CORE=y
 CONFIG_SND=y
diff --git a/drivers/video/omap/Makefile b/drivers/video/omap/Makefile
index c2475e3..96d2d43 100644
--- a/drivers/video/omap/Makefile
+++ b/drivers/video/omap/Makefile
@@ -8,6 +8,7 @@ objs-yy := omapfb_main.o
 
 objs-y$(CONFIG_ARCH_OMAP1) += lcdc.o
 objs-y$(CONFIG_ARCH_OMAP2) += dispc.o
+objs-y$(CONFIG_ARCH_OMAP3) += dispc.o
 
 objs-$(CONFIG_ARCH_OMAP1)$(CONFIG_FB_OMAP_LCDC_EXTERNAL) += sossi.o
 objs-$(CONFIG_ARCH_OMAP2)$(CONFIG_FB_OMAP_LCDC_EXTERNAL) += rfbi.o
@@ -27,6 +28,7 @@ objs-y$(CONFIG_MACH_OMAP_OSK) += lcd_osk.o
 
 objs-y$(CONFIG_MACH_OMAP_APOLLON) += lcd_apollon.o
 objs-y$(CONFIG_MACH_OMAP_2430SDP) += lcd_2430sdp.o
+objs-y$(CONFIG_MACH_OMAP_3430SDP) += lcd_2430sdp.o
 objs-y$(CONFIG_MACH_OMAP2EVM) += lcd_omap2evm.o
 objs-y$(CONFIG_FB_OMAP_LCD_MIPID) += lcd_mipid.o
 
diff --git a/drivers/video/omap/lcd_2430sdp.c b/drivers/video/omap/lcd_2430sdp.c
index 284cfce..393712b 100644
--- a/drivers/video/omap/lcd_2430sdp.c
+++ b/drivers/video/omap/lcd_2430sdp.c
@@ -39,13 +39,17 @@
 static unsigned backlight_gpio;
 static unsigned enable_gpio;
 
-#define LCD_PANEL_BACKLIGHT_GPIO	91
-#define LCD_PANEL_ENABLE_GPIO		154
 #define LCD_PIXCLOCK_MAX		5400 /* freq 5.4 MHz */
 #define PM_RECEIVER             TWL4030_MODULE_PM_RECEIVER
 #define ENABLE_VAUX2_DEDICATED  0x09
 #define ENABLE_VAUX2_DEV_GRP    0x20
+#define ENABLE_VAUX3_DEDICATED	0x03
+#define ENABLE_VAUX3_DEV_GRP	0x20
 
+#define ENABLE_VPLL2_DEDICATED          0x05
+#define ENABLE_VPLL2_DEV_GRP            0xE0
+#define TWL4030_VPLL2_DEV_GRP           0x33
+#define TWL4030_VPLL2_DEDICATED         0x36
 
 #define t2_out(c, r, v) twl4030_i2c_write_u8(c, r, v)
 
@@ -146,7 +150,7 @@ struct lcd_panel sdp2430_panel = {
 
 	.init		= sdp2430_panel_init,
 	.cleanup	= sdp2430_panel_cleanup,
-	.enable	= sdp2430_panel_enable,
+	.enable		= sdp2430_panel_enable,
 	.disable	= sdp2430_panel_disable,
 	.get_caps	= sdp2430_panel_get_caps,
 };
@@ -176,7 +180,7 @@ static int sdp2430_panel_resume(struct platform_device *pdev)
 struct platform_driver sdp2430_panel_driver = {
 	.probe		= sdp2430_panel_probe,
 	.remove		= sdp2430_panel_remove,
-	.suspend		= sdp2430_panel_suspend,
+	.suspend	= sdp2430_panel_suspend,
 	.resume		= sdp2430_panel_resume,
 	.driver		= {
 		.name	= "sdp2430_lcd",
-- 
cgit v0.10.2


From 2a8729c47a9dfe5c8ba55883b86b719b33e6e099 Mon Sep 17 00:00:00 2001
From: Steve Sakoman <steve@sakoman.com>
Date: Tue, 22 Sep 2009 16:46:49 -0700
Subject: omapfb: add support for the OMAP3 EVM LCD

Add LCD support for OMAP3 EVM

Backlight support by Arun C <arunedarath@mistralsolutions.com>

Fixed-by: Jarkko Nikula <jarkko.nikula@gmail.com>
Fixed-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Steve Sakoman <steve@sakoman.com>
Acked-by: Syed Mohammed Khasim <khasim@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Imre Deak <imre.deak@nokia.com>
Acked-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/omap/Makefile b/drivers/video/omap/Makefile
index 96d2d43..4345157 100644
--- a/drivers/video/omap/Makefile
+++ b/drivers/video/omap/Makefile
@@ -30,6 +30,7 @@ objs-y$(CONFIG_MACH_OMAP_APOLLON) += lcd_apollon.o
 objs-y$(CONFIG_MACH_OMAP_2430SDP) += lcd_2430sdp.o
 objs-y$(CONFIG_MACH_OMAP_3430SDP) += lcd_2430sdp.o
 objs-y$(CONFIG_MACH_OMAP2EVM) += lcd_omap2evm.o
+objs-y$(CONFIG_MACH_OMAP3EVM) += lcd_omap3evm.o
 objs-y$(CONFIG_FB_OMAP_LCD_MIPID) += lcd_mipid.o
 
 omapfb-objs := $(objs-yy)
diff --git a/drivers/video/omap/lcd_omap3evm.c b/drivers/video/omap/lcd_omap3evm.c
new file mode 100644
index 0000000..b6a4c2c
--- /dev/null
+++ b/drivers/video/omap/lcd_omap3evm.c
@@ -0,0 +1,192 @@
+/*
+ * LCD panel support for the TI OMAP3 EVM board
+ *
+ * Author: Steve Sakoman <steve@sakoman.com>
+ *
+ * Derived from drivers/video/omap/lcd-apollon.c
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/gpio.h>
+#include <linux/i2c/twl4030.h>
+
+#include <mach/mux.h>
+#include <mach/omapfb.h>
+#include <asm/mach-types.h>
+
+#define LCD_PANEL_ENABLE_GPIO       153
+#define LCD_PANEL_LR                2
+#define LCD_PANEL_UD                3
+#define LCD_PANEL_INI               152
+#define LCD_PANEL_QVGA              154
+#define LCD_PANEL_RESB              155
+
+#define ENABLE_VDAC_DEDICATED	0x03
+#define ENABLE_VDAC_DEV_GRP	0x20
+#define ENABLE_VPLL2_DEDICATED	0x05
+#define ENABLE_VPLL2_DEV_GRP	0xE0
+
+#define TWL_LED_LEDEN		0x00
+#define TWL_PWMA_PWMAON		0x00
+#define TWL_PWMA_PWMAOFF	0x01
+
+static unsigned int bklight_level;
+
+static int omap3evm_panel_init(struct lcd_panel *panel,
+				struct omapfb_device *fbdev)
+{
+	gpio_request(LCD_PANEL_LR, "LCD lr");
+	gpio_request(LCD_PANEL_UD, "LCD ud");
+	gpio_request(LCD_PANEL_INI, "LCD ini");
+	gpio_request(LCD_PANEL_RESB, "LCD resb");
+	gpio_request(LCD_PANEL_QVGA, "LCD qvga");
+
+	gpio_direction_output(LCD_PANEL_RESB, 1);
+	gpio_direction_output(LCD_PANEL_INI, 1);
+	gpio_direction_output(LCD_PANEL_QVGA, 0);
+	gpio_direction_output(LCD_PANEL_LR, 1);
+	gpio_direction_output(LCD_PANEL_UD, 1);
+
+	twl4030_i2c_write_u8(TWL4030_MODULE_LED, 0x11, TWL_LED_LEDEN);
+	twl4030_i2c_write_u8(TWL4030_MODULE_PWMA, 0x01, TWL_PWMA_PWMAON);
+	twl4030_i2c_write_u8(TWL4030_MODULE_PWMA, 0x02, TWL_PWMA_PWMAOFF);
+	bklight_level = 100;
+
+	return 0;
+}
+
+static void omap3evm_panel_cleanup(struct lcd_panel *panel)
+{
+	gpio_free(LCD_PANEL_QVGA);
+	gpio_free(LCD_PANEL_RESB);
+	gpio_free(LCD_PANEL_INI);
+	gpio_free(LCD_PANEL_UD);
+	gpio_free(LCD_PANEL_LR);
+}
+
+static int omap3evm_panel_enable(struct lcd_panel *panel)
+{
+	gpio_set_value(LCD_PANEL_ENABLE_GPIO, 0);
+	return 0;
+}
+
+static void omap3evm_panel_disable(struct lcd_panel *panel)
+{
+	gpio_set_value(LCD_PANEL_ENABLE_GPIO, 1);
+}
+
+static unsigned long omap3evm_panel_get_caps(struct lcd_panel *panel)
+{
+	return 0;
+}
+
+static int omap3evm_bklight_setlevel(struct lcd_panel *panel,
+						unsigned int level)
+{
+	u8 c;
+	if ((level >= 0) && (level <= 100)) {
+		c = (125 * (100 - level)) / 100 + 2;
+		twl4030_i2c_write_u8(TWL4030_MODULE_PWMA, c, TWL_PWMA_PWMAOFF);
+		bklight_level = level;
+	}
+	return 0;
+}
+
+static unsigned int omap3evm_bklight_getlevel(struct lcd_panel *panel)
+{
+	return bklight_level;
+}
+
+static unsigned int omap3evm_bklight_getmaxlevel(struct lcd_panel *panel)
+{
+	return 100;
+}
+
+struct lcd_panel omap3evm_panel = {
+	.name		= "omap3evm",
+	.config		= OMAP_LCDC_PANEL_TFT | OMAP_LCDC_INV_VSYNC |
+			  OMAP_LCDC_INV_HSYNC,
+
+	.bpp		= 16,
+	.data_lines	= 18,
+	.x_res		= 480,
+	.y_res		= 640,
+	.hsw		= 3,		/* hsync_len (4) - 1 */
+	.hfp		= 3,		/* right_margin (4) - 1 */
+	.hbp		= 39,		/* left_margin (40) - 1 */
+	.vsw		= 1,		/* vsync_len (2) - 1 */
+	.vfp		= 2,		/* lower_margin */
+	.vbp		= 7,		/* upper_margin (8) - 1 */
+
+	.pixel_clock	= 26000,
+
+	.init		= omap3evm_panel_init,
+	.cleanup	= omap3evm_panel_cleanup,
+	.enable		= omap3evm_panel_enable,
+	.disable	= omap3evm_panel_disable,
+	.get_caps	= omap3evm_panel_get_caps,
+	.set_bklight_level      = omap3evm_bklight_setlevel,
+	.get_bklight_level      = omap3evm_bklight_getlevel,
+	.get_bklight_max        = omap3evm_bklight_getmaxlevel,
+};
+
+static int omap3evm_panel_probe(struct platform_device *pdev)
+{
+	omapfb_register_panel(&omap3evm_panel);
+	return 0;
+}
+
+static int omap3evm_panel_remove(struct platform_device *pdev)
+{
+	return 0;
+}
+
+static int omap3evm_panel_suspend(struct platform_device *pdev,
+				   pm_message_t mesg)
+{
+	return 0;
+}
+
+static int omap3evm_panel_resume(struct platform_device *pdev)
+{
+	return 0;
+}
+
+struct platform_driver omap3evm_panel_driver = {
+	.probe		= omap3evm_panel_probe,
+	.remove		= omap3evm_panel_remove,
+	.suspend	= omap3evm_panel_suspend,
+	.resume		= omap3evm_panel_resume,
+	.driver		= {
+		.name	= "omap3evm_lcd",
+		.owner	= THIS_MODULE,
+	},
+};
+
+static int __init omap3evm_panel_drv_init(void)
+{
+	return platform_driver_register(&omap3evm_panel_driver);
+}
+
+static void __exit omap3evm_panel_drv_exit(void)
+{
+	platform_driver_unregister(&omap3evm_panel_driver);
+}
+
+module_init(omap3evm_panel_drv_init);
+module_exit(omap3evm_panel_drv_exit);
-- 
cgit v0.10.2


From 27969ccc2840c42bfbe4f55d08f0c7ec254d4e93 Mon Sep 17 00:00:00 2001
From: Koen Kooi <koen@openembedded.org>
Date: Tue, 22 Sep 2009 16:46:50 -0700
Subject: omapfb: add support for the OMAP3 Beagle DVI output

The default resolution is 1024x768@24bit

This version addresses the comments from Felipe Balbi adn Arun Edarath

Fixed-by: Felipe Contreras <felipe.contreras@gmail.com>
Fixed-by: Steve Sakoman <steve@sakoman.com>
Fixed-by: Jarkko Nikula <jarkko.nikula@gmail.com>
Fixed-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Koen Kooi <koen@openembedded.org>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Imre Deak <imre.deak@nokia.com>
Acked-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/arm/configs/omap3_beagle_defconfig b/arch/arm/configs/omap3_beagle_defconfig
index 51c0fa8..357d402 100644
--- a/arch/arm/configs/omap3_beagle_defconfig
+++ b/arch/arm/configs/omap3_beagle_defconfig
@@ -778,7 +778,33 @@ CONFIG_DAB=y
 #
 # CONFIG_VGASTATE is not set
 # CONFIG_VIDEO_OUTPUT_CONTROL is not set
-# CONFIG_FB is not set
+CONFIG_FB=y
+# CONFIG_FIRMWARE_EDID is not set
+# CONFIG_FB_DDC is not set
+CONFIG_FB_CFB_FILLRECT=y
+CONFIG_FB_CFB_COPYAREA=y
+CONFIG_FB_CFB_IMAGEBLIT=y
+# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set
+# CONFIG_FB_SYS_FILLRECT is not set
+# CONFIG_FB_SYS_COPYAREA is not set
+# CONFIG_FB_SYS_IMAGEBLIT is not set
+# CONFIG_FB_FOREIGN_ENDIAN is not set
+# CONFIG_FB_SYS_FOPS is not set
+# CONFIG_FB_SVGALIB is not set
+# CONFIG_FB_MACMODES is not set
+# CONFIG_FB_BACKLIGHT is not set
+# CONFIG_FB_MODE_HELPERS is not set
+# CONFIG_FB_TILEBLITTING is not set
+
+#
+# Frame buffer hardware drivers
+#
+# CONFIG_FB_S1D13XXX is not set
+# CONFIG_FB_VIRTUAL is not set
+CONFIG_FB_OMAP=y
+# CONFIG_FB_OMAP_LCDC_EXTERNAL is not set
+# CONFIG_FB_OMAP_BOOTLOADER_INIT is not set
+CONFIG_FB_OMAP_CONSISTENT_DMA_SIZE=2
 # CONFIG_BACKLIGHT_LCD_SUPPORT is not set
 
 #
@@ -791,6 +817,25 @@ CONFIG_DAB=y
 #
 # CONFIG_VGA_CONSOLE is not set
 CONFIG_DUMMY_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+# CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY is not set
+CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y
+CONFIG_FONTS=y
+CONFIG_FONT_8x8=y
+CONFIG_FONT_8x16=y
+# CONFIG_FONT_6x11 is not set
+# CONFIG_FONT_7x14 is not set
+# CONFIG_FONT_PEARL_8x8 is not set
+# CONFIG_FONT_ACORN_8x8 is not set
+# CONFIG_FONT_MINI_4x6 is not set
+# CONFIG_FONT_SUN8x16 is not set
+# CONFIG_FONT_SUN12x22 is not set
+# CONFIG_FONT_10x18 is not set
+# CONFIG_LOGO is not set
+
+#
+# Sound
+#
 # CONFIG_SOUND is not set
 # CONFIG_HID_SUPPORT is not set
 CONFIG_USB_SUPPORT=y
diff --git a/drivers/video/omap/Makefile b/drivers/video/omap/Makefile
index 4345157..9ff1815 100644
--- a/drivers/video/omap/Makefile
+++ b/drivers/video/omap/Makefile
@@ -31,6 +31,7 @@ objs-y$(CONFIG_MACH_OMAP_2430SDP) += lcd_2430sdp.o
 objs-y$(CONFIG_MACH_OMAP_3430SDP) += lcd_2430sdp.o
 objs-y$(CONFIG_MACH_OMAP2EVM) += lcd_omap2evm.o
 objs-y$(CONFIG_MACH_OMAP3EVM) += lcd_omap3evm.o
+objs-y$(CONFIG_MACH_OMAP3_BEAGLE) += lcd_omap3beagle.o
 objs-y$(CONFIG_FB_OMAP_LCD_MIPID) += lcd_mipid.o
 
 omapfb-objs := $(objs-yy)
diff --git a/drivers/video/omap/lcd_omap3beagle.c b/drivers/video/omap/lcd_omap3beagle.c
new file mode 100644
index 0000000..4011910
--- /dev/null
+++ b/drivers/video/omap/lcd_omap3beagle.c
@@ -0,0 +1,130 @@
+/*
+ * LCD panel support for the TI OMAP3 Beagle board
+ *
+ * Author: Koen Kooi <koen@openembedded.org>
+ *
+ * Derived from drivers/video/omap/lcd-omap3evm.c
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/gpio.h>
+#include <linux/i2c/twl4030.h>
+
+#include <mach/mux.h>
+#include <mach/omapfb.h>
+#include <asm/mach-types.h>
+
+#define LCD_PANEL_ENABLE_GPIO       170
+
+static int omap3beagle_panel_init(struct lcd_panel *panel,
+				struct omapfb_device *fbdev)
+{
+	gpio_request(LCD_PANEL_ENABLE_GPIO, "LCD enable");
+	return 0;
+}
+
+static void omap3beagle_panel_cleanup(struct lcd_panel *panel)
+{
+	gpio_free(LCD_PANEL_ENABLE_GPIO);
+}
+
+static int omap3beagle_panel_enable(struct lcd_panel *panel)
+{
+	gpio_set_value(LCD_PANEL_ENABLE_GPIO, 1);
+	return 0;
+}
+
+static void omap3beagle_panel_disable(struct lcd_panel *panel)
+{
+	gpio_set_value(LCD_PANEL_ENABLE_GPIO, 0);
+}
+
+static unsigned long omap3beagle_panel_get_caps(struct lcd_panel *panel)
+{
+	return 0;
+}
+
+struct lcd_panel omap3beagle_panel = {
+	.name		= "omap3beagle",
+	.config		= OMAP_LCDC_PANEL_TFT,
+
+	.bpp		= 16,
+	.data_lines	= 24,
+	.x_res		= 1024,
+	.y_res		= 768,
+	.hsw		= 3,		/* hsync_len (4) - 1 */
+	.hfp		= 3,		/* right_margin (4) - 1 */
+	.hbp		= 39,		/* left_margin (40) - 1 */
+	.vsw		= 1,		/* vsync_len (2) - 1 */
+	.vfp		= 2,		/* lower_margin */
+	.vbp		= 7,		/* upper_margin (8) - 1 */
+
+	.pixel_clock	= 64000,
+
+	.init		= omap3beagle_panel_init,
+	.cleanup	= omap3beagle_panel_cleanup,
+	.enable		= omap3beagle_panel_enable,
+	.disable	= omap3beagle_panel_disable,
+	.get_caps	= omap3beagle_panel_get_caps,
+};
+
+static int omap3beagle_panel_probe(struct platform_device *pdev)
+{
+	omapfb_register_panel(&omap3beagle_panel);
+	return 0;
+}
+
+static int omap3beagle_panel_remove(struct platform_device *pdev)
+{
+	return 0;
+}
+
+static int omap3beagle_panel_suspend(struct platform_device *pdev,
+				   pm_message_t mesg)
+{
+	return 0;
+}
+
+static int omap3beagle_panel_resume(struct platform_device *pdev)
+{
+	return 0;
+}
+
+struct platform_driver omap3beagle_panel_driver = {
+	.probe		= omap3beagle_panel_probe,
+	.remove		= omap3beagle_panel_remove,
+	.suspend	= omap3beagle_panel_suspend,
+	.resume		= omap3beagle_panel_resume,
+	.driver		= {
+		.name	= "omap3beagle_lcd",
+		.owner	= THIS_MODULE,
+	},
+};
+
+static int __init omap3beagle_panel_drv_init(void)
+{
+	return platform_driver_register(&omap3beagle_panel_driver);
+}
+
+static void __exit omap3beagle_panel_drv_exit(void)
+{
+	platform_driver_unregister(&omap3beagle_panel_driver);
+}
+
+module_init(omap3beagle_panel_drv_init);
+module_exit(omap3beagle_panel_drv_exit);
-- 
cgit v0.10.2


From be481941c6ddfe96e5c17a5d6f077b00e7b7bdf1 Mon Sep 17 00:00:00 2001
From: Steve Sakoman <steve@sakoman.com>
Date: Tue, 22 Sep 2009 16:46:51 -0700
Subject: omapfb: add support for the Gumstix Overo LCD

Signed-off-by: Steve Sakoman <steve@sakoman.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Imre Deak <imre.deak@nokia.com>
Acked-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/omap/Kconfig b/drivers/video/omap/Kconfig
index 574453f..eb05e09 100644
--- a/drivers/video/omap/Kconfig
+++ b/drivers/video/omap/Kconfig
@@ -7,6 +7,27 @@ config FB_OMAP
 	help
           Frame buffer driver for OMAP based boards.
 
+choice
+	depends on FB_OMAP && MACH_OVERO
+	prompt "Screen resolution"
+	default FB_OMAP_079M3R
+	help
+	  Selected desired screen resolution
+
+config FB_OMAP_031M3R
+	boolean "640 x 480 @ 60 Hz Reduced blanking"
+
+config FB_OMAP_048M3R
+	boolean "800 x 600 @ 60 Hz Reduced blanking"
+
+config FB_OMAP_079M3R
+	boolean "1024 x 768 @ 60 Hz Reduced blanking"
+
+config FB_OMAP_092M9R
+	boolean "1280 x 720 @ 60 Hz Reduced blanking"
+
+endchoice
+
 config FB_OMAP_LCD_MIPID
 	bool "MIPI DBI-C/DCS compatible LCD support"
 	depends on FB_OMAP && SPI_MASTER
diff --git a/drivers/video/omap/Makefile b/drivers/video/omap/Makefile
index 9ff1815..0d2996a 100644
--- a/drivers/video/omap/Makefile
+++ b/drivers/video/omap/Makefile
@@ -33,6 +33,7 @@ objs-y$(CONFIG_MACH_OMAP2EVM) += lcd_omap2evm.o
 objs-y$(CONFIG_MACH_OMAP3EVM) += lcd_omap3evm.o
 objs-y$(CONFIG_MACH_OMAP3_BEAGLE) += lcd_omap3beagle.o
 objs-y$(CONFIG_FB_OMAP_LCD_MIPID) += lcd_mipid.o
+objs-y$(CONFIG_MACH_OVERO) += lcd_overo.o
 
 omapfb-objs := $(objs-yy)
 
diff --git a/drivers/video/omap/lcd_overo.c b/drivers/video/omap/lcd_overo.c
new file mode 100644
index 0000000..2bc5c92
--- /dev/null
+++ b/drivers/video/omap/lcd_overo.c
@@ -0,0 +1,179 @@
+/*
+ * LCD panel support for the Gumstix Overo
+ *
+ * Author: Steve Sakoman <steve@sakoman.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/i2c/twl4030.h>
+
+#include <mach/gpio.h>
+#include <mach/mux.h>
+#include <mach/omapfb.h>
+#include <asm/mach-types.h>
+
+#define LCD_ENABLE       144
+
+static int overo_panel_init(struct lcd_panel *panel,
+				struct omapfb_device *fbdev)
+{
+	if ((gpio_request(LCD_ENABLE, "LCD_ENABLE") == 0) &&
+	    (gpio_direction_output(LCD_ENABLE, 1) == 0))
+		gpio_export(LCD_ENABLE, 0);
+	else
+		printk(KERN_ERR "could not obtain gpio for LCD_ENABLE\n");
+
+	return 0;
+}
+
+static void overo_panel_cleanup(struct lcd_panel *panel)
+{
+	gpio_free(LCD_ENABLE);
+}
+
+static int overo_panel_enable(struct lcd_panel *panel)
+{
+	gpio_set_value(LCD_ENABLE, 1);
+	return 0;
+}
+
+static void overo_panel_disable(struct lcd_panel *panel)
+{
+	gpio_set_value(LCD_ENABLE, 0);
+}
+
+static unsigned long overo_panel_get_caps(struct lcd_panel *panel)
+{
+	return 0;
+}
+
+struct lcd_panel overo_panel = {
+	.name		= "overo",
+	.config		= OMAP_LCDC_PANEL_TFT,
+	.bpp		= 16,
+	.data_lines	= 24,
+
+#if defined CONFIG_FB_OMAP_031M3R
+
+	/* 640 x 480 @ 60 Hz  Reduced blanking VESA CVT 0.31M3-R */
+	.x_res		= 640,
+	.y_res		= 480,
+	.hfp		= 48,
+	.hsw		= 32,
+	.hbp		= 80,
+	.vfp		= 3,
+	.vsw		= 4,
+	.vbp		= 7,
+	.pixel_clock	= 23500,
+
+#elif defined CONFIG_FB_OMAP_048M3R
+
+	/* 800 x 600 @ 60 Hz  Reduced blanking VESA CVT 0.48M3-R */
+	.x_res		= 800,
+	.y_res		= 600,
+	.hfp		= 48,
+	.hsw		= 32,
+	.hbp		= 80,
+	.vfp		= 3,
+	.vsw		= 4,
+	.vbp		= 11,
+	.pixel_clock	= 35500,
+
+#elif defined CONFIG_FB_OMAP_079M3R
+
+	/* 1024 x 768 @ 60 Hz  Reduced blanking VESA CVT 0.79M3-R */
+	.x_res		= 1024,
+	.y_res		= 768,
+	.hfp		= 48,
+	.hsw		= 32,
+	.hbp		= 80,
+	.vfp		= 3,
+	.vsw		= 4,
+	.vbp		= 15,
+	.pixel_clock	= 56000,
+
+#elif defined CONFIG_FB_OMAP_092M9R
+
+	/* 1280 x 720 @ 60 Hz  Reduced blanking VESA CVT 0.92M9-R */
+	.x_res		= 1280,
+	.y_res		= 720,
+	.hfp		= 48,
+	.hsw		= 32,
+	.hbp		= 80,
+	.vfp		= 3,
+	.vsw		= 5,
+	.vbp		= 13,
+	.pixel_clock	= 64000,
+
+#else
+
+	/* use 640 x 480 if no config option */
+	/* 640 x 480 @ 60 Hz  Reduced blanking VESA CVT 0.31M3-R */
+	.x_res		= 640,
+	.y_res		= 480,
+	.hfp		= 48,
+	.hsw		= 32,
+	.hbp		= 80,
+	.vfp		= 3,
+	.vsw		= 4,
+	.vbp		= 7,
+	.pixel_clock	= 23500,
+
+#endif
+
+	.init		= overo_panel_init,
+	.cleanup	= overo_panel_cleanup,
+	.enable		= overo_panel_enable,
+	.disable	= overo_panel_disable,
+	.get_caps	= overo_panel_get_caps,
+};
+
+static int overo_panel_probe(struct platform_device *pdev)
+{
+	omapfb_register_panel(&overo_panel);
+	return 0;
+}
+
+static int overo_panel_remove(struct platform_device *pdev)
+{
+	/* omapfb does not have unregister_panel */
+	return 0;
+}
+
+static struct platform_driver overo_panel_driver = {
+	.probe		= overo_panel_probe,
+	.remove		= overo_panel_remove,
+	.driver		= {
+		.name	= "overo_lcd",
+		.owner	= THIS_MODULE,
+	},
+};
+
+static int __init overo_panel_drv_init(void)
+{
+	return platform_driver_register(&overo_panel_driver);
+}
+
+static void __exit overo_panel_drv_exit(void)
+{
+	platform_driver_unregister(&overo_panel_driver);
+}
+
+module_init(overo_panel_drv_init);
+module_exit(overo_panel_drv_exit);
-- 
cgit v0.10.2


From 8fea8844a72f95ef22b108f5dc5c4237019771dd Mon Sep 17 00:00:00 2001
From: "Stanley.Miao" <stanley.miao@windriver.com>
Date: Tue, 22 Sep 2009 16:46:52 -0700
Subject: omapfb: add support for the ZOOM MDK LCD

Add glue to control the OMAP_LDP LCD as a frame buffer device using the
existing dispc.c driver under omapfb.

Patch updated for mainline kernel.  Note that the drivers/video/omap
should be updated to pass omap_lcd_config in platform_data.  The patch
should also be updated to compile if twl4030 is not selected, and
eventually to use the regulator framework.

Fixed-by: Jarkko Nikula <jarkko.nikula@gmail.com>
Fixed-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Stanley.Miao <stanley.miao@windriver.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Imre Deak <imre.deak@nokia.com>
Acked-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/arm/configs/omap_ldp_defconfig b/arch/arm/configs/omap_ldp_defconfig
index 679a4a3..b9c4891 100644
--- a/arch/arm/configs/omap_ldp_defconfig
+++ b/arch/arm/configs/omap_ldp_defconfig
@@ -690,6 +690,7 @@ CONFIG_GPIOLIB=y
 # CONFIG_GPIO_MAX732X is not set
 # CONFIG_GPIO_PCA953X is not set
 # CONFIG_GPIO_PCF857X is not set
+CONFIG_GPIO_TWL4030=y
 
 #
 # PCI GPIO expanders:
@@ -742,6 +743,7 @@ CONFIG_SSB_POSSIBLE=y
 # CONFIG_MFD_SM501 is not set
 # CONFIG_HTC_EGPIO is not set
 # CONFIG_HTC_PASIC3 is not set
+CONFIG_TWL4030_CORE=y
 # CONFIG_MFD_TMIO is not set
 # CONFIG_MFD_T7L66XB is not set
 # CONFIG_MFD_TC6387XB is not set
@@ -767,8 +769,46 @@ CONFIG_DAB=y
 #
 # CONFIG_VGASTATE is not set
 CONFIG_VIDEO_OUTPUT_CONTROL=m
-# CONFIG_FB is not set
-# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
+CONFIG_FB=y
+CONFIG_FIRMWARE_EDID=y
+# CONFIG_FB_DDC is not set
+# CONFIG_FB_BOOT_VESA_SUPPORT is not set
+CONFIG_FB_CFB_FILLRECT=y
+CONFIG_FB_CFB_COPYAREA=y
+CONFIG_FB_CFB_IMAGEBLIT=y
+# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set
+# CONFIG_FB_SYS_FILLRECT is not set
+# CONFIG_FB_SYS_COPYAREA is not set
+# CONFIG_FB_SYS_IMAGEBLIT is not set
+# CONFIG_FB_FOREIGN_ENDIAN is not set
+# CONFIG_FB_SYS_FOPS is not set
+# CONFIG_FB_SVGALIB is not set
+# CONFIG_FB_MACMODES is not set
+# CONFIG_FB_BACKLIGHT is not set
+CONFIG_FB_MODE_HELPERS=y
+CONFIG_FB_TILEBLITTING=y
+
+#
+# Frame buffer hardware drivers
+#
+# CONFIG_FB_S1D13XXX is not set
+# CONFIG_FB_VIRTUAL is not set
+# CONFIG_FB_METRONOME is not set
+CONFIG_FB_OMAP=y
+CONFIG_FB_OMAP_LCD_VGA=y
+# CONFIG_FB_OMAP_LCDC_EXTERNAL is not set
+# CONFIG_FB_OMAP_BOOTLOADER_INIT is not set
+CONFIG_FB_OMAP_CONSISTENT_DMA_SIZE=4
+CONFIG_BACKLIGHT_LCD_SUPPORT=y
+CONFIG_LCD_CLASS_DEVICE=y
+# CONFIG_LCD_LTV350QV is not set
+# CONFIG_LCD_ILI9320 is not set
+# CONFIG_LCD_TDO24M is not set
+# CONFIG_LCD_VGG2432A4 is not set
+CONFIG_LCD_PLATFORM=y
+CONFIG_BACKLIGHT_CLASS_DEVICE=y
+# CONFIG_BACKLIGHT_CORGI is not set
+# CONFIG_BACKLIGHT_GENERIC is not set
 
 #
 # Display device support
@@ -780,6 +820,16 @@ CONFIG_VIDEO_OUTPUT_CONTROL=m
 #
 # CONFIG_VGA_CONSOLE is not set
 CONFIG_DUMMY_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+# CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY is not set
+# CONFIG_FRAMEBUFFER_CONSOLE_ROTATION is not set
+# CONFIG_FONTS is not set
+CONFIG_FONT_8x8=y
+CONFIG_FONT_8x16=y
+CONFIG_LOGO=y
+CONFIG_LOGO_LINUX_MONO=y
+CONFIG_LOGO_LINUX_VGA16=y
+CONFIG_LOGO_LINUX_CLUT224=y
 CONFIG_SOUND=y
 CONFIG_SND=y
 # CONFIG_SND_SEQUENCER is not set
diff --git a/drivers/video/omap/Kconfig b/drivers/video/omap/Kconfig
index eb05e09..2bffc07 100644
--- a/drivers/video/omap/Kconfig
+++ b/drivers/video/omap/Kconfig
@@ -7,6 +7,10 @@ config FB_OMAP
 	help
           Frame buffer driver for OMAP based boards.
 
+config FB_OMAP_LCD_VGA
+        bool "Use LCD in VGA mode"
+	        depends on MACH_OMAP_3430SDP || MACH_OMAP_LDP
+
 choice
 	depends on FB_OMAP && MACH_OVERO
 	prompt "Screen resolution"
diff --git a/drivers/video/omap/Makefile b/drivers/video/omap/Makefile
index 0d2996a..b63b198 100644
--- a/drivers/video/omap/Makefile
+++ b/drivers/video/omap/Makefile
@@ -29,6 +29,7 @@ objs-y$(CONFIG_MACH_OMAP_OSK) += lcd_osk.o
 objs-y$(CONFIG_MACH_OMAP_APOLLON) += lcd_apollon.o
 objs-y$(CONFIG_MACH_OMAP_2430SDP) += lcd_2430sdp.o
 objs-y$(CONFIG_MACH_OMAP_3430SDP) += lcd_2430sdp.o
+objs-y$(CONFIG_MACH_OMAP_LDP) += lcd_ldp.o
 objs-y$(CONFIG_MACH_OMAP2EVM) += lcd_omap2evm.o
 objs-y$(CONFIG_MACH_OMAP3EVM) += lcd_omap3evm.o
 objs-y$(CONFIG_MACH_OMAP3_BEAGLE) += lcd_omap3beagle.o
diff --git a/drivers/video/omap/lcd_ldp.c b/drivers/video/omap/lcd_ldp.c
new file mode 100644
index 0000000..dbfe897
--- /dev/null
+++ b/drivers/video/omap/lcd_ldp.c
@@ -0,0 +1,200 @@
+/*
+ * LCD panel support for the TI LDP board
+ *
+ * Copyright (C) 2007 WindRiver
+ * Author: Stanley Miao <stanley.miao@windriver.com>
+ *
+ * Derived from drivers/video/omap/lcd-2430sdp.c
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/delay.h>
+#include <linux/i2c/twl4030.h>
+
+#include <mach/gpio.h>
+#include <mach/mux.h>
+#include <mach/omapfb.h>
+#include <asm/mach-types.h>
+
+#define LCD_PANEL_BACKLIGHT_GPIO	(15 + OMAP_MAX_GPIO_LINES)
+#define LCD_PANEL_ENABLE_GPIO		(7 + OMAP_MAX_GPIO_LINES)
+
+#define LCD_PANEL_RESET_GPIO		55
+#define LCD_PANEL_QVGA_GPIO		56
+
+#ifdef CONFIG_FB_OMAP_LCD_VGA
+#define LCD_XRES		480
+#define LCD_YRES		640
+#define LCD_PIXCLOCK_MAX	41700
+#else
+#define LCD_XRES		240
+#define LCD_YRES		320
+#define LCD_PIXCLOCK_MAX	185186
+#endif
+
+#define PM_RECEIVER             TWL4030_MODULE_PM_RECEIVER
+#define ENABLE_VAUX2_DEDICATED  0x09
+#define ENABLE_VAUX2_DEV_GRP    0x20
+#define ENABLE_VAUX3_DEDICATED	0x03
+#define ENABLE_VAUX3_DEV_GRP	0x20
+
+#define ENABLE_VPLL2_DEDICATED          0x05
+#define ENABLE_VPLL2_DEV_GRP            0xE0
+#define TWL4030_VPLL2_DEV_GRP           0x33
+#define TWL4030_VPLL2_DEDICATED         0x36
+
+#define t2_out(c, r, v) twl4030_i2c_write_u8(c, r, v)
+
+
+static int ldp_panel_init(struct lcd_panel *panel,
+				struct omapfb_device *fbdev)
+{
+	gpio_request(LCD_PANEL_RESET_GPIO, "lcd reset");
+	gpio_request(LCD_PANEL_QVGA_GPIO, "lcd qvga");
+	gpio_request(LCD_PANEL_ENABLE_GPIO, "lcd panel");
+	gpio_request(LCD_PANEL_BACKLIGHT_GPIO, "lcd backlight");
+
+	gpio_direction_output(LCD_PANEL_QVGA_GPIO, 0);
+	gpio_direction_output(LCD_PANEL_RESET_GPIO, 0);
+	gpio_direction_output(LCD_PANEL_ENABLE_GPIO, 0);
+	gpio_direction_output(LCD_PANEL_BACKLIGHT_GPIO, 0);
+
+#ifdef CONFIG_FB_OMAP_LCD_VGA
+	gpio_set_value(LCD_PANEL_QVGA_GPIO, 0);
+#else
+	gpio_set_value(LCD_PANEL_QVGA_GPIO, 1);
+#endif
+	gpio_set_value(LCD_PANEL_RESET_GPIO, 1);
+
+	return 0;
+}
+
+static void ldp_panel_cleanup(struct lcd_panel *panel)
+{
+	gpio_free(LCD_PANEL_BACKLIGHT_GPIO);
+	gpio_free(LCD_PANEL_ENABLE_GPIO);
+	gpio_free(LCD_PANEL_QVGA_GPIO);
+	gpio_free(LCD_PANEL_RESET_GPIO);
+}
+
+static int ldp_panel_enable(struct lcd_panel *panel)
+{
+	if (0 != t2_out(PM_RECEIVER, ENABLE_VPLL2_DEDICATED,
+			TWL4030_VPLL2_DEDICATED))
+		return -EIO;
+	if (0 != t2_out(PM_RECEIVER, ENABLE_VPLL2_DEV_GRP,
+			TWL4030_VPLL2_DEV_GRP))
+		return -EIO;
+
+	gpio_direction_output(LCD_PANEL_ENABLE_GPIO, 1);
+	gpio_direction_output(LCD_PANEL_BACKLIGHT_GPIO, 1);
+
+	if (0 != t2_out(PM_RECEIVER, ENABLE_VAUX3_DEDICATED,
+				TWL4030_VAUX3_DEDICATED))
+		return -EIO;
+	if (0 != t2_out(PM_RECEIVER, ENABLE_VAUX3_DEV_GRP,
+				TWL4030_VAUX3_DEV_GRP))
+		return -EIO;
+
+	return 0;
+}
+
+static void ldp_panel_disable(struct lcd_panel *panel)
+{
+	gpio_direction_output(LCD_PANEL_ENABLE_GPIO, 0);
+	gpio_direction_output(LCD_PANEL_BACKLIGHT_GPIO, 0);
+
+	t2_out(PM_RECEIVER, 0x0, TWL4030_VPLL2_DEDICATED);
+	t2_out(PM_RECEIVER, 0x0, TWL4030_VPLL2_DEV_GRP);
+	msleep(4);
+}
+
+static unsigned long ldp_panel_get_caps(struct lcd_panel *panel)
+{
+	return 0;
+}
+
+struct lcd_panel ldp_panel = {
+	.name		= "ldp",
+	.config		= OMAP_LCDC_PANEL_TFT | OMAP_LCDC_INV_VSYNC |
+			  OMAP_LCDC_INV_HSYNC,
+
+	.bpp		= 16,
+	.data_lines	= 18,
+	.x_res		= LCD_XRES,
+	.y_res		= LCD_YRES,
+	.hsw		= 3,		/* hsync_len (4) - 1 */
+	.hfp		= 3,		/* right_margin (4) - 1 */
+	.hbp		= 39,		/* left_margin (40) - 1 */
+	.vsw		= 1,		/* vsync_len (2) - 1 */
+	.vfp		= 2,		/* lower_margin */
+	.vbp		= 7,		/* upper_margin (8) - 1 */
+
+	.pixel_clock	= LCD_PIXCLOCK_MAX,
+
+	.init		= ldp_panel_init,
+	.cleanup	= ldp_panel_cleanup,
+	.enable		= ldp_panel_enable,
+	.disable	= ldp_panel_disable,
+	.get_caps	= ldp_panel_get_caps,
+};
+
+static int ldp_panel_probe(struct platform_device *pdev)
+{
+	omapfb_register_panel(&ldp_panel);
+	return 0;
+}
+
+static int ldp_panel_remove(struct platform_device *pdev)
+{
+	return 0;
+}
+
+static int ldp_panel_suspend(struct platform_device *pdev, pm_message_t mesg)
+{
+	return 0;
+}
+
+static int ldp_panel_resume(struct platform_device *pdev)
+{
+	return 0;
+}
+
+struct platform_driver ldp_panel_driver = {
+	.probe		= ldp_panel_probe,
+	.remove		= ldp_panel_remove,
+	.suspend	= ldp_panel_suspend,
+	.resume		= ldp_panel_resume,
+	.driver		= {
+		.name	= "ldp_lcd",
+		.owner	= THIS_MODULE,
+	},
+};
+
+static int __init ldp_panel_drv_init(void)
+{
+	return platform_driver_register(&ldp_panel_driver);
+}
+
+static void __exit ldp_panel_drv_exit(void)
+{
+	platform_driver_unregister(&ldp_panel_driver);
+}
+
+module_init(ldp_panel_drv_init);
+module_exit(ldp_panel_drv_exit);
-- 
cgit v0.10.2


From 2f21a62f16136f369788795a98aa5c313261f07b Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@openbossa.org>
Date: Tue, 22 Sep 2009 16:46:53 -0700
Subject: omapfb: add support for rotation on the Blizzard LCD ctrl

The LCD controller (EPSON S1D13744) supports rotation (0, 90, 180 and 270
degrees) on hardware just setting the bits 0 and 1 of 0x28 register (LCD
Panel Configuration Register).  Now it is possible to use this caps only
setting the angle degree on var rotate of fb_var_screeninfo using the
FBIOPUT_VSCREENINFO ioctl.

Fixed-by: Siarhei Siamashka <siarhei.siamashka@nokia.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@openbossa.org>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Imre Deak <imre.deak@nokia.com>
Acked-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/omap/blizzard.c b/drivers/video/omap/blizzard.c
index 9dfcf39..d5e5955 100644
--- a/drivers/video/omap/blizzard.c
+++ b/drivers/video/omap/blizzard.c
@@ -44,6 +44,7 @@
 #define BLIZZARD_CLK_SRC			0x0e
 #define BLIZZARD_MEM_BANK0_ACTIVATE		0x10
 #define BLIZZARD_MEM_BANK0_STATUS		0x14
+#define BLIZZARD_PANEL_CONFIGURATION		0x28
 #define BLIZZARD_HDISP				0x2a
 #define BLIZZARD_HNDP				0x2c
 #define BLIZZARD_VDISP0				0x2e
@@ -162,6 +163,10 @@ struct blizzard_struct {
 	int			vid_scaled;
 	int			last_color_mode;
 	int			zoom_on;
+	int			zoom_area_gx1;
+	int			zoom_area_gx2;
+	int			zoom_area_gy1;
+	int			zoom_area_gy2;
 	int			screen_width;
 	int			screen_height;
 	unsigned		te_connected:1;
@@ -513,6 +518,13 @@ static int do_full_screen_update(struct blizzard_request *req)
 	return REQ_PENDING;
 }
 
+static int check_1d_intersect(int a1, int a2, int b1, int b2)
+{
+	if (a2 <= b1 || b2 <= a1)
+		return 0;
+	return 1;
+}
+
 /* Setup all planes with an overlapping area with the update window. */
 static int do_partial_update(struct blizzard_request *req, int plane,
 			     int x, int y, int w, int h,
@@ -525,6 +537,7 @@ static int do_partial_update(struct blizzard_request *req, int plane,
 	int color_mode;
 	int flags;
 	int zoom_off;
+	int have_zoom_for_this_update = 0;
 
 	/* Global coordinates, relative to pixel 0,0 of the LCD */
 	gx1 = x + blizzard.plane[plane].pos_x;
@@ -544,10 +557,6 @@ static int do_partial_update(struct blizzard_request *req, int plane,
 		gx2_out = gx1_out + w_out;
 		gy2_out = gy1_out + h_out;
 	}
-	zoom_off = blizzard.zoom_on && gx1 == 0 && gy1 == 0 &&
-		w == blizzard.screen_width && h == blizzard.screen_height;
-	blizzard.zoom_on = (!zoom_off && blizzard.zoom_on) ||
-			   (w < w_out || h < h_out);
 
 	for (i = 0; i < OMAPFB_PLANE_NUM; i++) {
 		struct plane_info *p = &blizzard.plane[i];
@@ -653,8 +662,49 @@ static int do_partial_update(struct blizzard_request *req, int plane,
 	else
 		disable_tearsync();
 
+	if ((gx2_out - gx1_out) != (gx2 - gx1) ||
+	    (gy2_out - gy1_out) != (gy2 - gy1))
+		have_zoom_for_this_update = 1;
+
+	/* 'background' type of screen update (as opposed to 'destructive')
+	   can be used to disable scaling if scaling is active */
+	zoom_off = blizzard.zoom_on && !have_zoom_for_this_update &&
+	    (gx1_out == 0) && (gx2_out == blizzard.screen_width) &&
+	    (gy1_out == 0) && (gy2_out == blizzard.screen_height) &&
+	    (gx1 == 0) && (gy1 == 0);
+
+	if (blizzard.zoom_on && !have_zoom_for_this_update && !zoom_off &&
+	    check_1d_intersect(blizzard.zoom_area_gx1, blizzard.zoom_area_gx2,
+			       gx1_out, gx2_out) &&
+	    check_1d_intersect(blizzard.zoom_area_gy1, blizzard.zoom_area_gy2,
+			       gy1_out, gy2_out)) {
+		/* Previous screen update was using scaling, current update
+		 * is not using it. Additionally, current screen update is
+		 * going to overlap with the scaled area. Scaling needs to be
+		 * disabled in order to avoid 'magnifying glass' effect.
+		 * Dummy setup of background window can be used for this.
+		 */
+		set_window_regs(0, 0, blizzard.screen_width,
+				blizzard.screen_height,
+				0, 0, blizzard.screen_width,
+				blizzard.screen_height,
+				BLIZZARD_COLOR_RGB565, 1, flags);
+		blizzard.zoom_on = 0;
+	}
+
+	/* remember scaling settings if we have scaled update */
+	if (have_zoom_for_this_update) {
+		blizzard.zoom_on = 1;
+		blizzard.zoom_area_gx1 = gx1_out;
+		blizzard.zoom_area_gx2 = gx2_out;
+		blizzard.zoom_area_gy1 = gy1_out;
+		blizzard.zoom_area_gy2 = gy2_out;
+	}
+
 	set_window_regs(gx1, gy1, gx2, gy2, gx1_out, gy1_out, gx2_out, gy2_out,
 			color_mode, zoom_off, flags);
+	if (zoom_off)
+		blizzard.zoom_on = 0;
 
 	blizzard.extif->set_bits_per_cycle(16);
 	/* set_window_regs has left the register index at the right
@@ -908,6 +958,35 @@ static int blizzard_set_scale(int plane, int orig_w, int orig_h,
 	return 0;
 }
 
+static int blizzard_set_rotate(int angle)
+{
+	u32 l;
+
+	l = blizzard_read_reg(BLIZZARD_PANEL_CONFIGURATION);
+	l &= ~0x03;
+
+	switch (angle) {
+	case 0:
+		l = l | 0x00;
+		break;
+	case 90:
+		l = l | 0x03;
+		break;
+	case 180:
+		l = l | 0x02;
+		break;
+	case 270:
+		l = l | 0x01;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	blizzard_write_reg(BLIZZARD_PANEL_CONFIGURATION, l);
+
+	return 0;
+}
+
 static int blizzard_enable_plane(int plane, int enable)
 {
 	if (enable)
@@ -1285,7 +1364,8 @@ static void blizzard_get_caps(int plane, struct omapfb_caps *caps)
 	caps->ctrl |= OMAPFB_CAPS_MANUAL_UPDATE |
 		OMAPFB_CAPS_WINDOW_PIXEL_DOUBLE |
 		OMAPFB_CAPS_WINDOW_SCALE |
-		OMAPFB_CAPS_WINDOW_OVERLAY;
+		OMAPFB_CAPS_WINDOW_OVERLAY |
+		OMAPFB_CAPS_WINDOW_ROTATE;
 	if (blizzard.te_connected)
 		caps->ctrl |= OMAPFB_CAPS_TEARSYNC;
 	caps->wnd_color |= (1 << OMAPFB_COLOR_RGB565) |
@@ -1560,6 +1640,7 @@ struct lcd_ctrl blizzard_ctrl = {
 	.setup_plane		= blizzard_setup_plane,
 	.set_scale		= blizzard_set_scale,
 	.enable_plane		= blizzard_enable_plane,
+	.set_rotate		= blizzard_set_rotate,
 	.update_window		= blizzard_update_window_async,
 	.sync			= blizzard_sync,
 	.suspend		= blizzard_suspend,
diff --git a/drivers/video/omap/omapfb_main.c b/drivers/video/omap/omapfb_main.c
index 8862233..db05f7e 100644
--- a/drivers/video/omap/omapfb_main.c
+++ b/drivers/video/omap/omapfb_main.c
@@ -67,6 +67,7 @@ static struct caps_table_struct ctrl_caps[] = {
 	{ OMAPFB_CAPS_WINDOW_PIXEL_DOUBLE, "pixel double window" },
 	{ OMAPFB_CAPS_WINDOW_SCALE,   "scale window" },
 	{ OMAPFB_CAPS_WINDOW_OVERLAY, "overlay window" },
+	{ OMAPFB_CAPS_WINDOW_ROTATE,  "rotate window" },
 	{ OMAPFB_CAPS_SET_BACKLIGHT,  "backlight setting" },
 };
 
@@ -215,6 +216,15 @@ static int ctrl_change_mode(struct fb_info *fbi)
 				 offset, var->xres_virtual,
 				 plane->info.pos_x, plane->info.pos_y,
 				 var->xres, var->yres, plane->color_mode);
+	if (r < 0)
+		return r;
+
+	if (fbdev->ctrl->set_rotate != NULL) {
+		r = fbdev->ctrl->set_rotate(var->rotate);
+		if (r < 0)
+			return r;
+	}
+
 	if (fbdev->ctrl->set_scale != NULL)
 		r = fbdev->ctrl->set_scale(plane->idx,
 				   var->xres, var->yres,
@@ -600,7 +610,7 @@ static void omapfb_rotate(struct fb_info *fbi, int rotate)
 	struct omapfb_device *fbdev = plane->fbdev;
 
 	omapfb_rqueue_lock(fbdev);
-	if (cpu_is_omap15xx() && rotate != fbi->var.rotate) {
+	if (rotate != fbi->var.rotate) {
 		struct fb_var_screeninfo *new_var = &fbdev->new_var;
 
 		memcpy(new_var, &fbi->var, sizeof(*new_var));
@@ -707,28 +717,42 @@ int omapfb_update_window_async(struct fb_info *fbi,
 				void (*callback)(void *),
 				void *callback_data)
 {
+	int xres, yres;
 	struct omapfb_plane_struct *plane = fbi->par;
 	struct omapfb_device *fbdev = plane->fbdev;
-	struct fb_var_screeninfo *var;
+	struct fb_var_screeninfo *var = &fbi->var;
+
+	switch (var->rotate) {
+	case 0:
+	case 180:
+		xres = fbdev->panel->x_res;
+		yres = fbdev->panel->y_res;
+		break;
+	case 90:
+	case 270:
+		xres = fbdev->panel->y_res;
+		yres = fbdev->panel->x_res;
+		break;
+	default:
+		return -EINVAL;
+	}
 
-	var = &fbi->var;
-	if (win->x >= var->xres || win->y >= var->yres ||
-	    win->out_x > var->xres || win->out_y >= var->yres)
+	if (win->x >= xres || win->y >= yres ||
+	    win->out_x > xres || win->out_y > yres)
 		return -EINVAL;
 
 	if (!fbdev->ctrl->update_window ||
 	    fbdev->ctrl->get_update_mode() != OMAPFB_MANUAL_UPDATE)
 		return -ENODEV;
 
-	if (win->x + win->width >= var->xres)
-		win->width = var->xres - win->x;
-	if (win->y + win->height >= var->yres)
-		win->height = var->yres - win->y;
-	/* The out sizes should be cropped to the LCD size */
-	if (win->out_x + win->out_width > fbdev->panel->x_res)
-		win->out_width = fbdev->panel->x_res - win->out_x;
-	if (win->out_y + win->out_height > fbdev->panel->y_res)
-		win->out_height = fbdev->panel->y_res - win->out_y;
+	if (win->x + win->width > xres)
+		win->width = xres - win->x;
+	if (win->y + win->height > yres)
+		win->height = yres - win->y;
+	if (win->out_x + win->out_width > xres)
+		win->out_width = xres - win->out_x;
+	if (win->out_y + win->out_height > yres)
+		win->out_height = yres - win->out_y;
 	if (!win->width || !win->height || !win->out_width || !win->out_height)
 		return 0;
 
-- 
cgit v0.10.2


From c8f1c1b9e120223ab5b619ff0fc1f32c635248ad Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@nokia.com>
Date: Tue, 22 Sep 2009 16:46:54 -0700
Subject: N770: enable LCD MIPI-DCS in Kconfig

Signed-off-by: Imre Deak <imre.deak@nokia.com>
Acked-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/arm/configs/n770_defconfig b/arch/arm/configs/n770_defconfig
index 672f6db..a1657b7 100644
--- a/arch/arm/configs/n770_defconfig
+++ b/arch/arm/configs/n770_defconfig
@@ -875,7 +875,7 @@ CONFIG_FB_OMAP_LCDC_EXTERNAL=y
 CONFIG_FB_OMAP_LCDC_HWA742=y
 # CONFIG_FB_OMAP_LCDC_BLIZZARD is not set
 CONFIG_FB_OMAP_MANUAL_UPDATE=y
-# CONFIG_FB_OMAP_LCD_MIPID is not set
+CONFIG_FB_OMAP_LCD_MIPID=y
 # CONFIG_FB_OMAP_BOOTLOADER_INIT is not set
 CONFIG_FB_OMAP_CONSISTENT_DMA_SIZE=2
 # CONFIG_FB_OMAP_DMA_TUNE is not set
-- 
cgit v0.10.2


From fd0eecbdfbb61076e75d34034cc5cd5ca1321a81 Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@nokia.com>
Date: Tue, 22 Sep 2009 16:46:54 -0700
Subject: omapfb: dispc: various typo fixes

- value and register offset was swapped in a dispc write
- DISPC_CONTROL register was used instead of DISPC_SYSCONFIG
- FIFO size bit field had incorrect length for OMAP3

Fixed-by: arun <arunedarath@mistralsolutions.com>
Fixed-by: Kalle Jokiniemi <ext-kalle.jokiniemi@nokia.com>
Fixed-by: Andrzej Zaborowski <balrog@zabor.org>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Imre Deak <imre.deak@nokia.com>
Acked-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/omap/dispc.c b/drivers/video/omap/dispc.c
index 915439d..f1308aa 100644
--- a/drivers/video/omap/dispc.c
+++ b/drivers/video/omap/dispc.c
@@ -286,7 +286,7 @@ static void setup_plane_fifo(int plane, int ext_mode)
 	BUG_ON(plane > 2);
 
 	l = dispc_read_reg(fsz_reg[plane]);
-	l &= FLD_MASK(0, 9);
+	l &= FLD_MASK(0, 11);
 	if (ext_mode) {
 		low = l * 3 / 4;
 		high = l;
@@ -294,7 +294,7 @@ static void setup_plane_fifo(int plane, int ext_mode)
 		low = l / 4;
 		high = l * 3 / 4;
 	}
-	MOD_REG_FLD(ftrs_reg[plane], FLD_MASK(16, 9) | FLD_MASK(0, 9),
+	MOD_REG_FLD(ftrs_reg[plane], FLD_MASK(16, 12) | FLD_MASK(0, 12),
 			(high << 16) | low);
 }
 
@@ -1397,7 +1397,7 @@ static int omap_dispc_init(struct omapfb_device *fbdev, int ext_mode,
 	}
 
 	/* Enable smart idle and autoidle */
-	l = dispc_read_reg(DISPC_CONTROL);
+	l = dispc_read_reg(DISPC_SYSCONFIG);
 	l &= ~((3 << 12) | (3 << 3));
 	l |= (2 << 12) | (2 << 3) | (1 << 0);
 	dispc_write_reg(DISPC_SYSCONFIG, l);
@@ -1409,7 +1409,7 @@ static int omap_dispc_init(struct omapfb_device *fbdev, int ext_mode,
 	dispc_write_reg(DISPC_CONFIG, l);
 
 	l = dispc_read_reg(DISPC_IRQSTATUS);
-	dispc_write_reg(l, DISPC_IRQSTATUS);
+	dispc_write_reg(DISPC_IRQSTATUS, l);
 
 	/* Enable those that we handle always */
 	omap_dispc_enable_irqs(DISPC_IRQ_FRAMEMASK);
-- 
cgit v0.10.2


From f9e2bc8d7b8c2d9dd05a6702fce77aca3d4f2320 Mon Sep 17 00:00:00 2001
From: Jouni Hogander <jouni.hogander@nokia.com>
Date: Tue, 22 Sep 2009 16:46:55 -0700
Subject: omapfb: dispc: disable iface clocks along with func clocks

Leaving interface clocks enabled causes dss pwrdm to stay in active state
when mpu is in active state.  This fix puts dss to sleep state when it is
not needed.

Earlier version broke framebuffer on 24xx.  This is fixed by enabling
clocks before trying to access DISPC_IRQSTATUS register.

Signed-off-by: Jouni Hogander <jouni.hogander@nokia.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Imre Deak <imre.deak@nokia.com>
Acked-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/omap/dispc.c b/drivers/video/omap/dispc.c
index f1308aa..a856949 100644
--- a/drivers/video/omap/dispc.c
+++ b/drivers/video/omap/dispc.c
@@ -858,8 +858,11 @@ EXPORT_SYMBOL(omap_dispc_free_irq);
 
 static irqreturn_t omap_dispc_irq_handler(int irq, void *dev)
 {
-	u32 stat = dispc_read_reg(DISPC_IRQSTATUS);
+	u32 stat;
 
+	enable_lcd_clocks(1);
+
+	stat = dispc_read_reg(DISPC_IRQSTATUS);
 	if (stat & DISPC_IRQ_FRAMEMASK)
 		complete(&dispc.frame_done);
 
@@ -875,6 +878,8 @@ static irqreturn_t omap_dispc_irq_handler(int irq, void *dev)
 
 	dispc_write_reg(DISPC_IRQSTATUS, stat);
 
+	enable_lcd_clocks(0);
+
 	return IRQ_HANDLED;
 }
 
@@ -913,18 +918,13 @@ static void put_dss_clocks(void)
 
 static void enable_lcd_clocks(int enable)
 {
-	if (enable)
+	if (enable) {
+		clk_enable(dispc.dss_ick);
 		clk_enable(dispc.dss1_fck);
-	else
+	} else {
 		clk_disable(dispc.dss1_fck);
-}
-
-static void enable_interface_clocks(int enable)
-{
-	if (enable)
-		clk_enable(dispc.dss_ick);
-	else
 		clk_disable(dispc.dss_ick);
+	}
 }
 
 static void enable_digit_clocks(int enable)
@@ -1365,7 +1365,6 @@ static int omap_dispc_init(struct omapfb_device *fbdev, int ext_mode,
 	if ((r = get_dss_clocks()) < 0)
 		goto fail0;
 
-	enable_interface_clocks(1);
 	enable_lcd_clocks(1);
 
 #ifdef CONFIG_FB_OMAP_BOOTLOADER_INIT
@@ -1469,7 +1468,6 @@ fail2:
 	free_irq(INT_24XX_DSS_IRQ, fbdev);
 fail1:
 	enable_lcd_clocks(0);
-	enable_interface_clocks(0);
 	put_dss_clocks();
 fail0:
 	iounmap(dispc.base);
@@ -1487,7 +1485,6 @@ static void omap_dispc_cleanup(void)
 	cleanup_fbmem();
 	free_palette_ram();
 	free_irq(INT_24XX_DSS_IRQ, dispc.fbdev);
-	enable_interface_clocks(0);
 	put_dss_clocks();
 	iounmap(dispc.base);
 }
-- 
cgit v0.10.2


From 48a00e7fe9a6abeedb62c99ca7b7860754aae3d8 Mon Sep 17 00:00:00 2001
From: Jouni Hogander <jouni.hogander@nokia.com>
Date: Tue, 22 Sep 2009 16:46:56 -0700
Subject: omapfb: dispc: enable wake up capability

Without wakeup enable omap doesn't wake up on dispc interrupts.  This
causes problems in a case where mpu is in sleep state and dispc interrupt
fires.

Signed-off-by: Jouni Hogander <jouni.hogander@nokia.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Imre Deak <imre.deak@nokia.com>
Acked-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/omap/dispc.c b/drivers/video/omap/dispc.c
index a856949..04b7b8a 100644
--- a/drivers/video/omap/dispc.c
+++ b/drivers/video/omap/dispc.c
@@ -1395,10 +1395,10 @@ static int omap_dispc_init(struct omapfb_device *fbdev, int ext_mode,
 		enable_digit_clocks(0);
 	}
 
-	/* Enable smart idle and autoidle */
+	/* Enable smart standby/idle, autoidle and wakeup */
 	l = dispc_read_reg(DISPC_SYSCONFIG);
 	l &= ~((3 << 12) | (3 << 3));
-	l |= (2 << 12) | (2 << 3) | (1 << 0);
+	l |= (2 << 12) | (2 << 3) | (1 << 2) | (1 << 0);
 	dispc_write_reg(DISPC_SYSCONFIG, l);
 	omap_writel(1 << 0, DSS_BASE + DSS_SYSCONFIG);
 
-- 
cgit v0.10.2


From 4c88ef170f0f9b1f26923b43af526c973c5a74da Mon Sep 17 00:00:00 2001
From: Daniel Stone <daniel.stone@nokia.com>
Date: Tue, 22 Sep 2009 16:46:57 -0700
Subject: omapfb: dispc: allow multiple external IRQ handlers

Previously, the only external (to dispc.c) IRQ handler was RFBI's frame
done handler.  dispc's IRQ framework was very dumb: you could only have
one handler, and the semantics of {request,free}_irq were odd, to say the
least.

The new framework allows multiple consumers to register arbitrary IRQ
masks.

Signed-off-by: Daniel Stone <daniel.stone@nokia.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Imre Deak <imre.deak@nokia.com>
Acked-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/omap/dispc.c b/drivers/video/omap/dispc.c
index 04b7b8a..80a11d0 100644
--- a/drivers/video/omap/dispc.c
+++ b/drivers/video/omap/dispc.c
@@ -155,6 +155,8 @@ struct resmap {
 	unsigned long	*map;
 };
 
+#define MAX_IRQ_HANDLERS            4
+
 static struct {
 	void __iomem	*base;
 
@@ -167,9 +169,11 @@ static struct {
 
 	int		ext_mode;
 
-	unsigned long	enabled_irqs;
-	void		(*irq_callback)(void *);
-	void		*irq_callback_data;
+	struct {
+		u32	irq_mask;
+		void	(*callback)(void *);
+		void	*data;
+	} irq_handlers[MAX_IRQ_HANDLERS];
 	struct completion	frame_done;
 
 	int		fir_hinc[OMAPFB_PLANE_NUM];
@@ -809,56 +813,70 @@ static void set_lcd_timings(void)
 	panel->pixel_clock = fck / lck_div / pck_div / 1000;
 }
 
-int omap_dispc_request_irq(void (*callback)(void *data), void *data)
+static void recalc_irq_mask(void)
 {
-	int r = 0;
+	int i;
+	unsigned long irq_mask = DISPC_IRQ_MASK_ERROR;
 
-	BUG_ON(callback == NULL);
+	for (i = 0; i < MAX_IRQ_HANDLERS; i++) {
+		if (!dispc.irq_handlers[i].callback)
+			continue;
 
-	if (dispc.irq_callback)
-		r = -EBUSY;
-	else {
-		dispc.irq_callback = callback;
-		dispc.irq_callback_data = data;
+		irq_mask |= dispc.irq_handlers[i].irq_mask;
 	}
 
-	return r;
-}
-EXPORT_SYMBOL(omap_dispc_request_irq);
-
-void omap_dispc_enable_irqs(int irq_mask)
-{
 	enable_lcd_clocks(1);
-	dispc.enabled_irqs = irq_mask;
-	irq_mask |= DISPC_IRQ_MASK_ERROR;
 	MOD_REG_FLD(DISPC_IRQENABLE, 0x7fff, irq_mask);
 	enable_lcd_clocks(0);
 }
-EXPORT_SYMBOL(omap_dispc_enable_irqs);
 
-void omap_dispc_disable_irqs(int irq_mask)
+int omap_dispc_request_irq(unsigned long irq_mask, void (*callback)(void *data),
+			   void *data)
 {
-	enable_lcd_clocks(1);
-	dispc.enabled_irqs &= ~irq_mask;
-	irq_mask &= ~DISPC_IRQ_MASK_ERROR;
-	MOD_REG_FLD(DISPC_IRQENABLE, 0x7fff, irq_mask);
-	enable_lcd_clocks(0);
+	int i;
+
+	BUG_ON(callback == NULL);
+
+	for (i = 0; i < MAX_IRQ_HANDLERS; i++) {
+		if (dispc.irq_handlers[i].callback)
+			continue;
+
+		dispc.irq_handlers[i].irq_mask = irq_mask;
+		dispc.irq_handlers[i].callback = callback;
+		dispc.irq_handlers[i].data = data;
+		recalc_irq_mask();
+
+		return 0;
+	}
+
+	return -EBUSY;
 }
-EXPORT_SYMBOL(omap_dispc_disable_irqs);
+EXPORT_SYMBOL(omap_dispc_request_irq);
 
-void omap_dispc_free_irq(void)
+void omap_dispc_free_irq(unsigned long irq_mask, void (*callback)(void *data),
+			 void *data)
 {
-	enable_lcd_clocks(1);
-	omap_dispc_disable_irqs(DISPC_IRQ_MASK_ALL);
-	dispc.irq_callback = NULL;
-	dispc.irq_callback_data = NULL;
-	enable_lcd_clocks(0);
+	int i;
+
+	for (i = 0; i < MAX_IRQ_HANDLERS; i++) {
+		if (dispc.irq_handlers[i].callback == callback &&
+		    dispc.irq_handlers[i].data == data) {
+			dispc.irq_handlers[i].irq_mask = 0;
+			dispc.irq_handlers[i].callback = NULL;
+			dispc.irq_handlers[i].data = NULL;
+			recalc_irq_mask();
+			return;
+		}
+	}
+
+	BUG();
 }
 EXPORT_SYMBOL(omap_dispc_free_irq);
 
 static irqreturn_t omap_dispc_irq_handler(int irq, void *dev)
 {
 	u32 stat;
+	int i = 0;
 
 	enable_lcd_clocks(1);
 
@@ -873,8 +891,12 @@ static irqreturn_t omap_dispc_irq_handler(int irq, void *dev)
 		}
 	}
 
-	if ((stat & dispc.enabled_irqs) && dispc.irq_callback)
-		dispc.irq_callback(dispc.irq_callback_data);
+	for (i = 0; i < MAX_IRQ_HANDLERS; i++) {
+		if (unlikely(dispc.irq_handlers[i].callback &&
+			     (stat & dispc.irq_handlers[i].irq_mask)))
+			dispc.irq_handlers[i].callback(
+						dispc.irq_handlers[i].data);
+	}
 
 	dispc_write_reg(DISPC_IRQSTATUS, stat);
 
@@ -1410,8 +1432,7 @@ static int omap_dispc_init(struct omapfb_device *fbdev, int ext_mode,
 	l = dispc_read_reg(DISPC_IRQSTATUS);
 	dispc_write_reg(DISPC_IRQSTATUS, l);
 
-	/* Enable those that we handle always */
-	omap_dispc_enable_irqs(DISPC_IRQ_FRAMEMASK);
+	recalc_irq_mask();
 
 	if ((r = request_irq(INT_24XX_DSS_IRQ, omap_dispc_irq_handler,
 			   0, MODULE_NAME, fbdev)) < 0) {
diff --git a/drivers/video/omap/dispc.h b/drivers/video/omap/dispc.h
index ef720a7..c15ea77 100644
--- a/drivers/video/omap/dispc.h
+++ b/drivers/video/omap/dispc.h
@@ -37,9 +37,10 @@ extern void omap_dispc_set_lcd_size(int width, int height);
 extern void omap_dispc_enable_lcd_out(int enable);
 extern void omap_dispc_enable_digit_out(int enable);
 
-extern int  omap_dispc_request_irq(void (*callback)(void *data), void *data);
-extern void omap_dispc_free_irq(void);
+extern int omap_dispc_request_irq(unsigned long irq_mask,
+				   void (*callback)(void *data), void *data);
+extern void omap_dispc_free_irq(unsigned long irq_mask,
+				 void (*callback)(void *data), void *data);
 
 extern const struct lcd_ctrl omap2_int_ctrl;
-
 #endif
diff --git a/drivers/video/omap/rfbi.c b/drivers/video/omap/rfbi.c
index 9332d6c..ee01e84 100644
--- a/drivers/video/omap/rfbi.c
+++ b/drivers/video/omap/rfbi.c
@@ -57,6 +57,7 @@
 
 #define DISPC_BASE		0x48050400
 #define DISPC_CONTROL		0x0040
+#define DISPC_IRQ_FRAMEMASK     0x0001
 
 static struct {
 	void __iomem	*base;
@@ -553,7 +554,9 @@ static int rfbi_init(struct omapfb_device *fbdev)
 	l = (0x01 << 2);
 	rfbi_write_reg(RFBI_CONTROL, l);
 
-	if ((r = omap_dispc_request_irq(rfbi_dma_callback, NULL)) < 0) {
+	r = omap_dispc_request_irq(DISPC_IRQ_FRAMEMASK, rfbi_dma_callback,
+				   NULL);
+	if (r < 0) {
 		dev_err(fbdev->dev, "can't get DISPC irq\n");
 		rfbi_enable_clocks(0);
 		return r;
@@ -570,7 +573,7 @@ static int rfbi_init(struct omapfb_device *fbdev)
 
 static void rfbi_cleanup(void)
 {
-	omap_dispc_free_irq();
+	omap_dispc_free_irq(DISPC_IRQ_FRAMEMASK, rfbi_dma_callback, NULL);
 	rfbi_put_clocks();
 	iounmap(rfbi.base);
 }
-- 
cgit v0.10.2


From b6c2b66d353dba229ce167acef49639b9ddf90d9 Mon Sep 17 00:00:00 2001
From: Jouni Hogander <jouni.hogander@nokia.com>
Date: Tue, 22 Sep 2009 16:46:58 -0700
Subject: omapfb: suspend/resume only if FB device is already initialized

Check wether fbdev is NULL in suspend / resume functions. Fbdev is
NULL, if there is no lcd or it is not enabled in configuration.

Signed-off-by: Jouni Hogander <jouni.hogander@nokia.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Imre Deak <imre.deak@nokia.com>
Acked-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/omap/omapfb_main.c b/drivers/video/omap/omapfb_main.c
index db05f7e..7eab0f0 100644
--- a/drivers/video/omap/omapfb_main.c
+++ b/drivers/video/omap/omapfb_main.c
@@ -1846,8 +1846,8 @@ static int omapfb_suspend(struct platform_device *pdev, pm_message_t mesg)
 {
 	struct omapfb_device *fbdev = platform_get_drvdata(pdev);
 
-	omapfb_blank(FB_BLANK_POWERDOWN, fbdev->fb_info[0]);
-
+	if (fbdev != NULL)
+		omapfb_blank(FB_BLANK_POWERDOWN, fbdev->fb_info[0]);
 	return 0;
 }
 
@@ -1856,7 +1856,8 @@ static int omapfb_resume(struct platform_device *pdev)
 {
 	struct omapfb_device *fbdev = platform_get_drvdata(pdev);
 
-	omapfb_blank(FB_BLANK_UNBLANK, fbdev->fb_info[0]);
+	if (fbdev != NULL)
+		omapfb_blank(FB_BLANK_UNBLANK, fbdev->fb_info[0]);
 	return 0;
 }
 
-- 
cgit v0.10.2


From 5910d35cd6c0122a490000a6de0774ac7ebcc2de Mon Sep 17 00:00:00 2001
From: arun c <arunedarath@mistralsolutions.com>
Date: Tue, 22 Sep 2009 16:46:59 -0700
Subject: omapfb: fix coding style / remove dead line

- use __iomem type attribute where appropriate
- expand (a ? : b) to (a ? a : b)
As suggested by Russel King <linux@arm.linux.org.uk>

- remove a dead line from omapfb_main.c

Signed-off-by: Arun C <arunedarath@mistralsolutions.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Imre Deak <imre.deak@nokia.com>
Acked-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/arm/plat-omap/include/mach/omapfb.h b/arch/arm/plat-omap/include/mach/omapfb.h
index 7b74d12..b226bdf 100644
--- a/arch/arm/plat-omap/include/mach/omapfb.h
+++ b/arch/arm/plat-omap/include/mach/omapfb.h
@@ -276,8 +276,8 @@ typedef int (*omapfb_notifier_callback_t)(struct notifier_block *,
 					  void *fbi);
 
 struct omapfb_mem_region {
-	dma_addr_t	paddr;
-	void		*vaddr;
+	u32		paddr;
+	void __iomem	*vaddr;
 	unsigned long	size;
 	u8		type;		/* OMAPFB_PLANE_MEM_* */
 	unsigned	alloc:1;	/* allocated by the driver */
diff --git a/drivers/video/omap/omapfb_main.c b/drivers/video/omap/omapfb_main.c
index 7eab0f0..125e605 100644
--- a/drivers/video/omap/omapfb_main.c
+++ b/drivers/video/omap/omapfb_main.c
@@ -564,7 +564,6 @@ static int set_fb_var(struct fb_info *fbi,
 		var->xoffset = var->xres_virtual - var->xres;
 	if (var->yres + var->yoffset > var->yres_virtual)
 		var->yoffset = var->yres_virtual - var->yres;
-	line_size = var->xres * bpp / 8;
 
 	if (plane->color_mode == OMAPFB_COLOR_RGB444) {
 		var->red.offset	  = 8; var->red.length	 = 4;
@@ -1723,8 +1722,8 @@ static int omapfb_do_probe(struct platform_device *pdev,
 
 	pr_info("omapfb: configured for panel %s\n", fbdev->panel->name);
 
-	def_vxres = def_vxres ? : fbdev->panel->x_res;
-	def_vyres = def_vyres ? : fbdev->panel->y_res;
+	def_vxres = def_vxres ? def_vxres : fbdev->panel->x_res;
+	def_vyres = def_vyres ? def_vyres : fbdev->panel->y_res;
 
 	init_state++;
 
-- 
cgit v0.10.2


From b1d51dbb79c6398c88b45d4211545621b4d1a11c Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@nokia.com>
Date: Tue, 22 Sep 2009 16:47:00 -0700
Subject: omapfb: add FB manual update option to Kconfig

Also move the controller specific options up in the menu, to a more
logical spot.

Signed-off-by: Imre Deak <imre.deak@nokia.com>
Acked-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/omap/Kconfig b/drivers/video/omap/Kconfig
index 2bffc07..551e3e9 100644
--- a/drivers/video/omap/Kconfig
+++ b/drivers/video/omap/Kconfig
@@ -32,6 +32,36 @@ config FB_OMAP_092M9R
 
 endchoice
 
+config FB_OMAP_LCDC_EXTERNAL
+	bool "External LCD controller support"
+	depends on FB_OMAP
+	help
+	  Say Y here, if you want to have support for boards with an
+	  external LCD controller connected to the SoSSI/RFBI interface.
+
+config FB_OMAP_LCDC_HWA742
+	bool "Epson HWA742 LCD controller support"
+	depends on FB_OMAP && FB_OMAP_LCDC_EXTERNAL
+	help
+	  Say Y here if you want to have support for the external
+	  Epson HWA742 LCD controller.
+
+config FB_OMAP_LCDC_BLIZZARD
+	bool "Epson Blizzard LCD controller support"
+	depends on FB_OMAP && FB_OMAP_LCDC_EXTERNAL
+	help
+	  Say Y here if you want to have support for the external
+	  Epson Blizzard LCD controller.
+
+config FB_OMAP_MANUAL_UPDATE
+	bool "Default to manual update mode"
+	depends on FB_OMAP && FB_OMAP_LCDC_EXTERNAL
+	help
+	  Say Y here, if your user-space applications are capable of
+	  notifying the frame buffer driver when a change has occured in
+	  the frame buffer content and thus a reload of the image data to
+	  the external frame buffer is required. If unsure, say N.
+
 config FB_OMAP_LCD_MIPID
 	bool "MIPI DBI-C/DCS compatible LCD support"
 	depends on FB_OMAP && SPI_MASTER
@@ -69,23 +99,4 @@ config FB_OMAP_DMA_TUNE
           answer yes. Answer no if you have a dedicated video
           memory, or don't use any of the accelerated features.
 
-config FB_OMAP_LCDC_EXTERNAL
-	bool "External LCD controller support"
-	depends on FB_OMAP
-	help
-	  Say Y here, if you want to have support for boards with an
-	  external LCD controller connected to the SoSSI/RFBI interface.
 
-config FB_OMAP_LCDC_HWA742
-	bool "Epson HWA742 LCD controller support"
-	depends on FB_OMAP && FB_OMAP_LCDC_EXTERNAL
-	help
-	  Say Y here if you want to have support for the external
-	  Epson HWA742 LCD controller.
-
-config FB_OMAP_LCDC_BLIZZARD
-	bool "Epson Blizzard LCD controller support"
-	depends on FB_OMAP && FB_OMAP_LCDC_EXTERNAL
-	help
-	  Say Y here if you want to have support for the external
-	  Epson Blizzard LCD controller.
-- 
cgit v0.10.2


From 366ec51ba9622c44191fcf4d77ed4dc84acbdeec Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@nokia.com>
Date: Tue, 22 Sep 2009 16:47:00 -0700
Subject: omapfb: HWA742: fix pointer to be const

Fixes the following:
warning: assignment discards qualifiers from pointer target type

Signed-off-by: Imre Deak <imre.deak@nokia.com>
Acked-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/omap/hwa742.c b/drivers/video/omap/hwa742.c
index 5d4f348..ca51583 100644
--- a/drivers/video/omap/hwa742.c
+++ b/drivers/video/omap/hwa742.c
@@ -131,7 +131,7 @@ struct {
 
 	struct omapfb_device	*fbdev;
 	struct lcd_ctrl_extif	*extif;
-	struct lcd_ctrl		*int_ctrl;
+	const struct lcd_ctrl	*int_ctrl;
 
 	struct clk		*sys_ck;
 } hwa742;
-- 
cgit v0.10.2


From 689620100172e24fdf0981e9978a9559e8769258 Mon Sep 17 00:00:00 2001
From: Ville Syrjala <syrjala@sci.fi>
Date: Tue, 22 Sep 2009 16:47:01 -0700
Subject: atyfb: coding style cleanup

Fix a bunch of coding style problems in atyfb_base.c.

Signed-off-by: Ville Syrjala <syrjala@sci.fi>
Cc: "H Hartley Sweeten" <hartleys@visionengravers.com>
Cc: Krzysztof Helt <krzysztof.h1@poczta.fm>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/aty/atyfb_base.c b/drivers/video/aty/atyfb_base.c
index 63d3739..913b4a4 100644
--- a/drivers/video/aty/atyfb_base.c
+++ b/drivers/video/aty/atyfb_base.c
@@ -132,7 +132,7 @@
 #endif
 
 #define PRINTKI(fmt, args...)	printk(KERN_INFO "atyfb: " fmt, ## args)
-#define PRINTKE(fmt, args...)	 printk(KERN_ERR "atyfb: " fmt, ## args)
+#define PRINTKE(fmt, args...)	printk(KERN_ERR "atyfb: " fmt, ## args)
 
 #if defined(CONFIG_PM) || defined(CONFIG_PMAC_BACKLIGHT) || \
 defined (CONFIG_FB_ATY_GENERIC_LCD) || defined(CONFIG_FB_ATY_BACKLIGHT)
@@ -188,24 +188,23 @@ u32 aty_ld_lcd(int index, const struct atyfb_par *par)
  */
 static void ATIReduceRatio(int *Numerator, int *Denominator)
 {
-    int Multiplier, Divider, Remainder;
+	int Multiplier, Divider, Remainder;
 
-    Multiplier = *Numerator;
-    Divider = *Denominator;
+	Multiplier = *Numerator;
+	Divider = *Denominator;
 
-    while ((Remainder = Multiplier % Divider))
-    {
-        Multiplier = Divider;
-        Divider = Remainder;
-    }
+	while ((Remainder = Multiplier % Divider)) {
+		Multiplier = Divider;
+		Divider = Remainder;
+	}
 
-    *Numerator /= Divider;
-    *Denominator /= Divider;
+	*Numerator /= Divider;
+	*Denominator /= Divider;
 }
 #endif
-    /*
-     *  The Hardware parameters for each card
-     */
+/*
+ * The Hardware parameters for each card
+ */
 
 struct pci_mmap_map {
 	unsigned long voff;
@@ -223,17 +222,19 @@ static struct fb_fix_screeninfo atyfb_fix __devinitdata = {
 	.ypanstep	= 1,
 };
 
-    /*
-     *  Frame buffer device API
-     */
+/*
+ * Frame buffer device API
+ */
 
 static int atyfb_open(struct fb_info *info, int user);
 static int atyfb_release(struct fb_info *info, int user);
-static int atyfb_check_var(struct fb_var_screeninfo *var, struct fb_info *info);
+static int atyfb_check_var(struct fb_var_screeninfo *var,
+			   struct fb_info *info);
 static int atyfb_set_par(struct fb_info *info);
 static int atyfb_setcolreg(u_int regno, u_int red, u_int green, u_int blue,
-	u_int transp, struct fb_info *info);
-static int atyfb_pan_display(struct fb_var_screeninfo *var, struct fb_info *info);
+			   u_int transp, struct fb_info *info);
+static int atyfb_pan_display(struct fb_var_screeninfo *var,
+			     struct fb_info *info);
 static int atyfb_blank(int blank, struct fb_info *info);
 static int atyfb_ioctl(struct fb_info *info, u_int cmd, u_long arg);
 #ifdef __sparc__
@@ -241,9 +242,9 @@ static int atyfb_mmap(struct fb_info *info, struct vm_area_struct *vma);
 #endif
 static int atyfb_sync(struct fb_info *info);
 
-    /*
-     *  Internal routines
-     */
+/*
+ * Internal routines
+ */
 
 static int aty_init(struct fb_info *info);
 
@@ -254,8 +255,11 @@ static int store_video_par(char *videopar, unsigned char m64_num);
 static void aty_get_crtc(const struct atyfb_par *par, struct crtc *crtc);
 
 static void aty_set_crtc(const struct atyfb_par *par, const struct crtc *crtc);
-static int aty_var_to_crtc(const struct fb_info *info, const struct fb_var_screeninfo *var, struct crtc *crtc);
-static int aty_crtc_to_var(const struct crtc *crtc, struct fb_var_screeninfo *var);
+static int aty_var_to_crtc(const struct fb_info *info,
+			   const struct fb_var_screeninfo *var,
+			   struct crtc *crtc);
+static int aty_crtc_to_var(const struct crtc *crtc,
+			   struct fb_var_screeninfo *var);
 static void set_off_pitch(struct atyfb_par *par, const struct fb_info *info);
 #ifdef CONFIG_PPC
 static int read_aty_sense(const struct atyfb_par *par);
@@ -264,9 +268,9 @@ static int read_aty_sense(const struct atyfb_par *par);
 static DEFINE_MUTEX(reboot_lock);
 static struct fb_info *reboot_info;
 
-    /*
-     *  Interface used by the world
-     */
+/*
+ * Interface used by the world
+ */
 
 static struct fb_var_screeninfo default_var = {
 	/* 640x480, 60 Hz, Non-Interlaced (25.175 MHz dotclock) */
@@ -452,14 +456,14 @@ static int __devinit correct_chipset(struct atyfb_par *par)
 	type = chip_id & CFG_CHIP_TYPE;
 	rev = (chip_id & CFG_CHIP_REV) >> 24;
 
-	switch(par->pci_id) {
+	switch (par->pci_id) {
 #ifdef CONFIG_FB_ATY_GX
 	case PCI_CHIP_MACH64GX:
-		if(type != 0x00d7)
+		if (type != 0x00d7)
 			return -ENODEV;
 		break;
 	case PCI_CHIP_MACH64CX:
-		if(type != 0x0057)
+		if (type != 0x0057)
 			return -ENODEV;
 		break;
 #endif
@@ -564,7 +568,8 @@ static char *aty_xl_ram[8] __devinitdata = {
 };
 #endif /* CONFIG_FB_ATY_CT */
 
-static u32 atyfb_get_pixclock(struct fb_var_screeninfo *var, struct atyfb_par *par)
+static u32 atyfb_get_pixclock(struct fb_var_screeninfo *var,
+			      struct atyfb_par *par)
 {
 	u32 pixclock = var->pixclock;
 #ifdef CONFIG_FB_ATY_GENERIC_LCD
@@ -572,7 +577,7 @@ static u32 atyfb_get_pixclock(struct fb_var_screeninfo *var, struct atyfb_par *p
 	par->pll.ct.xres = 0;
 	if (par->lcd_table != 0) {
 		lcd_on_off = aty_ld_lcd(LCD_GEN_CNTL, par);
-		if(lcd_on_off & LCD_ON) {
+		if (lcd_on_off & LCD_ON) {
 			par->pll.ct.xres = var->xres;
 			pixclock = par->lcd_pixclock;
 		}
@@ -584,7 +589,7 @@ static u32 atyfb_get_pixclock(struct fb_var_screeninfo *var, struct atyfb_par *p
 #if defined(CONFIG_PPC)
 
 /*
- *  Apple monitor sense
+ * Apple monitor sense
  */
 
 static int __devinit read_aty_sense(const struct atyfb_par *par)
@@ -625,16 +630,16 @@ static int __devinit read_aty_sense(const struct atyfb_par *par)
 /* ------------------------------------------------------------------------- */
 
 /*
- *  CRTC programming
+ * CRTC programming
  */
 
 static void aty_get_crtc(const struct atyfb_par *par, struct crtc *crtc)
 {
 #ifdef CONFIG_FB_ATY_GENERIC_LCD
 	if (par->lcd_table != 0) {
-		if(!M64_HAS(LT_LCD_REGS)) {
-		    crtc->lcd_index = aty_ld_le32(LCD_INDEX, par);
-		    aty_st_le32(LCD_INDEX, crtc->lcd_index, par);
+		if (!M64_HAS(LT_LCD_REGS)) {
+			crtc->lcd_index = aty_ld_le32(LCD_INDEX, par);
+			aty_st_le32(LCD_INDEX, crtc->lcd_index, par);
 		}
 		crtc->lcd_config_panel = aty_ld_lcd(CNFG_PANEL, par);
 		crtc->lcd_gen_cntl = aty_ld_lcd(LCD_GEN_CNTL, par);
@@ -642,7 +647,7 @@ static void aty_get_crtc(const struct atyfb_par *par, struct crtc *crtc)
 
 		/* switch to non shadow registers */
 		aty_st_lcd(LCD_GEN_CNTL, crtc->lcd_gen_cntl &
-                    ~(CRTC_RW_SELECT | SHADOW_EN | SHADOW_RW_EN), par);
+			   ~(CRTC_RW_SELECT | SHADOW_EN | SHADOW_RW_EN), par);
 
 		/* save stretching */
 		crtc->horz_stretching = aty_ld_lcd(HORZ_STRETCHING, par);
@@ -663,7 +668,7 @@ static void aty_get_crtc(const struct atyfb_par *par, struct crtc *crtc)
 	if (par->lcd_table != 0) {
 		/* switch to shadow registers */
 		aty_st_lcd(LCD_GEN_CNTL, (crtc->lcd_gen_cntl & ~CRTC_RW_SELECT) |
-			SHADOW_EN | SHADOW_RW_EN, par);
+			   SHADOW_EN | SHADOW_RW_EN, par);
 
 		crtc->shadow_h_tot_disp = aty_ld_le32(CRTC_H_TOTAL_DISP, par);
 		crtc->shadow_h_sync_strt_wid = aty_ld_le32(CRTC_H_SYNC_STRT_WID, par);
@@ -680,21 +685,20 @@ static void aty_set_crtc(const struct atyfb_par *par, const struct crtc *crtc)
 #ifdef CONFIG_FB_ATY_GENERIC_LCD
 	if (par->lcd_table != 0) {
 		/* stop CRTC */
-		aty_st_le32(CRTC_GEN_CNTL, crtc->gen_cntl & ~(CRTC_EXT_DISP_EN | CRTC_EN), par);
+		aty_st_le32(CRTC_GEN_CNTL, crtc->gen_cntl &
+			    ~(CRTC_EXT_DISP_EN | CRTC_EN), par);
 
 		/* update non-shadow registers first */
 		aty_st_lcd(CNFG_PANEL, crtc->lcd_config_panel, par);
 		aty_st_lcd(LCD_GEN_CNTL, crtc->lcd_gen_cntl &
-			~(CRTC_RW_SELECT | SHADOW_EN | SHADOW_RW_EN), par);
+			   ~(CRTC_RW_SELECT | SHADOW_EN | SHADOW_RW_EN), par);
 
 		/* temporarily disable stretching */
-		aty_st_lcd(HORZ_STRETCHING,
-			crtc->horz_stretching &
-			~(HORZ_STRETCH_MODE | HORZ_STRETCH_EN), par);
-		aty_st_lcd(VERT_STRETCHING,
-			crtc->vert_stretching &
-			~(VERT_STRETCH_RATIO1 | VERT_STRETCH_RATIO2 |
-			VERT_STRETCH_USE0 | VERT_STRETCH_EN), par);
+		aty_st_lcd(HORZ_STRETCHING, crtc->horz_stretching &
+			   ~(HORZ_STRETCH_MODE | HORZ_STRETCH_EN), par);
+		aty_st_lcd(VERT_STRETCHING, crtc->vert_stretching &
+			   ~(VERT_STRETCH_RATIO1 | VERT_STRETCH_RATIO2 |
+			     VERT_STRETCH_USE0 | VERT_STRETCH_EN), par);
 	}
 #endif
 	/* turn off CRT */
@@ -702,17 +706,19 @@ static void aty_set_crtc(const struct atyfb_par *par, const struct crtc *crtc)
 
 	DPRINTK("setting up CRTC\n");
 	DPRINTK("set primary CRT to %ix%i %c%c composite %c\n",
-	    ((((crtc->h_tot_disp>>16) & 0xff) + 1)<<3), (((crtc->v_tot_disp>>16) & 0x7ff) + 1),
-	    (crtc->h_sync_strt_wid & 0x200000)?'N':'P', (crtc->v_sync_strt_wid & 0x200000)?'N':'P',
-	    (crtc->gen_cntl & CRTC_CSYNC_EN)?'P':'N');
-
-	DPRINTK("CRTC_H_TOTAL_DISP: %x\n",crtc->h_tot_disp);
-	DPRINTK("CRTC_H_SYNC_STRT_WID: %x\n",crtc->h_sync_strt_wid);
-	DPRINTK("CRTC_V_TOTAL_DISP: %x\n",crtc->v_tot_disp);
-	DPRINTK("CRTC_V_SYNC_STRT_WID: %x\n",crtc->v_sync_strt_wid);
+		((((crtc->h_tot_disp >> 16) & 0xff) + 1) << 3),
+		(((crtc->v_tot_disp >> 16) & 0x7ff) + 1),
+		(crtc->h_sync_strt_wid & 0x200000) ? 'N' : 'P',
+		(crtc->v_sync_strt_wid & 0x200000) ? 'N' : 'P',
+		(crtc->gen_cntl & CRTC_CSYNC_EN) ? 'P' : 'N');
+
+	DPRINTK("CRTC_H_TOTAL_DISP: %x\n", crtc->h_tot_disp);
+	DPRINTK("CRTC_H_SYNC_STRT_WID: %x\n", crtc->h_sync_strt_wid);
+	DPRINTK("CRTC_V_TOTAL_DISP: %x\n", crtc->v_tot_disp);
+	DPRINTK("CRTC_V_SYNC_STRT_WID: %x\n", crtc->v_sync_strt_wid);
 	DPRINTK("CRTC_OFF_PITCH: %x\n", crtc->off_pitch);
 	DPRINTK("CRTC_VLINE_CRNT_VLINE: %x\n", crtc->vline_crnt_vline);
-	DPRINTK("CRTC_GEN_CNTL: %x\n",crtc->gen_cntl);
+	DPRINTK("CRTC_GEN_CNTL: %x\n", crtc->gen_cntl);
 
 	aty_st_le32(CRTC_H_TOTAL_DISP, crtc->h_tot_disp, par);
 	aty_st_le32(CRTC_H_SYNC_STRT_WID, crtc->h_sync_strt_wid, par);
@@ -732,16 +738,22 @@ static void aty_set_crtc(const struct atyfb_par *par, const struct crtc *crtc)
 	if (par->lcd_table != 0) {
 		/* switch to shadow registers */
 		aty_st_lcd(LCD_GEN_CNTL, (crtc->lcd_gen_cntl & ~CRTC_RW_SELECT) |
-			(SHADOW_EN | SHADOW_RW_EN), par);
+			   SHADOW_EN | SHADOW_RW_EN, par);
 
 		DPRINTK("set shadow CRT to %ix%i %c%c\n",
-		    ((((crtc->shadow_h_tot_disp>>16) & 0xff) + 1)<<3), (((crtc->shadow_v_tot_disp>>16) & 0x7ff) + 1),
-		    (crtc->shadow_h_sync_strt_wid & 0x200000)?'N':'P', (crtc->shadow_v_sync_strt_wid & 0x200000)?'N':'P');
-
-		DPRINTK("SHADOW CRTC_H_TOTAL_DISP: %x\n", crtc->shadow_h_tot_disp);
-		DPRINTK("SHADOW CRTC_H_SYNC_STRT_WID: %x\n", crtc->shadow_h_sync_strt_wid);
-		DPRINTK("SHADOW CRTC_V_TOTAL_DISP: %x\n", crtc->shadow_v_tot_disp);
-		DPRINTK("SHADOW CRTC_V_SYNC_STRT_WID: %x\n", crtc->shadow_v_sync_strt_wid);
+			((((crtc->shadow_h_tot_disp >> 16) & 0xff) + 1) << 3),
+			(((crtc->shadow_v_tot_disp >> 16) & 0x7ff) + 1),
+			(crtc->shadow_h_sync_strt_wid & 0x200000) ? 'N' : 'P',
+			(crtc->shadow_v_sync_strt_wid & 0x200000) ? 'N' : 'P');
+
+		DPRINTK("SHADOW CRTC_H_TOTAL_DISP: %x\n",
+			crtc->shadow_h_tot_disp);
+		DPRINTK("SHADOW CRTC_H_SYNC_STRT_WID: %x\n",
+			crtc->shadow_h_sync_strt_wid);
+		DPRINTK("SHADOW CRTC_V_TOTAL_DISP: %x\n",
+			crtc->shadow_v_tot_disp);
+		DPRINTK("SHADOW CRTC_V_SYNC_STRT_WID: %x\n",
+			crtc->shadow_v_sync_strt_wid);
 
 		aty_st_le32(CRTC_H_TOTAL_DISP, crtc->shadow_h_tot_disp, par);
 		aty_st_le32(CRTC_H_SYNC_STRT_WID, crtc->shadow_h_sync_strt_wid, par);
@@ -752,16 +764,16 @@ static void aty_set_crtc(const struct atyfb_par *par, const struct crtc *crtc)
 		DPRINTK("LCD_GEN_CNTL: %x\n", crtc->lcd_gen_cntl);
 		DPRINTK("HORZ_STRETCHING: %x\n", crtc->horz_stretching);
 		DPRINTK("VERT_STRETCHING: %x\n", crtc->vert_stretching);
-		if(!M64_HAS(LT_LCD_REGS))
-		    DPRINTK("EXT_VERT_STRETCH: %x\n", crtc->ext_vert_stretch);
+		if (!M64_HAS(LT_LCD_REGS))
+			DPRINTK("EXT_VERT_STRETCH: %x\n", crtc->ext_vert_stretch);
 
 		aty_st_lcd(LCD_GEN_CNTL, crtc->lcd_gen_cntl, par);
 		aty_st_lcd(HORZ_STRETCHING, crtc->horz_stretching, par);
 		aty_st_lcd(VERT_STRETCHING, crtc->vert_stretching, par);
-		if(!M64_HAS(LT_LCD_REGS)) {
-		    aty_st_lcd(EXT_VERT_STRETCH, crtc->ext_vert_stretch, par);
-		    aty_ld_le32(LCD_INDEX, par);
-		    aty_st_le32(LCD_INDEX, crtc->lcd_index, par);
+		if (!M64_HAS(LT_LCD_REGS)) {
+			aty_st_lcd(EXT_VERT_STRETCH, crtc->ext_vert_stretch, par);
+			aty_ld_le32(LCD_INDEX, par);
+			aty_st_le32(LCD_INDEX, crtc->lcd_index, par);
 		}
 	}
 #endif /* CONFIG_FB_ATY_GENERIC_LCD */
@@ -779,7 +791,8 @@ static u32 calc_line_length(struct atyfb_par *par, u32 vxres, u32 bpp)
 }
 
 static int aty_var_to_crtc(const struct fb_info *info,
-	const struct fb_var_screeninfo *var, struct crtc *crtc)
+			   const struct fb_var_screeninfo *var,
+			   struct crtc *crtc)
 {
 	struct atyfb_par *par = (struct atyfb_par *) info->par;
 	u32 xres, yres, vxres, vyres, xoffset, yoffset, bpp;
@@ -814,34 +827,32 @@ static int aty_var_to_crtc(const struct fb_info *info,
 	if (bpp <= 8) {
 		bpp = 8;
 		pix_width = CRTC_PIX_WIDTH_8BPP;
-		dp_pix_width =
-		    HOST_8BPP | SRC_8BPP | DST_8BPP |
-		    BYTE_ORDER_LSB_TO_MSB;
+		dp_pix_width = HOST_8BPP | SRC_8BPP | DST_8BPP |
+			BYTE_ORDER_LSB_TO_MSB;
 		dp_chain_mask = DP_CHAIN_8BPP;
 	} else if (bpp <= 15) {
 		bpp = 16;
 		pix_width = CRTC_PIX_WIDTH_15BPP;
 		dp_pix_width = HOST_15BPP | SRC_15BPP | DST_15BPP |
-		    BYTE_ORDER_LSB_TO_MSB;
+			BYTE_ORDER_LSB_TO_MSB;
 		dp_chain_mask = DP_CHAIN_15BPP;
 	} else if (bpp <= 16) {
 		bpp = 16;
 		pix_width = CRTC_PIX_WIDTH_16BPP;
 		dp_pix_width = HOST_16BPP | SRC_16BPP | DST_16BPP |
-		    BYTE_ORDER_LSB_TO_MSB;
+			BYTE_ORDER_LSB_TO_MSB;
 		dp_chain_mask = DP_CHAIN_16BPP;
 	} else if (bpp <= 24 && M64_HAS(INTEGRATED)) {
 		bpp = 24;
 		pix_width = CRTC_PIX_WIDTH_24BPP;
-		dp_pix_width =
-		    HOST_8BPP | SRC_8BPP | DST_8BPP |
-		    BYTE_ORDER_LSB_TO_MSB;
+		dp_pix_width = HOST_8BPP | SRC_8BPP | DST_8BPP |
+			BYTE_ORDER_LSB_TO_MSB;
 		dp_chain_mask = DP_CHAIN_24BPP;
 	} else if (bpp <= 32) {
 		bpp = 32;
 		pix_width = CRTC_PIX_WIDTH_32BPP;
 		dp_pix_width = HOST_32BPP | SRC_32BPP | DST_32BPP |
-		    BYTE_ORDER_LSB_TO_MSB;
+			BYTE_ORDER_LSB_TO_MSB;
 		dp_chain_mask = DP_CHAIN_32BPP;
 	} else
 		FAIL("invalid bpp");
@@ -854,9 +865,9 @@ static int aty_var_to_crtc(const struct fb_info *info,
 	h_sync_pol = sync & FB_SYNC_HOR_HIGH_ACT ? 0 : 1;
 	v_sync_pol = sync & FB_SYNC_VERT_HIGH_ACT ? 0 : 1;
 
-	if((xres > 1600) || (yres > 1200)) {
+	if ((xres > 1600) || (yres > 1200)) {
 		FAIL("MACH64 chips are designed for max 1600x1200\n"
-		"select anoter resolution.");
+		     "select anoter resolution.");
 	}
 	h_sync_strt = h_disp + var->right_margin;
 	h_sync_end = h_sync_strt + var->hsync_len;
@@ -869,11 +880,12 @@ static int aty_var_to_crtc(const struct fb_info *info,
 
 #ifdef CONFIG_FB_ATY_GENERIC_LCD
 	if (par->lcd_table != 0) {
-		if(!M64_HAS(LT_LCD_REGS)) {
-		    u32 lcd_index = aty_ld_le32(LCD_INDEX, par);
-		    crtc->lcd_index = lcd_index &
-			~(LCD_INDEX_MASK | LCD_DISPLAY_DIS | LCD_SRC_SEL | CRTC2_DISPLAY_DIS);
-		    aty_st_le32(LCD_INDEX, lcd_index, par);
+		if (!M64_HAS(LT_LCD_REGS)) {
+			u32 lcd_index = aty_ld_le32(LCD_INDEX, par);
+			crtc->lcd_index = lcd_index &
+				~(LCD_INDEX_MASK | LCD_DISPLAY_DIS |
+				  LCD_SRC_SEL | CRTC2_DISPLAY_DIS);
+			aty_st_le32(LCD_INDEX, lcd_index, par);
 		}
 
 		if (!M64_HAS(MOBIL_BUS))
@@ -888,12 +900,14 @@ static int aty_var_to_crtc(const struct fb_info *info,
 			USE_SHADOWED_ROWCUR | SHADOW_EN | SHADOW_RW_EN);
 		crtc->lcd_gen_cntl |= DONT_SHADOW_VPAR | LOCK_8DOT;
 
-		if((crtc->lcd_gen_cntl & LCD_ON) &&
-			((xres > par->lcd_width) || (yres > par->lcd_height))) {
-			/* We cannot display the mode on the LCD. If the CRT is enabled
-			   we can turn off the LCD.
-			   If the CRT is off, it isn't a good idea to switch it on; we don't
-			   know if one is connected. So it's better to fail then.
+		if ((crtc->lcd_gen_cntl & LCD_ON) &&
+		    ((xres > par->lcd_width) || (yres > par->lcd_height))) {
+			/*
+			 * We cannot display the mode on the LCD. If the CRT is
+			 * enabled we can turn off the LCD.
+			 * If the CRT is off, it isn't a good idea to switch it
+			 * on; we don't know if one is connected. So it's better
+			 * to fail then.
 			 */
 			if (crtc->lcd_gen_cntl & CRT_ON) {
 				if (!(var->activate & FB_ACTIVATE_TEST))
@@ -916,17 +930,18 @@ static int aty_var_to_crtc(const struct fb_info *info,
 
 		vmode &= ~(FB_VMODE_DOUBLE | FB_VMODE_INTERLACED);
 
-		/* This is horror! When we simulate, say 640x480 on an 800x600
-		   LCD monitor, the CRTC should be programmed 800x600 values for
-		   the non visible part, but 640x480 for the visible part.
-		   This code has been tested on a laptop with it's 1400x1050 LCD
-		   monitor and a conventional monitor both switched on.
-		   Tested modes: 1280x1024, 1152x864, 1024x768, 800x600,
-		    works with little glitches also with DOUBLESCAN modes
+		/*
+		 * This is horror! When we simulate, say 640x480 on an 800x600
+		 * LCD monitor, the CRTC should be programmed 800x600 values for
+		 * the non visible part, but 640x480 for the visible part.
+		 * This code has been tested on a laptop with it's 1400x1050 LCD
+		 * monitor and a conventional monitor both switched on.
+		 * Tested modes: 1280x1024, 1152x864, 1024x768, 800x600,
+		 * works with little glitches also with DOUBLESCAN modes
 		 */
 		if (yres < par->lcd_height) {
 			VScan = par->lcd_height / yres;
-			if(VScan > 1) {
+			if (VScan > 1) {
 				VScan = 2;
 				vmode |= FB_VMODE_DOUBLE;
 			}
@@ -952,7 +967,7 @@ static int aty_var_to_crtc(const struct fb_info *info,
 	FAIL_MAX("h_disp too large", h_disp, 0xff);
 	FAIL_MAX("h_sync_strt too large", h_sync_strt, 0x1ff);
 	/*FAIL_MAX("h_sync_wid too large", h_sync_wid, 0x1f);*/
-	if(h_sync_wid > 0x1f)
+	if (h_sync_wid > 0x1f)
 		h_sync_wid = 0x1f;
 	FAIL_MAX("h_total too large", h_total, 0x1ff);
 
@@ -978,7 +993,7 @@ static int aty_var_to_crtc(const struct fb_info *info,
 	FAIL_MAX("v_disp too large", v_disp, 0x7ff);
 	FAIL_MAX("v_sync_stsrt too large", v_sync_strt, 0x7ff);
 	/*FAIL_MAX("v_sync_wid too large", v_sync_wid, 0x1f);*/
-	if(v_sync_wid > 0x1f)
+	if (v_sync_wid > 0x1f)
 		v_sync_wid = 0x1f;
 	FAIL_MAX("v_total too large", v_total, 0x7ff);
 
@@ -995,11 +1010,13 @@ static int aty_var_to_crtc(const struct fb_info *info,
 		((line_length / bpp) << 22);
 	crtc->vline_crnt_vline = 0;
 
-	crtc->h_tot_disp = h_total | (h_disp<<16);
-	crtc->h_sync_strt_wid = (h_sync_strt & 0xff) | (h_sync_dly<<8) |
-		((h_sync_strt & 0x100)<<4) | (h_sync_wid<<16) | (h_sync_pol<<21);
-	crtc->v_tot_disp = v_total | (v_disp<<16);
-	crtc->v_sync_strt_wid = v_sync_strt | (v_sync_wid<<16) | (v_sync_pol<<21);
+	crtc->h_tot_disp = h_total | (h_disp << 16);
+	crtc->h_sync_strt_wid = (h_sync_strt & 0xff) | (h_sync_dly << 8) |
+		((h_sync_strt & 0x100) << 4) | (h_sync_wid << 16) |
+		(h_sync_pol << 21);
+	crtc->v_tot_disp = v_total | (v_disp << 16);
+	crtc->v_sync_strt_wid = v_sync_strt | (v_sync_wid << 16) |
+		(v_sync_pol << 21);
 
 	/* crtc->gen_cntl = aty_ld_le32(CRTC_GEN_CNTL, par) & CRTC_PRESERVED_MASK; */
 	crtc->gen_cntl = CRTC_EXT_DISP_EN | CRTC_EN | pix_width | c_sync;
@@ -1014,13 +1031,15 @@ static int aty_var_to_crtc(const struct fb_info *info,
 #ifdef CONFIG_FB_ATY_GENERIC_LCD
 	if (par->lcd_table != 0) {
 		vdisplay = yres;
-		if(vmode & FB_VMODE_DOUBLE)
+		if (vmode & FB_VMODE_DOUBLE)
 			vdisplay <<= 1;
 		crtc->gen_cntl &= ~(CRTC2_EN | CRTC2_PIX_WIDTH);
 		crtc->lcd_gen_cntl &= ~(HORZ_DIVBY2_EN | DIS_HOR_CRT_DIVBY2 |
-			/*TVCLK_PM_EN | VCLK_DAC_PM_EN |*/
-			USE_SHADOWED_VEND | USE_SHADOWED_ROWCUR | SHADOW_EN | SHADOW_RW_EN);
-		crtc->lcd_gen_cntl |= (DONT_SHADOW_VPAR/* | LOCK_8DOT*/);
+					/*TVCLK_PM_EN | VCLK_DAC_PM_EN |*/
+					USE_SHADOWED_VEND |
+					USE_SHADOWED_ROWCUR |
+					SHADOW_EN | SHADOW_RW_EN);
+		crtc->lcd_gen_cntl |= DONT_SHADOW_VPAR/* | LOCK_8DOT*/;
 
 		/* MOBILITY M1 tested, FIXME: LT */
 		crtc->horz_stretching = aty_ld_lcd(HORZ_STRETCHING, par);
@@ -1028,28 +1047,32 @@ static int aty_var_to_crtc(const struct fb_info *info,
 			crtc->ext_vert_stretch = aty_ld_lcd(EXT_VERT_STRETCH, par) &
 				~(AUTO_VERT_RATIO | VERT_STRETCH_MODE | VERT_STRETCH_RATIO3);
 
-		crtc->horz_stretching &=
-			~(HORZ_STRETCH_RATIO | HORZ_STRETCH_LOOP | AUTO_HORZ_RATIO |
-			HORZ_STRETCH_MODE | HORZ_STRETCH_EN);
+		crtc->horz_stretching &= ~(HORZ_STRETCH_RATIO |
+					   HORZ_STRETCH_LOOP | AUTO_HORZ_RATIO |
+					   HORZ_STRETCH_MODE | HORZ_STRETCH_EN);
 		if (xres < par->lcd_width && crtc->lcd_gen_cntl & LCD_ON) {
 			do {
 				/*
-				* The horizontal blender misbehaves when HDisplay is less than a
-				* a certain threshold (440 for a 1024-wide panel).  It doesn't
-				* stretch such modes enough.  Use pixel replication instead of
-				* blending to stretch modes that can be made to exactly fit the
-				* panel width.  The undocumented "NoLCDBlend" option allows the
-				* pixel-replicated mode to be slightly wider or narrower than the
-				* panel width.  It also causes a mode that is exactly half as wide
-				* as the panel to be pixel-replicated, rather than blended.
-				*/
+				 * The horizontal blender misbehaves when
+				 * HDisplay is less than a certain threshold
+				 * (440 for a 1024-wide panel).  It doesn't
+				 * stretch such modes enough.  Use pixel
+				 * replication instead of blending to stretch
+				 * modes that can be made to exactly fit the
+				 * panel width.  The undocumented "NoLCDBlend"
+				 * option allows the pixel-replicated mode to
+				 * be slightly wider or narrower than the
+				 * panel width.  It also causes a mode that is
+				 * exactly half as wide as the panel to be
+				 * pixel-replicated, rather than blended.
+				 */
 				int HDisplay  = xres & ~7;
 				int nStretch  = par->lcd_width / HDisplay;
 				int Remainder = par->lcd_width % HDisplay;
 
 				if ((!Remainder && ((nStretch > 2))) ||
-					(((HDisplay * 16) / par->lcd_width) < 7)) {
-					static const char StretchLoops[] = {10, 12, 13, 15, 16};
+				    (((HDisplay * 16) / par->lcd_width) < 7)) {
+					static const char StretchLoops[] = { 10, 12, 13, 15, 16 };
 					int horz_stretch_loop = -1, BestRemainder;
 					int Numerator = HDisplay, Denominator = par->lcd_width;
 					int Index = 5;
@@ -1098,12 +1121,12 @@ static int aty_var_to_crtc(const struct fb_info *info,
 				(((vdisplay * (VERT_STRETCH_RATIO0 + 1)) / par->lcd_height) & VERT_STRETCH_RATIO0));
 
 			if (!M64_HAS(LT_LCD_REGS) &&
-			    xres <= (M64_HAS(MOBIL_BUS)?1024:800))
+			    xres <= (M64_HAS(MOBIL_BUS) ? 1024 : 800))
 				crtc->ext_vert_stretch |= VERT_STRETCH_MODE;
 		} else {
 			/*
-			 * Don't use vertical blending if the mode is too wide or not
-			 * vertically stretched.
+			 * Don't use vertical blending if the mode is too wide
+			 * or not vertically stretched.
 			 */
 			crtc->vert_stretching = 0;
 		}
@@ -1125,11 +1148,11 @@ static int aty_var_to_crtc(const struct fb_info *info,
 	return 0;
 }
 
-static int aty_crtc_to_var(const struct crtc *crtc, struct fb_var_screeninfo *var)
+static int aty_crtc_to_var(const struct crtc *crtc,
+			   struct fb_var_screeninfo *var)
 {
 	u32 xres, yres, bpp, left, right, upper, lower, hslen, vslen, sync;
-	u32 h_total, h_disp, h_sync_strt, h_sync_dly, h_sync_wid,
-	    h_sync_pol;
+	u32 h_total, h_disp, h_sync_strt, h_sync_dly, h_sync_wid, h_sync_pol;
 	u32 v_total, v_disp, v_sync_strt, v_sync_wid, v_sync_pol, c_sync;
 	u32 pix_width;
 	u32 double_scan, interlace;
@@ -1161,8 +1184,8 @@ static int aty_crtc_to_var(const struct crtc *crtc, struct fb_var_screeninfo *va
 	lower = v_sync_strt - v_disp;
 	vslen = v_sync_wid;
 	sync = (h_sync_pol ? 0 : FB_SYNC_HOR_HIGH_ACT) |
-	    (v_sync_pol ? 0 : FB_SYNC_VERT_HIGH_ACT) |
-	    (c_sync ? FB_SYNC_COMP_HIGH_ACT : 0);
+		(v_sync_pol ? 0 : FB_SYNC_VERT_HIGH_ACT) |
+		(c_sync ? FB_SYNC_COMP_HIGH_ACT : 0);
 
 	switch (pix_width) {
 #if 0
@@ -1252,20 +1275,21 @@ static int aty_crtc_to_var(const struct crtc *crtc, struct fb_var_screeninfo *va
 	var->vsync_len = vslen;
 	var->sync = sync;
 	var->vmode = FB_VMODE_NONINTERLACED;
-	/* In double scan mode, the vertical parameters are doubled, so we need to
-	   half them to get the right values.
-	   In interlaced mode the values are already correct, so no correction is
-	   necessary.
+	/*
+	 * In double scan mode, the vertical parameters are doubled,
+	 * so we need to halve them to get the right values.
+	 * In interlaced mode the values are already correct,
+	 * so no correction is necessary.
 	 */
 	if (interlace)
 		var->vmode = FB_VMODE_INTERLACED;
 
 	if (double_scan) {
 		var->vmode = FB_VMODE_DOUBLE;
-		var->yres>>=1;
-		var->upper_margin>>=1;
-		var->lower_margin>>=1;
-		var->vsync_len>>=1;
+		var->yres >>= 1;
+		var->upper_margin >>= 1;
+		var->lower_margin >>= 1;
+		var->vsync_len >>= 1;
 	}
 
 	return 0;
@@ -1286,7 +1310,8 @@ static int atyfb_set_par(struct fb_info *info)
 	if (par->asleep)
 		return 0;
 
-	if ((err = aty_var_to_crtc(info, var, &par->crtc)))
+	err = aty_var_to_crtc(info, var, &par->crtc);
+	if (err)
 		return err;
 
 	pixclock = atyfb_get_pixclock(var, par);
@@ -1295,7 +1320,9 @@ static int atyfb_set_par(struct fb_info *info)
 		PRINTKE("Invalid pixclock\n");
 		return -EINVAL;
 	} else {
-		if((err = par->pll_ops->var_to_pll(info, pixclock, var->bits_per_pixel, &par->pll)))
+		err = par->pll_ops->var_to_pll(info, pixclock,
+					       var->bits_per_pixel, &par->pll);
+		if (err)
 			return err;
 	}
 
@@ -1313,22 +1340,23 @@ static int atyfb_set_par(struct fb_info *info)
 		wait_for_idle(par);
 
 	aty_set_crtc(par, &par->crtc);
-	par->dac_ops->set_dac(info, &par->pll, var->bits_per_pixel, par->accel_flags);
+	par->dac_ops->set_dac(info, &par->pll,
+			      var->bits_per_pixel, par->accel_flags);
 	par->pll_ops->set_pll(info, &par->pll);
 
 #ifdef DEBUG
-	if(par->pll_ops && par->pll_ops->pll_to_var)
-		pixclock_in_ps = par->pll_ops->pll_to_var(info, &(par->pll));
+	if (par->pll_ops && par->pll_ops->pll_to_var)
+		pixclock_in_ps = par->pll_ops->pll_to_var(info, &par->pll);
 	else
 		pixclock_in_ps = 0;
 
-	if(0 == pixclock_in_ps) {
+	if (0 == pixclock_in_ps) {
 		PRINTKE("ALERT ops->pll_to_var get 0\n");
 		pixclock_in_ps = pixclock;
 	}
 
 	memset(&debug, 0, sizeof(debug));
-	if(!aty_crtc_to_var(&(par->crtc), &debug)) {
+	if (!aty_crtc_to_var(&par->crtc, &debug)) {
 		u32 hSync, vRefresh;
 		u32 h_disp, h_sync_strt, h_sync_end, h_total;
 		u32 v_disp, v_sync_strt, v_sync_end, v_total;
@@ -1344,16 +1372,20 @@ static int atyfb_set_par(struct fb_info *info)
 
 		hSync = 1000000000 / (pixclock_in_ps * h_total);
 		vRefresh = (hSync * 1000) / v_total;
-        	if (par->crtc.gen_cntl & CRTC_INTERLACE_EN)
-            	vRefresh *= 2;
-        	if (par->crtc.gen_cntl & CRTC_DBL_SCAN_EN)
-            	vRefresh /= 2;
+		if (par->crtc.gen_cntl & CRTC_INTERLACE_EN)
+			vRefresh *= 2;
+		if (par->crtc.gen_cntl & CRTC_DBL_SCAN_EN)
+			vRefresh /= 2;
 
 		DPRINTK("atyfb_set_par\n");
-		DPRINTK(" Set Visible Mode to %ix%i-%i\n", var->xres, var->yres, var->bits_per_pixel);
-		DPRINTK(" Virtual resolution %ix%i, pixclock_in_ps %i (calculated %i)\n",
-			var->xres_virtual, var->yres_virtual, pixclock, pixclock_in_ps);
-		DPRINTK(" Dot clock:           %i MHz\n", 1000000 / pixclock_in_ps);
+		DPRINTK(" Set Visible Mode to %ix%i-%i\n",
+			var->xres, var->yres, var->bits_per_pixel);
+		DPRINTK(" Virtual resolution %ix%i, "
+			"pixclock_in_ps %i (calculated %i)\n",
+			var->xres_virtual, var->yres_virtual,
+			pixclock, pixclock_in_ps);
+		DPRINTK(" Dot clock:           %i MHz\n",
+			1000000 / pixclock_in_ps);
 		DPRINTK(" Horizontal sync:     %i kHz\n", hSync);
 		DPRINTK(" Vertical refresh:    %i Hz\n", vRefresh);
 		DPRINTK(" x  style: %i.%03i %i %i %i %i   %i %i %i %i\n",
@@ -1448,7 +1480,8 @@ static int atyfb_set_par(struct fb_info *info)
 	base = 0x2000;
 	printk("debug atyfb: Mach64 non-shadow register values:");
 	for (i = 0; i < 256; i = i+4) {
-		if(i%16 == 0) printk("\ndebug atyfb: 0x%04X: ", base + i);
+		if (i % 16 == 0)
+			printk("\ndebug atyfb: 0x%04X: ", base + i);
 		printk(" %08X", aty_ld_le32(i, par));
 	}
 	printk("\n\n");
@@ -1458,8 +1491,10 @@ static int atyfb_set_par(struct fb_info *info)
 	base = 0x00;
 	printk("debug atyfb: Mach64 PLL register values:");
 	for (i = 0; i < 64; i++) {
-		if(i%16 == 0) printk("\ndebug atyfb: 0x%02X: ", base + i);
-		if(i%4 == 0)  printk(" ");
+		if (i % 16 == 0)
+			printk("\ndebug atyfb: 0x%02X: ", base + i);
+		if (i % 4 == 0)
+			printk(" ");
 		printk("%02X", aty_ld_pll_ct(i, par));
 	}
 	printk("\n\n");
@@ -1470,19 +1505,21 @@ static int atyfb_set_par(struct fb_info *info)
 		/* LCD registers */
 		base = 0x00;
 		printk("debug atyfb: LCD register values:");
-		if(M64_HAS(LT_LCD_REGS)) {
-		    for(i = 0; i <= POWER_MANAGEMENT; i++) {
-			if(i == EXT_VERT_STRETCH)
-			    continue;
-			printk("\ndebug atyfb: 0x%04X: ", lt_lcd_regs[i]);
-			printk(" %08X", aty_ld_lcd(i, par));
-		    }
-
+		if (M64_HAS(LT_LCD_REGS)) {
+			for (i = 0; i <= POWER_MANAGEMENT; i++) {
+				if (i == EXT_VERT_STRETCH)
+					continue;
+				printk("\ndebug atyfb: 0x%04X: ",
+				       lt_lcd_regs[i]);
+				printk(" %08X", aty_ld_lcd(i, par));
+			}
 		} else {
-		    for (i = 0; i < 64; i++) {
-			if(i%4 == 0) printk("\ndebug atyfb: 0x%02X: ", base + i);
-			printk(" %08X", aty_ld_lcd(i, par));
-		    }
+			for (i = 0; i < 64; i++) {
+				if (i % 4 == 0)
+					printk("\ndebug atyfb: 0x%02X: ",
+					       base + i);
+				printk(" %08X", aty_ld_lcd(i, par));
+			}
 		}
 		printk("\n\n");
 	}
@@ -1500,9 +1537,10 @@ static int atyfb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
 	union aty_pll pll;
 	u32 pixclock;
 
-	memcpy(&pll, &(par->pll), sizeof(pll));
+	memcpy(&pll, &par->pll, sizeof(pll));
 
-	if((err = aty_var_to_crtc(info, var, &crtc)))
+	err = aty_var_to_crtc(info, var, &crtc);
+	if (err)
 		return err;
 
 	pixclock = atyfb_get_pixclock(var, par);
@@ -1512,7 +1550,9 @@ static int atyfb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
 			PRINTKE("Invalid pixclock\n");
 		return -EINVAL;
 	} else {
-		if((err = par->pll_ops->var_to_pll(info, pixclock, var->bits_per_pixel, &pll)))
+		err = par->pll_ops->var_to_pll(info, pixclock,
+					       var->bits_per_pixel, &pll);
+		if (err)
 			return err;
 	}
 
@@ -1539,9 +1579,9 @@ static void set_off_pitch(struct atyfb_par *par, const struct fb_info *info)
 }
 
 
-    /*
-     *  Open/Release the frame buffer device
-     */
+/*
+ * Open/Release the frame buffer device
+ */
 
 static int atyfb_open(struct fb_info *info, int user)
 {
@@ -1553,7 +1593,7 @@ static int atyfb_open(struct fb_info *info, int user)
 		par->mmaped = 0;
 #endif
 	}
-	return (0);
+	return 0;
 }
 
 static irqreturn_t aty_irq(int irq, void *dev_id)
@@ -1568,7 +1608,8 @@ static irqreturn_t aty_irq(int irq, void *dev_id)
 
 	if (int_cntl & CRTC_VBLANK_INT) {
 		/* clear interrupt */
-		aty_st_le32(CRTC_INT_CNTL, (int_cntl & CRTC_INT_EN_MASK) | CRTC_VBLANK_INT_AK, par);
+		aty_st_le32(CRTC_INT_CNTL, (int_cntl & CRTC_INT_EN_MASK) |
+			    CRTC_VBLANK_INT_AK, par);
 		par->vblank.count++;
 		if (par->vblank.pan_display) {
 			par->vblank.pan_display = 0;
@@ -1603,9 +1644,11 @@ static int aty_enable_irq(struct atyfb_par *par, int reenable)
 		spin_lock_irq(&par->int_lock);
 		int_cntl = aty_ld_le32(CRTC_INT_CNTL, par) & CRTC_INT_EN_MASK;
 		if (!(int_cntl & CRTC_VBLANK_INT_EN)) {
-			printk("atyfb: someone disabled IRQ [%08x]\n", int_cntl);
+			printk("atyfb: someone disabled IRQ [%08x]\n",
+			       int_cntl);
 			/* re-enable interrupt */
-			aty_st_le32(CRTC_INT_CNTL, int_cntl | CRTC_VBLANK_INT_EN, par );
+			aty_st_le32(CRTC_INT_CNTL, int_cntl |
+				    CRTC_VBLANK_INT_EN, par);
 		}
 		spin_unlock_irq(&par->int_lock);
 	}
@@ -1625,7 +1668,7 @@ static int aty_disable_irq(struct atyfb_par *par)
 		spin_lock_irq(&par->int_lock);
 		int_cntl = aty_ld_le32(CRTC_INT_CNTL, par) & CRTC_INT_EN_MASK;
 		/* disable interrupt */
-		aty_st_le32(CRTC_INT_CNTL, int_cntl & ~CRTC_VBLANK_INT_EN, par );
+		aty_st_le32(CRTC_INT_CNTL, int_cntl & ~CRTC_VBLANK_INT_EN, par);
 		spin_unlock_irq(&par->int_lock);
 		free_irq(par->irq, par);
 	}
@@ -1636,50 +1679,62 @@ static int aty_disable_irq(struct atyfb_par *par)
 static int atyfb_release(struct fb_info *info, int user)
 {
 	struct atyfb_par *par = (struct atyfb_par *) info->par;
-	if (user) {
-		par->open--;
-		mdelay(1);
-		wait_for_idle(par);
-		if (!par->open) {
 #ifdef __sparc__
-			int was_mmaped = par->mmaped;
+	int was_mmaped;
+#endif
 
-			par->mmaped = 0;
+	if (!user)
+		return 0;
 
-			if (was_mmaped) {
-				struct fb_var_screeninfo var;
+	par->open--;
+	mdelay(1);
+	wait_for_idle(par);
 
-				/* Now reset the default display config, we have no
-				 * idea what the program(s) which mmap'd the chip did
-				 * to the configuration, nor whether it restored it
-				 * correctly.
-				 */
-				var = default_var;
-				if (noaccel)
-					var.accel_flags &= ~FB_ACCELF_TEXT;
-				else
-					var.accel_flags |= FB_ACCELF_TEXT;
-				if (var.yres == var.yres_virtual) {
-					u32 videoram = (info->fix.smem_len - (PAGE_SIZE << 2));
-					var.yres_virtual = ((videoram * 8) / var.bits_per_pixel) / var.xres_virtual;
-					if (var.yres_virtual < var.yres)
-						var.yres_virtual = var.yres;
-				}
-			}
-#endif
-			aty_disable_irq(par);
+	if (par->open)
+		return 0;
+
+#ifdef __sparc__
+	was_mmaped = par->mmaped;
+
+	par->mmaped = 0;
+
+	if (was_mmaped) {
+		struct fb_var_screeninfo var;
+
+		/*
+		 * Now reset the default display config, we have
+		 * no idea what the program(s) which mmap'd the
+		 * chip did to the configuration, nor whether it
+		 * restored it correctly.
+		 */
+		var = default_var;
+		if (noaccel)
+			var.accel_flags &= ~FB_ACCELF_TEXT;
+		else
+			var.accel_flags |= FB_ACCELF_TEXT;
+		if (var.yres == var.yres_virtual) {
+			u32 videoram = (info->fix.smem_len - (PAGE_SIZE << 2));
+			var.yres_virtual =
+				((videoram * 8) / var.bits_per_pixel) /
+				var.xres_virtual;
+			if (var.yres_virtual < var.yres)
+				var.yres_virtual = var.yres;
 		}
 	}
-	return (0);
+#endif
+	aty_disable_irq(par);
+
+	return 0;
 }
 
-    /*
-     *  Pan or Wrap the Display
-     *
-     *  This call looks only at xoffset, yoffset and the FB_VMODE_YWRAP flag
-     */
+/*
+ * Pan or Wrap the Display
+ *
+ * This call looks only at xoffset, yoffset and the FB_VMODE_YWRAP flag
+ */
 
-static int atyfb_pan_display(struct fb_var_screeninfo *var, struct fb_info *info)
+static int atyfb_pan_display(struct fb_var_screeninfo *var,
+			     struct fb_info *info)
 {
 	struct atyfb_par *par = (struct atyfb_par *) info->par;
 	u32 xres, yres, xoffset, yoffset;
@@ -1690,7 +1745,8 @@ static int atyfb_pan_display(struct fb_var_screeninfo *var, struct fb_info *info
 		yres >>= 1;
 	xoffset = (var->xoffset + 7) & ~7;
 	yoffset = var->yoffset;
-	if (xoffset + xres > par->crtc.vxres || yoffset + yres > par->crtc.vyres)
+	if (xoffset + xres > par->crtc.vxres ||
+	    yoffset + yres > par->crtc.vyres)
 		return -EINVAL;
 	info->var.xoffset = xoffset;
 	info->var.yoffset = yoffset;
@@ -1727,10 +1783,10 @@ static int aty_waitforvblank(struct atyfb_par *par, u32 crtc)
 		return ret;
 
 	count = vbl->count;
-	ret = wait_event_interruptible_timeout(vbl->wait, count != vbl->count, HZ/10);
-	if (ret < 0) {
+	ret = wait_event_interruptible_timeout(vbl->wait,
+					       count != vbl->count, HZ/10);
+	if (ret < 0)
 		return ret;
-	}
 	if (ret == 0) {
 		aty_enable_irq(par, 1);
 		return -ETIMEDOUT;
@@ -1784,7 +1840,8 @@ static int atyfb_ioctl(struct fb_info *info, u_int cmd, u_long arg)
 		fbtyp.fb_depth = info->var.bits_per_pixel;
 		fbtyp.fb_cmsize = info->cmap.len;
 		fbtyp.fb_size = info->fix.smem_len;
-		if (copy_to_user((struct fbtype __user *) arg, &fbtyp, sizeof(fbtyp)))
+		if (copy_to_user((struct fbtype __user *) arg, &fbtyp,
+				 sizeof(fbtyp)))
 			return -EFAULT;
 		break;
 #endif /* __sparc__ */
@@ -1804,7 +1861,7 @@ static int atyfb_ioctl(struct fb_info *info, u_int cmd, u_long arg)
 	case ATYIO_CLKR:
 		if (M64_HAS(INTEGRATED)) {
 			struct atyclk clk;
-			union aty_pll *pll = &(par->pll);
+			union aty_pll *pll = &par->pll;
 			u32 dsp_config = pll->ct.dsp_config;
 			u32 dsp_on_off = pll->ct.dsp_on_off;
 			clk.ref_clk_per = par->ref_clk_per;
@@ -1829,8 +1886,9 @@ static int atyfb_ioctl(struct fb_info *info, u_int cmd, u_long arg)
 	case ATYIO_CLKW:
 		if (M64_HAS(INTEGRATED)) {
 			struct atyclk clk;
-			union aty_pll *pll = &(par->pll);
-			if (copy_from_user(&clk, (struct atyclk __user *) arg, sizeof(clk)))
+			union aty_pll *pll = &par->pll;
+			if (copy_from_user(&clk, (struct atyclk __user *) arg,
+					   sizeof(clk)))
 				return -EFAULT;
 			par->ref_clk_per = clk.ref_clk_per;
 			pll->ct.pll_ref_div = clk.pll_ref_div;
@@ -1841,8 +1899,10 @@ static int atyfb_ioctl(struct fb_info *info, u_int cmd, u_long arg)
 			pll->ct.vclk_fb_div = clk.vclk_fb_div;
 			pll->ct.vclk_post_div_real = clk.vclk_post_div;
 			pll->ct.dsp_config = (clk.dsp_xclks_per_row & 0x3fff) |
-				((clk.dsp_loop_latency & 0xf)<<16)| ((clk.dsp_precision & 7)<<20);
-			pll->ct.dsp_on_off = (clk.dsp_off & 0x7ff) | ((clk.dsp_on & 0x7ff)<<16);
+				((clk.dsp_loop_latency & 0xf) << 16) |
+				((clk.dsp_precision & 7) << 20);
+			pll->ct.dsp_on_off = (clk.dsp_off & 0x7ff) |
+				((clk.dsp_on & 0x7ff) << 16);
 			/*aty_calc_pll_ct(info, &pll->ct);*/
 			aty_set_pll_ct(info, pll);
 		} else
@@ -1913,8 +1973,7 @@ static int atyfb_mmap(struct fb_info *info, struct vm_area_struct *vma)
 				continue;
 
 			map_size = par->mmap_map[i].size - (offset - start);
-			map_offset =
-			    par->mmap_map[i].poff + (offset - start);
+			map_offset = par->mmap_map[i].poff + (offset - start);
 			break;
 		}
 		if (!map_size) {
@@ -1924,8 +1983,7 @@ static int atyfb_mmap(struct fb_info *info, struct vm_area_struct *vma)
 		if (page + map_size > size)
 			map_size = size - page;
 
-		pgprot_val(vma->vm_page_prot) &=
-		    ~(par->mmap_map[i].prot_mask);
+		pgprot_val(vma->vm_page_prot) &= ~(par->mmap_map[i].prot_mask);
 		pgprot_val(vma->vm_page_prot) |= par->mmap_map[i].prot_flag;
 
 		if (remap_pfn_range(vma, vma->vm_start + page,
@@ -2029,7 +2087,8 @@ static int atyfb_pci_suspend(struct pci_dev *pdev, pm_message_t state)
 	par->asleep = 1;
 	par->lock_blank = 1;
 
-	/* Because we may change PCI D state ourselves, we need to
+	/*
+	 * Because we may change PCI D state ourselves, we need to
 	 * first save the config space content so the core can
 	 * restore it properly on resume.
 	 */
@@ -2080,7 +2139,8 @@ static int atyfb_pci_resume(struct pci_dev *pdev)
 
 	acquire_console_sem();
 
-	/* PCI state will have been restored by the core, so
+	/*
+	 * PCI state will have been restored by the core, so
 	 * we should be in D0 now with our config space fully
 	 * restored
 	 */
@@ -2192,8 +2252,8 @@ static void aty_bl_init(struct atyfb_par *par)
 
 	info->bl_dev = bd;
 	fb_bl_default_curve(info, 0,
-		0x3F * FB_BACKLIGHT_MAX / MAX_LEVEL,
-		0xFF * FB_BACKLIGHT_MAX / MAX_LEVEL);
+			    0x3F * FB_BACKLIGHT_MAX / MAX_LEVEL,
+			    0xFF * FB_BACKLIGHT_MAX / MAX_LEVEL);
 
 	bd->props.max_brightness = FB_BACKLIGHT_LEVELS - 1;
 	bd->props.brightness = bd->props.max_brightness;
@@ -2236,16 +2296,16 @@ static void __devinit aty_calc_mem_refresh(struct atyfb_par *par, int xclk)
 		size = ARRAY_SIZE(ragepro_tbl);
 	}
 
-	for (i=0; i < size; i++) {
+	for (i = 0; i < size; i++) {
 		if (xclk < refresh_tbl[i])
-		break;
+			break;
 	}
 	par->mem_refresh_rate = i;
 }
 
-    /*
-     *  Initialisation
-     */
+/*
+ * Initialisation
+ */
 
 static struct fb_info *fb_list = NULL;
 
@@ -2375,8 +2435,10 @@ static int __devinit aty_init(struct fb_info *info)
 	}
 #endif
 #ifdef CONFIG_PPC_PMAC
-	/* The Apple iBook1 uses non-standard memory frequencies. We detect it
-	 * and set the frequency manually. */
+	/*
+	 * The Apple iBook1 uses non-standard memory frequencies.
+	 * We detect it and set the frequency manually.
+	 */
 	if (machine_is_compatible("PowerBook2,1")) {
 		par->pll_limits.mclk = 70;
 		par->pll_limits.xclk = 53;
@@ -2421,13 +2483,14 @@ static int __devinit aty_init(struct fb_info *info)
 
 	/* save previous video mode */
 	aty_get_crtc(par, &par->saved_crtc);
-	if(par->pll_ops->get_pll)
+	if (par->pll_ops->get_pll)
 		par->pll_ops->get_pll(info, &par->saved_pll);
 
 	par->mem_cntl = aty_ld_le32(MEM_CNTL, par);
 	gtb_memsize = M64_HAS(GTB_DSP);
 	if (gtb_memsize)
-		switch (par->mem_cntl & 0xF) {	/* 0xF used instead of MEM_SIZE_ALIAS */
+		/* 0xF used instead of MEM_SIZE_ALIAS */
+		switch (par->mem_cntl & 0xF) {
 		case MEM_SIZE_512K:
 			info->fix.smem_len = 0x80000;
 			break;
@@ -2496,8 +2559,8 @@ static int __devinit aty_init(struct fb_info *info)
 	}
 
 	/*
-	 *  Reg Block 0 (CT-compatible block) is at mmio_start
-	 *  Reg Block 1 (multimedia extensions) is at mmio_start - 0x400
+	 * Reg Block 0 (CT-compatible block) is at mmio_start
+	 * Reg Block 1 (multimedia extensions) is at mmio_start - 0x400
 	 */
 	if (M64_HAS(GX)) {
 		info->fix.mmio_len = 0x400;
@@ -2516,84 +2579,98 @@ static int __devinit aty_init(struct fb_info *info)
 	}
 
 	PRINTKI("%d%c %s, %s MHz XTAL, %d MHz PLL, %d Mhz MCLK, %d MHz XCLK\n",
-	       info->fix.smem_len == 0x80000 ? 512 : (info->fix.smem_len >> 20),
-	       info->fix.smem_len == 0x80000 ? 'K' : 'M', ramname, xtal, par->pll_limits.pll_max,
-	       par->pll_limits.mclk, par->pll_limits.xclk);
+		info->fix.smem_len == 0x80000 ? 512 : (info->fix.smem_len>>20),
+		info->fix.smem_len == 0x80000 ? 'K' : 'M', ramname, xtal,
+		par->pll_limits.pll_max, par->pll_limits.mclk,
+		par->pll_limits.xclk);
 
 #if defined(DEBUG) && defined(CONFIG_FB_ATY_CT)
 	if (M64_HAS(INTEGRATED)) {
 		int i;
-		printk("debug atyfb: BUS_CNTL DAC_CNTL MEM_CNTL EXT_MEM_CNTL CRTC_GEN_CNTL "
-		       "DSP_CONFIG DSP_ON_OFF CLOCK_CNTL\n"
-		       "debug atyfb: %08x %08x %08x %08x     %08x      %08x   %08x   %08x\n"
+		printk("debug atyfb: BUS_CNTL DAC_CNTL MEM_CNTL "
+		       "EXT_MEM_CNTL CRTC_GEN_CNTL DSP_CONFIG "
+		       "DSP_ON_OFF CLOCK_CNTL\n"
+		       "debug atyfb: %08x %08x %08x "
+		       "%08x     %08x      %08x   "
+		       "%08x   %08x\n"
 		       "debug atyfb: PLL",
-			aty_ld_le32(BUS_CNTL, par), aty_ld_le32(DAC_CNTL, par),
-			aty_ld_le32(MEM_CNTL, par), aty_ld_le32(EXT_MEM_CNTL, par),
-			aty_ld_le32(CRTC_GEN_CNTL, par), aty_ld_le32(DSP_CONFIG, par),
-			aty_ld_le32(DSP_ON_OFF, par), aty_ld_le32(CLOCK_CNTL, par));
+		       aty_ld_le32(BUS_CNTL, par),
+		       aty_ld_le32(DAC_CNTL, par),
+		       aty_ld_le32(MEM_CNTL, par),
+		       aty_ld_le32(EXT_MEM_CNTL, par),
+		       aty_ld_le32(CRTC_GEN_CNTL, par),
+		       aty_ld_le32(DSP_CONFIG, par),
+		       aty_ld_le32(DSP_ON_OFF, par),
+		       aty_ld_le32(CLOCK_CNTL, par));
 		for (i = 0; i < 40; i++)
 			printk(" %02x", aty_ld_pll_ct(i, par));
 		printk("\n");
 	}
 #endif
-	if(par->pll_ops->init_pll)
+	if (par->pll_ops->init_pll)
 		par->pll_ops->init_pll(info, &par->pll);
 	if (par->pll_ops->resume_pll)
 		par->pll_ops->resume_pll(info, &par->pll);
 
 	/*
-	 *  Last page of 8 MB (4 MB on ISA) aperture is MMIO,
-	 *  unless the auxiliary register aperture is used.
+	 * Last page of 8 MB (4 MB on ISA) aperture is MMIO,
+	 * unless the auxiliary register aperture is used.
 	 */
-
 	if (!par->aux_start &&
-		(info->fix.smem_len == 0x800000 || (par->bus_type == ISA && info->fix.smem_len == 0x400000)))
+	    (info->fix.smem_len == 0x800000 ||
+	     (par->bus_type == ISA && info->fix.smem_len == 0x400000)))
 		info->fix.smem_len -= GUI_RESERVE;
 
 	/*
-	 *  Disable register access through the linear aperture
-	 *  if the auxiliary aperture is used so we can access
-	 *  the full 8 MB of video RAM on 8 MB boards.
+	 * Disable register access through the linear aperture
+	 * if the auxiliary aperture is used so we can access
+	 * the full 8 MB of video RAM on 8 MB boards.
 	 */
 	if (par->aux_start)
-		aty_st_le32(BUS_CNTL, aty_ld_le32(BUS_CNTL, par) | BUS_APER_REG_DIS, par);
+		aty_st_le32(BUS_CNTL, aty_ld_le32(BUS_CNTL, par) |
+			    BUS_APER_REG_DIS, par);
 
 #ifdef CONFIG_MTRR
 	par->mtrr_aper = -1;
 	par->mtrr_reg = -1;
 	if (!nomtrr) {
 		/* Cover the whole resource. */
-		 par->mtrr_aper = mtrr_add(par->res_start, par->res_size, MTRR_TYPE_WRCOMB, 1);
-		 if (par->mtrr_aper >= 0 && !par->aux_start) {
+		par->mtrr_aper = mtrr_add(par->res_start, par->res_size,
+					  MTRR_TYPE_WRCOMB, 1);
+		if (par->mtrr_aper >= 0 && !par->aux_start) {
 			/* Make a hole for mmio. */
-			par->mtrr_reg = mtrr_add(par->res_start + 0x800000 - GUI_RESERVE,
-				GUI_RESERVE, MTRR_TYPE_UNCACHABLE, 1);
+			par->mtrr_reg = mtrr_add(par->res_start + 0x800000 -
+						 GUI_RESERVE, GUI_RESERVE,
+						 MTRR_TYPE_UNCACHABLE, 1);
 			if (par->mtrr_reg < 0) {
 				mtrr_del(par->mtrr_aper, 0, 0);
 				par->mtrr_aper = -1;
 			}
-		 }
+		}
 	}
 #endif
 
 	info->fbops = &atyfb_ops;
 	info->pseudo_palette = par->pseudo_palette;
 	info->flags = FBINFO_DEFAULT           |
-	              FBINFO_HWACCEL_IMAGEBLIT |
-	              FBINFO_HWACCEL_FILLRECT  |
-	              FBINFO_HWACCEL_COPYAREA  |
-	              FBINFO_HWACCEL_YPAN;
+		      FBINFO_HWACCEL_IMAGEBLIT |
+		      FBINFO_HWACCEL_FILLRECT  |
+		      FBINFO_HWACCEL_COPYAREA  |
+		      FBINFO_HWACCEL_YPAN;
 
 #ifdef CONFIG_PMAC_BACKLIGHT
 	if (M64_HAS(G3_PB_1_1) && machine_is_compatible("PowerBook1,1")) {
-		/* these bits let the 101 powerbook wake up from sleep -- paulus */
-		aty_st_lcd(POWER_MANAGEMENT, aty_ld_lcd(POWER_MANAGEMENT, par)
-			   | (USE_F32KHZ | TRISTATE_MEM_EN), par);
+		/*
+		 * these bits let the 101 powerbook
+		 * wake up from sleep -- paulus
+		 */
+		aty_st_lcd(POWER_MANAGEMENT, aty_ld_lcd(POWER_MANAGEMENT, par) |
+			   USE_F32KHZ | TRISTATE_MEM_EN, par);
 	} else
 #endif
 	if (M64_HAS(MOBIL_BUS) && backlight) {
 #ifdef CONFIG_FB_ATY_BACKLIGHT
-		aty_bl_init (par);
+		aty_bl_init(par);
 #endif
 	}
 
@@ -2601,8 +2678,8 @@ static int __devinit aty_init(struct fb_info *info)
 #ifdef CONFIG_PPC
 	if (machine_is(powermac)) {
 		/*
-		 *  FIXME: The NVRAM stuff should be put in a Mac-specific file, as it
-		 *         applies to all Mac video cards
+		 * FIXME: The NVRAM stuff should be put in a Mac-specific file,
+		 *        as it applies to all Mac video cards
 		 */
 		if (mode) {
 			if (mac_find_mode(&var, info, mode, 8))
@@ -2615,8 +2692,7 @@ static int __devinit aty_init(struct fb_info *info)
 					default_vmode = VMODE_1024_768_60;
 				else if (machine_is_compatible("iMac"))
 					default_vmode = VMODE_1024_768_75;
-				else if (machine_is_compatible
-					 ("PowerBook2,1"))
+				else if (machine_is_compatible("PowerBook2,1"))
 					/* iBook with 800x600 LCD */
 					default_vmode = VMODE_800_600_60;
 				else
@@ -2630,7 +2706,7 @@ static int __devinit aty_init(struct fb_info *info)
 			if (default_cmode < CMODE_8 || default_cmode > CMODE_32)
 				default_cmode = CMODE_8;
 			if (!mac_vmode_to_var(default_vmode, default_cmode,
-					       &var))
+					      &var))
 				has_var = 1;
 		}
 	}
@@ -2702,12 +2778,12 @@ aty_init_exit:
 
 #ifdef CONFIG_MTRR
 	if (par->mtrr_reg >= 0) {
-	    mtrr_del(par->mtrr_reg, 0, 0);
-	    par->mtrr_reg = -1;
+		mtrr_del(par->mtrr_reg, 0, 0);
+		par->mtrr_reg = -1;
 	}
 	if (par->mtrr_aper >= 0) {
-	    mtrr_del(par->mtrr_aper, 0, 0);
-	    par->mtrr_aper = -1;
+		mtrr_del(par->mtrr_aper, 0, 0);
+		par->mtrr_aper = -1;
 	}
 #endif
 	return ret;
@@ -2735,18 +2811,18 @@ static int __devinit store_video_par(char *video_str, unsigned char m64_num)
 	phys_size[m64_num] = size;
 	phys_guiregbase[m64_num] = guiregbase;
 	PRINTKI("stored them all: $%08lX $%08lX $%08lX \n", vmembase, size,
-	       guiregbase);
+		guiregbase);
 	return 0;
 
-      mach64_invalid:
+ mach64_invalid:
 	phys_vmembase[m64_num] = 0;
 	return -1;
 }
 #endif /* CONFIG_ATARI */
 
-    /*
-     *  Blank the display.
-     */
+/*
+ * Blank the display.
+ */
 
 static int atyfb_blank(int blank, struct fb_info *info)
 {
@@ -2768,20 +2844,20 @@ static int atyfb_blank(int blank, struct fb_info *info)
 	gen_cntl = aty_ld_le32(CRTC_GEN_CNTL, par);
 	gen_cntl &= ~0x400004c;
 	switch (blank) {
-		case FB_BLANK_UNBLANK:
-			break;
-		case FB_BLANK_NORMAL:
-			gen_cntl |= 0x4000040;
-			break;
-		case FB_BLANK_VSYNC_SUSPEND:
-			gen_cntl |= 0x4000048;
-			break;
-		case FB_BLANK_HSYNC_SUSPEND:
-			gen_cntl |= 0x4000044;
-			break;
-		case FB_BLANK_POWERDOWN:
-			gen_cntl |= 0x400004c;
-			break;
+	case FB_BLANK_UNBLANK:
+		break;
+	case FB_BLANK_NORMAL:
+		gen_cntl |= 0x4000040;
+		break;
+	case FB_BLANK_VSYNC_SUSPEND:
+		gen_cntl |= 0x4000048;
+		break;
+	case FB_BLANK_HSYNC_SUSPEND:
+		gen_cntl |= 0x4000044;
+		break;
+	case FB_BLANK_POWERDOWN:
+		gen_cntl |= 0x400004c;
+		break;
 	}
 	aty_st_le32(CRTC_GEN_CNTL, gen_cntl, par);
 
@@ -2806,15 +2882,15 @@ static void aty_st_pal(u_int regno, u_int red, u_int green, u_int blue,
 	aty_st_8(DAC_DATA, blue, par);
 }
 
-    /*
-     *  Set a single color register. The values supplied are already
-     *  rounded down to the hardware's capabilities (according to the
-     *  entries in the var structure). Return != 0 for invalid regno.
-     *  !! 4 & 8 =  PSEUDO, > 8 = DIRECTCOLOR
-     */
+/*
+ * Set a single color register. The values supplied are already
+ * rounded down to the hardware's capabilities (according to the
+ * entries in the var structure). Return != 0 for invalid regno.
+ * !! 4 & 8 =  PSEUDO, > 8 = DIRECTCOLOR
+ */
 
 static int atyfb_setcolreg(u_int regno, u_int red, u_int green, u_int blue,
-	u_int transp, struct fb_info *info)
+			   u_int transp, struct fb_info *info)
 {
 	struct atyfb_par *par = (struct atyfb_par *) info->par;
 	int i, depth;
@@ -2868,16 +2944,15 @@ static int atyfb_setcolreg(u_int regno, u_int red, u_int green, u_int blue,
 		if (depth == 16) {
 			if (regno < 32)
 				aty_st_pal(regno << 3, red,
-					   par->palette[regno<<1].green,
+					   par->palette[regno << 1].green,
 					   blue, par);
-			red = par->palette[regno>>1].red;
-			blue = par->palette[regno>>1].blue;
+			red = par->palette[regno >> 1].red;
+			blue = par->palette[regno >> 1].blue;
 			regno <<= 2;
 		} else if (depth == 15) {
 			regno <<= 3;
-			for(i = 0; i < 8; i++) {
-			    aty_st_pal(regno + i, red, green, blue, par);
-			}
+			for (i = 0; i < 8; i++)
+				aty_st_pal(regno + i, red, green, blue, par);
 		}
 	}
 	aty_st_pal(regno, red, green, blue, par);
@@ -2890,7 +2965,8 @@ static int atyfb_setcolreg(u_int regno, u_int red, u_int green, u_int blue,
 #ifdef __sparc__
 
 static int __devinit atyfb_setup_sparc(struct pci_dev *pdev,
-			struct fb_info *info, unsigned long addr)
+				       struct fb_info *info,
+				       unsigned long addr)
 {
 	struct atyfb_par *par = info->par;
 	struct device_node *dp;
@@ -2978,7 +3054,8 @@ static int __devinit atyfb_setup_sparc(struct pci_dev *pdev,
 		j++;
 	}
 
-	if((ret = correct_chipset(par)))
+	ret = correct_chipset(par);
+	if (ret)
 		return ret;
 
 	if (IS_XL(pdev->device)) {
@@ -3108,28 +3185,28 @@ static void __devinit aty_init_lcd(struct atyfb_par *par, u32 bios_base)
 	u32 driv_inf_tab, sig;
 	u16 lcd_ofs;
 
-	/* To support an LCD panel, we should know it's dimensions and
+	/*
+	 * To support an LCD panel, we should know it's dimensions and
 	 *  it's desired pixel clock.
 	 * There are two ways to do it:
 	 *  - Check the startup video mode and calculate the panel
 	 *    size from it. This is unreliable.
 	 *  - Read it from the driver information table in the video BIOS.
-	*/
+	 */
 	/* Address of driver information table is at offset 0x78. */
 	driv_inf_tab = bios_base + *((u16 *)(bios_base+0x78));
 
 	/* Check for the driver information table signature. */
-	sig = (*(u32 *)driv_inf_tab);
+	sig = *(u32 *)driv_inf_tab;
 	if ((sig == 0x54504c24) || /* Rage LT pro */
-		(sig == 0x544d5224) || /* Rage mobility */
-		(sig == 0x54435824) || /* Rage XC */
-		(sig == 0x544c5824)) { /* Rage XL */
+	    (sig == 0x544d5224) || /* Rage mobility */
+	    (sig == 0x54435824) || /* Rage XC */
+	    (sig == 0x544c5824)) { /* Rage XL */
 		PRINTKI("BIOS contains driver information table.\n");
-		lcd_ofs = (*(u16 *)(driv_inf_tab + 10));
+		lcd_ofs = *(u16 *)(driv_inf_tab + 10);
 		par->lcd_table = 0;
-		if (lcd_ofs != 0) {
+		if (lcd_ofs != 0)
 			par->lcd_table = bios_base + lcd_ofs;
-		}
 	}
 
 	if (par->lcd_table != 0) {
@@ -3144,14 +3221,16 @@ static void __devinit aty_init_lcd(struct atyfb_par *par, u32 bios_base)
 		u16 width, height, panel_type, refresh_rates;
 		u16 *lcdmodeptr;
 		u32 format;
-		u8 lcd_refresh_rates[16] = {50,56,60,67,70,72,75,76,85,90,100,120,140,150,160,200};
-		/* The most important information is the panel size at
+		u8 lcd_refresh_rates[16] = { 50, 56, 60, 67, 70, 72, 75, 76, 85,
+					     90, 100, 120, 140, 150, 160, 200 };
+		/*
+		 * The most important information is the panel size at
 		 * offset 25 and 27, but there's some other nice information
 		 * which we print to the screen.
 		 */
 		id = *(u8 *)par->lcd_table;
-		strncpy(model,(char *)par->lcd_table+1,24);
-		model[23]=0;
+		strncpy(model, (char *)par->lcd_table+1, 24);
+		model[23] = 0;
 
 		width = par->lcd_width = *(u16 *)(par->lcd_table+25);
 		height = par->lcd_height = *(u16 *)(par->lcd_table+27);
@@ -3164,7 +3243,7 @@ static void __devinit aty_init_lcd(struct atyfb_par *par, u32 bios_base)
 			txtdual = "dual (split) ";
 		else
 			txtdual = "";
-		tech = (panel_type>>2) & 63;
+		tech = (panel_type >> 2) & 63;
 		switch (tech) {
 		case 0:
 			txtmonitor = "passive matrix";
@@ -3224,22 +3303,24 @@ static void __devinit aty_init_lcd(struct atyfb_par *par, u32 bios_base)
 			}
 		}
 		PRINTKI("%s%s %s monitor detected: %s\n",
-			txtdual ,txtcolour, txtmonitor, model);
+			txtdual, txtcolour, txtmonitor, model);
 		PRINTKI("       id=%d, %dx%d pixels, %s\n",
 			id, width, height, txtformat);
 		refresh_rates_buf[0] = 0;
 		refresh_rates = *(u16 *)(par->lcd_table+62);
 		m = 1;
 		f = 0;
-		for (i=0;i<16;i++) {
+		for (i = 0; i < 16; i++) {
 			if (refresh_rates & m) {
 				if (f == 0) {
-					sprintf(strbuf, "%d", lcd_refresh_rates[i]);
+					sprintf(strbuf, "%d",
+						lcd_refresh_rates[i]);
 					f++;
 				} else {
-					sprintf(strbuf, ",%d", lcd_refresh_rates[i]);
+					sprintf(strbuf, ",%d",
+						lcd_refresh_rates[i]);
 				}
-				strcat(refresh_rates_buf,strbuf);
+				strcat(refresh_rates_buf, strbuf);
 			}
 			m = m << 1;
 		}
@@ -3247,7 +3328,8 @@ static void __devinit aty_init_lcd(struct atyfb_par *par, u32 bios_base)
 		PRINTKI("       supports refresh rates [%s], default %d Hz\n",
 			refresh_rates_buf, lcd_refresh_rates[default_refresh_rate]);
 		par->lcd_refreshrate = lcd_refresh_rates[default_refresh_rate];
-		/* We now need to determine the crtc parameters for the
+		/*
+		 * We now need to determine the crtc parameters for the
 		 * LCD monitor. This is tricky, because they are not stored
 		 * individually in the BIOS. Instead, the BIOS contains a
 		 * table of display modes that work for this monitor.
@@ -3382,7 +3464,9 @@ static int __devinit init_from_bios(struct atyfb_par *par)
 }
 #endif /* __i386__ */
 
-static int __devinit atyfb_setup_generic(struct pci_dev *pdev, struct fb_info *info, unsigned long addr)
+static int __devinit atyfb_setup_generic(struct pci_dev *pdev,
+					 struct fb_info *info,
+					 unsigned long addr)
 {
 	struct atyfb_par *par = info->par;
 	u16 tmp;
@@ -3429,10 +3513,12 @@ static int __devinit atyfb_setup_generic(struct pci_dev *pdev, struct fb_info *i
 		goto atyfb_setup_generic_fail;
 	}
 
-	if((ret = correct_chipset(par)))
+	ret = correct_chipset(par);
+	if (ret)
 		goto atyfb_setup_generic_fail;
 #ifdef __i386__
-	if((ret = init_from_bios(par)))
+	ret = init_from_bios(par);
+	if (ret)
 		goto atyfb_setup_generic_fail;
 #endif
 	if (!(aty_ld_le32(CRTC_GEN_CNTL, par) & CRTC_EXT_DISP_EN))
@@ -3457,7 +3543,8 @@ atyfb_setup_generic_fail:
 
 #endif /* !__sparc__ */
 
-static int __devinit atyfb_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+static int __devinit atyfb_pci_probe(struct pci_dev *pdev,
+				     const struct pci_device_id *ent)
 {
 	unsigned long addr, res_start, res_size;
 	struct fb_info *info;
@@ -3482,10 +3569,10 @@ static int __devinit atyfb_pci_probe(struct pci_dev *pdev, const struct pci_devi
 	/* Reserve space */
 	res_start = rp->start;
 	res_size = rp->end - rp->start + 1;
-	if (!request_mem_region (res_start, res_size, "atyfb"))
+	if (!request_mem_region(res_start, res_size, "atyfb"))
 		return -EBUSY;
 
-        /* Allocate framebuffer */
+	/* Allocate framebuffer */
 	info = framebuffer_alloc(sizeof(struct atyfb_par), &pdev->dev);
 	if (!info) {
 		PRINTKE("atyfb_pci_probe() can't alloc fb_info\n");
@@ -3573,7 +3660,8 @@ static int __init atyfb_atari_probe(void)
 	for (m64_num = 0; m64_num < mach64_count; m64_num++) {
 		if (!phys_vmembase[m64_num] || !phys_size[m64_num] ||
 		    !phys_guiregbase[m64_num]) {
-		    PRINTKI("phys_*[%d] parameters not set => returning early. \n", m64_num);
+			PRINTKI("phys_*[%d] parameters not set => "
+				"returning early. \n", m64_num);
 			continue;
 		}
 
@@ -3589,8 +3677,8 @@ static int __init atyfb_atari_probe(void)
 		par->irq = (unsigned int) -1; /* something invalid */
 
 		/*
-		 *  Map the video memory (physical address given) to somewhere in the
-		 *  kernel address space.
+		 * Map the video memory (physical address given)
+		 * to somewhere in the kernel address space.
 		 */
 		info->screen_base = ioremap(phys_vmembase[m64_num], phys_size[m64_num]);
 		info->fix.smem_start = (unsigned long)info->screen_base; /* Fake! */
@@ -3661,12 +3749,12 @@ static void __devexit atyfb_remove(struct fb_info *info)
 
 #ifdef CONFIG_MTRR
 	if (par->mtrr_reg >= 0) {
-	    mtrr_del(par->mtrr_reg, 0, 0);
-	    par->mtrr_reg = -1;
+		mtrr_del(par->mtrr_reg, 0, 0);
+		par->mtrr_reg = -1;
 	}
 	if (par->mtrr_aper >= 0) {
-	    mtrr_del(par->mtrr_aper, 0, 0);
-	    par->mtrr_aper = -1;
+		mtrr_del(par->mtrr_aper, 0, 0);
+		par->mtrr_aper = -1;
 	}
 #endif
 #ifndef __sparc__
@@ -3900,29 +3988,29 @@ static const struct dmi_system_id atyfb_reboot_ids[] = {
 
 static int __init atyfb_init(void)
 {
-    int err1 = 1, err2 = 1;
+	int err1 = 1, err2 = 1;
 #ifndef MODULE
-    char *option = NULL;
+	char *option = NULL;
 
-    if (fb_get_options("atyfb", &option))
-	return -ENODEV;
-    atyfb_setup(option);
+	if (fb_get_options("atyfb", &option))
+		return -ENODEV;
+	atyfb_setup(option);
 #endif
 
 #ifdef CONFIG_PCI
-    err1 = pci_register_driver(&atyfb_driver);
+	err1 = pci_register_driver(&atyfb_driver);
 #endif
 #ifdef CONFIG_ATARI
-    err2 = atyfb_atari_probe();
+	err2 = atyfb_atari_probe();
 #endif
 
-    if (err1 && err2)
-	return -ENODEV;
+	if (err1 && err2)
+		return -ENODEV;
 
-    if (dmi_check_system(atyfb_reboot_ids))
-	register_reboot_notifier(&atyfb_reboot_notifier);
+	if (dmi_check_system(atyfb_reboot_ids))
+		register_reboot_notifier(&atyfb_reboot_notifier);
 
-    return 0;
+	return 0;
 }
 
 static void __exit atyfb_exit(void)
@@ -3951,8 +4039,7 @@ MODULE_PARM_DESC(mclk, "int: override memory clock");
 module_param(xclk, int, 0);
 MODULE_PARM_DESC(xclk, "int: override accelerated engine clock");
 module_param(comp_sync, int, 0);
-MODULE_PARM_DESC(comp_sync,
-		 "Set composite sync signal to low (0) or high (1)");
+MODULE_PARM_DESC(comp_sync, "Set composite sync signal to low (0) or high (1)");
 module_param(mode, charp, 0);
 MODULE_PARM_DESC(mode, "Specify resolution as \"<xres>x<yres>[-<bpp>][@<refresh>]\" ");
 #ifdef CONFIG_MTRR
-- 
cgit v0.10.2


From d480ace08d5b59133575e672a0bd1c97b0f8400f Mon Sep 17 00:00:00 2001
From: Pavel Machek <pavel@ucw.cz>
Date: Tue, 22 Sep 2009 16:47:03 -0700
Subject: fbdev: framebuffer support for HTC Dream

Add a framebuffer driver for Qualcomm MSM/QSD SoCs, tested on HTC Dream
smartphone (aka T-Mobile G1, aka ADP1).

Brian said:

  I did the original quick and dirty version for bringup.  Rebecca took
  over and (re)wrote the bulk of the driver, getting things stable for
  production ship of Dream and Sapphire, and Dima is currently adding
  support for later Qualcomm chipsets (QSD8x50, etc).

Signed-off-by: Pavel Machek <pavel@ucw.cz>
Cc: Brian Swetland <swetland@google.com>
Cc: Krzysztof Helt <krzysztof.h1@poczta.fm>
Cc: Rebecca Schultz Zavin <rebecca@android.com>
Cc: Dima Zavin <dima@android.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index 11af4cb..05184a1 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -2124,6 +2124,14 @@ config FB_PRE_INIT_FB
 	  Select this option if display contents should be inherited as set by
 	  the bootloader.
 
+config FB_MSM
+	tristate
+	depends on FB && ARCH_MSM
+	select FB_CFB_FILLRECT
+	select FB_CFB_COPYAREA
+	select FB_CFB_IMAGEBLIT
+	default y
+
 config FB_MX3
 	tristate "MX3 Framebuffer support"
 	depends on FB && MX3_IPU
diff --git a/drivers/video/Makefile b/drivers/video/Makefile
index 01a819f..f4b6c15 100644
--- a/drivers/video/Makefile
+++ b/drivers/video/Makefile
@@ -126,6 +126,7 @@ obj-$(CONFIG_FB_OMAP)             += omap/
 obj-$(CONFIG_XEN_FBDEV_FRONTEND)  += xen-fbfront.o
 obj-$(CONFIG_FB_CARMINE)          += carminefb.o
 obj-$(CONFIG_FB_MB862XX)	  += mb862xx/
+obj-$(CONFIG_FB_MSM)              += msm/
 
 # Platform or fallback drivers go here
 obj-$(CONFIG_FB_UVESA)            += uvesafb.o
diff --git a/drivers/video/msm/Makefile b/drivers/video/msm/Makefile
new file mode 100644
index 0000000..802d6ae
--- /dev/null
+++ b/drivers/video/msm/Makefile
@@ -0,0 +1,19 @@
+
+# core framebuffer
+#
+obj-y := msm_fb.o
+
+# MDP DMA/PPP engine
+#
+obj-y += mdp.o mdp_scale_tables.o mdp_ppp.o
+
+# MDDI interface
+#
+obj-y += mddi.o
+
+# MDDI client/panel drivers
+#
+obj-y += mddi_client_dummy.o
+obj-y += mddi_client_toshiba.o
+obj-y += mddi_client_nt35399.o
+
diff --git a/drivers/video/msm/mddi.c b/drivers/video/msm/mddi.c
new file mode 100644
index 0000000..f2de5a1
--- /dev/null
+++ b/drivers/video/msm/mddi.c
@@ -0,0 +1,828 @@
+/*
+ * MSM MDDI Transport
+ *
+ * Copyright (C) 2007 Google Incorporated
+ * Copyright (C) 2007 QUALCOMM Incorporated
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/delay.h>
+#include <linux/spinlock.h>
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <mach/msm_iomap.h>
+#include <mach/irqs.h>
+#include <mach/board.h>
+#include <linux/delay.h>
+
+#include <mach/msm_fb.h>
+#include "mddi_hw.h"
+
+#define FLAG_DISABLE_HIBERNATION 0x0001
+#define FLAG_HAVE_CAPS		 0x0002
+#define FLAG_HAS_VSYNC_IRQ	 0x0004
+#define FLAG_HAVE_STATUS	 0x0008
+
+#define CMD_GET_CLIENT_CAP     0x0601
+#define CMD_GET_CLIENT_STATUS  0x0602
+
+union mddi_rev {
+	unsigned char raw[MDDI_REV_BUFFER_SIZE];
+	struct mddi_rev_packet hdr;
+	struct mddi_client_status status;
+	struct mddi_client_caps caps;
+	struct mddi_register_access reg;
+};
+
+struct reg_read_info {
+	struct completion done;
+	uint32_t reg;
+	uint32_t status;
+	uint32_t result;
+};
+
+struct mddi_info {
+	uint16_t flags;
+	uint16_t version;
+	char __iomem *base;
+	int irq;
+	struct clk *clk;
+	struct msm_mddi_client_data client_data;
+
+	/* buffer for rev encap packets */
+	void *rev_data;
+	dma_addr_t rev_addr;
+	struct mddi_llentry *reg_write_data;
+	dma_addr_t reg_write_addr;
+	struct mddi_llentry *reg_read_data;
+	dma_addr_t reg_read_addr;
+	size_t rev_data_curr;
+
+	spinlock_t int_lock;
+	uint32_t int_enable;
+	uint32_t got_int;
+	wait_queue_head_t int_wait;
+
+	struct mutex reg_write_lock;
+	struct mutex reg_read_lock;
+	struct reg_read_info *reg_read;
+
+	struct mddi_client_caps caps;
+	struct mddi_client_status status;
+
+	void (*power_client)(struct msm_mddi_client_data *, int);
+
+	/* client device published to bind us to the
+	 * appropriate mddi_client driver
+	 */
+	char client_name[20];
+
+	struct platform_device client_pdev;
+};
+
+static void mddi_init_rev_encap(struct mddi_info *mddi);
+
+#define mddi_readl(r) readl(mddi->base + (MDDI_##r))
+#define mddi_writel(v, r) writel((v), mddi->base + (MDDI_##r))
+
+void mddi_activate_link(struct msm_mddi_client_data *cdata)
+{
+	struct mddi_info *mddi = container_of(cdata, struct mddi_info,
+					      client_data);
+
+	mddi_writel(MDDI_CMD_LINK_ACTIVE, CMD);
+}
+
+static void mddi_handle_link_list_done(struct mddi_info *mddi)
+{
+}
+
+static void mddi_reset_rev_encap_ptr(struct mddi_info *mddi)
+{
+	printk(KERN_INFO "mddi: resetting rev ptr\n");
+	mddi->rev_data_curr = 0;
+	mddi_writel(mddi->rev_addr, REV_PTR);
+	mddi_writel(mddi->rev_addr, REV_PTR);
+	mddi_writel(MDDI_CMD_FORCE_NEW_REV_PTR, CMD);
+}
+
+static void mddi_handle_rev_data(struct mddi_info *mddi, union mddi_rev *rev)
+{
+	int i;
+	struct reg_read_info *ri;
+
+	if ((rev->hdr.length <= MDDI_REV_BUFFER_SIZE - 2) &&
+	   (rev->hdr.length >= sizeof(struct mddi_rev_packet) - 2)) {
+
+		switch (rev->hdr.type) {
+		case TYPE_CLIENT_CAPS:
+			memcpy(&mddi->caps, &rev->caps,
+			       sizeof(struct mddi_client_caps));
+			mddi->flags |= FLAG_HAVE_CAPS;
+			wake_up(&mddi->int_wait);
+			break;
+		case TYPE_CLIENT_STATUS:
+			memcpy(&mddi->status, &rev->status,
+			       sizeof(struct mddi_client_status));
+			mddi->flags |= FLAG_HAVE_STATUS;
+			wake_up(&mddi->int_wait);
+			break;
+		case TYPE_REGISTER_ACCESS:
+			ri = mddi->reg_read;
+			if (ri == 0) {
+				printk(KERN_INFO "rev: got reg %x = %x without "
+						 " pending read\n",
+				       rev->reg.register_address,
+				       rev->reg.register_data_list);
+				break;
+			}
+			if (ri->reg != rev->reg.register_address) {
+				printk(KERN_INFO "rev: got reg %x = %x for "
+						 "wrong register, expected "
+						 "%x\n",
+				       rev->reg.register_address,
+				       rev->reg.register_data_list, ri->reg);
+				break;
+			}
+			mddi->reg_read = NULL;
+			ri->status = 0;
+			ri->result = rev->reg.register_data_list;
+			complete(&ri->done);
+			break;
+		default:
+			printk(KERN_INFO "rev: unknown reverse packet: "
+					 "len=%04x type=%04x CURR_REV_PTR=%x\n",
+			       rev->hdr.length, rev->hdr.type,
+			       mddi_readl(CURR_REV_PTR));
+			for (i = 0; i < rev->hdr.length + 2; i++) {
+				if ((i % 16) == 0)
+					printk(KERN_INFO "\n");
+				printk(KERN_INFO " %02x", rev->raw[i]);
+			}
+			printk(KERN_INFO "\n");
+			mddi_reset_rev_encap_ptr(mddi);
+		}
+	} else {
+		printk(KERN_INFO "bad rev length, %d, CURR_REV_PTR %x\n",
+		       rev->hdr.length, mddi_readl(CURR_REV_PTR));
+		mddi_reset_rev_encap_ptr(mddi);
+	}
+}
+
+static void mddi_wait_interrupt(struct mddi_info *mddi, uint32_t intmask);
+
+static void mddi_handle_rev_data_avail(struct mddi_info *mddi)
+{
+	union mddi_rev *rev = mddi->rev_data;
+	uint32_t rev_data_count;
+	uint32_t rev_crc_err_count;
+	int i;
+	struct reg_read_info *ri;
+	size_t prev_offset;
+	uint16_t length;
+
+	union mddi_rev *crev = mddi->rev_data + mddi->rev_data_curr;
+
+	/* clear the interrupt */
+	mddi_writel(MDDI_INT_REV_DATA_AVAIL, INT);
+	rev_data_count = mddi_readl(REV_PKT_CNT);
+	rev_crc_err_count = mddi_readl(REV_CRC_ERR);
+	if (rev_data_count > 1)
+		printk(KERN_INFO "rev_data_count %d\n", rev_data_count);
+
+	if (rev_crc_err_count) {
+		printk(KERN_INFO "rev_crc_err_count %d, INT %x\n",
+		       rev_crc_err_count,  mddi_readl(INT));
+		ri = mddi->reg_read;
+		if (ri == 0) {
+			printk(KERN_INFO "rev: got crc error without pending "
+			       "read\n");
+		} else {
+			mddi->reg_read = NULL;
+			ri->status = -EIO;
+			ri->result = -1;
+			complete(&ri->done);
+		}
+	}
+
+	if (rev_data_count == 0)
+		return;
+
+	prev_offset = mddi->rev_data_curr;
+
+	length = *((uint8_t *)mddi->rev_data + mddi->rev_data_curr);
+	mddi->rev_data_curr++;
+	if (mddi->rev_data_curr == MDDI_REV_BUFFER_SIZE)
+		mddi->rev_data_curr = 0;
+	length += *((uint8_t *)mddi->rev_data + mddi->rev_data_curr) << 8;
+	mddi->rev_data_curr += 1 + length;
+	if (mddi->rev_data_curr >= MDDI_REV_BUFFER_SIZE)
+		mddi->rev_data_curr =
+			mddi->rev_data_curr % MDDI_REV_BUFFER_SIZE;
+
+	if (length > MDDI_REV_BUFFER_SIZE - 2) {
+		printk(KERN_INFO "mddi: rev data length greater than buffer"
+			"size\n");
+		mddi_reset_rev_encap_ptr(mddi);
+		return;
+	}
+
+	if (prev_offset + 2 + length >= MDDI_REV_BUFFER_SIZE) {
+		union mddi_rev tmprev;
+		size_t rem = MDDI_REV_BUFFER_SIZE - prev_offset;
+		memcpy(&tmprev.raw[0], mddi->rev_data + prev_offset, rem);
+		memcpy(&tmprev.raw[rem], mddi->rev_data, 2 + length - rem);
+		mddi_handle_rev_data(mddi, &tmprev);
+	} else {
+		mddi_handle_rev_data(mddi, crev);
+	}
+
+	if (prev_offset < MDDI_REV_BUFFER_SIZE / 2 &&
+	    mddi->rev_data_curr >= MDDI_REV_BUFFER_SIZE / 2) {
+		mddi_writel(mddi->rev_addr, REV_PTR);
+	}
+}
+
+static irqreturn_t mddi_isr(int irq, void *data)
+{
+	struct msm_mddi_client_data *cdata = data;
+	struct mddi_info *mddi = container_of(cdata, struct mddi_info,
+					      client_data);
+	uint32_t active, status;
+
+	spin_lock(&mddi->int_lock);
+
+	active = mddi_readl(INT);
+	status = mddi_readl(STAT);
+
+	mddi_writel(active, INT);
+
+	/* ignore any interrupts we have disabled */
+	active &= mddi->int_enable;
+
+	mddi->got_int |= active;
+	wake_up(&mddi->int_wait);
+
+	if (active & MDDI_INT_PRI_LINK_LIST_DONE) {
+		mddi->int_enable &= (~MDDI_INT_PRI_LINK_LIST_DONE);
+		mddi_handle_link_list_done(mddi);
+	}
+	if (active & MDDI_INT_REV_DATA_AVAIL)
+		mddi_handle_rev_data_avail(mddi);
+
+	if (active & ~MDDI_INT_NEED_CLEAR)
+		mddi->int_enable &= ~(active & ~MDDI_INT_NEED_CLEAR);
+
+	if (active & MDDI_INT_LINK_ACTIVE) {
+		mddi->int_enable &= (~MDDI_INT_LINK_ACTIVE);
+		mddi->int_enable |= MDDI_INT_IN_HIBERNATION;
+	}
+
+	if (active & MDDI_INT_IN_HIBERNATION) {
+		mddi->int_enable &= (~MDDI_INT_IN_HIBERNATION);
+		mddi->int_enable |= MDDI_INT_LINK_ACTIVE;
+	}
+
+	mddi_writel(mddi->int_enable, INTEN);
+	spin_unlock(&mddi->int_lock);
+
+	return IRQ_HANDLED;
+}
+
+static long mddi_wait_interrupt_timeout(struct mddi_info *mddi,
+					uint32_t intmask, int timeout)
+{
+	unsigned long irq_flags;
+
+	spin_lock_irqsave(&mddi->int_lock, irq_flags);
+	mddi->got_int &= ~intmask;
+	mddi->int_enable |= intmask;
+	mddi_writel(mddi->int_enable, INTEN);
+	spin_unlock_irqrestore(&mddi->int_lock, irq_flags);
+	return wait_event_timeout(mddi->int_wait, mddi->got_int & intmask,
+				  timeout);
+}
+
+static void mddi_wait_interrupt(struct mddi_info *mddi, uint32_t intmask)
+{
+	if (mddi_wait_interrupt_timeout(mddi, intmask, HZ/10) == 0)
+		printk(KERN_INFO KERN_ERR "mddi_wait_interrupt %d, timeout "
+		       "waiting for %x, INT = %x, STAT = %x gotint = %x\n",
+		       current->pid, intmask, mddi_readl(INT), mddi_readl(STAT),
+		       mddi->got_int);
+}
+
+static void mddi_init_rev_encap(struct mddi_info *mddi)
+{
+	memset(mddi->rev_data, 0xee, MDDI_REV_BUFFER_SIZE);
+	mddi_writel(mddi->rev_addr, REV_PTR);
+	mddi_writel(MDDI_CMD_FORCE_NEW_REV_PTR, CMD);
+	mddi_wait_interrupt(mddi, MDDI_INT_NO_CMD_PKTS_PEND);
+}
+
+void mddi_set_auto_hibernate(struct msm_mddi_client_data *cdata, int on)
+{
+	struct mddi_info *mddi = container_of(cdata, struct mddi_info,
+					      client_data);
+	mddi_writel(MDDI_CMD_POWERDOWN, CMD);
+	mddi_wait_interrupt(mddi, MDDI_INT_IN_HIBERNATION);
+	mddi_writel(MDDI_CMD_HIBERNATE | !!on, CMD);
+	mddi_wait_interrupt(mddi, MDDI_INT_NO_CMD_PKTS_PEND);
+}
+
+
+static uint16_t mddi_init_registers(struct mddi_info *mddi)
+{
+	mddi_writel(0x0001, VERSION);
+	mddi_writel(MDDI_HOST_BYTES_PER_SUBFRAME, BPS);
+	mddi_writel(0x0003, SPM); /* subframes per media */
+	mddi_writel(0x0005, TA1_LEN);
+	mddi_writel(MDDI_HOST_TA2_LEN, TA2_LEN);
+	mddi_writel(0x0096, DRIVE_HI);
+	/* 0x32 normal, 0x50 for Toshiba display */
+	mddi_writel(0x0050, DRIVE_LO);
+	mddi_writel(0x003C, DISP_WAKE); /* wakeup counter */
+	mddi_writel(MDDI_HOST_REV_RATE_DIV, REV_RATE_DIV);
+
+	mddi_writel(MDDI_REV_BUFFER_SIZE, REV_SIZE);
+	mddi_writel(MDDI_MAX_REV_PKT_SIZE, REV_ENCAP_SZ);
+
+	/* disable periodic rev encap */
+	mddi_writel(MDDI_CMD_PERIODIC_REV_ENCAP, CMD);
+	mddi_wait_interrupt(mddi, MDDI_INT_NO_CMD_PKTS_PEND);
+
+	if (mddi_readl(PAD_CTL) == 0) {
+		/* If we are turning on band gap, need to wait 5us before
+		 * turning on the rest of the PAD */
+		mddi_writel(0x08000, PAD_CTL);
+		udelay(5);
+	}
+
+	/* Recommendation from PAD hw team */
+	mddi_writel(0xa850f, PAD_CTL);
+
+
+	/* Need an even number for counts */
+	mddi_writel(0x60006, DRIVER_START_CNT);
+
+	mddi_set_auto_hibernate(&mddi->client_data, 0);
+
+	mddi_writel(MDDI_CMD_DISP_IGNORE, CMD);
+	mddi_wait_interrupt(mddi, MDDI_INT_NO_CMD_PKTS_PEND);
+
+	mddi_init_rev_encap(mddi);
+	return mddi_readl(CORE_VER) & 0xffff;
+}
+
+static void mddi_suspend(struct msm_mddi_client_data *cdata)
+{
+	struct mddi_info *mddi = container_of(cdata, struct mddi_info,
+					      client_data);
+	/* turn off the client */
+	if (mddi->power_client)
+		mddi->power_client(&mddi->client_data, 0);
+	/* turn off the link */
+	mddi_writel(MDDI_CMD_RESET, CMD);
+	mddi_wait_interrupt(mddi, MDDI_INT_NO_CMD_PKTS_PEND);
+	/* turn off the clock */
+	clk_disable(mddi->clk);
+}
+
+static void mddi_resume(struct msm_mddi_client_data *cdata)
+{
+	struct mddi_info *mddi = container_of(cdata, struct mddi_info,
+					      client_data);
+	mddi_set_auto_hibernate(&mddi->client_data, 0);
+	/* turn on the client */
+	if (mddi->power_client)
+		mddi->power_client(&mddi->client_data, 1);
+	/* turn on the clock */
+	clk_enable(mddi->clk);
+	/* set up the local registers */
+	mddi->rev_data_curr = 0;
+	mddi_init_registers(mddi);
+	mddi_writel(mddi->int_enable, INTEN);
+	mddi_writel(MDDI_CMD_LINK_ACTIVE, CMD);
+	mddi_writel(MDDI_CMD_SEND_RTD, CMD);
+	mddi_wait_interrupt(mddi, MDDI_INT_NO_CMD_PKTS_PEND);
+	mddi_set_auto_hibernate(&mddi->client_data, 1);
+}
+
+static int __init mddi_get_client_caps(struct mddi_info *mddi)
+{
+	int i, j;
+
+	/* clear any stale interrupts */
+	mddi_writel(0xffffffff, INT);
+
+	mddi->int_enable = MDDI_INT_LINK_ACTIVE |
+			   MDDI_INT_IN_HIBERNATION |
+			   MDDI_INT_PRI_LINK_LIST_DONE |
+			   MDDI_INT_REV_DATA_AVAIL |
+			   MDDI_INT_REV_OVERFLOW |
+			   MDDI_INT_REV_OVERWRITE |
+			   MDDI_INT_RTD_FAILURE;
+	mddi_writel(mddi->int_enable, INTEN);
+
+	mddi_writel(MDDI_CMD_LINK_ACTIVE, CMD);
+	mddi_wait_interrupt(mddi, MDDI_INT_NO_CMD_PKTS_PEND);
+
+	for (j = 0; j < 3; j++) {
+		/* the toshiba vga panel does not respond to get
+		 * caps unless you SEND_RTD, but the first SEND_RTD
+		 * will fail...
+		 */
+		for (i = 0; i < 4; i++) {
+			uint32_t stat;
+
+			mddi_writel(MDDI_CMD_SEND_RTD, CMD);
+			mddi_wait_interrupt(mddi, MDDI_INT_NO_CMD_PKTS_PEND);
+			stat = mddi_readl(STAT);
+			printk(KERN_INFO "mddi cmd send rtd: int %x, stat %x, "
+					"rtd val %x\n", mddi_readl(INT), stat,
+					mddi_readl(RTD_VAL));
+			if ((stat & MDDI_STAT_RTD_MEAS_FAIL) == 0)
+				break;
+			msleep(1);
+		}
+
+		mddi_writel(CMD_GET_CLIENT_CAP, CMD);
+		mddi_wait_interrupt(mddi, MDDI_INT_NO_CMD_PKTS_PEND);
+		wait_event_timeout(mddi->int_wait, mddi->flags & FLAG_HAVE_CAPS,
+				   HZ / 100);
+
+		if (mddi->flags & FLAG_HAVE_CAPS)
+			break;
+		printk(KERN_INFO KERN_ERR "mddi_init, timeout waiting for "
+				"caps\n");
+	}
+	return mddi->flags & FLAG_HAVE_CAPS;
+}
+
+/* link must be active when this is called */
+int mddi_check_status(struct mddi_info *mddi)
+{
+	int ret = -1, retry = 3;
+	mutex_lock(&mddi->reg_read_lock);
+	mddi_writel(MDDI_CMD_PERIODIC_REV_ENCAP | 1, CMD);
+	mddi_wait_interrupt(mddi, MDDI_INT_NO_CMD_PKTS_PEND);
+
+	do {
+		mddi->flags &= ~FLAG_HAVE_STATUS;
+		mddi_writel(CMD_GET_CLIENT_STATUS, CMD);
+		mddi_wait_interrupt(mddi, MDDI_INT_NO_CMD_PKTS_PEND);
+		wait_event_timeout(mddi->int_wait,
+				   mddi->flags & FLAG_HAVE_STATUS,
+				   HZ / 100);
+
+		if (mddi->flags & FLAG_HAVE_STATUS) {
+			if (mddi->status.crc_error_count)
+				printk(KERN_INFO "mddi status: crc_error "
+					"count: %d\n",
+					mddi->status.crc_error_count);
+			else
+				ret = 0;
+			break;
+		} else
+			printk(KERN_INFO "mddi status: failed to get client "
+				"status\n");
+		mddi_writel(MDDI_CMD_SEND_RTD, CMD);
+		mddi_wait_interrupt(mddi, MDDI_INT_NO_CMD_PKTS_PEND);
+	} while (--retry);
+
+	mddi_writel(MDDI_CMD_PERIODIC_REV_ENCAP | 0, CMD);
+	mddi_wait_interrupt(mddi, MDDI_INT_NO_CMD_PKTS_PEND);
+	mutex_unlock(&mddi->reg_read_lock);
+	return ret;
+}
+
+
+void mddi_remote_write(struct msm_mddi_client_data *cdata, uint32_t val,
+		       uint32_t reg)
+{
+	struct mddi_info *mddi = container_of(cdata, struct mddi_info,
+					      client_data);
+	struct mddi_llentry *ll;
+	struct mddi_register_access *ra;
+
+	mutex_lock(&mddi->reg_write_lock);
+
+	ll = mddi->reg_write_data;
+
+	ra = &(ll->u.r);
+	ra->length = 14 + 4;
+	ra->type = TYPE_REGISTER_ACCESS;
+	ra->client_id = 0;
+	ra->read_write_info = MDDI_WRITE | 1;
+	ra->crc16 = 0;
+
+	ra->register_address = reg;
+	ra->register_data_list = val;
+
+	ll->flags = 1;
+	ll->header_count = 14;
+	ll->data_count = 4;
+	ll->data = mddi->reg_write_addr + offsetof(struct mddi_llentry,
+						   u.r.register_data_list);
+	ll->next = 0;
+	ll->reserved = 0;
+
+	mddi_writel(mddi->reg_write_addr, PRI_PTR);
+
+	mddi_wait_interrupt(mddi, MDDI_INT_PRI_LINK_LIST_DONE);
+	mutex_unlock(&mddi->reg_write_lock);
+}
+
+uint32_t mddi_remote_read(struct msm_mddi_client_data *cdata, uint32_t reg)
+{
+	struct mddi_info *mddi = container_of(cdata, struct mddi_info,
+					      client_data);
+	struct mddi_llentry *ll;
+	struct mddi_register_access *ra;
+	struct reg_read_info ri;
+	unsigned s;
+	int retry_count = 2;
+	unsigned long irq_flags;
+
+	mutex_lock(&mddi->reg_read_lock);
+
+	ll = mddi->reg_read_data;
+
+	ra = &(ll->u.r);
+	ra->length = 14;
+	ra->type = TYPE_REGISTER_ACCESS;
+	ra->client_id = 0;
+	ra->read_write_info = MDDI_READ | 1;
+	ra->crc16 = 0;
+
+	ra->register_address = reg;
+
+	ll->flags = 0x11;
+	ll->header_count = 14;
+	ll->data_count = 0;
+	ll->data = 0;
+	ll->next = 0;
+	ll->reserved = 0;
+
+	s = mddi_readl(STAT);
+
+	ri.reg = reg;
+	ri.status = -1;
+
+	do {
+		init_completion(&ri.done);
+		mddi->reg_read = &ri;
+		mddi_writel(mddi->reg_read_addr, PRI_PTR);
+
+		mddi_wait_interrupt(mddi, MDDI_INT_PRI_LINK_LIST_DONE);
+
+		/* Enable Periodic Reverse Encapsulation. */
+		mddi_writel(MDDI_CMD_PERIODIC_REV_ENCAP | 1, CMD);
+		mddi_wait_interrupt(mddi, MDDI_INT_NO_CMD_PKTS_PEND);
+		if (wait_for_completion_timeout(&ri.done, HZ/10) == 0 &&
+		    !ri.done.done) {
+			printk(KERN_INFO "mddi_remote_read(%x) timeout "
+					 "(%d %d %d)\n",
+			       reg, ri.status, ri.result, ri.done.done);
+			spin_lock_irqsave(&mddi->int_lock, irq_flags);
+			mddi->reg_read = NULL;
+			spin_unlock_irqrestore(&mddi->int_lock, irq_flags);
+			ri.status = -1;
+			ri.result = -1;
+		}
+		if (ri.status == 0)
+			break;
+
+		mddi_writel(MDDI_CMD_SEND_RTD, CMD);
+		mddi_writel(MDDI_CMD_LINK_ACTIVE, CMD);
+		mddi_wait_interrupt(mddi, MDDI_INT_NO_CMD_PKTS_PEND);
+		printk(KERN_INFO "mddi_remote_read: failed, sent "
+		       "MDDI_CMD_SEND_RTD: int %x, stat %x, rtd val %x "
+		       "curr_rev_ptr %x\n", mddi_readl(INT), mddi_readl(STAT),
+		       mddi_readl(RTD_VAL), mddi_readl(CURR_REV_PTR));
+	} while (retry_count-- > 0);
+	/* Disable Periodic Reverse Encapsulation. */
+	mddi_writel(MDDI_CMD_PERIODIC_REV_ENCAP | 0, CMD);
+	mddi_wait_interrupt(mddi, MDDI_INT_NO_CMD_PKTS_PEND);
+	mddi->reg_read = NULL;
+	mutex_unlock(&mddi->reg_read_lock);
+	return ri.result;
+}
+
+static struct mddi_info mddi_info[2];
+
+static int __init mddi_clk_setup(struct platform_device *pdev,
+				 struct mddi_info *mddi,
+				 unsigned long clk_rate)
+{
+	int ret;
+
+	/* set up the clocks */
+	mddi->clk = clk_get(&pdev->dev, "mddi_clk");
+	if (IS_ERR(mddi->clk)) {
+		printk(KERN_INFO "mddi: failed to get clock\n");
+		return PTR_ERR(mddi->clk);
+	}
+	ret =  clk_enable(mddi->clk);
+	if (ret)
+		goto fail;
+	ret = clk_set_rate(mddi->clk, clk_rate);
+	if (ret)
+		goto fail;
+	return 0;
+
+fail:
+	clk_put(mddi->clk);
+	return ret;
+}
+
+static int __init mddi_rev_data_setup(struct mddi_info *mddi)
+{
+	void *dma;
+	dma_addr_t dma_addr;
+
+	/* set up dma buffer */
+	dma = dma_alloc_coherent(NULL, 0x1000, &dma_addr, GFP_KERNEL);
+	if (dma == 0)
+		return -ENOMEM;
+	mddi->rev_data = dma;
+	mddi->rev_data_curr = 0;
+	mddi->rev_addr = dma_addr;
+	mddi->reg_write_data = dma + MDDI_REV_BUFFER_SIZE;
+	mddi->reg_write_addr = dma_addr + MDDI_REV_BUFFER_SIZE;
+	mddi->reg_read_data = mddi->reg_write_data + 1;
+	mddi->reg_read_addr = mddi->reg_write_addr +
+			      sizeof(*mddi->reg_write_data);
+	return 0;
+}
+
+static int __init mddi_probe(struct platform_device *pdev)
+{
+	struct msm_mddi_platform_data *pdata = pdev->dev.platform_data;
+	struct mddi_info *mddi = &mddi_info[pdev->id];
+	struct resource *resource;
+	int ret, i;
+
+	resource = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!resource) {
+		printk(KERN_ERR "mddi: no associated mem resource!\n");
+		return -ENOMEM;
+	}
+	mddi->base = ioremap(resource->start, resource->end - resource->start);
+	if (!mddi->base) {
+		printk(KERN_ERR "mddi: failed to remap base!\n");
+		ret = -EINVAL;
+		goto error_ioremap;
+	}
+	resource = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (!resource) {
+		printk(KERN_ERR "mddi: no associated irq resource!\n");
+		ret = -EINVAL;
+		goto error_get_irq_resource;
+	}
+	mddi->irq = resource->start;
+	printk(KERN_INFO "mddi: init() base=0x%p irq=%d\n", mddi->base,
+	       mddi->irq);
+	mddi->power_client = pdata->power_client;
+
+	mutex_init(&mddi->reg_write_lock);
+	mutex_init(&mddi->reg_read_lock);
+	spin_lock_init(&mddi->int_lock);
+	init_waitqueue_head(&mddi->int_wait);
+
+	ret = mddi_clk_setup(pdev, mddi, pdata->clk_rate);
+	if (ret) {
+		printk(KERN_ERR "mddi: failed to setup clock!\n");
+		goto error_clk_setup;
+	}
+
+	ret = mddi_rev_data_setup(mddi);
+	if (ret) {
+		printk(KERN_ERR "mddi: failed to setup rev data!\n");
+		goto error_rev_data;
+	}
+
+	mddi->int_enable = 0;
+	mddi_writel(mddi->int_enable, INTEN);
+	ret = request_irq(mddi->irq, mddi_isr, IRQF_DISABLED, "mddi",
+			  &mddi->client_data);
+	if (ret) {
+		printk(KERN_ERR "mddi: failed to request enable irq!\n");
+		goto error_request_irq;
+	}
+
+	/* turn on the mddi client bridge chip */
+	if (mddi->power_client)
+		mddi->power_client(&mddi->client_data, 1);
+
+	/* initialize the mddi registers */
+	mddi_set_auto_hibernate(&mddi->client_data, 0);
+	mddi_writel(MDDI_CMD_RESET, CMD);
+	mddi_wait_interrupt(mddi, MDDI_INT_NO_CMD_PKTS_PEND);
+	mddi->version = mddi_init_registers(mddi);
+	if (mddi->version < 0x20) {
+		printk(KERN_ERR "mddi: unsupported version 0x%x\n",
+		       mddi->version);
+		ret = -ENODEV;
+		goto error_mddi_version;
+	}
+
+	/* read the capabilities off the client */
+	if (!mddi_get_client_caps(mddi)) {
+		printk(KERN_INFO "mddi: no client found\n");
+		/* power down the panel */
+		mddi_writel(MDDI_CMD_POWERDOWN, CMD);
+		printk(KERN_INFO "mddi powerdown: stat %x\n", mddi_readl(STAT));
+		msleep(100);
+		printk(KERN_INFO "mddi powerdown: stat %x\n", mddi_readl(STAT));
+		return 0;
+	}
+	mddi_set_auto_hibernate(&mddi->client_data, 1);
+
+	if (mddi->caps.Mfr_Name == 0 && mddi->caps.Product_Code == 0)
+		pdata->fixup(&mddi->caps.Mfr_Name, &mddi->caps.Product_Code);
+
+	mddi->client_pdev.id = 0;
+	for (i = 0; i < pdata->num_clients; i++) {
+		if (pdata->client_platform_data[i].product_id ==
+		    (mddi->caps.Mfr_Name << 16 | mddi->caps.Product_Code)) {
+			mddi->client_data.private_client_data =
+				pdata->client_platform_data[i].client_data;
+			mddi->client_pdev.name =
+				pdata->client_platform_data[i].name;
+			mddi->client_pdev.id =
+				pdata->client_platform_data[i].id;
+			/* XXX: possibly set clock */
+			break;
+		}
+	}
+
+	if (i >= pdata->num_clients)
+		mddi->client_pdev.name = "mddi_c_dummy";
+	printk(KERN_INFO "mddi: registering panel %s\n",
+		mddi->client_pdev.name);
+
+	mddi->client_data.suspend = mddi_suspend;
+	mddi->client_data.resume = mddi_resume;
+	mddi->client_data.activate_link = mddi_activate_link;
+	mddi->client_data.remote_write = mddi_remote_write;
+	mddi->client_data.remote_read = mddi_remote_read;
+	mddi->client_data.auto_hibernate = mddi_set_auto_hibernate;
+	mddi->client_data.fb_resource = pdata->fb_resource;
+	if (pdev->id == 0)
+		mddi->client_data.interface_type = MSM_MDDI_PMDH_INTERFACE;
+	else if (pdev->id == 1)
+		mddi->client_data.interface_type = MSM_MDDI_EMDH_INTERFACE;
+	else {
+		printk(KERN_ERR "mddi: can not determine interface %d!\n",
+		       pdev->id);
+		ret = -EINVAL;
+		goto error_mddi_interface;
+	}
+
+	mddi->client_pdev.dev.platform_data = &mddi->client_data;
+	printk(KERN_INFO "mddi: publish: %s\n", mddi->client_name);
+	platform_device_register(&mddi->client_pdev);
+	return 0;
+
+error_mddi_interface:
+error_mddi_version:
+	free_irq(mddi->irq, 0);
+error_request_irq:
+	dma_free_coherent(NULL, 0x1000, mddi->rev_data, mddi->rev_addr);
+error_rev_data:
+error_clk_setup:
+error_get_irq_resource:
+	iounmap(mddi->base);
+error_ioremap:
+
+	printk(KERN_INFO "mddi: mddi_init() failed (%d)\n", ret);
+	return ret;
+}
+
+
+static struct platform_driver mddi_driver = {
+	.probe = mddi_probe,
+	.driver = { .name = "msm_mddi" },
+};
+
+static int __init _mddi_init(void)
+{
+	return platform_driver_register(&mddi_driver);
+}
+
+module_init(_mddi_init);
diff --git a/drivers/video/msm/mddi_client_dummy.c b/drivers/video/msm/mddi_client_dummy.c
new file mode 100644
index 0000000..ebbae87
--- /dev/null
+++ b/drivers/video/msm/mddi_client_dummy.c
@@ -0,0 +1,97 @@
+/* drivers/video/msm_fb/mddi_client_dummy.c
+ *
+ * Support for "dummy" mddi client devices which require no
+ * special initialization code.
+ *
+ * Copyright (C) 2007 Google Incorporated
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+
+#include <mach/msm_fb.h>
+
+struct panel_info {
+	struct platform_device pdev;
+	struct msm_panel_data panel_data;
+};
+
+static int mddi_dummy_suspend(struct msm_panel_data *panel_data)
+{
+	return 0;
+}
+
+static int mddi_dummy_resume(struct msm_panel_data *panel_data)
+{
+	return 0;
+}
+
+static int mddi_dummy_blank(struct msm_panel_data *panel_data)
+{
+	return 0;
+}
+
+static int mddi_dummy_unblank(struct msm_panel_data *panel_data)
+{
+	return 0;
+}
+
+static int mddi_dummy_probe(struct platform_device *pdev)
+{
+	struct msm_mddi_client_data *client_data = pdev->dev.platform_data;
+	struct panel_info *panel =
+		kzalloc(sizeof(struct panel_info), GFP_KERNEL);
+	int ret;
+	if (!panel)
+		return -ENOMEM;
+	platform_set_drvdata(pdev, panel);
+	panel->panel_data.suspend = mddi_dummy_suspend;
+	panel->panel_data.resume = mddi_dummy_resume;
+	panel->panel_data.blank = mddi_dummy_blank;
+	panel->panel_data.unblank = mddi_dummy_unblank;
+	panel->panel_data.caps = MSMFB_CAP_PARTIAL_UPDATES;
+	panel->pdev.name = "msm_panel";
+	panel->pdev.id = pdev->id;
+	platform_device_add_resources(&panel->pdev,
+				      client_data->fb_resource, 1);
+	panel->panel_data.fb_data = client_data->private_client_data;
+	panel->pdev.dev.platform_data = &panel->panel_data;
+	ret = platform_device_register(&panel->pdev);
+	if (ret) {
+		kfree(panel);
+		return ret;
+	}
+	return 0;
+}
+
+static int mddi_dummy_remove(struct platform_device *pdev)
+{
+	struct panel_info *panel = platform_get_drvdata(pdev);
+	kfree(panel);
+	return 0;
+}
+
+static struct platform_driver mddi_client_dummy = {
+	.probe = mddi_dummy_probe,
+	.remove = mddi_dummy_remove,
+	.driver = { .name = "mddi_c_dummy" },
+};
+
+static int __init mddi_client_dummy_init(void)
+{
+	platform_driver_register(&mddi_client_dummy);
+	return 0;
+}
+
+module_init(mddi_client_dummy_init);
+
diff --git a/drivers/video/msm/mddi_client_nt35399.c b/drivers/video/msm/mddi_client_nt35399.c
new file mode 100644
index 0000000..9c78050
--- /dev/null
+++ b/drivers/video/msm/mddi_client_nt35399.c
@@ -0,0 +1,255 @@
+/* drivers/video/msm_fb/mddi_client_nt35399.c
+ *
+ * Support for Novatek NT35399 MDDI client of Sapphire
+ *
+ * Copyright (C) 2008 HTC Incorporated
+ * Author: Solomon Chiu (solomon_chiu@htc.com)
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/interrupt.h>
+#include <linux/gpio.h>
+#include <mach/msm_fb.h>
+
+static DECLARE_WAIT_QUEUE_HEAD(nt35399_vsync_wait);
+
+struct panel_info {
+	struct msm_mddi_client_data *client_data;
+	struct platform_device pdev;
+	struct msm_panel_data panel_data;
+	struct msmfb_callback *fb_callback;
+	struct work_struct panel_work;
+	struct workqueue_struct *fb_wq;
+	int nt35399_got_int;
+};
+
+static void
+nt35399_request_vsync(struct msm_panel_data *panel_data,
+		      struct msmfb_callback *callback)
+{
+	struct panel_info *panel = container_of(panel_data, struct panel_info,
+						panel_data);
+	struct msm_mddi_client_data *client_data = panel->client_data;
+
+	panel->fb_callback = callback;
+	if (panel->nt35399_got_int) {
+		panel->nt35399_got_int = 0;
+		client_data->activate_link(client_data); /* clears interrupt */
+	}
+}
+
+static void nt35399_wait_vsync(struct msm_panel_data *panel_data)
+{
+	struct panel_info *panel = container_of(panel_data, struct panel_info,
+						panel_data);
+	struct msm_mddi_client_data *client_data = panel->client_data;
+
+	if (panel->nt35399_got_int) {
+		panel->nt35399_got_int = 0;
+		client_data->activate_link(client_data); /* clears interrupt */
+	}
+
+	if (wait_event_timeout(nt35399_vsync_wait, panel->nt35399_got_int,
+				HZ/2) == 0)
+		printk(KERN_ERR "timeout waiting for VSYNC\n");
+
+	panel->nt35399_got_int = 0;
+	/* interrupt clears when screen dma starts */
+}
+
+static int nt35399_suspend(struct msm_panel_data *panel_data)
+{
+	struct panel_info *panel = container_of(panel_data, struct panel_info,
+						panel_data);
+	struct msm_mddi_client_data *client_data = panel->client_data;
+
+	struct msm_mddi_bridge_platform_data *bridge_data =
+		client_data->private_client_data;
+	int ret;
+
+	ret = bridge_data->uninit(bridge_data, client_data);
+	if (ret) {
+		printk(KERN_INFO "mddi nt35399 client: non zero return from "
+			"uninit\n");
+		return ret;
+	}
+	client_data->suspend(client_data);
+	return 0;
+}
+
+static int nt35399_resume(struct msm_panel_data *panel_data)
+{
+	struct panel_info *panel = container_of(panel_data, struct panel_info,
+						panel_data);
+	struct msm_mddi_client_data *client_data = panel->client_data;
+
+	struct msm_mddi_bridge_platform_data *bridge_data =
+		client_data->private_client_data;
+	int ret;
+
+	client_data->resume(client_data);
+	ret = bridge_data->init(bridge_data, client_data);
+	if (ret)
+		return ret;
+	return 0;
+}
+
+static int nt35399_blank(struct msm_panel_data *panel_data)
+{
+	struct panel_info *panel = container_of(panel_data, struct panel_info,
+						panel_data);
+	struct msm_mddi_client_data *client_data = panel->client_data;
+	struct msm_mddi_bridge_platform_data *bridge_data =
+		client_data->private_client_data;
+
+	return bridge_data->blank(bridge_data, client_data);
+}
+
+static int nt35399_unblank(struct msm_panel_data *panel_data)
+{
+	struct panel_info *panel = container_of(panel_data, struct panel_info,
+						panel_data);
+	struct msm_mddi_client_data *client_data = panel->client_data;
+	struct msm_mddi_bridge_platform_data *bridge_data =
+		client_data->private_client_data;
+
+	return bridge_data->unblank(bridge_data, client_data);
+}
+
+irqreturn_t nt35399_vsync_interrupt(int irq, void *data)
+{
+	struct panel_info *panel = data;
+
+	panel->nt35399_got_int = 1;
+
+	if (panel->fb_callback) {
+		panel->fb_callback->func(panel->fb_callback);
+		panel->fb_callback = NULL;
+	}
+
+	wake_up(&nt35399_vsync_wait);
+
+	return IRQ_HANDLED;
+}
+
+static int setup_vsync(struct panel_info *panel, int init)
+{
+	int ret;
+	int gpio = 97;
+	unsigned int irq;
+
+	if (!init) {
+		ret = 0;
+		goto uninit;
+	}
+	ret = gpio_request(gpio, "vsync");
+	if (ret)
+		goto err_request_gpio_failed;
+
+	ret = gpio_direction_input(gpio);
+	if (ret)
+		goto err_gpio_direction_input_failed;
+
+	ret = irq = gpio_to_irq(gpio);
+	if (ret < 0)
+		goto err_get_irq_num_failed;
+
+	ret = request_irq(irq, nt35399_vsync_interrupt, IRQF_TRIGGER_RISING,
+			  "vsync", panel);
+	if (ret)
+		goto err_request_irq_failed;
+
+	printk(KERN_INFO "vsync on gpio %d now %d\n",
+	       gpio, gpio_get_value(gpio));
+	return 0;
+
+uninit:
+	free_irq(gpio_to_irq(gpio), panel->client_data);
+err_request_irq_failed:
+err_get_irq_num_failed:
+err_gpio_direction_input_failed:
+	gpio_free(gpio);
+err_request_gpio_failed:
+	return ret;
+}
+
+static int mddi_nt35399_probe(struct platform_device *pdev)
+{
+	struct msm_mddi_client_data *client_data = pdev->dev.platform_data;
+	struct msm_mddi_bridge_platform_data *bridge_data =
+		client_data->private_client_data;
+
+	int ret;
+
+	struct panel_info *panel = kzalloc(sizeof(struct panel_info),
+					   GFP_KERNEL);
+
+	printk(KERN_DEBUG "%s: enter.\n", __func__);
+
+	if (!panel)
+		return -ENOMEM;
+	platform_set_drvdata(pdev, panel);
+
+	ret = setup_vsync(panel, 1);
+	if (ret) {
+		dev_err(&pdev->dev, "mddi_nt35399_setup_vsync failed\n");
+		return ret;
+	}
+
+	panel->client_data = client_data;
+	panel->panel_data.suspend = nt35399_suspend;
+	panel->panel_data.resume = nt35399_resume;
+	panel->panel_data.wait_vsync = nt35399_wait_vsync;
+	panel->panel_data.request_vsync = nt35399_request_vsync;
+	panel->panel_data.blank = nt35399_blank;
+	panel->panel_data.unblank = nt35399_unblank;
+	panel->panel_data.fb_data = &bridge_data->fb_data;
+	panel->panel_data.caps = 0;
+
+	panel->pdev.name = "msm_panel";
+	panel->pdev.id = pdev->id;
+	panel->pdev.resource = client_data->fb_resource;
+	panel->pdev.num_resources = 1;
+	panel->pdev.dev.platform_data = &panel->panel_data;
+
+	if (bridge_data->init)
+		bridge_data->init(bridge_data, client_data);
+
+	platform_device_register(&panel->pdev);
+
+	return 0;
+}
+
+static int mddi_nt35399_remove(struct platform_device *pdev)
+{
+	struct panel_info *panel = platform_get_drvdata(pdev);
+
+	setup_vsync(panel, 0);
+	kfree(panel);
+	return 0;
+}
+
+static struct platform_driver mddi_client_0bda_8a47 = {
+	.probe = mddi_nt35399_probe,
+	.remove = mddi_nt35399_remove,
+	.driver = { .name = "mddi_c_0bda_8a47" },
+};
+
+static int __init mddi_client_nt35399_init(void)
+{
+	return platform_driver_register(&mddi_client_0bda_8a47);
+}
+
+module_init(mddi_client_nt35399_init);
+
diff --git a/drivers/video/msm/mddi_client_toshiba.c b/drivers/video/msm/mddi_client_toshiba.c
new file mode 100644
index 0000000..80d0f5f
--- /dev/null
+++ b/drivers/video/msm/mddi_client_toshiba.c
@@ -0,0 +1,283 @@
+/* drivers/video/msm_fb/mddi_client_toshiba.c
+ *
+ * Support for Toshiba TC358720XBG mddi client devices which require no
+ * special initialization code.
+ *
+ * Copyright (C) 2007 Google Incorporated
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/interrupt.h>
+#include <linux/gpio.h>
+#include <mach/msm_fb.h>
+
+
+#define LCD_CONTROL_BLOCK_BASE 0x110000
+#define CMN         (LCD_CONTROL_BLOCK_BASE|0x10)
+#define INTFLG      (LCD_CONTROL_BLOCK_BASE|0x18)
+#define HCYCLE      (LCD_CONTROL_BLOCK_BASE|0x34)
+#define HDE_START   (LCD_CONTROL_BLOCK_BASE|0x3C)
+#define VPOS        (LCD_CONTROL_BLOCK_BASE|0xC0)
+#define MPLFBUF     (LCD_CONTROL_BLOCK_BASE|0x20)
+#define WAKEUP      (LCD_CONTROL_BLOCK_BASE|0x54)
+#define WSYN_DLY    (LCD_CONTROL_BLOCK_BASE|0x58)
+#define REGENB      (LCD_CONTROL_BLOCK_BASE|0x5C)
+
+#define BASE5 0x150000
+#define BASE6 0x160000
+#define BASE7 0x170000
+
+#define GPIOIEV     (BASE5 + 0x10)
+#define GPIOIE      (BASE5 + 0x14)
+#define GPIORIS     (BASE5 + 0x18)
+#define GPIOMIS     (BASE5 + 0x1C)
+#define GPIOIC      (BASE5 + 0x20)
+
+#define INTMASK     (BASE6 + 0x0C)
+#define INTMASK_VWAKEOUT (1U << 0)
+#define INTMASK_VWAKEOUT_ACTIVE_LOW (1U << 8)
+#define GPIOSEL     (BASE7 + 0x00)
+#define GPIOSEL_VWAKEINT (1U << 0)
+
+static DECLARE_WAIT_QUEUE_HEAD(toshiba_vsync_wait);
+
+struct panel_info {
+	struct msm_mddi_client_data *client_data;
+	struct platform_device pdev;
+	struct msm_panel_data panel_data;
+	struct msmfb_callback *toshiba_callback;
+	int toshiba_got_int;
+};
+
+
+static void toshiba_request_vsync(struct msm_panel_data *panel_data,
+				  struct msmfb_callback *callback)
+{
+	struct panel_info *panel = container_of(panel_data, struct panel_info,
+						panel_data);
+	struct msm_mddi_client_data *client_data = panel->client_data;
+
+	panel->toshiba_callback = callback;
+	if (panel->toshiba_got_int) {
+		panel->toshiba_got_int = 0;
+		client_data->activate_link(client_data);
+	}
+}
+
+static void toshiba_clear_vsync(struct msm_panel_data *panel_data)
+{
+	struct panel_info *panel = container_of(panel_data, struct panel_info,
+						panel_data);
+	struct msm_mddi_client_data *client_data = panel->client_data;
+
+	client_data->activate_link(client_data);
+}
+
+static void toshiba_wait_vsync(struct msm_panel_data *panel_data)
+{
+	struct panel_info *panel = container_of(panel_data, struct panel_info,
+						panel_data);
+	struct msm_mddi_client_data *client_data = panel->client_data;
+
+	if (panel->toshiba_got_int) {
+		panel->toshiba_got_int = 0;
+		client_data->activate_link(client_data); /* clears interrupt */
+	}
+	if (wait_event_timeout(toshiba_vsync_wait, panel->toshiba_got_int,
+				HZ/2) == 0)
+		printk(KERN_ERR "timeout waiting for VSYNC\n");
+	panel->toshiba_got_int = 0;
+	/* interrupt clears when screen dma starts */
+}
+
+static int toshiba_suspend(struct msm_panel_data *panel_data)
+{
+	struct panel_info *panel = container_of(panel_data, struct panel_info,
+						panel_data);
+	struct msm_mddi_client_data *client_data = panel->client_data;
+
+	struct msm_mddi_bridge_platform_data *bridge_data =
+		client_data->private_client_data;
+	int ret;
+
+	ret = bridge_data->uninit(bridge_data, client_data);
+	if (ret) {
+		printk(KERN_INFO "mddi toshiba client: non zero return from "
+			"uninit\n");
+		return ret;
+	}
+	client_data->suspend(client_data);
+	return 0;
+}
+
+static int toshiba_resume(struct msm_panel_data *panel_data)
+{
+	struct panel_info *panel = container_of(panel_data, struct panel_info,
+						panel_data);
+	struct msm_mddi_client_data *client_data = panel->client_data;
+
+	struct msm_mddi_bridge_platform_data *bridge_data =
+		client_data->private_client_data;
+	int ret;
+
+	client_data->resume(client_data);
+	ret = bridge_data->init(bridge_data, client_data);
+	if (ret)
+		return ret;
+	return 0;
+}
+
+static int toshiba_blank(struct msm_panel_data *panel_data)
+{
+	struct panel_info *panel = container_of(panel_data, struct panel_info,
+						panel_data);
+	struct msm_mddi_client_data *client_data = panel->client_data;
+	struct msm_mddi_bridge_platform_data *bridge_data =
+		client_data->private_client_data;
+
+	return bridge_data->blank(bridge_data, client_data);
+}
+
+static int toshiba_unblank(struct msm_panel_data *panel_data)
+{
+	struct panel_info *panel = container_of(panel_data, struct panel_info,
+						panel_data);
+	struct msm_mddi_client_data *client_data = panel->client_data;
+	struct msm_mddi_bridge_platform_data *bridge_data =
+		client_data->private_client_data;
+
+	return bridge_data->unblank(bridge_data, client_data);
+}
+
+irqreturn_t toshiba_vsync_interrupt(int irq, void *data)
+{
+	struct panel_info *panel = data;
+
+	panel->toshiba_got_int = 1;
+	if (panel->toshiba_callback) {
+		panel->toshiba_callback->func(panel->toshiba_callback);
+		panel->toshiba_callback = 0;
+	}
+	wake_up(&toshiba_vsync_wait);
+	return IRQ_HANDLED;
+}
+
+static int setup_vsync(struct panel_info *panel,
+		       int init)
+{
+	int ret;
+	int gpio = 97;
+	unsigned int irq;
+
+	if (!init) {
+		ret = 0;
+		goto uninit;
+	}
+	ret = gpio_request(gpio, "vsync");
+	if (ret)
+		goto err_request_gpio_failed;
+
+	ret = gpio_direction_input(gpio);
+	if (ret)
+		goto err_gpio_direction_input_failed;
+
+	ret = irq = gpio_to_irq(gpio);
+	if (ret < 0)
+		goto err_get_irq_num_failed;
+
+	ret = request_irq(irq, toshiba_vsync_interrupt, IRQF_TRIGGER_RISING,
+			  "vsync", panel);
+	if (ret)
+		goto err_request_irq_failed;
+	printk(KERN_INFO "vsync on gpio %d now %d\n",
+	       gpio, gpio_get_value(gpio));
+	return 0;
+
+uninit:
+	free_irq(gpio_to_irq(gpio), panel);
+err_request_irq_failed:
+err_get_irq_num_failed:
+err_gpio_direction_input_failed:
+	gpio_free(gpio);
+err_request_gpio_failed:
+	return ret;
+}
+
+static int mddi_toshiba_probe(struct platform_device *pdev)
+{
+	int ret;
+	struct msm_mddi_client_data *client_data = pdev->dev.platform_data;
+	struct msm_mddi_bridge_platform_data *bridge_data =
+		client_data->private_client_data;
+	struct panel_info *panel =
+		kzalloc(sizeof(struct panel_info), GFP_KERNEL);
+	if (!panel)
+		return -ENOMEM;
+	platform_set_drvdata(pdev, panel);
+
+	/* mddi_remote_write(mddi, 0, WAKEUP); */
+	client_data->remote_write(client_data, GPIOSEL_VWAKEINT, GPIOSEL);
+	client_data->remote_write(client_data, INTMASK_VWAKEOUT, INTMASK);
+
+	ret = setup_vsync(panel, 1);
+	if (ret) {
+		dev_err(&pdev->dev, "mddi_bridge_setup_vsync failed\n");
+		return ret;
+	}
+
+	panel->client_data = client_data;
+	panel->panel_data.suspend = toshiba_suspend;
+	panel->panel_data.resume = toshiba_resume;
+	panel->panel_data.wait_vsync = toshiba_wait_vsync;
+	panel->panel_data.request_vsync = toshiba_request_vsync;
+	panel->panel_data.clear_vsync = toshiba_clear_vsync;
+	panel->panel_data.blank = toshiba_blank;
+	panel->panel_data.unblank = toshiba_unblank;
+	panel->panel_data.fb_data =  &bridge_data->fb_data;
+	panel->panel_data.caps = MSMFB_CAP_PARTIAL_UPDATES;
+
+	panel->pdev.name = "msm_panel";
+	panel->pdev.id = pdev->id;
+	panel->pdev.resource = client_data->fb_resource;
+	panel->pdev.num_resources = 1;
+	panel->pdev.dev.platform_data = &panel->panel_data;
+	bridge_data->init(bridge_data, client_data);
+	platform_device_register(&panel->pdev);
+
+	return 0;
+}
+
+static int mddi_toshiba_remove(struct platform_device *pdev)
+{
+	struct panel_info *panel = platform_get_drvdata(pdev);
+
+	setup_vsync(panel, 0);
+	kfree(panel);
+	return 0;
+}
+
+static struct platform_driver mddi_client_d263_0000 = {
+	.probe = mddi_toshiba_probe,
+	.remove = mddi_toshiba_remove,
+	.driver = { .name = "mddi_c_d263_0000" },
+};
+
+static int __init mddi_client_toshiba_init(void)
+{
+	platform_driver_register(&mddi_client_d263_0000);
+	return 0;
+}
+
+module_init(mddi_client_toshiba_init);
+
diff --git a/drivers/video/msm/mddi_hw.h b/drivers/video/msm/mddi_hw.h
new file mode 100644
index 0000000..45cc01f
--- /dev/null
+++ b/drivers/video/msm/mddi_hw.h
@@ -0,0 +1,305 @@
+/* drivers/video/msm_fb/mddi_hw.h
+ *
+ * MSM MDDI Hardware Registers and Structures
+ *
+ * Copyright (C) 2007 QUALCOMM Incorporated
+ * Copyright (C) 2007 Google Incorporated
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _MDDI_HW_H_
+#define _MDDI_HW_H_
+
+#include <linux/types.h>
+
+#define MDDI_CMD                0x0000
+#define MDDI_VERSION            0x0004
+#define MDDI_PRI_PTR            0x0008
+#define MDDI_SEC_PTR            0x000c
+#define MDDI_BPS                0x0010
+#define MDDI_SPM                0x0014
+#define MDDI_INT                0x0018
+#define MDDI_INTEN              0x001c
+#define MDDI_REV_PTR            0x0020
+#define MDDI_REV_SIZE           0x0024
+#define MDDI_STAT               0x0028
+#define MDDI_REV_RATE_DIV       0x002c
+#define MDDI_REV_CRC_ERR        0x0030
+#define MDDI_TA1_LEN            0x0034
+#define MDDI_TA2_LEN            0x0038
+#define MDDI_TEST_BUS           0x003c
+#define MDDI_TEST               0x0040
+#define MDDI_REV_PKT_CNT        0x0044
+#define MDDI_DRIVE_HI           0x0048
+#define MDDI_DRIVE_LO           0x004c
+#define MDDI_DISP_WAKE          0x0050
+#define MDDI_REV_ENCAP_SZ       0x0054
+#define MDDI_RTD_VAL            0x0058
+#define MDDI_PAD_CTL            0x0068
+#define MDDI_DRIVER_START_CNT   0x006c
+#define MDDI_NEXT_PRI_PTR       0x0070
+#define MDDI_NEXT_SEC_PTR       0x0074
+#define MDDI_MISR_CTL           0x0078
+#define MDDI_MISR_DATA          0x007c
+#define MDDI_SF_CNT             0x0080
+#define MDDI_MF_CNT             0x0084
+#define MDDI_CURR_REV_PTR       0x0088
+#define MDDI_CORE_VER           0x008c
+
+#define MDDI_INT_PRI_PTR_READ       0x0001
+#define MDDI_INT_SEC_PTR_READ       0x0002
+#define MDDI_INT_REV_DATA_AVAIL     0x0004
+#define MDDI_INT_DISP_REQ           0x0008
+#define MDDI_INT_PRI_UNDERFLOW      0x0010
+#define MDDI_INT_SEC_UNDERFLOW      0x0020
+#define MDDI_INT_REV_OVERFLOW       0x0040
+#define MDDI_INT_CRC_ERROR          0x0080
+#define MDDI_INT_MDDI_IN            0x0100
+#define MDDI_INT_PRI_OVERWRITE      0x0200
+#define MDDI_INT_SEC_OVERWRITE      0x0400
+#define MDDI_INT_REV_OVERWRITE      0x0800
+#define MDDI_INT_DMA_FAILURE        0x1000
+#define MDDI_INT_LINK_ACTIVE        0x2000
+#define MDDI_INT_IN_HIBERNATION     0x4000
+#define MDDI_INT_PRI_LINK_LIST_DONE 0x8000
+#define MDDI_INT_SEC_LINK_LIST_DONE 0x10000
+#define MDDI_INT_NO_CMD_PKTS_PEND   0x20000
+#define MDDI_INT_RTD_FAILURE        0x40000
+#define MDDI_INT_REV_PKT_RECEIVED   0x80000
+#define MDDI_INT_REV_PKTS_AVAIL     0x100000
+
+#define MDDI_INT_NEED_CLEAR ( \
+	MDDI_INT_REV_DATA_AVAIL | \
+	MDDI_INT_PRI_UNDERFLOW | \
+	MDDI_INT_SEC_UNDERFLOW | \
+	MDDI_INT_REV_OVERFLOW | \
+	MDDI_INT_CRC_ERROR | \
+	MDDI_INT_REV_PKT_RECEIVED)
+
+
+#define MDDI_STAT_LINK_ACTIVE        0x0001
+#define MDDI_STAT_NEW_REV_PTR        0x0002
+#define MDDI_STAT_NEW_PRI_PTR        0x0004
+#define MDDI_STAT_NEW_SEC_PTR        0x0008
+#define MDDI_STAT_IN_HIBERNATION     0x0010
+#define MDDI_STAT_PRI_LINK_LIST_DONE 0x0020
+#define MDDI_STAT_SEC_LINK_LIST_DONE 0x0040
+#define MDDI_STAT_PENDING_TIMING_PKT 0x0080
+#define MDDI_STAT_PENDING_REV_ENCAP  0x0100
+#define MDDI_STAT_PENDING_POWERDOWN  0x0200
+#define MDDI_STAT_RTD_MEAS_FAIL      0x0800
+#define MDDI_STAT_CLIENT_WAKEUP_REQ  0x1000
+
+
+#define MDDI_CMD_POWERDOWN           0x0100
+#define MDDI_CMD_POWERUP             0x0200
+#define MDDI_CMD_HIBERNATE           0x0300
+#define MDDI_CMD_RESET               0x0400
+#define MDDI_CMD_DISP_IGNORE         0x0501
+#define MDDI_CMD_DISP_LISTEN         0x0500
+#define MDDI_CMD_SEND_REV_ENCAP      0x0600
+#define MDDI_CMD_GET_CLIENT_CAP      0x0601
+#define MDDI_CMD_GET_CLIENT_STATUS   0x0602
+#define MDDI_CMD_SEND_RTD            0x0700
+#define MDDI_CMD_LINK_ACTIVE         0x0900
+#define MDDI_CMD_PERIODIC_REV_ENCAP  0x0A00
+#define MDDI_CMD_FORCE_NEW_REV_PTR   0x0C00
+
+
+
+#define MDDI_VIDEO_REV_PKT_SIZE              0x40
+#define MDDI_CLIENT_CAPABILITY_REV_PKT_SIZE  0x60
+#define MDDI_MAX_REV_PKT_SIZE                0x60
+
+/* #define MDDI_REV_BUFFER_SIZE 128 */
+#define MDDI_REV_BUFFER_SIZE (MDDI_MAX_REV_PKT_SIZE * 4)
+
+/* MDP sends 256 pixel packets, so lower value hibernates more without
+ * significantly increasing latency of waiting for next subframe */
+#define MDDI_HOST_BYTES_PER_SUBFRAME  0x3C00
+#define MDDI_HOST_TA2_LEN       0x000c
+#define MDDI_HOST_REV_RATE_DIV  0x0002
+
+
+struct __attribute__((packed)) mddi_rev_packet {
+	uint16_t length;
+	uint16_t type;
+	uint16_t client_id;
+};
+
+struct __attribute__((packed)) mddi_client_status {
+	uint16_t length;
+	uint16_t type;
+	uint16_t client_id;
+	uint16_t reverse_link_request;  /* bytes needed in rev encap message */
+	uint8_t  crc_error_count;
+	uint8_t  capability_change;
+	uint16_t graphics_busy_flags;
+	uint16_t crc16;
+};
+
+struct __attribute__((packed)) mddi_client_caps {
+	uint16_t length; /* length, exclusive of this field */
+	uint16_t type; /* 66 */
+	uint16_t client_id;
+
+	uint16_t Protocol_Version;
+	uint16_t Minimum_Protocol_Version;
+	uint16_t Data_Rate_Capability;
+	uint8_t  Interface_Type_Capability;
+	uint8_t  Number_of_Alt_Displays;
+	uint16_t PostCal_Data_Rate;
+	uint16_t Bitmap_Width;
+	uint16_t Bitmap_Height;
+	uint16_t Display_Window_Width;
+	uint16_t Display_Window_Height;
+	uint32_t Color_Map_Size;
+	uint16_t Color_Map_RGB_Width;
+	uint16_t RGB_Capability;
+	uint8_t  Monochrome_Capability;
+	uint8_t  Reserved_1;
+	uint16_t Y_Cb_Cr_Capability;
+	uint16_t Bayer_Capability;
+	uint16_t Alpha_Cursor_Image_Planes;
+	uint32_t Client_Feature_Capability_Indicators;
+	uint8_t  Maximum_Video_Frame_Rate_Capability;
+	uint8_t  Minimum_Video_Frame_Rate_Capability;
+	uint16_t Minimum_Sub_frame_Rate;
+	uint16_t Audio_Buffer_Depth;
+	uint16_t Audio_Channel_Capability;
+	uint16_t Audio_Sample_Rate_Capability;
+	uint8_t  Audio_Sample_Resolution;
+	uint8_t  Mic_Audio_Sample_Resolution;
+	uint16_t Mic_Sample_Rate_Capability;
+	uint8_t  Keyboard_Data_Format;
+	uint8_t  pointing_device_data_format;
+	uint16_t content_protection_type;
+	uint16_t Mfr_Name;
+	uint16_t Product_Code;
+	uint16_t Reserved_3;
+	uint32_t Serial_Number;
+	uint8_t  Week_of_Manufacture;
+	uint8_t  Year_of_Manufacture;
+
+	uint16_t crc16;
+} mddi_client_capability_type;
+
+
+struct __attribute__((packed)) mddi_video_stream {
+	uint16_t length;
+	uint16_t type; /* 16 */
+	uint16_t client_id; /* 0 */
+
+	uint16_t video_data_format_descriptor;
+/* format of each pixel in the Pixel Data in the present stream in the
+ * present packet.
+ * If bits [15:13] = 000 monochrome
+ * If bits [15:13] = 001 color pixels (palette).
+ * If bits [15:13] = 010 color pixels in raw RGB
+ * If bits [15:13] = 011 data in 4:2:2 Y Cb Cr format
+ * If bits [15:13] = 100 Bayer pixels
+ */
+
+	uint16_t pixel_data_attributes;
+/* interpreted as follows:
+ * Bits [1:0] = 11  pixel data is displayed to both eyes
+ * Bits [1:0] = 10  pixel data is routed to the left eye only.
+ * Bits [1:0] = 01  pixel data is routed to the right eye only.
+ * Bits [1:0] = 00  pixel data is routed to the alternate display.
+ * Bit 2 is 0  Pixel Data is in the standard progressive format.
+ * Bit 2 is 1  Pixel Data is in interlace format.
+ * Bit 3 is 0  Pixel Data is in the standard progressive format.
+ * Bit 3 is 1  Pixel Data is in alternate pixel format.
+ * Bit 4 is 0  Pixel Data is to or from the display frame buffer.
+ * Bit 4 is 1  Pixel Data is to or from the camera.
+ * Bit 5 is 0  pixel data contains the next consecutive row of pixels.
+ * Bit 5 is 1  X Left Edge, Y Top Edge, X Right Edge, Y Bottom Edge,
+ *             X Start, and Y Start parameters are not defined and
+ *             shall be ignored by the client.
+ * Bits [7:6] = 01  Pixel data is written to the offline image buffer.
+ * Bits [7:6] = 00  Pixel data is written to the buffer to refresh display.
+ * Bits [7:6] = 11  Pixel data is written to all image buffers.
+ * Bits [7:6] = 10  Invalid. Reserved for future use.
+ * Bits 8 through 11 alternate display number.
+ * Bits 12 through 14 are reserved for future use and shall be set to zero.
+ * Bit 15 is 1 the row of pixels is the last row of pixels in a frame.
+ */
+
+	uint16_t x_left_edge;
+	uint16_t y_top_edge;
+	/* X,Y coordinate of the top left edge of the screen window */
+
+	uint16_t x_right_edge;
+	uint16_t y_bottom_edge;
+	/* X,Y coordinate of the bottom right edge of the window being
+	 * updated. */
+
+	uint16_t x_start;
+	uint16_t y_start;
+	/* (X Start, Y Start) is the first pixel in the Pixel Data field
+	 * below. */
+
+	uint16_t pixel_count;
+	/* number of pixels in the Pixel Data field below. */
+
+	uint16_t parameter_CRC;
+	/* 16-bit CRC of all bytes from the Packet Length to the Pixel Count. */
+
+	uint16_t reserved;
+	/* 16-bit variable to make structure align on 4 byte boundary */
+};
+
+#define TYPE_VIDEO_STREAM      16
+#define TYPE_CLIENT_CAPS       66
+#define TYPE_REGISTER_ACCESS   146
+#define TYPE_CLIENT_STATUS     70
+
+struct __attribute__((packed)) mddi_register_access {
+	uint16_t length;
+	uint16_t type; /* 146 */
+	uint16_t client_id;
+
+	uint16_t read_write_info;
+	/* Bits 13:0  a 14-bit unsigned integer that specifies the number of
+	 *            32-bit Register Data List items to be transferred in the
+	 *            Register Data List field.
+	 * Bits[15:14] = 00  Write to register(s);
+	 * Bits[15:14] = 10  Read from register(s);
+	 * Bits[15:14] = 11  Response to a Read.
+	 * Bits[15:14] = 01  this value is reserved for future use. */
+#define MDDI_WRITE     (0 << 14)
+#define MDDI_READ      (2 << 14)
+#define MDDI_READ_RESP (3 << 14)
+
+	uint32_t register_address;
+	/* the register address that is to be written to or read from. */
+
+	uint16_t crc16;
+
+	uint32_t register_data_list;
+	/* list of 4-byte register data values for/from client registers */
+};
+
+struct __attribute__((packed)) mddi_llentry {
+	uint16_t flags;
+	uint16_t header_count;
+	uint16_t data_count;
+	dma_addr_t data; /* 32 bit */
+	struct mddi_llentry *next;
+	uint16_t reserved;
+	union {
+		struct mddi_video_stream v;
+		struct mddi_register_access r;
+		uint32_t _[12];
+	} u;
+};
+
+#endif
diff --git a/drivers/video/msm/mdp.c b/drivers/video/msm/mdp.c
new file mode 100644
index 0000000..99636a2
--- /dev/null
+++ b/drivers/video/msm/mdp.c
@@ -0,0 +1,538 @@
+/* drivers/video/msm_fb/mdp.c
+ *
+ * MSM MDP Interface (used by framebuffer core)
+ *
+ * Copyright (C) 2007 QUALCOMM Incorporated
+ * Copyright (C) 2007 Google Incorporated
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/fb.h>
+#include <linux/msm_mdp.h>
+#include <linux/interrupt.h>
+#include <linux/wait.h>
+#include <linux/clk.h>
+#include <linux/file.h>
+#ifdef CONFIG_ANDROID_PMEM
+#include <linux/android_pmem.h>
+#endif
+#include <linux/major.h>
+
+#include <mach/msm_iomap.h>
+#include <mach/msm_fb.h>
+#include <linux/platform_device.h>
+
+#include "mdp_hw.h"
+
+struct class *mdp_class;
+
+#define MDP_CMD_DEBUG_ACCESS_BASE (0x10000)
+
+static uint16_t mdp_default_ccs[] = {
+	0x254, 0x000, 0x331, 0x254, 0xF38, 0xE61, 0x254, 0x409, 0x000,
+	0x010, 0x080, 0x080
+};
+
+static DECLARE_WAIT_QUEUE_HEAD(mdp_dma2_waitqueue);
+static DECLARE_WAIT_QUEUE_HEAD(mdp_ppp_waitqueue);
+static struct msmfb_callback *dma_callback;
+static struct clk *clk;
+static unsigned int mdp_irq_mask;
+static DEFINE_SPINLOCK(mdp_lock);
+DEFINE_MUTEX(mdp_mutex);
+
+static int enable_mdp_irq(struct mdp_info *mdp, uint32_t mask)
+{
+	unsigned long irq_flags;
+	int ret = 0;
+
+	BUG_ON(!mask);
+
+	spin_lock_irqsave(&mdp_lock, irq_flags);
+	/* if the mask bits are already set return an error, this interrupt
+	 * is already enabled */
+	if (mdp_irq_mask & mask) {
+		printk(KERN_ERR "mdp irq already on already on %x %x\n",
+		       mdp_irq_mask, mask);
+		ret = -1;
+	}
+	/* if the mdp irq is not already enabled enable it */
+	if (!mdp_irq_mask) {
+		if (clk)
+			clk_enable(clk);
+		enable_irq(mdp->irq);
+	}
+
+	/* update the irq mask to reflect the fact that the interrupt is
+	 * enabled */
+	mdp_irq_mask |= mask;
+	spin_unlock_irqrestore(&mdp_lock, irq_flags);
+	return ret;
+}
+
+static int locked_disable_mdp_irq(struct mdp_info *mdp, uint32_t mask)
+{
+	/* this interrupt is already disabled! */
+	if (!(mdp_irq_mask & mask)) {
+		printk(KERN_ERR "mdp irq already off %x %x\n",
+		       mdp_irq_mask, mask);
+		return -1;
+	}
+	/* update the irq mask to reflect the fact that the interrupt is
+	 * disabled */
+	mdp_irq_mask &= ~(mask);
+	/* if no one is waiting on the interrupt, disable it */
+	if (!mdp_irq_mask) {
+		disable_irq(mdp->irq);
+		if (clk)
+			clk_disable(clk);
+	}
+	return 0;
+}
+
+static int disable_mdp_irq(struct mdp_info *mdp, uint32_t mask)
+{
+	unsigned long irq_flags;
+	int ret;
+
+	spin_lock_irqsave(&mdp_lock, irq_flags);
+	ret = locked_disable_mdp_irq(mdp, mask);
+	spin_unlock_irqrestore(&mdp_lock, irq_flags);
+	return ret;
+}
+
+static irqreturn_t mdp_isr(int irq, void *data)
+{
+	uint32_t status;
+	unsigned long irq_flags;
+	struct mdp_info *mdp = data;
+
+	spin_lock_irqsave(&mdp_lock, irq_flags);
+
+	status = mdp_readl(mdp, MDP_INTR_STATUS);
+	mdp_writel(mdp, status, MDP_INTR_CLEAR);
+
+	status &= mdp_irq_mask;
+	if (status & DL0_DMA2_TERM_DONE) {
+		if (dma_callback) {
+			dma_callback->func(dma_callback);
+			dma_callback = NULL;
+		}
+		wake_up(&mdp_dma2_waitqueue);
+	}
+
+	if (status & DL0_ROI_DONE)
+		wake_up(&mdp_ppp_waitqueue);
+
+	if (status)
+		locked_disable_mdp_irq(mdp, status);
+
+	spin_unlock_irqrestore(&mdp_lock, irq_flags);
+	return IRQ_HANDLED;
+}
+
+static uint32_t mdp_check_mask(uint32_t mask)
+{
+	uint32_t ret;
+	unsigned long irq_flags;
+
+	spin_lock_irqsave(&mdp_lock, irq_flags);
+	ret = mdp_irq_mask & mask;
+	spin_unlock_irqrestore(&mdp_lock, irq_flags);
+	return ret;
+}
+
+static int mdp_wait(struct mdp_info *mdp, uint32_t mask, wait_queue_head_t *wq)
+{
+	int ret = 0;
+	unsigned long irq_flags;
+
+	wait_event_timeout(*wq, !mdp_check_mask(mask), HZ);
+
+	spin_lock_irqsave(&mdp_lock, irq_flags);
+	if (mdp_irq_mask & mask) {
+		locked_disable_mdp_irq(mdp, mask);
+		printk(KERN_WARNING "timeout waiting for mdp to complete %x\n",
+		       mask);
+		ret = -ETIMEDOUT;
+	}
+	spin_unlock_irqrestore(&mdp_lock, irq_flags);
+
+	return ret;
+}
+
+void mdp_dma_wait(struct mdp_device *mdp_dev)
+{
+#define MDP_MAX_TIMEOUTS 20
+	static int timeout_count;
+	struct mdp_info *mdp = container_of(mdp_dev, struct mdp_info, mdp_dev);
+
+	if (mdp_wait(mdp, DL0_DMA2_TERM_DONE, &mdp_dma2_waitqueue) == -ETIMEDOUT)
+		timeout_count++;
+	else
+		timeout_count = 0;
+
+	if (timeout_count > MDP_MAX_TIMEOUTS) {
+		printk(KERN_ERR "mdp: dma failed %d times, somethings wrong!\n",
+		       MDP_MAX_TIMEOUTS);
+		BUG();
+	}
+}
+
+static int mdp_ppp_wait(struct mdp_info *mdp)
+{
+	return mdp_wait(mdp, DL0_ROI_DONE, &mdp_ppp_waitqueue);
+}
+
+void mdp_dma_to_mddi(struct mdp_info *mdp, uint32_t addr, uint32_t stride,
+		     uint32_t width, uint32_t height, uint32_t x, uint32_t y,
+		     struct msmfb_callback *callback)
+{
+	uint32_t dma2_cfg;
+	uint16_t ld_param = 0; /* 0=PRIM, 1=SECD, 2=EXT */
+
+	if (enable_mdp_irq(mdp, DL0_DMA2_TERM_DONE)) {
+		printk(KERN_ERR "mdp_dma_to_mddi: busy\n");
+		return;
+	}
+
+	dma_callback = callback;
+
+	dma2_cfg = DMA_PACK_TIGHT |
+		DMA_PACK_ALIGN_LSB |
+		DMA_PACK_PATTERN_RGB |
+		DMA_OUT_SEL_AHB |
+		DMA_IBUF_NONCONTIGUOUS;
+
+	dma2_cfg |= DMA_IBUF_FORMAT_RGB565;
+
+	dma2_cfg |= DMA_OUT_SEL_MDDI;
+
+	dma2_cfg |= DMA_MDDI_DMAOUT_LCD_SEL_PRIMARY;
+
+	dma2_cfg |= DMA_DITHER_EN;
+
+	/* setup size, address, and stride */
+	mdp_writel(mdp, (height << 16) | (width),
+		   MDP_CMD_DEBUG_ACCESS_BASE + 0x0184);
+	mdp_writel(mdp, addr, MDP_CMD_DEBUG_ACCESS_BASE + 0x0188);
+	mdp_writel(mdp, stride, MDP_CMD_DEBUG_ACCESS_BASE + 0x018C);
+
+	/* 666 18BPP */
+	dma2_cfg |= DMA_DSTC0G_6BITS | DMA_DSTC1B_6BITS | DMA_DSTC2R_6BITS;
+
+	/* set y & x offset and MDDI transaction parameters */
+	mdp_writel(mdp, (y << 16) | (x), MDP_CMD_DEBUG_ACCESS_BASE + 0x0194);
+	mdp_writel(mdp, ld_param, MDP_CMD_DEBUG_ACCESS_BASE + 0x01a0);
+	mdp_writel(mdp, (MDDI_VDO_PACKET_DESC << 16) | MDDI_VDO_PACKET_PRIM,
+		   MDP_CMD_DEBUG_ACCESS_BASE + 0x01a4);
+
+	mdp_writel(mdp, dma2_cfg, MDP_CMD_DEBUG_ACCESS_BASE + 0x0180);
+
+	/* start DMA2 */
+	mdp_writel(mdp, 0, MDP_CMD_DEBUG_ACCESS_BASE + 0x0044);
+}
+
+void mdp_dma(struct mdp_device *mdp_dev, uint32_t addr, uint32_t stride,
+	     uint32_t width, uint32_t height, uint32_t x, uint32_t y,
+	     struct msmfb_callback *callback, int interface)
+{
+	struct mdp_info *mdp = container_of(mdp_dev, struct mdp_info, mdp_dev);
+
+	if (interface == MSM_MDDI_PMDH_INTERFACE) {
+		mdp_dma_to_mddi(mdp, addr, stride, width, height, x, y,
+				callback);
+	}
+}
+
+int get_img(struct mdp_img *img, struct fb_info *info,
+	    unsigned long *start, unsigned long *len,
+	    struct file **filep)
+{
+	int put_needed, ret = 0;
+	struct file *file;
+	unsigned long vstart;
+
+#ifdef CONFIG_ANDROID_PMEM
+	if (!get_pmem_file(img->memory_id, start, &vstart, len, filep))
+		return 0;
+#endif
+
+	file = fget_light(img->memory_id, &put_needed);
+	if (file == NULL)
+		return -1;
+
+	if (MAJOR(file->f_dentry->d_inode->i_rdev) == FB_MAJOR) {
+		*start = info->fix.smem_start;
+		*len = info->fix.smem_len;
+	} else
+		ret = -1;
+	fput_light(file, put_needed);
+
+	return ret;
+}
+
+void put_img(struct file *src_file, struct file *dst_file)
+{
+#ifdef CONFIG_ANDROID_PMEM
+	if (src_file)
+		put_pmem_file(src_file);
+	if (dst_file)
+		put_pmem_file(dst_file);
+#endif
+}
+
+int mdp_blit(struct mdp_device *mdp_dev, struct fb_info *fb,
+	     struct mdp_blit_req *req)
+{
+	int ret;
+	unsigned long src_start = 0, src_len = 0, dst_start = 0, dst_len = 0;
+	struct mdp_info *mdp = container_of(mdp_dev, struct mdp_info, mdp_dev);
+	struct file *src_file = 0, *dst_file = 0;
+
+	/* WORKAROUND FOR HARDWARE BUG IN BG TILE FETCH */
+	if (unlikely(req->src_rect.h == 0 ||
+		     req->src_rect.w == 0)) {
+		printk(KERN_ERR "mpd_ppp: src img of zero size!\n");
+		return -EINVAL;
+	}
+	if (unlikely(req->dst_rect.h == 0 ||
+		     req->dst_rect.w == 0))
+		return -EINVAL;
+
+	/* do this first so that if this fails, the caller can always
+	 * safely call put_img */
+	if (unlikely(get_img(&req->src, fb, &src_start, &src_len, &src_file))) {
+		printk(KERN_ERR "mpd_ppp: could not retrieve src image from "
+				"memory\n");
+		return -EINVAL;
+	}
+
+	if (unlikely(get_img(&req->dst, fb, &dst_start, &dst_len, &dst_file))) {
+		printk(KERN_ERR "mpd_ppp: could not retrieve dst image from "
+				"memory\n");
+#ifdef CONFIG_ANDROID_PMEM
+		put_pmem_file(src_file);
+#endif
+		return -EINVAL;
+	}
+	mutex_lock(&mdp_mutex);
+
+	/* transp_masking unimplemented */
+	req->transp_mask = MDP_TRANSP_NOP;
+	if (unlikely((req->transp_mask != MDP_TRANSP_NOP ||
+		      req->alpha != MDP_ALPHA_NOP ||
+		      HAS_ALPHA(req->src.format)) &&
+		     (req->flags & MDP_ROT_90 &&
+		      req->dst_rect.w <= 16 && req->dst_rect.h >= 16))) {
+		int i;
+		unsigned int tiles = req->dst_rect.h / 16;
+		unsigned int remainder = req->dst_rect.h % 16;
+		req->src_rect.w = 16*req->src_rect.w / req->dst_rect.h;
+		req->dst_rect.h = 16;
+		for (i = 0; i < tiles; i++) {
+			enable_mdp_irq(mdp, DL0_ROI_DONE);
+			ret = mdp_ppp_blit(mdp, req, src_file, src_start,
+					   src_len, dst_file, dst_start,
+					   dst_len);
+			if (ret)
+				goto err_bad_blit;
+			ret = mdp_ppp_wait(mdp);
+			if (ret)
+				goto err_wait_failed;
+			req->dst_rect.y += 16;
+			req->src_rect.x += req->src_rect.w;
+		}
+		if (!remainder)
+			goto end;
+		req->src_rect.w = remainder*req->src_rect.w / req->dst_rect.h;
+		req->dst_rect.h = remainder;
+	}
+	enable_mdp_irq(mdp, DL0_ROI_DONE);
+	ret = mdp_ppp_blit(mdp, req, src_file, src_start, src_len, dst_file,
+			   dst_start,
+			   dst_len);
+	if (ret)
+		goto err_bad_blit;
+	ret = mdp_ppp_wait(mdp);
+	if (ret)
+		goto err_wait_failed;
+end:
+	put_img(src_file, dst_file);
+	mutex_unlock(&mdp_mutex);
+	return 0;
+err_bad_blit:
+	disable_mdp_irq(mdp, DL0_ROI_DONE);
+err_wait_failed:
+	put_img(src_file, dst_file);
+	mutex_unlock(&mdp_mutex);
+	return ret;
+}
+
+void mdp_set_grp_disp(struct mdp_device *mdp_dev, unsigned disp_id)
+{
+	struct mdp_info *mdp = container_of(mdp_dev, struct mdp_info, mdp_dev);
+
+	disp_id &= 0xf;
+	mdp_writel(mdp, disp_id, MDP_FULL_BYPASS_WORD43);
+}
+
+int register_mdp_client(struct class_interface *cint)
+{
+	if (!mdp_class) {
+		pr_err("mdp: no mdp_class when registering mdp client\n");
+		return -ENODEV;
+	}
+	cint->class = mdp_class;
+	return class_interface_register(cint);
+}
+
+#include "mdp_csc_table.h"
+#include "mdp_scale_tables.h"
+
+int mdp_probe(struct platform_device *pdev)
+{
+	struct resource *resource;
+	int ret;
+	int n;
+	struct mdp_info *mdp;
+
+	resource = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!resource) {
+		pr_err("mdp: can not get mdp mem resource!\n");
+		return -ENOMEM;
+	}
+
+	mdp = kzalloc(sizeof(struct mdp_info), GFP_KERNEL);
+	if (!mdp)
+		return -ENOMEM;
+
+	mdp->irq = platform_get_irq(pdev, 0);
+	if (mdp->irq < 0) {
+		pr_err("mdp: can not get mdp irq\n");
+		ret = mdp->irq;
+		goto error_get_irq;
+	}
+
+	mdp->base = ioremap(resource->start,
+			    resource->end - resource->start);
+	if (mdp->base == 0) {
+		printk(KERN_ERR "msmfb: cannot allocate mdp regs!\n");
+		ret = -ENOMEM;
+		goto error_ioremap;
+	}
+
+	mdp->mdp_dev.dma = mdp_dma;
+	mdp->mdp_dev.dma_wait = mdp_dma_wait;
+	mdp->mdp_dev.blit = mdp_blit;
+	mdp->mdp_dev.set_grp_disp = mdp_set_grp_disp;
+
+	clk = clk_get(&pdev->dev, "mdp_clk");
+	if (IS_ERR(clk)) {
+		printk(KERN_INFO "mdp: failed to get mdp clk");
+		return PTR_ERR(clk);
+	}
+
+	ret = request_irq(mdp->irq, mdp_isr, IRQF_DISABLED, "msm_mdp", mdp);
+	if (ret)
+		goto error_request_irq;
+	disable_irq(mdp->irq);
+	mdp_irq_mask = 0;
+
+	/* debug interface write access */
+	mdp_writel(mdp, 1, 0x60);
+
+	mdp_writel(mdp, MDP_ANY_INTR_MASK, MDP_INTR_ENABLE);
+	mdp_writel(mdp, 1, MDP_EBI2_PORTMAP_MODE);
+
+	mdp_writel(mdp, 0, MDP_CMD_DEBUG_ACCESS_BASE + 0x01f8);
+	mdp_writel(mdp, 0, MDP_CMD_DEBUG_ACCESS_BASE + 0x01fc);
+
+	for (n = 0; n < ARRAY_SIZE(csc_table); n++)
+		mdp_writel(mdp, csc_table[n].val, csc_table[n].reg);
+
+	/* clear up unused fg/main registers */
+	/* comp.plane 2&3 ystride */
+	mdp_writel(mdp, 0, MDP_CMD_DEBUG_ACCESS_BASE + 0x0120);
+
+	/* unpacked pattern */
+	mdp_writel(mdp, 0, MDP_CMD_DEBUG_ACCESS_BASE + 0x012c);
+	mdp_writel(mdp, 0, MDP_CMD_DEBUG_ACCESS_BASE + 0x0130);
+	mdp_writel(mdp, 0, MDP_CMD_DEBUG_ACCESS_BASE + 0x0134);
+	mdp_writel(mdp, 0, MDP_CMD_DEBUG_ACCESS_BASE + 0x0158);
+	mdp_writel(mdp, 0, MDP_CMD_DEBUG_ACCESS_BASE + 0x015c);
+	mdp_writel(mdp, 0, MDP_CMD_DEBUG_ACCESS_BASE + 0x0160);
+	mdp_writel(mdp, 0, MDP_CMD_DEBUG_ACCESS_BASE + 0x0170);
+	mdp_writel(mdp, 0, MDP_CMD_DEBUG_ACCESS_BASE + 0x0174);
+	mdp_writel(mdp, 0, MDP_CMD_DEBUG_ACCESS_BASE + 0x017c);
+
+	/* comp.plane 2 & 3 */
+	mdp_writel(mdp, 0, MDP_CMD_DEBUG_ACCESS_BASE + 0x0114);
+	mdp_writel(mdp, 0, MDP_CMD_DEBUG_ACCESS_BASE + 0x0118);
+
+	/* clear unused bg registers */
+	mdp_writel(mdp, 0, MDP_CMD_DEBUG_ACCESS_BASE + 0x01c8);
+	mdp_writel(mdp, 0, MDP_CMD_DEBUG_ACCESS_BASE + 0x01d0);
+	mdp_writel(mdp, 0, MDP_CMD_DEBUG_ACCESS_BASE + 0x01dc);
+	mdp_writel(mdp, 0, MDP_CMD_DEBUG_ACCESS_BASE + 0x01e0);
+	mdp_writel(mdp, 0, MDP_CMD_DEBUG_ACCESS_BASE + 0x01e4);
+
+	for (n = 0; n < ARRAY_SIZE(mdp_upscale_table); n++)
+		mdp_writel(mdp, mdp_upscale_table[n].val,
+		       mdp_upscale_table[n].reg);
+
+	for (n = 0; n < 9; n++)
+		mdp_writel(mdp, mdp_default_ccs[n], 0x40440 + 4 * n);
+	mdp_writel(mdp, mdp_default_ccs[9], 0x40500 + 4 * 0);
+	mdp_writel(mdp, mdp_default_ccs[10], 0x40500 + 4 * 0);
+	mdp_writel(mdp, mdp_default_ccs[11], 0x40500 + 4 * 0);
+
+	/* register mdp device */
+	mdp->mdp_dev.dev.parent = &pdev->dev;
+	mdp->mdp_dev.dev.class = mdp_class;
+	snprintf(mdp->mdp_dev.dev.bus_id, BUS_ID_SIZE, "mdp%d", pdev->id);
+
+	/* if you can remove the platform device you'd have to implement
+	 * this:
+	mdp_dev.release = mdp_class; */
+
+	ret = device_register(&mdp->mdp_dev.dev);
+	if (ret)
+		goto error_device_register;
+	return 0;
+
+error_device_register:
+	free_irq(mdp->irq, mdp);
+error_request_irq:
+	iounmap(mdp->base);
+error_get_irq:
+error_ioremap:
+	kfree(mdp);
+	return ret;
+}
+
+static struct platform_driver msm_mdp_driver = {
+	.probe = mdp_probe,
+	.driver = {.name = "msm_mdp"},
+};
+
+static int __init mdp_init(void)
+{
+	mdp_class = class_create(THIS_MODULE, "msm_mdp");
+	if (IS_ERR(mdp_class)) {
+		printk(KERN_ERR "Error creating mdp class\n");
+		return PTR_ERR(mdp_class);
+	}
+	return platform_driver_register(&msm_mdp_driver);
+}
+
+subsys_initcall(mdp_init);
diff --git a/drivers/video/msm/mdp_csc_table.h b/drivers/video/msm/mdp_csc_table.h
new file mode 100644
index 0000000..d1cde30
--- /dev/null
+++ b/drivers/video/msm/mdp_csc_table.h
@@ -0,0 +1,582 @@
+/* drivers/video/msm_fb/mdp_csc_table.h
+ *
+ * Copyright (C) 2007 QUALCOMM Incorporated
+ * Copyright (C) 2007 Google Incorporated
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+static struct {
+	uint32_t reg;
+	uint32_t val;
+} csc_table[] = {
+	{ 0x40400, 0x83 },
+	{ 0x40404, 0x102 },
+	{ 0x40408, 0x32 },
+	{ 0x4040c, 0xffffffb5 },
+	{ 0x40410, 0xffffff6c },
+	{ 0x40414, 0xe1 },
+	{ 0x40418, 0xe1 },
+	{ 0x4041c, 0xffffff45 },
+	{ 0x40420, 0xffffffdc },
+	{ 0x40440, 0x254 },
+	{ 0x40444, 0x0 },
+	{ 0x40448, 0x331 },
+	{ 0x4044c, 0x254 },
+	{ 0x40450, 0xffffff38 },
+	{ 0x40454, 0xfffffe61 },
+	{ 0x40458, 0x254 },
+	{ 0x4045c, 0x409 },
+	{ 0x40460, 0x0 },
+	{ 0x40480, 0x5d },
+	{ 0x40484, 0x13a },
+	{ 0x40488, 0x20 },
+	{ 0x4048c, 0xffffffcd },
+	{ 0x40490, 0xffffff54 },
+	{ 0x40494, 0xe1 },
+	{ 0x40498, 0xe1 },
+	{ 0x4049c, 0xffffff35 },
+	{ 0x404a0, 0xffffffec },
+	{ 0x404c0, 0x254 },
+	{ 0x404c4, 0x0 },
+	{ 0x404c8, 0x396 },
+	{ 0x404cc, 0x254 },
+	{ 0x404d0, 0xffffff94 },
+	{ 0x404d4, 0xfffffef0 },
+	{ 0x404d8, 0x254 },
+	{ 0x404dc, 0x43a },
+	{ 0x404e0, 0x0 },
+	{ 0x40500, 0x10 },
+	{ 0x40504, 0x80 },
+	{ 0x40508, 0x80 },
+	{ 0x40540, 0x10 },
+	{ 0x40544, 0x80 },
+	{ 0x40548, 0x80 },
+	{ 0x40580, 0x10 },
+	{ 0x40584, 0xeb },
+	{ 0x40588, 0x10 },
+	{ 0x4058c, 0xf0 },
+	{ 0x405c0, 0x10 },
+	{ 0x405c4, 0xeb },
+	{ 0x405c8, 0x10 },
+	{ 0x405cc, 0xf0 },
+	{ 0x40800, 0x0 },
+	{ 0x40804, 0x151515 },
+	{ 0x40808, 0x1d1d1d },
+	{ 0x4080c, 0x232323 },
+	{ 0x40810, 0x272727 },
+	{ 0x40814, 0x2b2b2b },
+	{ 0x40818, 0x2f2f2f },
+	{ 0x4081c, 0x333333 },
+	{ 0x40820, 0x363636 },
+	{ 0x40824, 0x393939 },
+	{ 0x40828, 0x3b3b3b },
+	{ 0x4082c, 0x3e3e3e },
+	{ 0x40830, 0x404040 },
+	{ 0x40834, 0x434343 },
+	{ 0x40838, 0x454545 },
+	{ 0x4083c, 0x474747 },
+	{ 0x40840, 0x494949 },
+	{ 0x40844, 0x4b4b4b },
+	{ 0x40848, 0x4d4d4d },
+	{ 0x4084c, 0x4f4f4f },
+	{ 0x40850, 0x515151 },
+	{ 0x40854, 0x535353 },
+	{ 0x40858, 0x555555 },
+	{ 0x4085c, 0x565656 },
+	{ 0x40860, 0x585858 },
+	{ 0x40864, 0x5a5a5a },
+	{ 0x40868, 0x5b5b5b },
+	{ 0x4086c, 0x5d5d5d },
+	{ 0x40870, 0x5e5e5e },
+	{ 0x40874, 0x606060 },
+	{ 0x40878, 0x616161 },
+	{ 0x4087c, 0x636363 },
+	{ 0x40880, 0x646464 },
+	{ 0x40884, 0x666666 },
+	{ 0x40888, 0x676767 },
+	{ 0x4088c, 0x686868 },
+	{ 0x40890, 0x6a6a6a },
+	{ 0x40894, 0x6b6b6b },
+	{ 0x40898, 0x6c6c6c },
+	{ 0x4089c, 0x6e6e6e },
+	{ 0x408a0, 0x6f6f6f },
+	{ 0x408a4, 0x707070 },
+	{ 0x408a8, 0x717171 },
+	{ 0x408ac, 0x727272 },
+	{ 0x408b0, 0x747474 },
+	{ 0x408b4, 0x757575 },
+	{ 0x408b8, 0x767676 },
+	{ 0x408bc, 0x777777 },
+	{ 0x408c0, 0x787878 },
+	{ 0x408c4, 0x797979 },
+	{ 0x408c8, 0x7a7a7a },
+	{ 0x408cc, 0x7c7c7c },
+	{ 0x408d0, 0x7d7d7d },
+	{ 0x408d4, 0x7e7e7e },
+	{ 0x408d8, 0x7f7f7f },
+	{ 0x408dc, 0x808080 },
+	{ 0x408e0, 0x818181 },
+	{ 0x408e4, 0x828282 },
+	{ 0x408e8, 0x838383 },
+	{ 0x408ec, 0x848484 },
+	{ 0x408f0, 0x858585 },
+	{ 0x408f4, 0x868686 },
+	{ 0x408f8, 0x878787 },
+	{ 0x408fc, 0x888888 },
+	{ 0x40900, 0x898989 },
+	{ 0x40904, 0x8a8a8a },
+	{ 0x40908, 0x8b8b8b },
+	{ 0x4090c, 0x8c8c8c },
+	{ 0x40910, 0x8d8d8d },
+	{ 0x40914, 0x8e8e8e },
+	{ 0x40918, 0x8f8f8f },
+	{ 0x4091c, 0x8f8f8f },
+	{ 0x40920, 0x909090 },
+	{ 0x40924, 0x919191 },
+	{ 0x40928, 0x929292 },
+	{ 0x4092c, 0x939393 },
+	{ 0x40930, 0x949494 },
+	{ 0x40934, 0x959595 },
+	{ 0x40938, 0x969696 },
+	{ 0x4093c, 0x969696 },
+	{ 0x40940, 0x979797 },
+	{ 0x40944, 0x989898 },
+	{ 0x40948, 0x999999 },
+	{ 0x4094c, 0x9a9a9a },
+	{ 0x40950, 0x9b9b9b },
+	{ 0x40954, 0x9c9c9c },
+	{ 0x40958, 0x9c9c9c },
+	{ 0x4095c, 0x9d9d9d },
+	{ 0x40960, 0x9e9e9e },
+	{ 0x40964, 0x9f9f9f },
+	{ 0x40968, 0xa0a0a0 },
+	{ 0x4096c, 0xa0a0a0 },
+	{ 0x40970, 0xa1a1a1 },
+	{ 0x40974, 0xa2a2a2 },
+	{ 0x40978, 0xa3a3a3 },
+	{ 0x4097c, 0xa4a4a4 },
+	{ 0x40980, 0xa4a4a4 },
+	{ 0x40984, 0xa5a5a5 },
+	{ 0x40988, 0xa6a6a6 },
+	{ 0x4098c, 0xa7a7a7 },
+	{ 0x40990, 0xa7a7a7 },
+	{ 0x40994, 0xa8a8a8 },
+	{ 0x40998, 0xa9a9a9 },
+	{ 0x4099c, 0xaaaaaa },
+	{ 0x409a0, 0xaaaaaa },
+	{ 0x409a4, 0xababab },
+	{ 0x409a8, 0xacacac },
+	{ 0x409ac, 0xadadad },
+	{ 0x409b0, 0xadadad },
+	{ 0x409b4, 0xaeaeae },
+	{ 0x409b8, 0xafafaf },
+	{ 0x409bc, 0xafafaf },
+	{ 0x409c0, 0xb0b0b0 },
+	{ 0x409c4, 0xb1b1b1 },
+	{ 0x409c8, 0xb2b2b2 },
+	{ 0x409cc, 0xb2b2b2 },
+	{ 0x409d0, 0xb3b3b3 },
+	{ 0x409d4, 0xb4b4b4 },
+	{ 0x409d8, 0xb4b4b4 },
+	{ 0x409dc, 0xb5b5b5 },
+	{ 0x409e0, 0xb6b6b6 },
+	{ 0x409e4, 0xb6b6b6 },
+	{ 0x409e8, 0xb7b7b7 },
+	{ 0x409ec, 0xb8b8b8 },
+	{ 0x409f0, 0xb8b8b8 },
+	{ 0x409f4, 0xb9b9b9 },
+	{ 0x409f8, 0xbababa },
+	{ 0x409fc, 0xbababa },
+	{ 0x40a00, 0xbbbbbb },
+	{ 0x40a04, 0xbcbcbc },
+	{ 0x40a08, 0xbcbcbc },
+	{ 0x40a0c, 0xbdbdbd },
+	{ 0x40a10, 0xbebebe },
+	{ 0x40a14, 0xbebebe },
+	{ 0x40a18, 0xbfbfbf },
+	{ 0x40a1c, 0xc0c0c0 },
+	{ 0x40a20, 0xc0c0c0 },
+	{ 0x40a24, 0xc1c1c1 },
+	{ 0x40a28, 0xc1c1c1 },
+	{ 0x40a2c, 0xc2c2c2 },
+	{ 0x40a30, 0xc3c3c3 },
+	{ 0x40a34, 0xc3c3c3 },
+	{ 0x40a38, 0xc4c4c4 },
+	{ 0x40a3c, 0xc5c5c5 },
+	{ 0x40a40, 0xc5c5c5 },
+	{ 0x40a44, 0xc6c6c6 },
+	{ 0x40a48, 0xc6c6c6 },
+	{ 0x40a4c, 0xc7c7c7 },
+	{ 0x40a50, 0xc8c8c8 },
+	{ 0x40a54, 0xc8c8c8 },
+	{ 0x40a58, 0xc9c9c9 },
+	{ 0x40a5c, 0xc9c9c9 },
+	{ 0x40a60, 0xcacaca },
+	{ 0x40a64, 0xcbcbcb },
+	{ 0x40a68, 0xcbcbcb },
+	{ 0x40a6c, 0xcccccc },
+	{ 0x40a70, 0xcccccc },
+	{ 0x40a74, 0xcdcdcd },
+	{ 0x40a78, 0xcecece },
+	{ 0x40a7c, 0xcecece },
+	{ 0x40a80, 0xcfcfcf },
+	{ 0x40a84, 0xcfcfcf },
+	{ 0x40a88, 0xd0d0d0 },
+	{ 0x40a8c, 0xd0d0d0 },
+	{ 0x40a90, 0xd1d1d1 },
+	{ 0x40a94, 0xd2d2d2 },
+	{ 0x40a98, 0xd2d2d2 },
+	{ 0x40a9c, 0xd3d3d3 },
+	{ 0x40aa0, 0xd3d3d3 },
+	{ 0x40aa4, 0xd4d4d4 },
+	{ 0x40aa8, 0xd4d4d4 },
+	{ 0x40aac, 0xd5d5d5 },
+	{ 0x40ab0, 0xd6d6d6 },
+	{ 0x40ab4, 0xd6d6d6 },
+	{ 0x40ab8, 0xd7d7d7 },
+	{ 0x40abc, 0xd7d7d7 },
+	{ 0x40ac0, 0xd8d8d8 },
+	{ 0x40ac4, 0xd8d8d8 },
+	{ 0x40ac8, 0xd9d9d9 },
+	{ 0x40acc, 0xd9d9d9 },
+	{ 0x40ad0, 0xdadada },
+	{ 0x40ad4, 0xdbdbdb },
+	{ 0x40ad8, 0xdbdbdb },
+	{ 0x40adc, 0xdcdcdc },
+	{ 0x40ae0, 0xdcdcdc },
+	{ 0x40ae4, 0xdddddd },
+	{ 0x40ae8, 0xdddddd },
+	{ 0x40aec, 0xdedede },
+	{ 0x40af0, 0xdedede },
+	{ 0x40af4, 0xdfdfdf },
+	{ 0x40af8, 0xdfdfdf },
+	{ 0x40afc, 0xe0e0e0 },
+	{ 0x40b00, 0xe0e0e0 },
+	{ 0x40b04, 0xe1e1e1 },
+	{ 0x40b08, 0xe1e1e1 },
+	{ 0x40b0c, 0xe2e2e2 },
+	{ 0x40b10, 0xe3e3e3 },
+	{ 0x40b14, 0xe3e3e3 },
+	{ 0x40b18, 0xe4e4e4 },
+	{ 0x40b1c, 0xe4e4e4 },
+	{ 0x40b20, 0xe5e5e5 },
+	{ 0x40b24, 0xe5e5e5 },
+	{ 0x40b28, 0xe6e6e6 },
+	{ 0x40b2c, 0xe6e6e6 },
+	{ 0x40b30, 0xe7e7e7 },
+	{ 0x40b34, 0xe7e7e7 },
+	{ 0x40b38, 0xe8e8e8 },
+	{ 0x40b3c, 0xe8e8e8 },
+	{ 0x40b40, 0xe9e9e9 },
+	{ 0x40b44, 0xe9e9e9 },
+	{ 0x40b48, 0xeaeaea },
+	{ 0x40b4c, 0xeaeaea },
+	{ 0x40b50, 0xebebeb },
+	{ 0x40b54, 0xebebeb },
+	{ 0x40b58, 0xececec },
+	{ 0x40b5c, 0xececec },
+	{ 0x40b60, 0xededed },
+	{ 0x40b64, 0xededed },
+	{ 0x40b68, 0xeeeeee },
+	{ 0x40b6c, 0xeeeeee },
+	{ 0x40b70, 0xefefef },
+	{ 0x40b74, 0xefefef },
+	{ 0x40b78, 0xf0f0f0 },
+	{ 0x40b7c, 0xf0f0f0 },
+	{ 0x40b80, 0xf1f1f1 },
+	{ 0x40b84, 0xf1f1f1 },
+	{ 0x40b88, 0xf2f2f2 },
+	{ 0x40b8c, 0xf2f2f2 },
+	{ 0x40b90, 0xf2f2f2 },
+	{ 0x40b94, 0xf3f3f3 },
+	{ 0x40b98, 0xf3f3f3 },
+	{ 0x40b9c, 0xf4f4f4 },
+	{ 0x40ba0, 0xf4f4f4 },
+	{ 0x40ba4, 0xf5f5f5 },
+	{ 0x40ba8, 0xf5f5f5 },
+	{ 0x40bac, 0xf6f6f6 },
+	{ 0x40bb0, 0xf6f6f6 },
+	{ 0x40bb4, 0xf7f7f7 },
+	{ 0x40bb8, 0xf7f7f7 },
+	{ 0x40bbc, 0xf8f8f8 },
+	{ 0x40bc0, 0xf8f8f8 },
+	{ 0x40bc4, 0xf9f9f9 },
+	{ 0x40bc8, 0xf9f9f9 },
+	{ 0x40bcc, 0xfafafa },
+	{ 0x40bd0, 0xfafafa },
+	{ 0x40bd4, 0xfafafa },
+	{ 0x40bd8, 0xfbfbfb },
+	{ 0x40bdc, 0xfbfbfb },
+	{ 0x40be0, 0xfcfcfc },
+	{ 0x40be4, 0xfcfcfc },
+	{ 0x40be8, 0xfdfdfd },
+	{ 0x40bec, 0xfdfdfd },
+	{ 0x40bf0, 0xfefefe },
+	{ 0x40bf4, 0xfefefe },
+	{ 0x40bf8, 0xffffff },
+	{ 0x40bfc, 0xffffff },
+	{ 0x40c00, 0x0 },
+	{ 0x40c04, 0x0 },
+	{ 0x40c08, 0x0 },
+	{ 0x40c0c, 0x0 },
+	{ 0x40c10, 0x0 },
+	{ 0x40c14, 0x0 },
+	{ 0x40c18, 0x0 },
+	{ 0x40c1c, 0x0 },
+	{ 0x40c20, 0x0 },
+	{ 0x40c24, 0x0 },
+	{ 0x40c28, 0x0 },
+	{ 0x40c2c, 0x0 },
+	{ 0x40c30, 0x0 },
+	{ 0x40c34, 0x0 },
+	{ 0x40c38, 0x0 },
+	{ 0x40c3c, 0x0 },
+	{ 0x40c40, 0x10101 },
+	{ 0x40c44, 0x10101 },
+	{ 0x40c48, 0x10101 },
+	{ 0x40c4c, 0x10101 },
+	{ 0x40c50, 0x10101 },
+	{ 0x40c54, 0x10101 },
+	{ 0x40c58, 0x10101 },
+	{ 0x40c5c, 0x10101 },
+	{ 0x40c60, 0x10101 },
+	{ 0x40c64, 0x10101 },
+	{ 0x40c68, 0x20202 },
+	{ 0x40c6c, 0x20202 },
+	{ 0x40c70, 0x20202 },
+	{ 0x40c74, 0x20202 },
+	{ 0x40c78, 0x20202 },
+	{ 0x40c7c, 0x20202 },
+	{ 0x40c80, 0x30303 },
+	{ 0x40c84, 0x30303 },
+	{ 0x40c88, 0x30303 },
+	{ 0x40c8c, 0x30303 },
+	{ 0x40c90, 0x30303 },
+	{ 0x40c94, 0x40404 },
+	{ 0x40c98, 0x40404 },
+	{ 0x40c9c, 0x40404 },
+	{ 0x40ca0, 0x40404 },
+	{ 0x40ca4, 0x40404 },
+	{ 0x40ca8, 0x50505 },
+	{ 0x40cac, 0x50505 },
+	{ 0x40cb0, 0x50505 },
+	{ 0x40cb4, 0x50505 },
+	{ 0x40cb8, 0x60606 },
+	{ 0x40cbc, 0x60606 },
+	{ 0x40cc0, 0x60606 },
+	{ 0x40cc4, 0x70707 },
+	{ 0x40cc8, 0x70707 },
+	{ 0x40ccc, 0x70707 },
+	{ 0x40cd0, 0x70707 },
+	{ 0x40cd4, 0x80808 },
+	{ 0x40cd8, 0x80808 },
+	{ 0x40cdc, 0x80808 },
+	{ 0x40ce0, 0x90909 },
+	{ 0x40ce4, 0x90909 },
+	{ 0x40ce8, 0xa0a0a },
+	{ 0x40cec, 0xa0a0a },
+	{ 0x40cf0, 0xa0a0a },
+	{ 0x40cf4, 0xb0b0b },
+	{ 0x40cf8, 0xb0b0b },
+	{ 0x40cfc, 0xb0b0b },
+	{ 0x40d00, 0xc0c0c },
+	{ 0x40d04, 0xc0c0c },
+	{ 0x40d08, 0xd0d0d },
+	{ 0x40d0c, 0xd0d0d },
+	{ 0x40d10, 0xe0e0e },
+	{ 0x40d14, 0xe0e0e },
+	{ 0x40d18, 0xe0e0e },
+	{ 0x40d1c, 0xf0f0f },
+	{ 0x40d20, 0xf0f0f },
+	{ 0x40d24, 0x101010 },
+	{ 0x40d28, 0x101010 },
+	{ 0x40d2c, 0x111111 },
+	{ 0x40d30, 0x111111 },
+	{ 0x40d34, 0x121212 },
+	{ 0x40d38, 0x121212 },
+	{ 0x40d3c, 0x131313 },
+	{ 0x40d40, 0x131313 },
+	{ 0x40d44, 0x141414 },
+	{ 0x40d48, 0x151515 },
+	{ 0x40d4c, 0x151515 },
+	{ 0x40d50, 0x161616 },
+	{ 0x40d54, 0x161616 },
+	{ 0x40d58, 0x171717 },
+	{ 0x40d5c, 0x171717 },
+	{ 0x40d60, 0x181818 },
+	{ 0x40d64, 0x191919 },
+	{ 0x40d68, 0x191919 },
+	{ 0x40d6c, 0x1a1a1a },
+	{ 0x40d70, 0x1b1b1b },
+	{ 0x40d74, 0x1b1b1b },
+	{ 0x40d78, 0x1c1c1c },
+	{ 0x40d7c, 0x1c1c1c },
+	{ 0x40d80, 0x1d1d1d },
+	{ 0x40d84, 0x1e1e1e },
+	{ 0x40d88, 0x1f1f1f },
+	{ 0x40d8c, 0x1f1f1f },
+	{ 0x40d90, 0x202020 },
+	{ 0x40d94, 0x212121 },
+	{ 0x40d98, 0x212121 },
+	{ 0x40d9c, 0x222222 },
+	{ 0x40da0, 0x232323 },
+	{ 0x40da4, 0x242424 },
+	{ 0x40da8, 0x242424 },
+	{ 0x40dac, 0x252525 },
+	{ 0x40db0, 0x262626 },
+	{ 0x40db4, 0x272727 },
+	{ 0x40db8, 0x272727 },
+	{ 0x40dbc, 0x282828 },
+	{ 0x40dc0, 0x292929 },
+	{ 0x40dc4, 0x2a2a2a },
+	{ 0x40dc8, 0x2b2b2b },
+	{ 0x40dcc, 0x2c2c2c },
+	{ 0x40dd0, 0x2c2c2c },
+	{ 0x40dd4, 0x2d2d2d },
+	{ 0x40dd8, 0x2e2e2e },
+	{ 0x40ddc, 0x2f2f2f },
+	{ 0x40de0, 0x303030 },
+	{ 0x40de4, 0x313131 },
+	{ 0x40de8, 0x323232 },
+	{ 0x40dec, 0x333333 },
+	{ 0x40df0, 0x333333 },
+	{ 0x40df4, 0x343434 },
+	{ 0x40df8, 0x353535 },
+	{ 0x40dfc, 0x363636 },
+	{ 0x40e00, 0x373737 },
+	{ 0x40e04, 0x383838 },
+	{ 0x40e08, 0x393939 },
+	{ 0x40e0c, 0x3a3a3a },
+	{ 0x40e10, 0x3b3b3b },
+	{ 0x40e14, 0x3c3c3c },
+	{ 0x40e18, 0x3d3d3d },
+	{ 0x40e1c, 0x3e3e3e },
+	{ 0x40e20, 0x3f3f3f },
+	{ 0x40e24, 0x404040 },
+	{ 0x40e28, 0x414141 },
+	{ 0x40e2c, 0x424242 },
+	{ 0x40e30, 0x434343 },
+	{ 0x40e34, 0x444444 },
+	{ 0x40e38, 0x464646 },
+	{ 0x40e3c, 0x474747 },
+	{ 0x40e40, 0x484848 },
+	{ 0x40e44, 0x494949 },
+	{ 0x40e48, 0x4a4a4a },
+	{ 0x40e4c, 0x4b4b4b },
+	{ 0x40e50, 0x4c4c4c },
+	{ 0x40e54, 0x4d4d4d },
+	{ 0x40e58, 0x4f4f4f },
+	{ 0x40e5c, 0x505050 },
+	{ 0x40e60, 0x515151 },
+	{ 0x40e64, 0x525252 },
+	{ 0x40e68, 0x535353 },
+	{ 0x40e6c, 0x545454 },
+	{ 0x40e70, 0x565656 },
+	{ 0x40e74, 0x575757 },
+	{ 0x40e78, 0x585858 },
+	{ 0x40e7c, 0x595959 },
+	{ 0x40e80, 0x5b5b5b },
+	{ 0x40e84, 0x5c5c5c },
+	{ 0x40e88, 0x5d5d5d },
+	{ 0x40e8c, 0x5e5e5e },
+	{ 0x40e90, 0x606060 },
+	{ 0x40e94, 0x616161 },
+	{ 0x40e98, 0x626262 },
+	{ 0x40e9c, 0x646464 },
+	{ 0x40ea0, 0x656565 },
+	{ 0x40ea4, 0x666666 },
+	{ 0x40ea8, 0x686868 },
+	{ 0x40eac, 0x696969 },
+	{ 0x40eb0, 0x6a6a6a },
+	{ 0x40eb4, 0x6c6c6c },
+	{ 0x40eb8, 0x6d6d6d },
+	{ 0x40ebc, 0x6f6f6f },
+	{ 0x40ec0, 0x707070 },
+	{ 0x40ec4, 0x717171 },
+	{ 0x40ec8, 0x737373 },
+	{ 0x40ecc, 0x747474 },
+	{ 0x40ed0, 0x767676 },
+	{ 0x40ed4, 0x777777 },
+	{ 0x40ed8, 0x797979 },
+	{ 0x40edc, 0x7a7a7a },
+	{ 0x40ee0, 0x7c7c7c },
+	{ 0x40ee4, 0x7d7d7d },
+	{ 0x40ee8, 0x7f7f7f },
+	{ 0x40eec, 0x808080 },
+	{ 0x40ef0, 0x828282 },
+	{ 0x40ef4, 0x838383 },
+	{ 0x40ef8, 0x858585 },
+	{ 0x40efc, 0x868686 },
+	{ 0x40f00, 0x888888 },
+	{ 0x40f04, 0x898989 },
+	{ 0x40f08, 0x8b8b8b },
+	{ 0x40f0c, 0x8d8d8d },
+	{ 0x40f10, 0x8e8e8e },
+	{ 0x40f14, 0x909090 },
+	{ 0x40f18, 0x919191 },
+	{ 0x40f1c, 0x939393 },
+	{ 0x40f20, 0x959595 },
+	{ 0x40f24, 0x969696 },
+	{ 0x40f28, 0x989898 },
+	{ 0x40f2c, 0x9a9a9a },
+	{ 0x40f30, 0x9b9b9b },
+	{ 0x40f34, 0x9d9d9d },
+	{ 0x40f38, 0x9f9f9f },
+	{ 0x40f3c, 0xa1a1a1 },
+	{ 0x40f40, 0xa2a2a2 },
+	{ 0x40f44, 0xa4a4a4 },
+	{ 0x40f48, 0xa6a6a6 },
+	{ 0x40f4c, 0xa7a7a7 },
+	{ 0x40f50, 0xa9a9a9 },
+	{ 0x40f54, 0xababab },
+	{ 0x40f58, 0xadadad },
+	{ 0x40f5c, 0xafafaf },
+	{ 0x40f60, 0xb0b0b0 },
+	{ 0x40f64, 0xb2b2b2 },
+	{ 0x40f68, 0xb4b4b4 },
+	{ 0x40f6c, 0xb6b6b6 },
+	{ 0x40f70, 0xb8b8b8 },
+	{ 0x40f74, 0xbababa },
+	{ 0x40f78, 0xbbbbbb },
+	{ 0x40f7c, 0xbdbdbd },
+	{ 0x40f80, 0xbfbfbf },
+	{ 0x40f84, 0xc1c1c1 },
+	{ 0x40f88, 0xc3c3c3 },
+	{ 0x40f8c, 0xc5c5c5 },
+	{ 0x40f90, 0xc7c7c7 },
+	{ 0x40f94, 0xc9c9c9 },
+	{ 0x40f98, 0xcbcbcb },
+	{ 0x40f9c, 0xcdcdcd },
+	{ 0x40fa0, 0xcfcfcf },
+	{ 0x40fa4, 0xd1d1d1 },
+	{ 0x40fa8, 0xd3d3d3 },
+	{ 0x40fac, 0xd5d5d5 },
+	{ 0x40fb0, 0xd7d7d7 },
+	{ 0x40fb4, 0xd9d9d9 },
+	{ 0x40fb8, 0xdbdbdb },
+	{ 0x40fbc, 0xdddddd },
+	{ 0x40fc0, 0xdfdfdf },
+	{ 0x40fc4, 0xe1e1e1 },
+	{ 0x40fc8, 0xe3e3e3 },
+	{ 0x40fcc, 0xe5e5e5 },
+	{ 0x40fd0, 0xe7e7e7 },
+	{ 0x40fd4, 0xe9e9e9 },
+	{ 0x40fd8, 0xebebeb },
+	{ 0x40fdc, 0xeeeeee },
+	{ 0x40fe0, 0xf0f0f0 },
+	{ 0x40fe4, 0xf2f2f2 },
+	{ 0x40fe8, 0xf4f4f4 },
+	{ 0x40fec, 0xf6f6f6 },
+	{ 0x40ff0, 0xf8f8f8 },
+	{ 0x40ff4, 0xfbfbfb },
+	{ 0x40ff8, 0xfdfdfd },
+	{ 0x40ffc, 0xffffff },
+};
diff --git a/drivers/video/msm/mdp_hw.h b/drivers/video/msm/mdp_hw.h
new file mode 100644
index 0000000..4e3deb4
--- /dev/null
+++ b/drivers/video/msm/mdp_hw.h
@@ -0,0 +1,621 @@
+/* drivers/video/msm_fb/mdp_hw.h
+ *
+ * Copyright (C) 2007 QUALCOMM Incorporated
+ * Copyright (C) 2007 Google Incorporated
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#ifndef _MDP_HW_H_
+#define _MDP_HW_H_
+
+#include <mach/msm_iomap.h>
+#include <mach/msm_fb.h>
+
+struct mdp_info {
+	struct mdp_device mdp_dev;
+	char * __iomem base;
+	int irq;
+};
+struct mdp_blit_req;
+struct mdp_device;
+int mdp_ppp_blit(const struct mdp_info *mdp, struct mdp_blit_req *req,
+		 struct file *src_file, unsigned long src_start,
+		 unsigned long src_len, struct file *dst_file,
+		 unsigned long dst_start, unsigned long dst_len);
+#define mdp_writel(mdp, value, offset) writel(value, mdp->base + offset)
+#define mdp_readl(mdp, offset) readl(mdp->base + offset)
+
+#define MDP_SYNC_CONFIG_0                (0x00000)
+#define MDP_SYNC_CONFIG_1                (0x00004)
+#define MDP_SYNC_CONFIG_2                (0x00008)
+#define MDP_SYNC_STATUS_0                (0x0000c)
+#define MDP_SYNC_STATUS_1                (0x00010)
+#define MDP_SYNC_STATUS_2                (0x00014)
+#define MDP_SYNC_THRESH_0                (0x00018)
+#define MDP_SYNC_THRESH_1                (0x0001c)
+#define MDP_INTR_ENABLE                  (0x00020)
+#define MDP_INTR_STATUS                  (0x00024)
+#define MDP_INTR_CLEAR                   (0x00028)
+#define MDP_DISPLAY0_START               (0x00030)
+#define MDP_DISPLAY1_START               (0x00034)
+#define MDP_DISPLAY_STATUS               (0x00038)
+#define MDP_EBI2_LCD0                    (0x0003c)
+#define MDP_EBI2_LCD1                    (0x00040)
+#define MDP_DISPLAY0_ADDR                (0x00054)
+#define MDP_DISPLAY1_ADDR                (0x00058)
+#define MDP_EBI2_PORTMAP_MODE            (0x0005c)
+#define MDP_MODE                         (0x00060)
+#define MDP_TV_OUT_STATUS                (0x00064)
+#define MDP_HW_VERSION                   (0x00070)
+#define MDP_SW_RESET                     (0x00074)
+#define MDP_AXI_ERROR_MASTER_STOP        (0x00078)
+#define MDP_SEL_CLK_OR_HCLK_TEST_BUS     (0x0007c)
+#define MDP_PRIMARY_VSYNC_OUT_CTRL       (0x00080)
+#define MDP_SECONDARY_VSYNC_OUT_CTRL     (0x00084)
+#define MDP_EXTERNAL_VSYNC_OUT_CTRL      (0x00088)
+#define MDP_VSYNC_CTRL                   (0x0008c)
+#define MDP_CGC_EN                       (0x00100)
+#define MDP_CMD_STATUS                   (0x10008)
+#define MDP_PROFILE_EN                   (0x10010)
+#define MDP_PROFILE_COUNT                (0x10014)
+#define MDP_DMA_START                    (0x10044)
+#define MDP_FULL_BYPASS_WORD0            (0x10100)
+#define MDP_FULL_BYPASS_WORD1            (0x10104)
+#define MDP_COMMAND_CONFIG               (0x10104)
+#define MDP_FULL_BYPASS_WORD2            (0x10108)
+#define MDP_FULL_BYPASS_WORD3            (0x1010c)
+#define MDP_FULL_BYPASS_WORD4            (0x10110)
+#define MDP_FULL_BYPASS_WORD6            (0x10118)
+#define MDP_FULL_BYPASS_WORD7            (0x1011c)
+#define MDP_FULL_BYPASS_WORD8            (0x10120)
+#define MDP_FULL_BYPASS_WORD9            (0x10124)
+#define MDP_PPP_SOURCE_CONFIG            (0x10124)
+#define MDP_FULL_BYPASS_WORD10           (0x10128)
+#define MDP_FULL_BYPASS_WORD11           (0x1012c)
+#define MDP_FULL_BYPASS_WORD12           (0x10130)
+#define MDP_FULL_BYPASS_WORD13           (0x10134)
+#define MDP_FULL_BYPASS_WORD14           (0x10138)
+#define MDP_PPP_OPERATION_CONFIG         (0x10138)
+#define MDP_FULL_BYPASS_WORD15           (0x1013c)
+#define MDP_FULL_BYPASS_WORD16           (0x10140)
+#define MDP_FULL_BYPASS_WORD17           (0x10144)
+#define MDP_FULL_BYPASS_WORD18           (0x10148)
+#define MDP_FULL_BYPASS_WORD19           (0x1014c)
+#define MDP_FULL_BYPASS_WORD20           (0x10150)
+#define MDP_PPP_DESTINATION_CONFIG       (0x10150)
+#define MDP_FULL_BYPASS_WORD21           (0x10154)
+#define MDP_FULL_BYPASS_WORD22           (0x10158)
+#define MDP_FULL_BYPASS_WORD23           (0x1015c)
+#define MDP_FULL_BYPASS_WORD24           (0x10160)
+#define MDP_FULL_BYPASS_WORD25           (0x10164)
+#define MDP_FULL_BYPASS_WORD26           (0x10168)
+#define MDP_FULL_BYPASS_WORD27           (0x1016c)
+#define MDP_FULL_BYPASS_WORD29           (0x10174)
+#define MDP_FULL_BYPASS_WORD30           (0x10178)
+#define MDP_FULL_BYPASS_WORD31           (0x1017c)
+#define MDP_FULL_BYPASS_WORD32           (0x10180)
+#define MDP_DMA_CONFIG                   (0x10180)
+#define MDP_FULL_BYPASS_WORD33           (0x10184)
+#define MDP_FULL_BYPASS_WORD34           (0x10188)
+#define MDP_FULL_BYPASS_WORD35           (0x1018c)
+#define MDP_FULL_BYPASS_WORD37           (0x10194)
+#define MDP_FULL_BYPASS_WORD39           (0x1019c)
+#define MDP_FULL_BYPASS_WORD40           (0x101a0)
+#define MDP_FULL_BYPASS_WORD41           (0x101a4)
+#define MDP_FULL_BYPASS_WORD43           (0x101ac)
+#define MDP_FULL_BYPASS_WORD46           (0x101b8)
+#define MDP_FULL_BYPASS_WORD47           (0x101bc)
+#define MDP_FULL_BYPASS_WORD48           (0x101c0)
+#define MDP_FULL_BYPASS_WORD49           (0x101c4)
+#define MDP_FULL_BYPASS_WORD50           (0x101c8)
+#define MDP_FULL_BYPASS_WORD51           (0x101cc)
+#define MDP_FULL_BYPASS_WORD52           (0x101d0)
+#define MDP_FULL_BYPASS_WORD53           (0x101d4)
+#define MDP_FULL_BYPASS_WORD54           (0x101d8)
+#define MDP_FULL_BYPASS_WORD55           (0x101dc)
+#define MDP_FULL_BYPASS_WORD56           (0x101e0)
+#define MDP_FULL_BYPASS_WORD57           (0x101e4)
+#define MDP_FULL_BYPASS_WORD58           (0x101e8)
+#define MDP_FULL_BYPASS_WORD59           (0x101ec)
+#define MDP_FULL_BYPASS_WORD60           (0x101f0)
+#define MDP_VSYNC_THRESHOLD              (0x101f0)
+#define MDP_FULL_BYPASS_WORD61           (0x101f4)
+#define MDP_FULL_BYPASS_WORD62           (0x101f8)
+#define MDP_FULL_BYPASS_WORD63           (0x101fc)
+#define MDP_TFETCH_TEST_MODE             (0x20004)
+#define MDP_TFETCH_STATUS                (0x20008)
+#define MDP_TFETCH_TILE_COUNT            (0x20010)
+#define MDP_TFETCH_FETCH_COUNT           (0x20014)
+#define MDP_TFETCH_CONSTANT_COLOR        (0x20040)
+#define MDP_CSC_BYPASS                   (0x40004)
+#define MDP_SCALE_COEFF_LSB              (0x5fffc)
+#define MDP_TV_OUT_CTL                   (0xc0000)
+#define MDP_TV_OUT_FIR_COEFF             (0xc0004)
+#define MDP_TV_OUT_BUF_ADDR              (0xc0008)
+#define MDP_TV_OUT_CC_DATA               (0xc000c)
+#define MDP_TV_OUT_SOBEL                 (0xc0010)
+#define MDP_TV_OUT_Y_CLAMP               (0xc0018)
+#define MDP_TV_OUT_CB_CLAMP              (0xc001c)
+#define MDP_TV_OUT_CR_CLAMP              (0xc0020)
+#define MDP_TEST_MODE_CLK                (0xd0000)
+#define MDP_TEST_MISR_RESET_CLK          (0xd0004)
+#define MDP_TEST_EXPORT_MISR_CLK         (0xd0008)
+#define MDP_TEST_MISR_CURR_VAL_CLK       (0xd000c)
+#define MDP_TEST_MODE_HCLK               (0xd0100)
+#define MDP_TEST_MISR_RESET_HCLK         (0xd0104)
+#define MDP_TEST_EXPORT_MISR_HCLK        (0xd0108)
+#define MDP_TEST_MISR_CURR_VAL_HCLK      (0xd010c)
+#define MDP_TEST_MODE_DCLK               (0xd0200)
+#define MDP_TEST_MISR_RESET_DCLK         (0xd0204)
+#define MDP_TEST_EXPORT_MISR_DCLK        (0xd0208)
+#define MDP_TEST_MISR_CURR_VAL_DCLK      (0xd020c)
+#define MDP_TEST_CAPTURED_DCLK           (0xd0210)
+#define MDP_TEST_MISR_CAPT_VAL_DCLK      (0xd0214)
+#define MDP_LCDC_CTL                     (0xe0000)
+#define MDP_LCDC_HSYNC_CTL               (0xe0004)
+#define MDP_LCDC_VSYNC_CTL               (0xe0008)
+#define MDP_LCDC_ACTIVE_HCTL             (0xe000c)
+#define MDP_LCDC_ACTIVE_VCTL             (0xe0010)
+#define MDP_LCDC_BORDER_CLR              (0xe0014)
+#define MDP_LCDC_H_BLANK                 (0xe0018)
+#define MDP_LCDC_V_BLANK                 (0xe001c)
+#define MDP_LCDC_UNDERFLOW_CLR           (0xe0020)
+#define MDP_LCDC_HSYNC_SKEW              (0xe0024)
+#define MDP_LCDC_TEST_CTL                (0xe0028)
+#define MDP_LCDC_LINE_IRQ                (0xe002c)
+#define MDP_LCDC_CTL_POLARITY            (0xe0030)
+#define MDP_LCDC_DMA_CONFIG              (0xe1000)
+#define MDP_LCDC_DMA_SIZE                (0xe1004)
+#define MDP_LCDC_DMA_IBUF_ADDR           (0xe1008)
+#define MDP_LCDC_DMA_IBUF_Y_STRIDE       (0xe100c)
+
+
+#define MDP_DMA2_TERM 0x1
+#define MDP_DMA3_TERM 0x2
+#define MDP_PPP_TERM 0x3
+
+/* MDP_INTR_ENABLE */
+#define DL0_ROI_DONE           (1<<0)
+#define DL1_ROI_DONE           (1<<1)
+#define DL0_DMA2_TERM_DONE     (1<<2)
+#define DL1_DMA2_TERM_DONE     (1<<3)
+#define DL0_PPP_TERM_DONE      (1<<4)
+#define DL1_PPP_TERM_DONE      (1<<5)
+#define TV_OUT_DMA3_DONE       (1<<6)
+#define TV_ENC_UNDERRUN        (1<<7)
+#define DL0_FETCH_DONE         (1<<11)
+#define DL1_FETCH_DONE         (1<<12)
+
+#define MDP_PPP_BUSY_STATUS (DL0_ROI_DONE| \
+			   DL1_ROI_DONE| \
+			   DL0_PPP_TERM_DONE| \
+			   DL1_PPP_TERM_DONE)
+
+#define MDP_ANY_INTR_MASK (DL0_ROI_DONE| \
+			   DL1_ROI_DONE| \
+			   DL0_DMA2_TERM_DONE| \
+			   DL1_DMA2_TERM_DONE| \
+			   DL0_PPP_TERM_DONE| \
+			   DL1_PPP_TERM_DONE| \
+			   DL0_FETCH_DONE| \
+			   DL1_FETCH_DONE| \
+			   TV_ENC_UNDERRUN)
+
+#define MDP_TOP_LUMA       16
+#define MDP_TOP_CHROMA     0
+#define MDP_BOTTOM_LUMA    19
+#define MDP_BOTTOM_CHROMA  3
+#define MDP_LEFT_LUMA      22
+#define MDP_LEFT_CHROMA    6
+#define MDP_RIGHT_LUMA     25
+#define MDP_RIGHT_CHROMA   9
+
+#define CLR_G 0x0
+#define CLR_B 0x1
+#define CLR_R 0x2
+#define CLR_ALPHA 0x3
+
+#define CLR_Y  CLR_G
+#define CLR_CB CLR_B
+#define CLR_CR CLR_R
+
+/* from lsb to msb */
+#define MDP_GET_PACK_PATTERN(a, x, y, z, bit) \
+	(((a)<<(bit*3))|((x)<<(bit*2))|((y)<<bit)|(z))
+
+/* MDP_SYNC_CONFIG_0/1/2 */
+#define MDP_SYNCFG_HGT_LOC 22
+#define MDP_SYNCFG_VSYNC_EXT_EN (1<<21)
+#define MDP_SYNCFG_VSYNC_INT_EN (1<<20)
+
+/* MDP_SYNC_THRESH_0 */
+#define MDP_PRIM_BELOW_LOC 0
+#define MDP_PRIM_ABOVE_LOC 8
+
+/* MDP_{PRIMARY,SECONDARY,EXTERNAL}_VSYNC_OUT_CRL */
+#define VSYNC_PULSE_EN (1<<31)
+#define VSYNC_PULSE_INV (1<<30)
+
+/* MDP_VSYNC_CTRL */
+#define DISP0_VSYNC_MAP_VSYNC0 0
+#define DISP0_VSYNC_MAP_VSYNC1 (1<<0)
+#define DISP0_VSYNC_MAP_VSYNC2 ((1<<0)|(1<<1))
+
+#define DISP1_VSYNC_MAP_VSYNC0 0
+#define DISP1_VSYNC_MAP_VSYNC1 (1<<2)
+#define DISP1_VSYNC_MAP_VSYNC2 ((1<<2)|(1<<3))
+
+#define PRIMARY_LCD_SYNC_EN (1<<4)
+#define PRIMARY_LCD_SYNC_DISABLE 0
+
+#define SECONDARY_LCD_SYNC_EN (1<<5)
+#define SECONDARY_LCD_SYNC_DISABLE 0
+
+#define EXTERNAL_LCD_SYNC_EN (1<<6)
+#define EXTERNAL_LCD_SYNC_DISABLE 0
+
+/* MDP_VSYNC_THRESHOLD / MDP_FULL_BYPASS_WORD60 */
+#define VSYNC_THRESHOLD_ABOVE_LOC 0
+#define VSYNC_THRESHOLD_BELOW_LOC 16
+#define VSYNC_ANTI_TEAR_EN (1<<31)
+
+/* MDP_COMMAND_CONFIG / MDP_FULL_BYPASS_WORD1 */
+#define MDP_CMD_DBGBUS_EN (1<<0)
+
+/* MDP_PPP_SOURCE_CONFIG / MDP_FULL_BYPASS_WORD9&53 */
+#define PPP_SRC_C0G_8BIT ((1<<1)|(1<<0))
+#define PPP_SRC_C1B_8BIT ((1<<3)|(1<<2))
+#define PPP_SRC_C2R_8BIT ((1<<5)|(1<<4))
+#define PPP_SRC_C3A_8BIT ((1<<7)|(1<<6))
+
+#define PPP_SRC_C0G_6BIT (1<<1)
+#define PPP_SRC_C1B_6BIT (1<<3)
+#define PPP_SRC_C2R_6BIT (1<<5)
+
+#define PPP_SRC_C0G_5BIT (1<<0)
+#define PPP_SRC_C1B_5BIT (1<<2)
+#define PPP_SRC_C2R_5BIT (1<<4)
+
+#define PPP_SRC_C3ALPHA_EN (1<<8)
+
+#define PPP_SRC_BPP_1BYTES 0
+#define PPP_SRC_BPP_2BYTES (1<<9)
+#define PPP_SRC_BPP_3BYTES (1<<10)
+#define PPP_SRC_BPP_4BYTES ((1<<10)|(1<<9))
+
+#define PPP_SRC_BPP_ROI_ODD_X (1<<11)
+#define PPP_SRC_BPP_ROI_ODD_Y (1<<12)
+#define PPP_SRC_INTERLVD_2COMPONENTS (1<<13)
+#define PPP_SRC_INTERLVD_3COMPONENTS (1<<14)
+#define PPP_SRC_INTERLVD_4COMPONENTS ((1<<14)|(1<<13))
+
+
+/* RGB666 unpack format
+** TIGHT means R6+G6+B6 together
+** LOOSE means R6+2 +G6+2+ B6+2 (with MSB)
+**          or 2+R6 +2+G6 +2+B6 (with LSB)
+*/
+#define PPP_SRC_PACK_TIGHT (1<<17)
+#define PPP_SRC_PACK_LOOSE 0
+#define PPP_SRC_PACK_ALIGN_LSB 0
+#define PPP_SRC_PACK_ALIGN_MSB (1<<18)
+
+#define PPP_SRC_PLANE_INTERLVD 0
+#define PPP_SRC_PLANE_PSEUDOPLNR (1<<20)
+
+#define PPP_SRC_WMV9_MODE (1<<21)
+
+/* MDP_PPP_OPERATION_CONFIG / MDP_FULL_BYPASS_WORD14 */
+#define PPP_OP_SCALE_X_ON (1<<0)
+#define PPP_OP_SCALE_Y_ON (1<<1)
+
+#define PPP_OP_CONVERT_RGB2YCBCR 0
+#define PPP_OP_CONVERT_YCBCR2RGB (1<<2)
+#define PPP_OP_CONVERT_ON (1<<3)
+
+#define PPP_OP_CONVERT_MATRIX_PRIMARY 0
+#define PPP_OP_CONVERT_MATRIX_SECONDARY (1<<4)
+
+#define PPP_OP_LUT_C0_ON (1<<5)
+#define PPP_OP_LUT_C1_ON (1<<6)
+#define PPP_OP_LUT_C2_ON (1<<7)
+
+/* rotate or blend enable */
+#define PPP_OP_ROT_ON (1<<8)
+
+#define PPP_OP_ROT_90 (1<<9)
+#define PPP_OP_FLIP_LR (1<<10)
+#define PPP_OP_FLIP_UD (1<<11)
+
+#define PPP_OP_BLEND_ON (1<<12)
+
+#define PPP_OP_BLEND_SRCPIXEL_ALPHA 0
+#define PPP_OP_BLEND_DSTPIXEL_ALPHA (1<<13)
+#define PPP_OP_BLEND_CONSTANT_ALPHA (1<<14)
+#define PPP_OP_BLEND_SRCPIXEL_TRANSP ((1<<13)|(1<<14))
+
+#define PPP_OP_BLEND_ALPHA_BLEND_NORMAL 0
+#define PPP_OP_BLEND_ALPHA_BLEND_REVERSE (1<<15)
+
+#define PPP_OP_DITHER_EN (1<<16)
+
+#define PPP_OP_COLOR_SPACE_RGB 0
+#define PPP_OP_COLOR_SPACE_YCBCR (1<<17)
+
+#define PPP_OP_SRC_CHROMA_RGB 0
+#define PPP_OP_SRC_CHROMA_H2V1 (1<<18)
+#define PPP_OP_SRC_CHROMA_H1V2 (1<<19)
+#define PPP_OP_SRC_CHROMA_420 ((1<<18)|(1<<19))
+#define PPP_OP_SRC_CHROMA_COSITE 0
+#define PPP_OP_SRC_CHROMA_OFFSITE (1<<20)
+
+#define PPP_OP_DST_CHROMA_RGB 0
+#define PPP_OP_DST_CHROMA_H2V1 (1<<21)
+#define PPP_OP_DST_CHROMA_H1V2 (1<<22)
+#define PPP_OP_DST_CHROMA_420 ((1<<21)|(1<<22))
+#define PPP_OP_DST_CHROMA_COSITE 0
+#define PPP_OP_DST_CHROMA_OFFSITE (1<<23)
+
+#define PPP_BLEND_ALPHA_TRANSP (1<<24)
+
+#define PPP_OP_BG_CHROMA_RGB 0
+#define PPP_OP_BG_CHROMA_H2V1 (1<<25)
+#define PPP_OP_BG_CHROMA_H1V2 (1<<26)
+#define PPP_OP_BG_CHROMA_420 ((1<<25)|(1<<26))
+#define PPP_OP_BG_CHROMA_SITE_COSITE 0
+#define PPP_OP_BG_CHROMA_SITE_OFFSITE (1<<27)
+
+/* MDP_PPP_DESTINATION_CONFIG / MDP_FULL_BYPASS_WORD20 */
+#define PPP_DST_C0G_8BIT ((1<<0)|(1<<1))
+#define PPP_DST_C1B_8BIT ((1<<3)|(1<<2))
+#define PPP_DST_C2R_8BIT ((1<<5)|(1<<4))
+#define PPP_DST_C3A_8BIT ((1<<7)|(1<<6))
+
+#define PPP_DST_C0G_6BIT (1<<1)
+#define PPP_DST_C1B_6BIT (1<<3)
+#define PPP_DST_C2R_6BIT (1<<5)
+
+#define PPP_DST_C0G_5BIT (1<<0)
+#define PPP_DST_C1B_5BIT (1<<2)
+#define PPP_DST_C2R_5BIT (1<<4)
+
+#define PPP_DST_C3A_8BIT ((1<<7)|(1<<6))
+#define PPP_DST_C3ALPHA_EN (1<<8)
+
+#define PPP_DST_INTERLVD_2COMPONENTS (1<<9)
+#define PPP_DST_INTERLVD_3COMPONENTS (1<<10)
+#define PPP_DST_INTERLVD_4COMPONENTS ((1<<10)|(1<<9))
+#define PPP_DST_INTERLVD_6COMPONENTS ((1<<11)|(1<<9))
+
+#define PPP_DST_PACK_LOOSE 0
+#define PPP_DST_PACK_TIGHT (1<<13)
+#define PPP_DST_PACK_ALIGN_LSB 0
+#define PPP_DST_PACK_ALIGN_MSB (1<<14)
+
+#define PPP_DST_OUT_SEL_AXI 0
+#define PPP_DST_OUT_SEL_MDDI (1<<15)
+
+#define PPP_DST_BPP_2BYTES (1<<16)
+#define PPP_DST_BPP_3BYTES (1<<17)
+#define PPP_DST_BPP_4BYTES ((1<<17)|(1<<16))
+
+#define PPP_DST_PLANE_INTERLVD 0
+#define PPP_DST_PLANE_PLANAR (1<<18)
+#define PPP_DST_PLANE_PSEUDOPLNR (1<<19)
+
+#define PPP_DST_TO_TV (1<<20)
+
+#define PPP_DST_MDDI_PRIMARY 0
+#define PPP_DST_MDDI_SECONDARY (1<<21)
+#define PPP_DST_MDDI_EXTERNAL (1<<22)
+
+/* image configurations by image type */
+#define PPP_CFG_MDP_RGB_565(dir)       (PPP_##dir##_C2R_5BIT | \
+					PPP_##dir##_C0G_6BIT | \
+					PPP_##dir##_C1B_5BIT | \
+					PPP_##dir##_BPP_2BYTES | \
+					PPP_##dir##_INTERLVD_3COMPONENTS | \
+					PPP_##dir##_PACK_TIGHT | \
+					PPP_##dir##_PACK_ALIGN_LSB | \
+					PPP_##dir##_PLANE_INTERLVD)
+
+#define PPP_CFG_MDP_RGB_888(dir)       (PPP_##dir##_C2R_8BIT | \
+					PPP_##dir##_C0G_8BIT | \
+					PPP_##dir##_C1B_8BIT | \
+					PPP_##dir##_BPP_3BYTES | \
+					PPP_##dir##_INTERLVD_3COMPONENTS | \
+					PPP_##dir##_PACK_TIGHT | \
+					PPP_##dir##_PACK_ALIGN_LSB | \
+					PPP_##dir##_PLANE_INTERLVD)
+
+#define PPP_CFG_MDP_ARGB_8888(dir)     (PPP_##dir##_C2R_8BIT | \
+					PPP_##dir##_C0G_8BIT | \
+					PPP_##dir##_C1B_8BIT | \
+					PPP_##dir##_C3A_8BIT | \
+					PPP_##dir##_C3ALPHA_EN | \
+					PPP_##dir##_BPP_4BYTES | \
+					PPP_##dir##_INTERLVD_4COMPONENTS | \
+					PPP_##dir##_PACK_TIGHT | \
+					PPP_##dir##_PACK_ALIGN_LSB | \
+					PPP_##dir##_PLANE_INTERLVD)
+
+#define PPP_CFG_MDP_XRGB_8888(dir) PPP_CFG_MDP_ARGB_8888(dir)
+#define PPP_CFG_MDP_RGBA_8888(dir) PPP_CFG_MDP_ARGB_8888(dir)
+#define PPP_CFG_MDP_BGRA_8888(dir) PPP_CFG_MDP_ARGB_8888(dir)
+
+#define PPP_CFG_MDP_Y_CBCR_H2V2(dir)   (PPP_##dir##_C2R_8BIT | \
+					PPP_##dir##_C0G_8BIT | \
+					PPP_##dir##_C1B_8BIT | \
+					PPP_##dir##_C3A_8BIT | \
+					PPP_##dir##_BPP_2BYTES | \
+					PPP_##dir##_INTERLVD_2COMPONENTS | \
+					PPP_##dir##_PACK_TIGHT | \
+					PPP_##dir##_PACK_ALIGN_LSB | \
+					PPP_##dir##_PLANE_PSEUDOPLNR)
+
+#define PPP_CFG_MDP_Y_CRCB_H2V2(dir)	PPP_CFG_MDP_Y_CBCR_H2V2(dir)
+
+#define PPP_CFG_MDP_YCRYCB_H2V1(dir)   (PPP_##dir##_C2R_8BIT | \
+					PPP_##dir##_C0G_8BIT | \
+					PPP_##dir##_C1B_8BIT | \
+					PPP_##dir##_C3A_8BIT | \
+					PPP_##dir##_BPP_2BYTES | \
+					PPP_##dir##_INTERLVD_4COMPONENTS | \
+					PPP_##dir##_PACK_TIGHT | \
+					PPP_##dir##_PACK_ALIGN_LSB |\
+					PPP_##dir##_PLANE_INTERLVD)
+
+#define PPP_CFG_MDP_Y_CBCR_H2V1(dir)   (PPP_##dir##_C2R_8BIT | \
+					PPP_##dir##_C0G_8BIT | \
+					PPP_##dir##_C1B_8BIT | \
+					PPP_##dir##_C3A_8BIT | \
+					PPP_##dir##_BPP_2BYTES |   \
+					PPP_##dir##_INTERLVD_2COMPONENTS |  \
+					PPP_##dir##_PACK_TIGHT | \
+					PPP_##dir##_PACK_ALIGN_LSB | \
+					PPP_##dir##_PLANE_PSEUDOPLNR)
+
+#define PPP_CFG_MDP_Y_CRCB_H2V1(dir)	PPP_CFG_MDP_Y_CBCR_H2V1(dir)
+
+#define PPP_PACK_PATTERN_MDP_RGB_565 \
+	MDP_GET_PACK_PATTERN(0, CLR_R, CLR_G, CLR_B, 8)
+#define PPP_PACK_PATTERN_MDP_RGB_888 PPP_PACK_PATTERN_MDP_RGB_565
+#define PPP_PACK_PATTERN_MDP_XRGB_8888 \
+	MDP_GET_PACK_PATTERN(CLR_ALPHA, CLR_R, CLR_G, CLR_B, 8)
+#define PPP_PACK_PATTERN_MDP_ARGB_8888 PPP_PACK_PATTERN_MDP_XRGB_8888
+#define PPP_PACK_PATTERN_MDP_RGBA_8888 \
+	MDP_GET_PACK_PATTERN(CLR_ALPHA, CLR_B, CLR_G, CLR_R, 8)
+#define PPP_PACK_PATTERN_MDP_BGRA_8888 \
+	MDP_GET_PACK_PATTERN(CLR_ALPHA, CLR_R, CLR_G, CLR_B, 8)
+#define PPP_PACK_PATTERN_MDP_Y_CBCR_H2V1 \
+	MDP_GET_PACK_PATTERN(0, 0, CLR_CB, CLR_CR, 8)
+#define PPP_PACK_PATTERN_MDP_Y_CBCR_H2V2 PPP_PACK_PATTERN_MDP_Y_CBCR_H2V1
+#define PPP_PACK_PATTERN_MDP_Y_CRCB_H2V1 \
+	MDP_GET_PACK_PATTERN(0, 0, CLR_CR, CLR_CB, 8)
+#define PPP_PACK_PATTERN_MDP_Y_CRCB_H2V2 PPP_PACK_PATTERN_MDP_Y_CRCB_H2V1
+#define PPP_PACK_PATTERN_MDP_YCRYCB_H2V1 \
+	MDP_GET_PACK_PATTERN(CLR_Y, CLR_R, CLR_Y, CLR_B, 8)
+
+#define PPP_CHROMA_SAMP_MDP_RGB_565(dir) PPP_OP_##dir##_CHROMA_RGB
+#define PPP_CHROMA_SAMP_MDP_RGB_888(dir) PPP_OP_##dir##_CHROMA_RGB
+#define PPP_CHROMA_SAMP_MDP_XRGB_8888(dir) PPP_OP_##dir##_CHROMA_RGB
+#define PPP_CHROMA_SAMP_MDP_ARGB_8888(dir) PPP_OP_##dir##_CHROMA_RGB
+#define PPP_CHROMA_SAMP_MDP_RGBA_8888(dir) PPP_OP_##dir##_CHROMA_RGB
+#define PPP_CHROMA_SAMP_MDP_BGRA_8888(dir) PPP_OP_##dir##_CHROMA_RGB
+#define PPP_CHROMA_SAMP_MDP_Y_CBCR_H2V1(dir) PPP_OP_##dir##_CHROMA_H2V1
+#define PPP_CHROMA_SAMP_MDP_Y_CBCR_H2V2(dir) PPP_OP_##dir##_CHROMA_420
+#define PPP_CHROMA_SAMP_MDP_Y_CRCB_H2V1(dir) PPP_OP_##dir##_CHROMA_H2V1
+#define PPP_CHROMA_SAMP_MDP_Y_CRCB_H2V2(dir) PPP_OP_##dir##_CHROMA_420
+#define PPP_CHROMA_SAMP_MDP_YCRYCB_H2V1(dir) PPP_OP_##dir##_CHROMA_H2V1
+
+/* Helpful array generation macros */
+#define PPP_ARRAY0(name) \
+	[MDP_RGB_565] = PPP_##name##_MDP_RGB_565,\
+	[MDP_RGB_888] = PPP_##name##_MDP_RGB_888,\
+	[MDP_XRGB_8888] = PPP_##name##_MDP_XRGB_8888,\
+	[MDP_ARGB_8888] = PPP_##name##_MDP_ARGB_8888,\
+	[MDP_RGBA_8888] = PPP_##name##_MDP_RGBA_8888,\
+	[MDP_BGRA_8888] = PPP_##name##_MDP_BGRA_8888,\
+	[MDP_Y_CBCR_H2V1] = PPP_##name##_MDP_Y_CBCR_H2V1,\
+	[MDP_Y_CBCR_H2V2] = PPP_##name##_MDP_Y_CBCR_H2V2,\
+	[MDP_Y_CRCB_H2V1] = PPP_##name##_MDP_Y_CRCB_H2V1,\
+	[MDP_Y_CRCB_H2V2] = PPP_##name##_MDP_Y_CRCB_H2V2,\
+	[MDP_YCRYCB_H2V1] = PPP_##name##_MDP_YCRYCB_H2V1
+
+#define PPP_ARRAY1(name, dir) \
+	[MDP_RGB_565] = PPP_##name##_MDP_RGB_565(dir),\
+	[MDP_RGB_888] = PPP_##name##_MDP_RGB_888(dir),\
+	[MDP_XRGB_8888] = PPP_##name##_MDP_XRGB_8888(dir),\
+	[MDP_ARGB_8888] = PPP_##name##_MDP_ARGB_8888(dir),\
+	[MDP_RGBA_8888] = PPP_##name##_MDP_RGBA_8888(dir),\
+	[MDP_BGRA_8888] = PPP_##name##_MDP_BGRA_8888(dir),\
+	[MDP_Y_CBCR_H2V1] = PPP_##name##_MDP_Y_CBCR_H2V1(dir),\
+	[MDP_Y_CBCR_H2V2] = PPP_##name##_MDP_Y_CBCR_H2V2(dir),\
+	[MDP_Y_CRCB_H2V1] = PPP_##name##_MDP_Y_CRCB_H2V1(dir),\
+	[MDP_Y_CRCB_H2V2] = PPP_##name##_MDP_Y_CRCB_H2V2(dir),\
+	[MDP_YCRYCB_H2V1] = PPP_##name##_MDP_YCRYCB_H2V1(dir)
+
+#define IS_YCRCB(img) ((img == MDP_Y_CRCB_H2V2) | (img == MDP_Y_CBCR_H2V2) | \
+		       (img == MDP_Y_CRCB_H2V1) | (img == MDP_Y_CBCR_H2V1) | \
+		       (img == MDP_YCRYCB_H2V1))
+#define IS_RGB(img) ((img == MDP_RGB_565) | (img == MDP_RGB_888) | \
+		     (img == MDP_ARGB_8888) | (img == MDP_RGBA_8888) | \
+		     (img == MDP_XRGB_8888) | (img == MDP_BGRA_8888))
+#define HAS_ALPHA(img) ((img == MDP_ARGB_8888) | (img == MDP_RGBA_8888) | \
+			(img == MDP_BGRA_8888))
+
+#define IS_PSEUDOPLNR(img) ((img == MDP_Y_CRCB_H2V2) | \
+			    (img == MDP_Y_CBCR_H2V2) | \
+			    (img == MDP_Y_CRCB_H2V1) | \
+			    (img == MDP_Y_CBCR_H2V1))
+
+/* Mappings from addr to purpose */
+#define PPP_ADDR_SRC_ROI		MDP_FULL_BYPASS_WORD2
+#define PPP_ADDR_SRC0			MDP_FULL_BYPASS_WORD3
+#define PPP_ADDR_SRC1			MDP_FULL_BYPASS_WORD4
+#define PPP_ADDR_SRC_YSTRIDE		MDP_FULL_BYPASS_WORD7
+#define PPP_ADDR_SRC_CFG		MDP_FULL_BYPASS_WORD9
+#define PPP_ADDR_SRC_PACK_PATTERN	MDP_FULL_BYPASS_WORD10
+#define PPP_ADDR_OPERATION		MDP_FULL_BYPASS_WORD14
+#define PPP_ADDR_PHASEX_INIT		MDP_FULL_BYPASS_WORD15
+#define PPP_ADDR_PHASEY_INIT		MDP_FULL_BYPASS_WORD16
+#define PPP_ADDR_PHASEX_STEP		MDP_FULL_BYPASS_WORD17
+#define PPP_ADDR_PHASEY_STEP		MDP_FULL_BYPASS_WORD18
+#define PPP_ADDR_ALPHA_TRANSP		MDP_FULL_BYPASS_WORD19
+#define PPP_ADDR_DST_CFG		MDP_FULL_BYPASS_WORD20
+#define PPP_ADDR_DST_PACK_PATTERN	MDP_FULL_BYPASS_WORD21
+#define PPP_ADDR_DST_ROI		MDP_FULL_BYPASS_WORD25
+#define PPP_ADDR_DST0			MDP_FULL_BYPASS_WORD26
+#define PPP_ADDR_DST1			MDP_FULL_BYPASS_WORD27
+#define PPP_ADDR_DST_YSTRIDE		MDP_FULL_BYPASS_WORD30
+#define PPP_ADDR_EDGE			MDP_FULL_BYPASS_WORD46
+#define PPP_ADDR_BG0			MDP_FULL_BYPASS_WORD48
+#define PPP_ADDR_BG1			MDP_FULL_BYPASS_WORD49
+#define PPP_ADDR_BG_YSTRIDE		MDP_FULL_BYPASS_WORD51
+#define PPP_ADDR_BG_CFG			MDP_FULL_BYPASS_WORD53
+#define PPP_ADDR_BG_PACK_PATTERN	MDP_FULL_BYPASS_WORD54
+
+/* MDP_DMA_CONFIG / MDP_FULL_BYPASS_WORD32 */
+#define DMA_DSTC0G_6BITS (1<<1)
+#define DMA_DSTC1B_6BITS (1<<3)
+#define DMA_DSTC2R_6BITS (1<<5)
+#define DMA_DSTC0G_5BITS (1<<0)
+#define DMA_DSTC1B_5BITS (1<<2)
+#define DMA_DSTC2R_5BITS (1<<4)
+
+#define DMA_PACK_TIGHT (1<<6)
+#define DMA_PACK_LOOSE 0
+#define DMA_PACK_ALIGN_LSB 0
+#define DMA_PACK_ALIGN_MSB (1<<7)
+#define DMA_PACK_PATTERN_RGB \
+	(MDP_GET_PACK_PATTERN(0, CLR_R, CLR_G, CLR_B, 2)<<8)
+
+#define DMA_OUT_SEL_AHB  0
+#define DMA_OUT_SEL_MDDI (1<<14)
+#define DMA_AHBM_LCD_SEL_PRIMARY 0
+#define DMA_AHBM_LCD_SEL_SECONDARY (1<<15)
+#define DMA_IBUF_C3ALPHA_EN (1<<16)
+#define DMA_DITHER_EN (1<<17)
+
+#define DMA_MDDI_DMAOUT_LCD_SEL_PRIMARY 0
+#define DMA_MDDI_DMAOUT_LCD_SEL_SECONDARY (1<<18)
+#define DMA_MDDI_DMAOUT_LCD_SEL_EXTERNAL (1<<19)
+
+#define DMA_IBUF_FORMAT_RGB565 (1<<20)
+#define DMA_IBUF_FORMAT_RGB888_OR_ARGB8888 0
+
+#define DMA_IBUF_NONCONTIGUOUS (1<<21)
+
+/* MDDI REGISTER ? */
+#define MDDI_VDO_PACKET_DESC  0x5666
+#define MDDI_VDO_PACKET_PRIM  0xC3
+#define MDDI_VDO_PACKET_SECD  0xC0
+
+#endif
diff --git a/drivers/video/msm/mdp_ppp.c b/drivers/video/msm/mdp_ppp.c
new file mode 100644
index 0000000..ba2c467
--- /dev/null
+++ b/drivers/video/msm/mdp_ppp.c
@@ -0,0 +1,750 @@
+/* drivers/video/msm/mdp_ppp.c
+ *
+ * Copyright (C) 2007 QUALCOMM Incorporated
+ * Copyright (C) 2007 Google Incorporated
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#include <linux/fb.h>
+#include <linux/file.h>
+#include <linux/delay.h>
+#include <linux/msm_mdp.h>
+#include <linux/android_pmem.h>
+#include <mach/msm_fb.h>
+
+#include "mdp_hw.h"
+#include "mdp_scale_tables.h"
+
+#define DLOG(x...) do {} while (0)
+
+#define MDP_DOWNSCALE_BLUR (MDP_DOWNSCALE_MAX + 1)
+static int downscale_y_table = MDP_DOWNSCALE_MAX;
+static int downscale_x_table = MDP_DOWNSCALE_MAX;
+
+struct mdp_regs {
+	uint32_t src0;
+	uint32_t src1;
+	uint32_t dst0;
+	uint32_t dst1;
+	uint32_t src_cfg;
+	uint32_t dst_cfg;
+	uint32_t src_pack;
+	uint32_t dst_pack;
+	uint32_t src_rect;
+	uint32_t dst_rect;
+	uint32_t src_ystride;
+	uint32_t dst_ystride;
+	uint32_t op;
+	uint32_t src_bpp;
+	uint32_t dst_bpp;
+	uint32_t edge;
+	uint32_t phasex_init;
+	uint32_t phasey_init;
+	uint32_t phasex_step;
+	uint32_t phasey_step;
+};
+
+static uint32_t pack_pattern[] = {
+	PPP_ARRAY0(PACK_PATTERN)
+};
+
+static uint32_t src_img_cfg[] = {
+	PPP_ARRAY1(CFG, SRC)
+};
+
+static uint32_t dst_img_cfg[] = {
+	PPP_ARRAY1(CFG, DST)
+};
+
+static uint32_t bytes_per_pixel[] = {
+	[MDP_RGB_565] = 2,
+	[MDP_RGB_888] = 3,
+	[MDP_XRGB_8888] = 4,
+	[MDP_ARGB_8888] = 4,
+	[MDP_RGBA_8888] = 4,
+	[MDP_BGRA_8888] = 4,
+	[MDP_Y_CBCR_H2V1] = 1,
+	[MDP_Y_CBCR_H2V2] = 1,
+	[MDP_Y_CRCB_H2V1] = 1,
+	[MDP_Y_CRCB_H2V2] = 1,
+	[MDP_YCRYCB_H2V1] = 2
+};
+
+static uint32_t dst_op_chroma[] = {
+	PPP_ARRAY1(CHROMA_SAMP, DST)
+};
+
+static uint32_t src_op_chroma[] = {
+	PPP_ARRAY1(CHROMA_SAMP, SRC)
+};
+
+static uint32_t bg_op_chroma[] = {
+	PPP_ARRAY1(CHROMA_SAMP, BG)
+};
+
+static void rotate_dst_addr_x(struct mdp_blit_req *req, struct mdp_regs *regs)
+{
+	regs->dst0 += (req->dst_rect.w -
+		       min((uint32_t)16, req->dst_rect.w)) * regs->dst_bpp;
+	regs->dst1 += (req->dst_rect.w -
+		       min((uint32_t)16, req->dst_rect.w)) * regs->dst_bpp;
+}
+
+static void rotate_dst_addr_y(struct mdp_blit_req *req, struct mdp_regs *regs)
+{
+	regs->dst0 += (req->dst_rect.h -
+		       min((uint32_t)16, req->dst_rect.h)) *
+		       regs->dst_ystride;
+	regs->dst1 += (req->dst_rect.h -
+		       min((uint32_t)16, req->dst_rect.h)) *
+		       regs->dst_ystride;
+}
+
+static void blit_rotate(struct mdp_blit_req *req,
+			struct mdp_regs *regs)
+{
+	if (req->flags == MDP_ROT_NOP)
+		return;
+
+	regs->op |= PPP_OP_ROT_ON;
+	if ((req->flags & MDP_ROT_90 || req->flags & MDP_FLIP_LR) &&
+	    !(req->flags & MDP_ROT_90 && req->flags & MDP_FLIP_LR))
+		rotate_dst_addr_x(req, regs);
+	if (req->flags & MDP_ROT_90)
+		regs->op |= PPP_OP_ROT_90;
+	if (req->flags & MDP_FLIP_UD) {
+		regs->op |= PPP_OP_FLIP_UD;
+		rotate_dst_addr_y(req, regs);
+	}
+	if (req->flags & MDP_FLIP_LR)
+		regs->op |= PPP_OP_FLIP_LR;
+}
+
+static void blit_convert(struct mdp_blit_req *req, struct mdp_regs *regs)
+{
+	if (req->src.format == req->dst.format)
+		return;
+	if (IS_RGB(req->src.format) && IS_YCRCB(req->dst.format)) {
+		regs->op |= PPP_OP_CONVERT_RGB2YCBCR | PPP_OP_CONVERT_ON;
+	} else if (IS_YCRCB(req->src.format) && IS_RGB(req->dst.format)) {
+		regs->op |= PPP_OP_CONVERT_YCBCR2RGB | PPP_OP_CONVERT_ON;
+		if (req->dst.format == MDP_RGB_565)
+			regs->op |= PPP_OP_CONVERT_MATRIX_SECONDARY;
+	}
+}
+
+#define GET_BIT_RANGE(value, high, low) \
+	(((1 << (high - low + 1)) - 1) & (value >> low))
+static uint32_t transp_convert(struct mdp_blit_req *req)
+{
+	uint32_t transp = 0;
+	if (req->src.format == MDP_RGB_565) {
+		/* pad each value to 8 bits by copying the high bits into the
+		 * low end, convert RGB to RBG by switching low 2 components */
+		transp |= ((GET_BIT_RANGE(req->transp_mask, 15, 11) << 3) |
+			   (GET_BIT_RANGE(req->transp_mask, 15, 13))) << 16;
+
+		transp |= ((GET_BIT_RANGE(req->transp_mask, 4, 0) << 3) |
+			   (GET_BIT_RANGE(req->transp_mask, 4, 2))) << 8;
+
+		transp |= (GET_BIT_RANGE(req->transp_mask, 10, 5) << 2) |
+			  (GET_BIT_RANGE(req->transp_mask, 10, 9));
+	} else {
+		/* convert RGB to RBG */
+		transp |= (GET_BIT_RANGE(req->transp_mask, 15, 8)) |
+			  (GET_BIT_RANGE(req->transp_mask, 23, 16) << 16) |
+			  (GET_BIT_RANGE(req->transp_mask, 7, 0) << 8);
+	}
+	return transp;
+}
+#undef GET_BIT_RANGE
+
+static void blit_blend(struct mdp_blit_req *req, struct mdp_regs *regs)
+{
+	/* TRANSP BLEND */
+	if (req->transp_mask != MDP_TRANSP_NOP) {
+		req->transp_mask = transp_convert(req);
+		if (req->alpha != MDP_ALPHA_NOP) {
+			/* use blended transparancy mode
+			 * pixel = (src == transp) ? dst : blend
+			 * blend is combo of blend_eq_sel and
+			 * blend_alpha_sel */
+			regs->op |= PPP_OP_ROT_ON | PPP_OP_BLEND_ON |
+				PPP_OP_BLEND_ALPHA_BLEND_NORMAL |
+				PPP_OP_BLEND_CONSTANT_ALPHA |
+				PPP_BLEND_ALPHA_TRANSP;
+		} else {
+			/* simple transparancy mode
+			 * pixel = (src == transp) ? dst : src */
+			regs->op |= PPP_OP_ROT_ON | PPP_OP_BLEND_ON |
+				PPP_OP_BLEND_SRCPIXEL_TRANSP;
+		}
+	}
+
+	req->alpha &= 0xff;
+	/* ALPHA BLEND */
+	if (HAS_ALPHA(req->src.format)) {
+		regs->op |= PPP_OP_ROT_ON | PPP_OP_BLEND_ON |
+			PPP_OP_BLEND_SRCPIXEL_ALPHA;
+	} else if (req->alpha < MDP_ALPHA_NOP) {
+		/* just blend by alpha */
+		regs->op |= PPP_OP_ROT_ON | PPP_OP_BLEND_ON |
+			PPP_OP_BLEND_ALPHA_BLEND_NORMAL |
+			PPP_OP_BLEND_CONSTANT_ALPHA;
+	}
+
+	regs->op |= bg_op_chroma[req->dst.format];
+}
+
+#define ONE_HALF	(1LL << 32)
+#define ONE		(1LL << 33)
+#define TWO		(2LL << 33)
+#define THREE		(3LL << 33)
+#define FRAC_MASK (ONE - 1)
+#define INT_MASK (~FRAC_MASK)
+
+static int scale_params(uint32_t dim_in, uint32_t dim_out, uint32_t origin,
+			uint32_t *phase_init, uint32_t *phase_step)
+{
+	/* to improve precicsion calculations are done in U31.33 and converted
+	 * to U3.29 at the end */
+	int64_t k1, k2, k3, k4, tmp;
+	uint64_t n, d, os, os_p, od, od_p, oreq;
+	unsigned rpa = 0;
+	int64_t ip64, delta;
+
+	if (dim_out % 3 == 0)
+		rpa = !(dim_in % (dim_out / 3));
+
+	n = ((uint64_t)dim_out) << 34;
+	d = dim_in;
+	if (!d)
+		return -1;
+	do_div(n, d);
+	k3 = (n + 1) >> 1;
+	if ((k3 >> 4) < (1LL << 27) || (k3 >> 4) > (1LL << 31)) {
+		DLOG("crap bad scale\n");
+		return -1;
+	}
+	n = ((uint64_t)dim_in) << 34;
+	d = (uint64_t)dim_out;
+	if (!d)
+		return -1;
+	do_div(n, d);
+	k1 = (n + 1) >> 1;
+	k2 = (k1 - ONE) >> 1;
+
+	*phase_init = (int)(k2 >> 4);
+	k4 = (k3 - ONE) >> 1;
+
+	if (rpa) {
+		os = ((uint64_t)origin << 33) - ONE_HALF;
+		tmp = (dim_out * os) + ONE_HALF;
+		if (!dim_in)
+			return -1;
+		do_div(tmp, dim_in);
+		od = tmp - ONE_HALF;
+	} else {
+		os = ((uint64_t)origin << 1) - 1;
+		od = (((k3 * os) >> 1) + k4);
+	}
+
+	od_p = od & INT_MASK;
+	if (od_p != od)
+		od_p += ONE;
+
+	if (rpa) {
+		tmp = (dim_in * od_p) + ONE_HALF;
+		if (!dim_in)
+			return -1;
+		do_div(tmp, dim_in);
+		os_p = tmp - ONE_HALF;
+	} else {
+		os_p = ((k1 * (od_p >> 33)) + k2);
+	}
+
+	oreq = (os_p & INT_MASK) - ONE;
+
+	ip64 = os_p - oreq;
+	delta = ((int64_t)(origin) << 33) - oreq;
+	ip64 -= delta;
+	/* limit to valid range before the left shift */
+	delta = (ip64 & (1LL << 63)) ? 4 : -4;
+	delta <<= 33;
+	while (abs((int)(ip64 >> 33)) > 4)
+		ip64 += delta;
+	*phase_init = (int)(ip64 >> 4);
+	*phase_step = (uint32_t)(k1 >> 4);
+	return 0;
+}
+
+static void load_scale_table(const struct mdp_info *mdp,
+			     struct mdp_table_entry *table, int len)
+{
+	int i;
+	for (i = 0; i < len; i++)
+		mdp_writel(mdp, table[i].val, table[i].reg);
+}
+
+enum {
+IMG_LEFT,
+IMG_RIGHT,
+IMG_TOP,
+IMG_BOTTOM,
+};
+
+static void get_edge_info(uint32_t src, uint32_t src_coord, uint32_t dst,
+			  uint32_t *interp1, uint32_t *interp2,
+			  uint32_t *repeat1, uint32_t *repeat2) {
+	if (src > 3 * dst) {
+		*interp1 = 0;
+		*interp2 = src - 1;
+		*repeat1 = 0;
+		*repeat2 = 0;
+	} else if (src == 3 * dst) {
+		*interp1 = 0;
+		*interp2 = src;
+		*repeat1 = 0;
+		*repeat2 = 1;
+	} else if (src > dst && src < 3 * dst) {
+		*interp1 = -1;
+		*interp2 = src;
+		*repeat1 = 1;
+		*repeat2 = 1;
+	} else if (src == dst) {
+		*interp1 = -1;
+		*interp2 = src + 1;
+		*repeat1 = 1;
+		*repeat2 = 2;
+	} else {
+		*interp1 = -2;
+		*interp2 = src + 1;
+		*repeat1 = 2;
+		*repeat2 = 2;
+	}
+	*interp1 += src_coord;
+	*interp2 += src_coord;
+}
+
+static int get_edge_cond(struct mdp_blit_req *req, struct mdp_regs *regs)
+{
+	int32_t luma_interp[4];
+	int32_t luma_repeat[4];
+	int32_t chroma_interp[4];
+	int32_t chroma_bound[4];
+	int32_t chroma_repeat[4];
+	uint32_t dst_w, dst_h;
+
+	memset(&luma_interp, 0, sizeof(int32_t) * 4);
+	memset(&luma_repeat, 0, sizeof(int32_t) * 4);
+	memset(&chroma_interp, 0, sizeof(int32_t) * 4);
+	memset(&chroma_bound, 0, sizeof(int32_t) * 4);
+	memset(&chroma_repeat, 0, sizeof(int32_t) * 4);
+	regs->edge = 0;
+
+	if (req->flags & MDP_ROT_90) {
+		dst_w = req->dst_rect.h;
+		dst_h = req->dst_rect.w;
+	} else {
+		dst_w = req->dst_rect.w;
+		dst_h = req->dst_rect.h;
+	}
+
+	if (regs->op & (PPP_OP_SCALE_Y_ON | PPP_OP_SCALE_X_ON)) {
+		get_edge_info(req->src_rect.h, req->src_rect.y, dst_h,
+			      &luma_interp[IMG_TOP], &luma_interp[IMG_BOTTOM],
+			      &luma_repeat[IMG_TOP], &luma_repeat[IMG_BOTTOM]);
+		get_edge_info(req->src_rect.w, req->src_rect.x, dst_w,
+			      &luma_interp[IMG_LEFT], &luma_interp[IMG_RIGHT],
+			      &luma_repeat[IMG_LEFT], &luma_repeat[IMG_RIGHT]);
+	} else {
+		luma_interp[IMG_LEFT] = req->src_rect.x;
+		luma_interp[IMG_RIGHT] = req->src_rect.x + req->src_rect.w - 1;
+		luma_interp[IMG_TOP] = req->src_rect.y;
+		luma_interp[IMG_BOTTOM] = req->src_rect.y + req->src_rect.h - 1;
+		luma_repeat[IMG_LEFT] = 0;
+		luma_repeat[IMG_TOP] = 0;
+		luma_repeat[IMG_RIGHT] = 0;
+		luma_repeat[IMG_BOTTOM] = 0;
+	}
+
+	chroma_interp[IMG_LEFT] = luma_interp[IMG_LEFT];
+	chroma_interp[IMG_RIGHT] = luma_interp[IMG_RIGHT];
+	chroma_interp[IMG_TOP] = luma_interp[IMG_TOP];
+	chroma_interp[IMG_BOTTOM] = luma_interp[IMG_BOTTOM];
+
+	chroma_bound[IMG_LEFT] = req->src_rect.x;
+	chroma_bound[IMG_RIGHT] = req->src_rect.x + req->src_rect.w - 1;
+	chroma_bound[IMG_TOP] = req->src_rect.y;
+	chroma_bound[IMG_BOTTOM] = req->src_rect.y + req->src_rect.h - 1;
+
+	if (IS_YCRCB(req->src.format)) {
+		chroma_interp[IMG_LEFT] = chroma_interp[IMG_LEFT] >> 1;
+		chroma_interp[IMG_RIGHT] = (chroma_interp[IMG_RIGHT] + 1) >> 1;
+
+		chroma_bound[IMG_LEFT] = chroma_bound[IMG_LEFT] >> 1;
+		chroma_bound[IMG_RIGHT] = chroma_bound[IMG_RIGHT] >> 1;
+	}
+
+	if (req->src.format == MDP_Y_CBCR_H2V2 ||
+	    req->src.format == MDP_Y_CRCB_H2V2) {
+		chroma_interp[IMG_TOP] = (chroma_interp[IMG_TOP] - 1) >> 1;
+		chroma_interp[IMG_BOTTOM] = (chroma_interp[IMG_BOTTOM] + 1)
+					    >> 1;
+		chroma_bound[IMG_TOP] = (chroma_bound[IMG_TOP] + 1) >> 1;
+		chroma_bound[IMG_BOTTOM] = chroma_bound[IMG_BOTTOM] >> 1;
+	}
+
+	chroma_repeat[IMG_LEFT] = chroma_bound[IMG_LEFT] -
+				  chroma_interp[IMG_LEFT];
+	chroma_repeat[IMG_RIGHT] = chroma_interp[IMG_RIGHT] -
+				  chroma_bound[IMG_RIGHT];
+	chroma_repeat[IMG_TOP] = chroma_bound[IMG_TOP] -
+				  chroma_interp[IMG_TOP];
+	chroma_repeat[IMG_BOTTOM] = chroma_interp[IMG_BOTTOM] -
+				  chroma_bound[IMG_BOTTOM];
+
+	if (chroma_repeat[IMG_LEFT] < 0 || chroma_repeat[IMG_LEFT] > 3 ||
+	    chroma_repeat[IMG_RIGHT] < 0 || chroma_repeat[IMG_RIGHT] > 3 ||
+	    chroma_repeat[IMG_TOP] < 0 || chroma_repeat[IMG_TOP] > 3 ||
+	    chroma_repeat[IMG_BOTTOM] < 0 || chroma_repeat[IMG_BOTTOM] > 3 ||
+	    luma_repeat[IMG_LEFT] < 0 || luma_repeat[IMG_LEFT] > 3 ||
+	    luma_repeat[IMG_RIGHT] < 0 || luma_repeat[IMG_RIGHT] > 3 ||
+	    luma_repeat[IMG_TOP] < 0 || luma_repeat[IMG_TOP] > 3 ||
+	    luma_repeat[IMG_BOTTOM] < 0 || luma_repeat[IMG_BOTTOM] > 3)
+		return -1;
+
+	regs->edge |= (chroma_repeat[IMG_LEFT] & 3) << MDP_LEFT_CHROMA;
+	regs->edge |= (chroma_repeat[IMG_RIGHT] & 3) << MDP_RIGHT_CHROMA;
+	regs->edge |= (chroma_repeat[IMG_TOP] & 3) << MDP_TOP_CHROMA;
+	regs->edge |= (chroma_repeat[IMG_BOTTOM] & 3) << MDP_BOTTOM_CHROMA;
+	regs->edge |= (luma_repeat[IMG_LEFT] & 3) << MDP_LEFT_LUMA;
+	regs->edge |= (luma_repeat[IMG_RIGHT] & 3) << MDP_RIGHT_LUMA;
+	regs->edge |= (luma_repeat[IMG_TOP] & 3) << MDP_TOP_LUMA;
+	regs->edge |= (luma_repeat[IMG_BOTTOM] & 3) << MDP_BOTTOM_LUMA;
+	return 0;
+}
+
+static int blit_scale(const struct mdp_info *mdp, struct mdp_blit_req *req,
+		      struct mdp_regs *regs)
+{
+	uint32_t phase_init_x, phase_init_y, phase_step_x, phase_step_y;
+	uint32_t scale_factor_x, scale_factor_y;
+	uint32_t downscale;
+	uint32_t dst_w, dst_h;
+
+	if (req->flags & MDP_ROT_90) {
+		dst_w = req->dst_rect.h;
+		dst_h = req->dst_rect.w;
+	} else {
+		dst_w = req->dst_rect.w;
+		dst_h = req->dst_rect.h;
+	}
+	if ((req->src_rect.w == dst_w)  && (req->src_rect.h == dst_h) &&
+	    !(req->flags & MDP_BLUR)) {
+		regs->phasex_init = 0;
+		regs->phasey_init = 0;
+		regs->phasex_step = 0;
+		regs->phasey_step = 0;
+		return 0;
+	}
+
+	if (scale_params(req->src_rect.w, dst_w, 1, &phase_init_x,
+			 &phase_step_x) ||
+	    scale_params(req->src_rect.h, dst_h, 1, &phase_init_y,
+			 &phase_step_y))
+		return -1;
+
+	scale_factor_x = (dst_w * 10) / req->src_rect.w;
+	scale_factor_y = (dst_h * 10) / req->src_rect.h;
+
+	if (scale_factor_x > 8)
+		downscale = MDP_DOWNSCALE_PT8TO1;
+	else if (scale_factor_x > 6)
+		downscale = MDP_DOWNSCALE_PT6TOPT8;
+	else if (scale_factor_x > 4)
+		downscale = MDP_DOWNSCALE_PT4TOPT6;
+	else
+		downscale = MDP_DOWNSCALE_PT2TOPT4;
+	if (downscale != downscale_x_table) {
+		load_scale_table(mdp, mdp_downscale_x_table[downscale], 64);
+		downscale_x_table = downscale;
+	}
+
+	if (scale_factor_y > 8)
+		downscale = MDP_DOWNSCALE_PT8TO1;
+	else if (scale_factor_y > 6)
+		downscale = MDP_DOWNSCALE_PT6TOPT8;
+	else if (scale_factor_y > 4)
+		downscale = MDP_DOWNSCALE_PT4TOPT6;
+	else
+		downscale = MDP_DOWNSCALE_PT2TOPT4;
+	if (downscale != downscale_y_table) {
+		load_scale_table(mdp, mdp_downscale_y_table[downscale], 64);
+		downscale_y_table = downscale;
+	}
+
+	regs->phasex_init = phase_init_x;
+	regs->phasey_init = phase_init_y;
+	regs->phasex_step = phase_step_x;
+	regs->phasey_step = phase_step_y;
+	regs->op |= (PPP_OP_SCALE_Y_ON | PPP_OP_SCALE_X_ON);
+	return 0;
+
+}
+
+static void blit_blur(const struct mdp_info *mdp, struct mdp_blit_req *req,
+		      struct mdp_regs *regs)
+{
+	if (!(req->flags & MDP_BLUR))
+		return;
+
+	if (!(downscale_x_table == MDP_DOWNSCALE_BLUR &&
+	      downscale_y_table == MDP_DOWNSCALE_BLUR)) {
+		load_scale_table(mdp, mdp_gaussian_blur_table, 128);
+		downscale_x_table = MDP_DOWNSCALE_BLUR;
+		downscale_y_table = MDP_DOWNSCALE_BLUR;
+	}
+
+	regs->op |= (PPP_OP_SCALE_Y_ON | PPP_OP_SCALE_X_ON);
+}
+
+
+#define IMG_LEN(rect_h, w, rect_w, bpp) (((rect_h) * w) * bpp)
+
+#define Y_TO_CRCB_RATIO(format) \
+	((format == MDP_Y_CBCR_H2V2 || format == MDP_Y_CRCB_H2V2) ?  2 :\
+	 (format == MDP_Y_CBCR_H2V1 || format == MDP_Y_CRCB_H2V1) ?  1 : 1)
+
+static void get_len(struct mdp_img *img, struct mdp_rect *rect, uint32_t bpp,
+		    uint32_t *len0, uint32_t *len1)
+{
+	*len0 = IMG_LEN(rect->h, img->width, rect->w, bpp);
+	if (IS_PSEUDOPLNR(img->format))
+		*len1 = *len0/Y_TO_CRCB_RATIO(img->format);
+	else
+		*len1 = 0;
+}
+
+static int valid_src_dst(unsigned long src_start, unsigned long src_len,
+			 unsigned long dst_start, unsigned long dst_len,
+			 struct mdp_blit_req *req, struct mdp_regs *regs)
+{
+	unsigned long src_min_ok = src_start;
+	unsigned long src_max_ok = src_start + src_len;
+	unsigned long dst_min_ok = dst_start;
+	unsigned long dst_max_ok = dst_start + dst_len;
+	uint32_t src0_len, src1_len, dst0_len, dst1_len;
+	get_len(&req->src, &req->src_rect, regs->src_bpp, &src0_len,
+		 &src1_len);
+	get_len(&req->dst, &req->dst_rect, regs->dst_bpp, &dst0_len,
+		 &dst1_len);
+
+	if (regs->src0 < src_min_ok || regs->src0 > src_max_ok ||
+	    regs->src0 + src0_len > src_max_ok) {
+		DLOG("invalid_src %x %x %lx %lx\n", regs->src0,
+		      src0_len, src_min_ok, src_max_ok);
+		return 0;
+	}
+	if (regs->src_cfg & PPP_SRC_PLANE_PSEUDOPLNR) {
+		if (regs->src1 < src_min_ok || regs->src1 > src_max_ok ||
+		    regs->src1 + src1_len > src_max_ok) {
+			DLOG("invalid_src1");
+			return 0;
+		}
+	}
+	if (regs->dst0 < dst_min_ok || regs->dst0 > dst_max_ok ||
+	    regs->dst0 + dst0_len > dst_max_ok) {
+		DLOG("invalid_dst");
+		return 0;
+	}
+	if (regs->dst_cfg & PPP_SRC_PLANE_PSEUDOPLNR) {
+		if (regs->dst1 < dst_min_ok || regs->dst1 > dst_max_ok ||
+		    regs->dst1 + dst1_len > dst_max_ok) {
+			DLOG("invalid_dst1");
+			return 0;
+		}
+	}
+	return 1;
+}
+
+
+static void flush_imgs(struct mdp_blit_req *req, struct mdp_regs *regs,
+		       struct file *src_file, struct file *dst_file)
+{
+#ifdef CONFIG_ANDROID_PMEM
+	uint32_t src0_len, src1_len, dst0_len, dst1_len;
+
+	/* flush src images to memory before dma to mdp */
+	get_len(&req->src, &req->src_rect, regs->src_bpp, &src0_len,
+		&src1_len);
+	flush_pmem_file(src_file, req->src.offset, src0_len);
+	if (IS_PSEUDOPLNR(req->src.format))
+		flush_pmem_file(src_file, req->src.offset + src0_len,
+				src1_len);
+
+	/* flush dst images */
+	get_len(&req->dst, &req->dst_rect, regs->dst_bpp, &dst0_len,
+		&dst1_len);
+	flush_pmem_file(dst_file, req->dst.offset, dst0_len);
+	if (IS_PSEUDOPLNR(req->dst.format))
+		flush_pmem_file(dst_file, req->dst.offset + dst0_len,
+				dst1_len);
+#endif
+}
+
+static void get_chroma_addr(struct mdp_img *img, struct mdp_rect *rect,
+			    uint32_t base, uint32_t bpp, uint32_t cfg,
+			    uint32_t *addr, uint32_t *ystride)
+{
+	uint32_t compress_v = Y_TO_CRCB_RATIO(img->format);
+	uint32_t compress_h = 2;
+	uint32_t  offset;
+
+	if (IS_PSEUDOPLNR(img->format)) {
+		offset = (rect->x / compress_h) * compress_h;
+		offset += rect->y == 0 ? 0 :
+			  ((rect->y + 1) / compress_v) * img->width;
+		*addr = base + (img->width * img->height * bpp);
+		*addr += offset * bpp;
+		*ystride |= *ystride << 16;
+	} else {
+		*addr = 0;
+	}
+}
+
+static int send_blit(const struct mdp_info *mdp, struct mdp_blit_req *req,
+		     struct mdp_regs *regs, struct file *src_file,
+		     struct file *dst_file)
+{
+	mdp_writel(mdp, 1, 0x060);
+	mdp_writel(mdp, regs->src_rect, PPP_ADDR_SRC_ROI);
+	mdp_writel(mdp, regs->src0, PPP_ADDR_SRC0);
+	mdp_writel(mdp, regs->src1, PPP_ADDR_SRC1);
+	mdp_writel(mdp, regs->src_ystride, PPP_ADDR_SRC_YSTRIDE);
+	mdp_writel(mdp, regs->src_cfg, PPP_ADDR_SRC_CFG);
+	mdp_writel(mdp, regs->src_pack, PPP_ADDR_SRC_PACK_PATTERN);
+
+	mdp_writel(mdp, regs->op, PPP_ADDR_OPERATION);
+	mdp_writel(mdp, regs->phasex_init, PPP_ADDR_PHASEX_INIT);
+	mdp_writel(mdp, regs->phasey_init, PPP_ADDR_PHASEY_INIT);
+	mdp_writel(mdp, regs->phasex_step, PPP_ADDR_PHASEX_STEP);
+	mdp_writel(mdp, regs->phasey_step, PPP_ADDR_PHASEY_STEP);
+
+	mdp_writel(mdp, (req->alpha << 24) | (req->transp_mask & 0xffffff),
+	       PPP_ADDR_ALPHA_TRANSP);
+
+	mdp_writel(mdp, regs->dst_cfg, PPP_ADDR_DST_CFG);
+	mdp_writel(mdp, regs->dst_pack, PPP_ADDR_DST_PACK_PATTERN);
+	mdp_writel(mdp, regs->dst_rect, PPP_ADDR_DST_ROI);
+	mdp_writel(mdp, regs->dst0, PPP_ADDR_DST0);
+	mdp_writel(mdp, regs->dst1, PPP_ADDR_DST1);
+	mdp_writel(mdp, regs->dst_ystride, PPP_ADDR_DST_YSTRIDE);
+
+	mdp_writel(mdp, regs->edge, PPP_ADDR_EDGE);
+	if (regs->op & PPP_OP_BLEND_ON) {
+		mdp_writel(mdp, regs->dst0, PPP_ADDR_BG0);
+		mdp_writel(mdp, regs->dst1, PPP_ADDR_BG1);
+		mdp_writel(mdp, regs->dst_ystride, PPP_ADDR_BG_YSTRIDE);
+		mdp_writel(mdp, src_img_cfg[req->dst.format], PPP_ADDR_BG_CFG);
+		mdp_writel(mdp, pack_pattern[req->dst.format],
+			   PPP_ADDR_BG_PACK_PATTERN);
+	}
+	flush_imgs(req, regs, src_file, dst_file);
+	mdp_writel(mdp, 0x1000, MDP_DISPLAY0_START);
+	return 0;
+}
+
+int mdp_ppp_blit(const struct mdp_info *mdp, struct mdp_blit_req *req,
+		 struct file *src_file, unsigned long src_start, unsigned long src_len,
+		 struct file *dst_file, unsigned long dst_start, unsigned long dst_len)
+{
+	struct mdp_regs regs = {0};
+
+	if (unlikely(req->src.format >= MDP_IMGTYPE_LIMIT ||
+		     req->dst.format >= MDP_IMGTYPE_LIMIT)) {
+		printk(KERN_ERR "mpd_ppp: img is of wrong format\n");
+		return -EINVAL;
+	}
+
+	if (unlikely(req->src_rect.x > req->src.width ||
+		     req->src_rect.y > req->src.height ||
+		     req->dst_rect.x > req->dst.width ||
+		     req->dst_rect.y > req->dst.height)) {
+		printk(KERN_ERR "mpd_ppp: img rect is outside of img!\n");
+		return -EINVAL;
+	}
+
+	/* set the src image configuration */
+	regs.src_cfg = src_img_cfg[req->src.format];
+	regs.src_cfg |= (req->src_rect.x & 0x1) ? PPP_SRC_BPP_ROI_ODD_X : 0;
+	regs.src_cfg |= (req->src_rect.y & 0x1) ? PPP_SRC_BPP_ROI_ODD_Y : 0;
+	regs.src_rect = (req->src_rect.h << 16) | req->src_rect.w;
+	regs.src_pack = pack_pattern[req->src.format];
+
+	/* set the dest image configuration */
+	regs.dst_cfg = dst_img_cfg[req->dst.format] | PPP_DST_OUT_SEL_AXI;
+	regs.dst_rect = (req->dst_rect.h << 16) | req->dst_rect.w;
+	regs.dst_pack = pack_pattern[req->dst.format];
+
+	/* set src, bpp, start pixel and ystride */
+	regs.src_bpp = bytes_per_pixel[req->src.format];
+	regs.src0 = src_start + req->src.offset;
+	regs.src_ystride = req->src.width * regs.src_bpp;
+	get_chroma_addr(&req->src, &req->src_rect, regs.src0, regs.src_bpp,
+			regs.src_cfg, &regs.src1, &regs.src_ystride);
+	regs.src0 += (req->src_rect.x + (req->src_rect.y * req->src.width)) *
+		      regs.src_bpp;
+
+	/* set dst, bpp, start pixel and ystride */
+	regs.dst_bpp = bytes_per_pixel[req->dst.format];
+	regs.dst0 = dst_start + req->dst.offset;
+	regs.dst_ystride = req->dst.width * regs.dst_bpp;
+	get_chroma_addr(&req->dst, &req->dst_rect, regs.dst0, regs.dst_bpp,
+			regs.dst_cfg, &regs.dst1, &regs.dst_ystride);
+	regs.dst0 += (req->dst_rect.x + (req->dst_rect.y * req->dst.width)) *
+		      regs.dst_bpp;
+
+	if (!valid_src_dst(src_start, src_len, dst_start, dst_len, req,
+			   &regs)) {
+		printk(KERN_ERR "mpd_ppp: final src or dst location is "
+			"invalid, are you trying to make an image too large "
+			"or to place it outside the screen?\n");
+		return -EINVAL;
+	}
+
+	/* set up operation register */
+	regs.op = 0;
+	blit_rotate(req, &regs);
+	blit_convert(req, &regs);
+	if (req->flags & MDP_DITHER)
+		regs.op |= PPP_OP_DITHER_EN;
+	blit_blend(req, &regs);
+	if (blit_scale(mdp, req, &regs)) {
+		printk(KERN_ERR "mpd_ppp: error computing scale for img.\n");
+		return -EINVAL;
+	}
+	blit_blur(mdp, req, &regs);
+	regs.op |= dst_op_chroma[req->dst.format] |
+		   src_op_chroma[req->src.format];
+
+	/* if the image is YCRYCB, the x and w must be even */
+	if (unlikely(req->src.format == MDP_YCRYCB_H2V1)) {
+		req->src_rect.x = req->src_rect.x & (~0x1);
+		req->src_rect.w = req->src_rect.w & (~0x1);
+		req->dst_rect.x = req->dst_rect.x & (~0x1);
+		req->dst_rect.w = req->dst_rect.w & (~0x1);
+	}
+	if (get_edge_cond(req, &regs))
+		return -EINVAL;
+
+	send_blit(mdp, req, &regs, src_file, dst_file);
+	return 0;
+}
diff --git a/drivers/video/msm/mdp_scale_tables.c b/drivers/video/msm/mdp_scale_tables.c
new file mode 100644
index 0000000..604783b
--- /dev/null
+++ b/drivers/video/msm/mdp_scale_tables.c
@@ -0,0 +1,766 @@
+/* drivers/video/msm_fb/mdp_scale_tables.c
+ *
+ * Copyright (C) 2007 QUALCOMM Incorporated
+ * Copyright (C) 2007 Google Incorporated
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include "mdp_scale_tables.h"
+#include "mdp_hw.h"
+
+struct mdp_table_entry mdp_upscale_table[] = {
+	{ 0x5fffc, 0x0 },
+	{ 0x50200, 0x7fc00000 },
+	{ 0x5fffc, 0xff80000d },
+	{ 0x50204, 0x7ec003f9 },
+	{ 0x5fffc, 0xfec0001c },
+	{ 0x50208, 0x7d4003f3 },
+	{ 0x5fffc, 0xfe40002b },
+	{ 0x5020c, 0x7b8003ed },
+	{ 0x5fffc, 0xfd80003c },
+	{ 0x50210, 0x794003e8 },
+	{ 0x5fffc, 0xfcc0004d },
+	{ 0x50214, 0x76c003e4 },
+	{ 0x5fffc, 0xfc40005f },
+	{ 0x50218, 0x73c003e0 },
+	{ 0x5fffc, 0xfb800071 },
+	{ 0x5021c, 0x708003de },
+	{ 0x5fffc, 0xfac00085 },
+	{ 0x50220, 0x6d0003db },
+	{ 0x5fffc, 0xfa000098 },
+	{ 0x50224, 0x698003d9 },
+	{ 0x5fffc, 0xf98000ac },
+	{ 0x50228, 0x654003d8 },
+	{ 0x5fffc, 0xf8c000c1 },
+	{ 0x5022c, 0x610003d7 },
+	{ 0x5fffc, 0xf84000d5 },
+	{ 0x50230, 0x5c8003d7 },
+	{ 0x5fffc, 0xf7c000e9 },
+	{ 0x50234, 0x580003d7 },
+	{ 0x5fffc, 0xf74000fd },
+	{ 0x50238, 0x534003d8 },
+	{ 0x5fffc, 0xf6c00112 },
+	{ 0x5023c, 0x4e8003d8 },
+	{ 0x5fffc, 0xf6800126 },
+	{ 0x50240, 0x494003da },
+	{ 0x5fffc, 0xf600013a },
+	{ 0x50244, 0x448003db },
+	{ 0x5fffc, 0xf600014d },
+	{ 0x50248, 0x3f4003dd },
+	{ 0x5fffc, 0xf5c00160 },
+	{ 0x5024c, 0x3a4003df },
+	{ 0x5fffc, 0xf5c00172 },
+	{ 0x50250, 0x354003e1 },
+	{ 0x5fffc, 0xf5c00184 },
+	{ 0x50254, 0x304003e3 },
+	{ 0x5fffc, 0xf6000195 },
+	{ 0x50258, 0x2b0003e6 },
+	{ 0x5fffc, 0xf64001a6 },
+	{ 0x5025c, 0x260003e8 },
+	{ 0x5fffc, 0xf6c001b4 },
+	{ 0x50260, 0x214003eb },
+	{ 0x5fffc, 0xf78001c2 },
+	{ 0x50264, 0x1c4003ee },
+	{ 0x5fffc, 0xf80001cf },
+	{ 0x50268, 0x17c003f1 },
+	{ 0x5fffc, 0xf90001db },
+	{ 0x5026c, 0x134003f3 },
+	{ 0x5fffc, 0xfa0001e5 },
+	{ 0x50270, 0xf0003f6 },
+	{ 0x5fffc, 0xfb4001ee },
+	{ 0x50274, 0xac003f9 },
+	{ 0x5fffc, 0xfcc001f5 },
+	{ 0x50278, 0x70003fb },
+	{ 0x5fffc, 0xfe4001fb },
+	{ 0x5027c, 0x34003fe },
+};
+
+static struct mdp_table_entry mdp_downscale_x_table_PT2TOPT4[] = {
+	{ 0x5fffc, 0x740008c },
+	{ 0x50280, 0x33800088 },
+	{ 0x5fffc, 0x800008e },
+	{ 0x50284, 0x33400084 },
+	{ 0x5fffc, 0x8400092 },
+	{ 0x50288, 0x33000080 },
+	{ 0x5fffc, 0x9000094 },
+	{ 0x5028c, 0x3300007b },
+	{ 0x5fffc, 0x9c00098 },
+	{ 0x50290, 0x32400077 },
+	{ 0x5fffc, 0xa40009b },
+	{ 0x50294, 0x32000073 },
+	{ 0x5fffc, 0xb00009d },
+	{ 0x50298,  0x31c0006f },
+	{ 0x5fffc,  0xbc000a0 },
+	{ 0x5029c,  0x3140006b },
+	{ 0x5fffc,  0xc8000a2 },
+	{ 0x502a0,  0x31000067 },
+	{ 0x5fffc,  0xd8000a5 },
+	{ 0x502a4,  0x30800062 },
+	{ 0x5fffc,  0xe4000a8 },
+	{ 0x502a8,  0x2fc0005f },
+	{ 0x5fffc,  0xec000aa },
+	{ 0x502ac,  0x2fc0005b },
+	{ 0x5fffc,  0xf8000ad },
+	{ 0x502b0,  0x2f400057 },
+	{ 0x5fffc,  0x108000b0 },
+	{ 0x502b4,  0x2e400054 },
+	{ 0x5fffc,  0x114000b2 },
+	{ 0x502b8,  0x2e000050 },
+	{ 0x5fffc,  0x124000b4 },
+	{ 0x502bc,  0x2d80004c },
+	{ 0x5fffc,  0x130000b6 },
+	{ 0x502c0,  0x2d000049 },
+	{ 0x5fffc,  0x140000b8 },
+	{ 0x502c4,  0x2c800045 },
+	{ 0x5fffc,  0x150000b9 },
+	{ 0x502c8,  0x2c000042 },
+	{ 0x5fffc,  0x15c000bd },
+	{ 0x502cc,  0x2b40003e },
+	{ 0x5fffc,  0x16c000bf },
+	{ 0x502d0,  0x2a80003b },
+	{ 0x5fffc,  0x17c000bf },
+	{ 0x502d4,  0x2a000039 },
+	{ 0x5fffc,  0x188000c2 },
+	{ 0x502d8,  0x29400036 },
+	{ 0x5fffc,  0x19c000c4 },
+	{ 0x502dc,  0x28800032 },
+	{ 0x5fffc,  0x1ac000c5 },
+	{ 0x502e0,  0x2800002f },
+	{ 0x5fffc,  0x1bc000c7 },
+	{ 0x502e4,  0x2740002c },
+	{ 0x5fffc,  0x1cc000c8 },
+	{ 0x502e8,  0x26c00029 },
+	{ 0x5fffc,  0x1dc000c9 },
+	{ 0x502ec,  0x26000027 },
+	{ 0x5fffc,  0x1ec000cc },
+	{ 0x502f0,  0x25000024 },
+	{ 0x5fffc,  0x200000cc },
+	{ 0x502f4,  0x24800021 },
+	{ 0x5fffc,  0x210000cd },
+	{ 0x502f8,  0x23800020 },
+	{ 0x5fffc,  0x220000ce },
+	{ 0x502fc,  0x2300001d },
+};
+
+static struct mdp_table_entry mdp_downscale_x_table_PT4TOPT6[] = {
+	{ 0x5fffc,  0x740008c },
+	{ 0x50280,  0x33800088 },
+	{ 0x5fffc,  0x800008e },
+	{ 0x50284,  0x33400084 },
+	{ 0x5fffc,  0x8400092 },
+	{ 0x50288,  0x33000080 },
+	{ 0x5fffc,  0x9000094 },
+	{ 0x5028c,  0x3300007b },
+	{ 0x5fffc,  0x9c00098 },
+	{ 0x50290,  0x32400077 },
+	{ 0x5fffc,  0xa40009b },
+	{ 0x50294,  0x32000073 },
+	{ 0x5fffc,  0xb00009d },
+	{ 0x50298,  0x31c0006f },
+	{ 0x5fffc,  0xbc000a0 },
+	{ 0x5029c,  0x3140006b },
+	{ 0x5fffc,  0xc8000a2 },
+	{ 0x502a0,  0x31000067 },
+	{ 0x5fffc,  0xd8000a5 },
+	{ 0x502a4,  0x30800062 },
+	{ 0x5fffc,  0xe4000a8 },
+	{ 0x502a8,  0x2fc0005f },
+	{ 0x5fffc,  0xec000aa },
+	{ 0x502ac,  0x2fc0005b },
+	{ 0x5fffc,  0xf8000ad },
+	{ 0x502b0,  0x2f400057 },
+	{ 0x5fffc,  0x108000b0 },
+	{ 0x502b4,  0x2e400054 },
+	{ 0x5fffc,  0x114000b2 },
+	{ 0x502b8,  0x2e000050 },
+	{ 0x5fffc,  0x124000b4 },
+	{ 0x502bc,  0x2d80004c },
+	{ 0x5fffc,  0x130000b6 },
+	{ 0x502c0,  0x2d000049 },
+	{ 0x5fffc,  0x140000b8 },
+	{ 0x502c4,  0x2c800045 },
+	{ 0x5fffc,  0x150000b9 },
+	{ 0x502c8,  0x2c000042 },
+	{ 0x5fffc,  0x15c000bd },
+	{ 0x502cc,  0x2b40003e },
+	{ 0x5fffc,  0x16c000bf },
+	{ 0x502d0,  0x2a80003b },
+	{ 0x5fffc,  0x17c000bf },
+	{ 0x502d4,  0x2a000039 },
+	{ 0x5fffc,  0x188000c2 },
+	{ 0x502d8,  0x29400036 },
+	{ 0x5fffc,  0x19c000c4 },
+	{ 0x502dc,  0x28800032 },
+	{ 0x5fffc,  0x1ac000c5 },
+	{ 0x502e0,  0x2800002f },
+	{ 0x5fffc,  0x1bc000c7 },
+	{ 0x502e4,  0x2740002c },
+	{ 0x5fffc,  0x1cc000c8 },
+	{ 0x502e8,  0x26c00029 },
+	{ 0x5fffc,  0x1dc000c9 },
+	{ 0x502ec,  0x26000027 },
+	{ 0x5fffc,  0x1ec000cc },
+	{ 0x502f0,  0x25000024 },
+	{ 0x5fffc,  0x200000cc },
+	{ 0x502f4,  0x24800021 },
+	{ 0x5fffc,  0x210000cd },
+	{ 0x502f8,  0x23800020 },
+	{ 0x5fffc,  0x220000ce },
+	{ 0x502fc,  0x2300001d },
+};
+
+static struct mdp_table_entry mdp_downscale_x_table_PT6TOPT8[] = {
+	{ 0x5fffc,  0xfe000070 },
+	{ 0x50280,  0x4bc00068 },
+	{ 0x5fffc,  0xfe000078 },
+	{ 0x50284,  0x4bc00060 },
+	{ 0x5fffc,  0xfe000080 },
+	{ 0x50288,  0x4b800059 },
+	{ 0x5fffc,  0xfe000089 },
+	{ 0x5028c,  0x4b000052 },
+	{ 0x5fffc,  0xfe400091 },
+	{ 0x50290,  0x4a80004b },
+	{ 0x5fffc,  0xfe40009a },
+	{ 0x50294,  0x4a000044 },
+	{ 0x5fffc,  0xfe8000a3 },
+	{ 0x50298,  0x4940003d },
+	{ 0x5fffc,  0xfec000ac },
+	{ 0x5029c,  0x48400037 },
+	{ 0x5fffc,  0xff0000b4 },
+	{ 0x502a0,  0x47800031 },
+	{ 0x5fffc,  0xff8000bd },
+	{ 0x502a4,  0x4640002b },
+	{ 0x5fffc,  0xc5 },
+	{ 0x502a8,  0x45000026 },
+	{ 0x5fffc,  0x8000ce },
+	{ 0x502ac,  0x43800021 },
+	{ 0x5fffc,  0x10000d6 },
+	{ 0x502b0,  0x4240001c },
+	{ 0x5fffc,  0x18000df },
+	{ 0x502b4,  0x40800018 },
+	{ 0x5fffc,  0x24000e6 },
+	{ 0x502b8,  0x3f000014 },
+	{ 0x5fffc,  0x30000ee },
+	{ 0x502bc,  0x3d400010 },
+	{ 0x5fffc,  0x40000f5 },
+	{ 0x502c0,  0x3b80000c },
+	{ 0x5fffc,  0x50000fc },
+	{ 0x502c4,  0x39800009 },
+	{ 0x5fffc,  0x6000102 },
+	{ 0x502c8,  0x37c00006 },
+	{ 0x5fffc,  0x7000109 },
+	{ 0x502cc,  0x35800004 },
+	{ 0x5fffc,  0x840010e },
+	{ 0x502d0,  0x33800002 },
+	{ 0x5fffc,  0x9800114 },
+	{ 0x502d4,  0x31400000 },
+	{ 0x5fffc,  0xac00119 },
+	{ 0x502d8,  0x2f4003fe },
+	{ 0x5fffc,  0xc40011e },
+	{ 0x502dc,  0x2d0003fc },
+	{ 0x5fffc,  0xdc00121 },
+	{ 0x502e0,  0x2b0003fb },
+	{ 0x5fffc,  0xf400125 },
+	{ 0x502e4,  0x28c003fa },
+	{ 0x5fffc,  0x11000128 },
+	{ 0x502e8,  0x268003f9 },
+	{ 0x5fffc,  0x12c0012a },
+	{ 0x502ec,  0x244003f9 },
+	{ 0x5fffc,  0x1480012c },
+	{ 0x502f0,  0x224003f8 },
+	{ 0x5fffc,  0x1640012e },
+	{ 0x502f4,  0x200003f8 },
+	{ 0x5fffc,  0x1800012f },
+	{ 0x502f8,  0x1e0003f8 },
+	{ 0x5fffc,  0x1a00012f },
+	{ 0x502fc,  0x1c0003f8 },
+};
+
+static struct mdp_table_entry mdp_downscale_x_table_PT8TO1[] = {
+	{ 0x5fffc,  0x0 },
+	{ 0x50280,  0x7fc00000 },
+	{ 0x5fffc,  0xff80000d },
+	{ 0x50284,  0x7ec003f9 },
+	{ 0x5fffc,  0xfec0001c },
+	{ 0x50288,  0x7d4003f3 },
+	{ 0x5fffc,  0xfe40002b },
+	{ 0x5028c,  0x7b8003ed },
+	{ 0x5fffc,  0xfd80003c },
+	{ 0x50290,  0x794003e8 },
+	{ 0x5fffc,  0xfcc0004d },
+	{ 0x50294,  0x76c003e4 },
+	{ 0x5fffc,  0xfc40005f },
+	{ 0x50298,  0x73c003e0 },
+	{ 0x5fffc,  0xfb800071 },
+	{ 0x5029c,  0x708003de },
+	{ 0x5fffc,  0xfac00085 },
+	{ 0x502a0,  0x6d0003db },
+	{ 0x5fffc,  0xfa000098 },
+	{ 0x502a4,  0x698003d9 },
+	{ 0x5fffc,  0xf98000ac },
+	{ 0x502a8,  0x654003d8 },
+	{ 0x5fffc,  0xf8c000c1 },
+	{ 0x502ac,  0x610003d7 },
+	{ 0x5fffc,  0xf84000d5 },
+	{ 0x502b0,  0x5c8003d7 },
+	{ 0x5fffc,  0xf7c000e9 },
+	{ 0x502b4,  0x580003d7 },
+	{ 0x5fffc,  0xf74000fd },
+	{ 0x502b8,  0x534003d8 },
+	{ 0x5fffc,  0xf6c00112 },
+	{ 0x502bc,  0x4e8003d8 },
+	{ 0x5fffc,  0xf6800126 },
+	{ 0x502c0,  0x494003da },
+	{ 0x5fffc,  0xf600013a },
+	{ 0x502c4,  0x448003db },
+	{ 0x5fffc,  0xf600014d },
+	{ 0x502c8,  0x3f4003dd },
+	{ 0x5fffc,  0xf5c00160 },
+	{ 0x502cc,  0x3a4003df },
+	{ 0x5fffc,  0xf5c00172 },
+	{ 0x502d0,  0x354003e1 },
+	{ 0x5fffc,  0xf5c00184 },
+	{ 0x502d4,  0x304003e3 },
+	{ 0x5fffc,  0xf6000195 },
+	{ 0x502d8,  0x2b0003e6 },
+	{ 0x5fffc,  0xf64001a6 },
+	{ 0x502dc,  0x260003e8 },
+	{ 0x5fffc,  0xf6c001b4 },
+	{ 0x502e0,  0x214003eb },
+	{ 0x5fffc,  0xf78001c2 },
+	{ 0x502e4,  0x1c4003ee },
+	{ 0x5fffc,  0xf80001cf },
+	{ 0x502e8,  0x17c003f1 },
+	{ 0x5fffc,  0xf90001db },
+	{ 0x502ec,  0x134003f3 },
+	{ 0x5fffc,  0xfa0001e5 },
+	{ 0x502f0,  0xf0003f6 },
+	{ 0x5fffc,  0xfb4001ee },
+	{ 0x502f4,  0xac003f9 },
+	{ 0x5fffc,  0xfcc001f5 },
+	{ 0x502f8,  0x70003fb },
+	{ 0x5fffc,  0xfe4001fb },
+	{ 0x502fc,  0x34003fe },
+};
+
+struct mdp_table_entry *mdp_downscale_x_table[MDP_DOWNSCALE_MAX] = {
+	[MDP_DOWNSCALE_PT2TOPT4] = mdp_downscale_x_table_PT2TOPT4,
+	[MDP_DOWNSCALE_PT4TOPT6] = mdp_downscale_x_table_PT4TOPT6,
+	[MDP_DOWNSCALE_PT6TOPT8] = mdp_downscale_x_table_PT6TOPT8,
+	[MDP_DOWNSCALE_PT8TO1]  = mdp_downscale_x_table_PT8TO1,
+};
+
+static struct mdp_table_entry mdp_downscale_y_table_PT2TOPT4[] = {
+	{ 0x5fffc,  0x740008c },
+	{ 0x50300,  0x33800088 },
+	{ 0x5fffc,  0x800008e },
+	{ 0x50304,  0x33400084 },
+	{ 0x5fffc,  0x8400092 },
+	{ 0x50308,  0x33000080 },
+	{ 0x5fffc,  0x9000094 },
+	{ 0x5030c,  0x3300007b },
+	{ 0x5fffc,  0x9c00098 },
+	{ 0x50310,  0x32400077 },
+	{ 0x5fffc,  0xa40009b },
+	{ 0x50314,  0x32000073 },
+	{ 0x5fffc,  0xb00009d },
+	{ 0x50318,  0x31c0006f },
+	{ 0x5fffc,  0xbc000a0 },
+	{ 0x5031c,  0x3140006b },
+	{ 0x5fffc,  0xc8000a2 },
+	{ 0x50320,  0x31000067 },
+	{ 0x5fffc,  0xd8000a5 },
+	{ 0x50324,  0x30800062 },
+	{ 0x5fffc,  0xe4000a8 },
+	{ 0x50328,  0x2fc0005f },
+	{ 0x5fffc,  0xec000aa },
+	{ 0x5032c,  0x2fc0005b },
+	{ 0x5fffc,  0xf8000ad },
+	{ 0x50330,  0x2f400057 },
+	{ 0x5fffc,  0x108000b0 },
+	{ 0x50334,  0x2e400054 },
+	{ 0x5fffc,  0x114000b2 },
+	{ 0x50338,  0x2e000050 },
+	{ 0x5fffc,  0x124000b4 },
+	{ 0x5033c,  0x2d80004c },
+	{ 0x5fffc,  0x130000b6 },
+	{ 0x50340,  0x2d000049 },
+	{ 0x5fffc,  0x140000b8 },
+	{ 0x50344,  0x2c800045 },
+	{ 0x5fffc,  0x150000b9 },
+	{ 0x50348,  0x2c000042 },
+	{ 0x5fffc,  0x15c000bd },
+	{ 0x5034c,  0x2b40003e },
+	{ 0x5fffc,  0x16c000bf },
+	{ 0x50350,  0x2a80003b },
+	{ 0x5fffc,  0x17c000bf },
+	{ 0x50354,  0x2a000039 },
+	{ 0x5fffc,  0x188000c2 },
+	{ 0x50358,  0x29400036 },
+	{ 0x5fffc,  0x19c000c4 },
+	{ 0x5035c,  0x28800032 },
+	{ 0x5fffc,  0x1ac000c5 },
+	{ 0x50360,  0x2800002f },
+	{ 0x5fffc,  0x1bc000c7 },
+	{ 0x50364,  0x2740002c },
+	{ 0x5fffc,  0x1cc000c8 },
+	{ 0x50368,  0x26c00029 },
+	{ 0x5fffc,  0x1dc000c9 },
+	{ 0x5036c,  0x26000027 },
+	{ 0x5fffc,  0x1ec000cc },
+	{ 0x50370,  0x25000024 },
+	{ 0x5fffc,  0x200000cc },
+	{ 0x50374,  0x24800021 },
+	{ 0x5fffc,  0x210000cd },
+	{ 0x50378,  0x23800020 },
+	{ 0x5fffc,  0x220000ce },
+	{ 0x5037c,  0x2300001d },
+};
+
+static struct mdp_table_entry mdp_downscale_y_table_PT4TOPT6[] = {
+	{ 0x5fffc,  0x740008c },
+	{ 0x50300,  0x33800088 },
+	{ 0x5fffc,  0x800008e },
+	{ 0x50304,  0x33400084 },
+	{ 0x5fffc,  0x8400092 },
+	{ 0x50308,  0x33000080 },
+	{ 0x5fffc,  0x9000094 },
+	{ 0x5030c,  0x3300007b },
+	{ 0x5fffc,  0x9c00098 },
+	{ 0x50310,  0x32400077 },
+	{ 0x5fffc,  0xa40009b },
+	{ 0x50314,  0x32000073 },
+	{ 0x5fffc,  0xb00009d },
+	{ 0x50318,  0x31c0006f },
+	{ 0x5fffc,  0xbc000a0 },
+	{ 0x5031c,  0x3140006b },
+	{ 0x5fffc,  0xc8000a2 },
+	{ 0x50320,  0x31000067 },
+	{ 0x5fffc,  0xd8000a5 },
+	{ 0x50324,  0x30800062 },
+	{ 0x5fffc,  0xe4000a8 },
+	{ 0x50328,  0x2fc0005f },
+	{ 0x5fffc,  0xec000aa },
+	{ 0x5032c,  0x2fc0005b },
+	{ 0x5fffc,  0xf8000ad },
+	{ 0x50330,  0x2f400057 },
+	{ 0x5fffc,  0x108000b0 },
+	{ 0x50334,  0x2e400054 },
+	{ 0x5fffc,  0x114000b2 },
+	{ 0x50338,  0x2e000050 },
+	{ 0x5fffc,  0x124000b4 },
+	{ 0x5033c,  0x2d80004c },
+	{ 0x5fffc,  0x130000b6 },
+	{ 0x50340,  0x2d000049 },
+	{ 0x5fffc,  0x140000b8 },
+	{ 0x50344,  0x2c800045 },
+	{ 0x5fffc,  0x150000b9 },
+	{ 0x50348,  0x2c000042 },
+	{ 0x5fffc,  0x15c000bd },
+	{ 0x5034c,  0x2b40003e },
+	{ 0x5fffc,  0x16c000bf },
+	{ 0x50350,  0x2a80003b },
+	{ 0x5fffc,  0x17c000bf },
+	{ 0x50354,  0x2a000039 },
+	{ 0x5fffc,  0x188000c2 },
+	{ 0x50358,  0x29400036 },
+	{ 0x5fffc,  0x19c000c4 },
+	{ 0x5035c,  0x28800032 },
+	{ 0x5fffc,  0x1ac000c5 },
+	{ 0x50360,  0x2800002f },
+	{ 0x5fffc,  0x1bc000c7 },
+	{ 0x50364,  0x2740002c },
+	{ 0x5fffc,  0x1cc000c8 },
+	{ 0x50368,  0x26c00029 },
+	{ 0x5fffc,  0x1dc000c9 },
+	{ 0x5036c,  0x26000027 },
+	{ 0x5fffc,  0x1ec000cc },
+	{ 0x50370,  0x25000024 },
+	{ 0x5fffc,  0x200000cc },
+	{ 0x50374,  0x24800021 },
+	{ 0x5fffc,  0x210000cd },
+	{ 0x50378,  0x23800020 },
+	{ 0x5fffc,  0x220000ce },
+	{ 0x5037c,  0x2300001d },
+};
+
+static struct mdp_table_entry mdp_downscale_y_table_PT6TOPT8[] = {
+	{ 0x5fffc,  0xfe000070 },
+	{ 0x50300,  0x4bc00068 },
+	{ 0x5fffc,  0xfe000078 },
+	{ 0x50304,  0x4bc00060 },
+	{ 0x5fffc,  0xfe000080 },
+	{ 0x50308,  0x4b800059 },
+	{ 0x5fffc,  0xfe000089 },
+	{ 0x5030c,  0x4b000052 },
+	{ 0x5fffc,  0xfe400091 },
+	{ 0x50310,  0x4a80004b },
+	{ 0x5fffc,  0xfe40009a },
+	{ 0x50314,  0x4a000044 },
+	{ 0x5fffc,  0xfe8000a3 },
+	{ 0x50318,  0x4940003d },
+	{ 0x5fffc,  0xfec000ac },
+	{ 0x5031c,  0x48400037 },
+	{ 0x5fffc,  0xff0000b4 },
+	{ 0x50320,  0x47800031 },
+	{ 0x5fffc,  0xff8000bd },
+	{ 0x50324,  0x4640002b },
+	{ 0x5fffc,  0xc5 },
+	{ 0x50328,  0x45000026 },
+	{ 0x5fffc,  0x8000ce },
+	{ 0x5032c,  0x43800021 },
+	{ 0x5fffc,  0x10000d6 },
+	{ 0x50330,  0x4240001c },
+	{ 0x5fffc,  0x18000df },
+	{ 0x50334,  0x40800018 },
+	{ 0x5fffc,  0x24000e6 },
+	{ 0x50338,  0x3f000014 },
+	{ 0x5fffc,  0x30000ee },
+	{ 0x5033c,  0x3d400010 },
+	{ 0x5fffc,  0x40000f5 },
+	{ 0x50340,  0x3b80000c },
+	{ 0x5fffc,  0x50000fc },
+	{ 0x50344,  0x39800009 },
+	{ 0x5fffc,  0x6000102 },
+	{ 0x50348,  0x37c00006 },
+	{ 0x5fffc,  0x7000109 },
+	{ 0x5034c,  0x35800004 },
+	{ 0x5fffc,  0x840010e },
+	{ 0x50350,  0x33800002 },
+	{ 0x5fffc,  0x9800114 },
+	{ 0x50354,  0x31400000 },
+	{ 0x5fffc,  0xac00119 },
+	{ 0x50358,  0x2f4003fe },
+	{ 0x5fffc,  0xc40011e },
+	{ 0x5035c,  0x2d0003fc },
+	{ 0x5fffc,  0xdc00121 },
+	{ 0x50360,  0x2b0003fb },
+	{ 0x5fffc,  0xf400125 },
+	{ 0x50364,  0x28c003fa },
+	{ 0x5fffc,  0x11000128 },
+	{ 0x50368,  0x268003f9 },
+	{ 0x5fffc,  0x12c0012a },
+	{ 0x5036c,  0x244003f9 },
+	{ 0x5fffc,  0x1480012c },
+	{ 0x50370,  0x224003f8 },
+	{ 0x5fffc,  0x1640012e },
+	{ 0x50374,  0x200003f8 },
+	{ 0x5fffc,  0x1800012f },
+	{ 0x50378,  0x1e0003f8 },
+	{ 0x5fffc,  0x1a00012f },
+	{ 0x5037c,  0x1c0003f8 },
+};
+
+static struct mdp_table_entry mdp_downscale_y_table_PT8TO1[] = {
+	{ 0x5fffc,  0x0 },
+	{ 0x50300,  0x7fc00000 },
+	{ 0x5fffc,  0xff80000d },
+	{ 0x50304,  0x7ec003f9 },
+	{ 0x5fffc,  0xfec0001c },
+	{ 0x50308,  0x7d4003f3 },
+	{ 0x5fffc,  0xfe40002b },
+	{ 0x5030c,  0x7b8003ed },
+	{ 0x5fffc,  0xfd80003c },
+	{ 0x50310,  0x794003e8 },
+	{ 0x5fffc,  0xfcc0004d },
+	{ 0x50314,  0x76c003e4 },
+	{ 0x5fffc,  0xfc40005f },
+	{ 0x50318,  0x73c003e0 },
+	{ 0x5fffc,  0xfb800071 },
+	{ 0x5031c,  0x708003de },
+	{ 0x5fffc,  0xfac00085 },
+	{ 0x50320,  0x6d0003db },
+	{ 0x5fffc,  0xfa000098 },
+	{ 0x50324,  0x698003d9 },
+	{ 0x5fffc,  0xf98000ac },
+	{ 0x50328,  0x654003d8 },
+	{ 0x5fffc,  0xf8c000c1 },
+	{ 0x5032c,  0x610003d7 },
+	{ 0x5fffc,  0xf84000d5 },
+	{ 0x50330,  0x5c8003d7 },
+	{ 0x5fffc,  0xf7c000e9 },
+	{ 0x50334,  0x580003d7 },
+	{ 0x5fffc,  0xf74000fd },
+	{ 0x50338,  0x534003d8 },
+	{ 0x5fffc,  0xf6c00112 },
+	{ 0x5033c,  0x4e8003d8 },
+	{ 0x5fffc,  0xf6800126 },
+	{ 0x50340,  0x494003da },
+	{ 0x5fffc,  0xf600013a },
+	{ 0x50344,  0x448003db },
+	{ 0x5fffc,  0xf600014d },
+	{ 0x50348,  0x3f4003dd },
+	{ 0x5fffc,  0xf5c00160 },
+	{ 0x5034c,  0x3a4003df },
+	{ 0x5fffc,  0xf5c00172 },
+	{ 0x50350,  0x354003e1 },
+	{ 0x5fffc,  0xf5c00184 },
+	{ 0x50354,  0x304003e3 },
+	{ 0x5fffc,  0xf6000195 },
+	{ 0x50358,  0x2b0003e6 },
+	{ 0x5fffc,  0xf64001a6 },
+	{ 0x5035c,  0x260003e8 },
+	{ 0x5fffc,  0xf6c001b4 },
+	{ 0x50360,  0x214003eb },
+	{ 0x5fffc,  0xf78001c2 },
+	{ 0x50364,  0x1c4003ee },
+	{ 0x5fffc,  0xf80001cf },
+	{ 0x50368,  0x17c003f1 },
+	{ 0x5fffc,  0xf90001db },
+	{ 0x5036c,  0x134003f3 },
+	{ 0x5fffc,  0xfa0001e5 },
+	{ 0x50370,  0xf0003f6 },
+	{ 0x5fffc,  0xfb4001ee },
+	{ 0x50374,  0xac003f9 },
+	{ 0x5fffc,  0xfcc001f5 },
+	{ 0x50378,  0x70003fb },
+	{ 0x5fffc,  0xfe4001fb },
+	{ 0x5037c,  0x34003fe },
+};
+
+struct mdp_table_entry *mdp_downscale_y_table[MDP_DOWNSCALE_MAX] = {
+	[MDP_DOWNSCALE_PT2TOPT4] = mdp_downscale_y_table_PT2TOPT4,
+	[MDP_DOWNSCALE_PT4TOPT6] = mdp_downscale_y_table_PT4TOPT6,
+	[MDP_DOWNSCALE_PT6TOPT8] = mdp_downscale_y_table_PT6TOPT8,
+	[MDP_DOWNSCALE_PT8TO1]  = mdp_downscale_y_table_PT8TO1,
+};
+
+struct mdp_table_entry mdp_gaussian_blur_table[] = {
+	/* max variance */
+	{ 0x5fffc, 0x20000080 },
+	{ 0x50280, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x50284, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x50288, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x5028c, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x50290, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x50294, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x50298, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x5029c, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x502a0, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x502a4, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x502a8, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x502ac, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x502b0, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x502b4, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x502b8, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x502bc, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x502c0, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x502c4, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x502c8, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x502cc, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x502d0, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x502d4, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x502d8, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x502dc, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x502e0, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x502e4, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x502e8, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x502ec, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x502f0, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x502f4, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x502f8, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x502fc, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x50300, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x50304, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x50308, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x5030c, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x50310, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x50314, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x50318, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x5031c, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x50320, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x50324, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x50328, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x5032c, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x50330, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x50334, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x50338, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x5033c, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x50340, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x50344, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x50348, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x5034c, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x50350, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x50354, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x50358, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x5035c, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x50360, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x50364, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x50368, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x5036c, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x50370, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x50374, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x50378, 0x20000080 },
+	{ 0x5fffc, 0x20000080 },
+	{ 0x5037c, 0x20000080 },
+};
diff --git a/drivers/video/msm/mdp_scale_tables.h b/drivers/video/msm/mdp_scale_tables.h
new file mode 100644
index 0000000..34077b1
--- /dev/null
+++ b/drivers/video/msm/mdp_scale_tables.h
@@ -0,0 +1,38 @@
+/* drivers/video/msm_fb/mdp_scale_tables.h
+ *
+ * Copyright (C) 2007 QUALCOMM Incorporated
+ * Copyright (C) 2007 Google Incorporated
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#ifndef _MDP_SCALE_TABLES_H_
+#define _MDP_SCALE_TABLES_H_
+
+#include <linux/types.h>
+struct mdp_table_entry {
+	uint32_t reg;
+	uint32_t val;
+};
+
+extern struct mdp_table_entry mdp_upscale_table[64];
+
+enum {
+	MDP_DOWNSCALE_PT2TOPT4,
+	MDP_DOWNSCALE_PT4TOPT6,
+	MDP_DOWNSCALE_PT6TOPT8,
+	MDP_DOWNSCALE_PT8TO1,
+	MDP_DOWNSCALE_MAX,
+};
+
+extern struct mdp_table_entry *mdp_downscale_x_table[MDP_DOWNSCALE_MAX];
+extern struct mdp_table_entry *mdp_downscale_y_table[MDP_DOWNSCALE_MAX];
+extern struct mdp_table_entry mdp_gaussian_blur_table[];
+
+#endif
diff --git a/drivers/video/msm/msm_fb.c b/drivers/video/msm/msm_fb.c
new file mode 100644
index 0000000..49101dd
--- /dev/null
+++ b/drivers/video/msm/msm_fb.c
@@ -0,0 +1,636 @@
+/* drivers/video/msm/msm_fb.c
+ *
+ * Core MSM framebuffer driver.
+ *
+ * Copyright (C) 2007 Google Incorporated
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/platform_device.h>
+#include <linux/module.h>
+#include <linux/fb.h>
+#include <linux/delay.h>
+
+#include <linux/freezer.h>
+#include <linux/wait.h>
+#include <linux/msm_mdp.h>
+#include <linux/io.h>
+#include <linux/uaccess.h>
+#include <mach/msm_fb.h>
+#include <mach/board.h>
+#include <linux/workqueue.h>
+#include <linux/clk.h>
+#include <linux/debugfs.h>
+#include <linux/dma-mapping.h>
+
+#define PRINT_FPS 0
+#define PRINT_BLIT_TIME 0
+
+#define SLEEPING 0x4
+#define UPDATING 0x3
+#define FULL_UPDATE_DONE 0x2
+#define WAKING 0x1
+#define AWAKE 0x0
+
+#define NONE 0
+#define SUSPEND_RESUME 0x1
+#define FPS 0x2
+#define BLIT_TIME 0x4
+#define SHOW_UPDATES 0x8
+
+#define DLOG(mask, fmt, args...) \
+do { \
+	if (msmfb_debug_mask & mask) \
+		printk(KERN_INFO "msmfb: "fmt, ##args); \
+} while (0)
+
+static int msmfb_debug_mask;
+module_param_named(msmfb_debug_mask, msmfb_debug_mask, int,
+		   S_IRUGO | S_IWUSR | S_IWGRP);
+
+struct mdp_device *mdp;
+
+struct msmfb_info {
+	struct fb_info *fb;
+	struct msm_panel_data *panel;
+	int xres;
+	int yres;
+	unsigned output_format;
+	unsigned yoffset;
+	unsigned frame_requested;
+	unsigned frame_done;
+	int sleeping;
+	unsigned update_frame;
+	struct {
+		int left;
+		int top;
+		int eright; /* exclusive */
+		int ebottom; /* exclusive */
+	} update_info;
+	char *black;
+
+	spinlock_t update_lock;
+	struct mutex panel_init_lock;
+	wait_queue_head_t frame_wq;
+	struct workqueue_struct *resume_workqueue;
+	struct work_struct resume_work;
+	struct msmfb_callback dma_callback;
+	struct msmfb_callback vsync_callback;
+	struct hrtimer fake_vsync;
+	ktime_t vsync_request_time;
+};
+
+static int msmfb_open(struct fb_info *info, int user)
+{
+	return 0;
+}
+
+static int msmfb_release(struct fb_info *info, int user)
+{
+	return 0;
+}
+
+/* Called from dma interrupt handler, must not sleep */
+static void msmfb_handle_dma_interrupt(struct msmfb_callback *callback)
+{
+	unsigned long irq_flags;
+	struct msmfb_info *msmfb  = container_of(callback, struct msmfb_info,
+					       dma_callback);
+
+	spin_lock_irqsave(&msmfb->update_lock, irq_flags);
+	msmfb->frame_done = msmfb->frame_requested;
+	if (msmfb->sleeping == UPDATING &&
+	    msmfb->frame_done == msmfb->update_frame) {
+		DLOG(SUSPEND_RESUME, "full update completed\n");
+		queue_work(msmfb->resume_workqueue, &msmfb->resume_work);
+	}
+	spin_unlock_irqrestore(&msmfb->update_lock, irq_flags);
+	wake_up(&msmfb->frame_wq);
+}
+
+static int msmfb_start_dma(struct msmfb_info *msmfb)
+{
+	uint32_t x, y, w, h;
+	unsigned addr;
+	unsigned long irq_flags;
+	uint32_t yoffset;
+	s64 time_since_request;
+	struct msm_panel_data *panel = msmfb->panel;
+
+	spin_lock_irqsave(&msmfb->update_lock, irq_flags);
+	time_since_request = ktime_to_ns(ktime_sub(ktime_get(),
+			     msmfb->vsync_request_time));
+	if (time_since_request > 20 * NSEC_PER_MSEC) {
+		uint32_t us;
+		us = do_div(time_since_request, NSEC_PER_MSEC) / NSEC_PER_USEC;
+		printk(KERN_WARNING "msmfb_start_dma %lld.%03u ms after vsync "
+			"request\n", time_since_request, us);
+	}
+	if (msmfb->frame_done == msmfb->frame_requested) {
+		spin_unlock_irqrestore(&msmfb->update_lock, irq_flags);
+		return -1;
+	}
+	if (msmfb->sleeping == SLEEPING) {
+		DLOG(SUSPEND_RESUME, "tried to start dma while asleep\n");
+		spin_unlock_irqrestore(&msmfb->update_lock, irq_flags);
+		return -1;
+	}
+	x = msmfb->update_info.left;
+	y = msmfb->update_info.top;
+	w = msmfb->update_info.eright - x;
+	h = msmfb->update_info.ebottom - y;
+	yoffset = msmfb->yoffset;
+	msmfb->update_info.left = msmfb->xres + 1;
+	msmfb->update_info.top = msmfb->yres + 1;
+	msmfb->update_info.eright = 0;
+	msmfb->update_info.ebottom = 0;
+	if (unlikely(w > msmfb->xres || h > msmfb->yres ||
+		     w == 0 || h == 0)) {
+		printk(KERN_INFO "invalid update: %d %d %d "
+				"%d\n", x, y, w, h);
+		msmfb->frame_done = msmfb->frame_requested;
+		goto error;
+	}
+	spin_unlock_irqrestore(&msmfb->update_lock, irq_flags);
+
+	addr = ((msmfb->xres * (yoffset + y) + x) * 2);
+	mdp->dma(mdp, addr + msmfb->fb->fix.smem_start,
+		 msmfb->xres * 2, w, h, x, y, &msmfb->dma_callback,
+		 panel->interface_type);
+	return 0;
+error:
+	spin_unlock_irqrestore(&msmfb->update_lock, irq_flags);
+	/* some clients need to clear their vsync interrupt */
+	if (panel->clear_vsync)
+		panel->clear_vsync(panel);
+	wake_up(&msmfb->frame_wq);
+	return 0;
+}
+
+/* Called from esync interrupt handler, must not sleep */
+static void msmfb_handle_vsync_interrupt(struct msmfb_callback *callback)
+{
+	struct msmfb_info *msmfb = container_of(callback, struct msmfb_info,
+					       vsync_callback);
+	msmfb_start_dma(msmfb);
+}
+
+static enum hrtimer_restart msmfb_fake_vsync(struct hrtimer *timer)
+{
+	struct msmfb_info *msmfb  = container_of(timer, struct msmfb_info,
+					       fake_vsync);
+	msmfb_start_dma(msmfb);
+	return HRTIMER_NORESTART;
+}
+
+static void msmfb_pan_update(struct fb_info *info, uint32_t left, uint32_t top,
+			     uint32_t eright, uint32_t ebottom,
+			     uint32_t yoffset, int pan_display)
+{
+	struct msmfb_info *msmfb = info->par;
+	struct msm_panel_data *panel = msmfb->panel;
+	unsigned long irq_flags;
+	int sleeping;
+	int retry = 1;
+
+	DLOG(SHOW_UPDATES, "update %d %d %d %d %d %d\n",
+		left, top, eright, ebottom, yoffset, pan_display);
+restart:
+	spin_lock_irqsave(&msmfb->update_lock, irq_flags);
+
+	/* if we are sleeping, on a pan_display wait 10ms (to throttle back
+	 * drawing otherwise return */
+	if (msmfb->sleeping == SLEEPING) {
+		DLOG(SUSPEND_RESUME, "drawing while asleep\n");
+		spin_unlock_irqrestore(&msmfb->update_lock, irq_flags);
+		if (pan_display)
+			wait_event_interruptible_timeout(msmfb->frame_wq,
+				msmfb->sleeping != SLEEPING, HZ/10);
+		return;
+	}
+
+	sleeping = msmfb->sleeping;
+	/* on a full update, if the last frame has not completed, wait for it */
+	if (pan_display && (msmfb->frame_requested != msmfb->frame_done ||
+			    sleeping == UPDATING)) {
+		int ret;
+		spin_unlock_irqrestore(&msmfb->update_lock, irq_flags);
+		ret = wait_event_interruptible_timeout(msmfb->frame_wq,
+			msmfb->frame_done == msmfb->frame_requested &&
+			msmfb->sleeping != UPDATING, 5 * HZ);
+		if (ret <= 0 && (msmfb->frame_requested != msmfb->frame_done ||
+				 msmfb->sleeping == UPDATING)) {
+			if (retry && panel->request_vsync &&
+			    (sleeping == AWAKE)) {
+				panel->request_vsync(panel,
+					&msmfb->vsync_callback);
+				retry = 0;
+				printk(KERN_WARNING "msmfb_pan_display timeout "
+					"rerequest vsync\n");
+			} else {
+				printk(KERN_WARNING "msmfb_pan_display timeout "
+					"waiting for frame start, %d %d\n",
+					msmfb->frame_requested,
+					msmfb->frame_done);
+				return;
+			}
+		}
+		goto restart;
+	}
+
+
+	msmfb->frame_requested++;
+	/* if necessary, update the y offset, if this is the
+	 * first full update on resume, set the sleeping state */
+	if (pan_display) {
+		msmfb->yoffset = yoffset;
+		if (left == 0 && top == 0 && eright == info->var.xres &&
+		    ebottom == info->var.yres) {
+			if (sleeping == WAKING) {
+				msmfb->update_frame = msmfb->frame_requested;
+				DLOG(SUSPEND_RESUME, "full update starting\n");
+				msmfb->sleeping = UPDATING;
+			}
+		}
+	}
+
+	/* set the update request */
+	if (left < msmfb->update_info.left)
+		msmfb->update_info.left = left;
+	if (top < msmfb->update_info.top)
+		msmfb->update_info.top = top;
+	if (eright > msmfb->update_info.eright)
+		msmfb->update_info.eright = eright;
+	if (ebottom > msmfb->update_info.ebottom)
+		msmfb->update_info.ebottom = ebottom;
+	DLOG(SHOW_UPDATES, "update queued %d %d %d %d %d\n",
+		msmfb->update_info.left, msmfb->update_info.top,
+		msmfb->update_info.eright, msmfb->update_info.ebottom,
+		msmfb->yoffset);
+	spin_unlock_irqrestore(&msmfb->update_lock, irq_flags);
+
+	/* if the panel is all the way on wait for vsync, otherwise sleep
+	 * for 16 ms (long enough for the dma to panel) and then begin dma */
+	msmfb->vsync_request_time = ktime_get();
+	if (panel->request_vsync && (sleeping == AWAKE)) {
+		panel->request_vsync(panel, &msmfb->vsync_callback);
+	} else {
+		if (!hrtimer_active(&msmfb->fake_vsync)) {
+			hrtimer_start(&msmfb->fake_vsync,
+				      ktime_set(0, NSEC_PER_SEC/60),
+				      HRTIMER_MODE_REL);
+		}
+	}
+}
+
+static void msmfb_update(struct fb_info *info, uint32_t left, uint32_t top,
+			 uint32_t eright, uint32_t ebottom)
+{
+	msmfb_pan_update(info, left, top, eright, ebottom, 0, 0);
+}
+
+static void power_on_panel(struct work_struct *work)
+{
+	struct msmfb_info *msmfb =
+		container_of(work, struct msmfb_info, resume_work);
+	struct msm_panel_data *panel = msmfb->panel;
+	unsigned long irq_flags;
+
+	mutex_lock(&msmfb->panel_init_lock);
+	DLOG(SUSPEND_RESUME, "turning on panel\n");
+	if (msmfb->sleeping == UPDATING) {
+		if (panel->unblank(panel)) {
+			printk(KERN_INFO "msmfb: panel unblank failed,"
+			       "not starting drawing\n");
+			goto error;
+		}
+		spin_lock_irqsave(&msmfb->update_lock, irq_flags);
+		msmfb->sleeping = AWAKE;
+		wake_up(&msmfb->frame_wq);
+		spin_unlock_irqrestore(&msmfb->update_lock, irq_flags);
+	}
+error:
+	mutex_unlock(&msmfb->panel_init_lock);
+}
+
+
+static int msmfb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
+{
+	if ((var->xres != info->var.xres) ||
+	    (var->yres != info->var.yres) ||
+	    (var->xres_virtual != info->var.xres_virtual) ||
+	    (var->yres_virtual != info->var.yres_virtual) ||
+	    (var->xoffset != info->var.xoffset) ||
+	    (var->bits_per_pixel != info->var.bits_per_pixel) ||
+	    (var->grayscale != info->var.grayscale))
+		 return -EINVAL;
+	return 0;
+}
+
+int msmfb_pan_display(struct fb_var_screeninfo *var, struct fb_info *info)
+{
+	struct msmfb_info *msmfb = info->par;
+	struct msm_panel_data *panel = msmfb->panel;
+
+	/* "UPDT" */
+	if ((panel->caps & MSMFB_CAP_PARTIAL_UPDATES) &&
+	    (var->reserved[0] == 0x54445055)) {
+		msmfb_pan_update(info, var->reserved[1] & 0xffff,
+				 var->reserved[1] >> 16,
+				 var->reserved[2] & 0xffff,
+				 var->reserved[2] >> 16, var->yoffset, 1);
+	} else {
+		msmfb_pan_update(info, 0, 0, info->var.xres, info->var.yres,
+				 var->yoffset, 1);
+	}
+	return 0;
+}
+
+static void msmfb_fillrect(struct fb_info *p, const struct fb_fillrect *rect)
+{
+	cfb_fillrect(p, rect);
+	msmfb_update(p, rect->dx, rect->dy, rect->dx + rect->width,
+		     rect->dy + rect->height);
+}
+
+static void msmfb_copyarea(struct fb_info *p, const struct fb_copyarea *area)
+{
+	cfb_copyarea(p, area);
+	msmfb_update(p, area->dx, area->dy, area->dx + area->width,
+		     area->dy + area->height);
+}
+
+static void msmfb_imageblit(struct fb_info *p, const struct fb_image *image)
+{
+	cfb_imageblit(p, image);
+	msmfb_update(p, image->dx, image->dy, image->dx + image->width,
+		     image->dy + image->height);
+}
+
+
+static int msmfb_blit(struct fb_info *info,
+		      void __user *p)
+{
+	struct mdp_blit_req req;
+	struct mdp_blit_req_list req_list;
+	int i;
+	int ret;
+
+	if (copy_from_user(&req_list, p, sizeof(req_list)))
+		return -EFAULT;
+
+	for (i = 0; i < req_list.count; i++) {
+		struct mdp_blit_req_list *list =
+			(struct mdp_blit_req_list *)p;
+		if (copy_from_user(&req, &list->req[i], sizeof(req)))
+			return -EFAULT;
+		ret = mdp->blit(mdp, info, &req);
+		if (ret)
+			return ret;
+	}
+	return 0;
+}
+
+
+DEFINE_MUTEX(mdp_ppp_lock);
+
+static int msmfb_ioctl(struct fb_info *p, unsigned int cmd, unsigned long arg)
+{
+	void __user *argp = (void __user *)arg;
+	int ret;
+
+	switch (cmd) {
+	case MSMFB_GRP_DISP:
+		mdp->set_grp_disp(mdp, arg);
+		break;
+	case MSMFB_BLIT:
+		ret = msmfb_blit(p, argp);
+		if (ret)
+			return ret;
+		break;
+	default:
+			printk(KERN_INFO "msmfb unknown ioctl: %d\n", cmd);
+			return -EINVAL;
+	}
+	return 0;
+}
+
+static struct fb_ops msmfb_ops = {
+	.owner = THIS_MODULE,
+	.fb_open = msmfb_open,
+	.fb_release = msmfb_release,
+	.fb_check_var = msmfb_check_var,
+	.fb_pan_display = msmfb_pan_display,
+	.fb_fillrect = msmfb_fillrect,
+	.fb_copyarea = msmfb_copyarea,
+	.fb_imageblit = msmfb_imageblit,
+	.fb_ioctl = msmfb_ioctl,
+};
+
+static unsigned PP[16];
+
+
+
+#define BITS_PER_PIXEL 16
+
+static void setup_fb_info(struct msmfb_info *msmfb)
+{
+	struct fb_info *fb_info = msmfb->fb;
+	int r;
+
+	/* finish setting up the fb_info struct */
+	strncpy(fb_info->fix.id, "msmfb", 16);
+	fb_info->fix.ypanstep = 1;
+
+	fb_info->fbops = &msmfb_ops;
+	fb_info->flags = FBINFO_DEFAULT;
+
+	fb_info->fix.type = FB_TYPE_PACKED_PIXELS;
+	fb_info->fix.visual = FB_VISUAL_TRUECOLOR;
+	fb_info->fix.line_length = msmfb->xres * 2;
+
+	fb_info->var.xres = msmfb->xres;
+	fb_info->var.yres = msmfb->yres;
+	fb_info->var.width = msmfb->panel->fb_data->width;
+	fb_info->var.height = msmfb->panel->fb_data->height;
+	fb_info->var.xres_virtual = msmfb->xres;
+	fb_info->var.yres_virtual = msmfb->yres * 2;
+	fb_info->var.bits_per_pixel = BITS_PER_PIXEL;
+	fb_info->var.accel_flags = 0;
+
+	fb_info->var.yoffset = 0;
+
+	if (msmfb->panel->caps & MSMFB_CAP_PARTIAL_UPDATES) {
+		fb_info->var.reserved[0] = 0x54445055;
+		fb_info->var.reserved[1] = 0;
+		fb_info->var.reserved[2] = (uint16_t)msmfb->xres |
+					   ((uint32_t)msmfb->yres << 16);
+	}
+
+	fb_info->var.red.offset = 11;
+	fb_info->var.red.length = 5;
+	fb_info->var.red.msb_right = 0;
+	fb_info->var.green.offset = 5;
+	fb_info->var.green.length = 6;
+	fb_info->var.green.msb_right = 0;
+	fb_info->var.blue.offset = 0;
+	fb_info->var.blue.length = 5;
+	fb_info->var.blue.msb_right = 0;
+
+	r = fb_alloc_cmap(&fb_info->cmap, 16, 0);
+	fb_info->pseudo_palette = PP;
+
+	PP[0] = 0;
+	for (r = 1; r < 16; r++)
+		PP[r] = 0xffffffff;
+}
+
+static int setup_fbmem(struct msmfb_info *msmfb, struct platform_device *pdev)
+{
+	struct fb_info *fb = msmfb->fb;
+	struct resource *resource;
+	unsigned long size = msmfb->xres * msmfb->yres *
+			     (BITS_PER_PIXEL >> 3) * 2;
+	unsigned char *fbram;
+
+	/* board file might have attached a resource describing an fb */
+	resource = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!resource)
+		return -EINVAL;
+
+	/* check the resource is large enough to fit the fb */
+	if (resource->end - resource->start < size) {
+		printk(KERN_ERR "allocated resource is too small for "
+				"fb\n");
+		return -ENOMEM;
+	}
+	fb->fix.smem_start = resource->start;
+	fb->fix.smem_len = resource->end - resource->start;
+	fbram = ioremap(resource->start,
+			resource->end - resource->start);
+	if (fbram == 0) {
+		printk(KERN_ERR "msmfb: cannot allocate fbram!\n");
+		return -ENOMEM;
+	}
+	fb->screen_base = fbram;
+	return 0;
+}
+
+static int msmfb_probe(struct platform_device *pdev)
+{
+	struct fb_info *fb;
+	struct msmfb_info *msmfb;
+	struct msm_panel_data *panel = pdev->dev.platform_data;
+	int ret;
+
+	if (!panel) {
+		pr_err("msmfb_probe: no platform data\n");
+		return -EINVAL;
+	}
+	if (!panel->fb_data) {
+		pr_err("msmfb_probe: no fb_data\n");
+		return -EINVAL;
+	}
+
+	fb = framebuffer_alloc(sizeof(struct msmfb_info), &pdev->dev);
+	if (!fb)
+		return -ENOMEM;
+	msmfb = fb->par;
+	msmfb->fb = fb;
+	msmfb->panel = panel;
+	msmfb->xres = panel->fb_data->xres;
+	msmfb->yres = panel->fb_data->yres;
+
+	ret = setup_fbmem(msmfb, pdev);
+	if (ret)
+		goto error_setup_fbmem;
+
+	setup_fb_info(msmfb);
+
+	spin_lock_init(&msmfb->update_lock);
+	mutex_init(&msmfb->panel_init_lock);
+	init_waitqueue_head(&msmfb->frame_wq);
+	msmfb->resume_workqueue = create_workqueue("panel_on");
+	if (msmfb->resume_workqueue == NULL) {
+		printk(KERN_ERR "failed to create panel_on workqueue\n");
+		ret = -ENOMEM;
+		goto error_create_workqueue;
+	}
+	INIT_WORK(&msmfb->resume_work, power_on_panel);
+	msmfb->black = kzalloc(msmfb->fb->var.bits_per_pixel*msmfb->xres,
+			       GFP_KERNEL);
+
+	printk(KERN_INFO "msmfb_probe() installing %d x %d panel\n",
+	       msmfb->xres, msmfb->yres);
+
+	msmfb->dma_callback.func = msmfb_handle_dma_interrupt;
+	msmfb->vsync_callback.func = msmfb_handle_vsync_interrupt;
+	hrtimer_init(&msmfb->fake_vsync, CLOCK_MONOTONIC,
+		     HRTIMER_MODE_REL);
+
+
+	msmfb->fake_vsync.function = msmfb_fake_vsync;
+
+	ret = register_framebuffer(fb);
+	if (ret)
+		goto error_register_framebuffer;
+
+	msmfb->sleeping = WAKING;
+
+	return 0;
+
+error_register_framebuffer:
+	destroy_workqueue(msmfb->resume_workqueue);
+error_create_workqueue:
+	iounmap(fb->screen_base);
+error_setup_fbmem:
+	framebuffer_release(msmfb->fb);
+	return ret;
+}
+
+static struct platform_driver msm_panel_driver = {
+	/* need to write remove */
+	.probe = msmfb_probe,
+	.driver = {.name = "msm_panel"},
+};
+
+
+static int msmfb_add_mdp_device(struct device *dev,
+				struct class_interface *class_intf)
+{
+	/* might need locking if mulitple mdp devices */
+	if (mdp)
+		return 0;
+	mdp = container_of(dev, struct mdp_device, dev);
+	return platform_driver_register(&msm_panel_driver);
+}
+
+static void msmfb_remove_mdp_device(struct device *dev,
+				struct class_interface *class_intf)
+{
+	/* might need locking if mulitple mdp devices */
+	if (dev != &mdp->dev)
+		return;
+	platform_driver_unregister(&msm_panel_driver);
+	mdp = NULL;
+}
+
+static struct class_interface msm_fb_interface = {
+	.add_dev = &msmfb_add_mdp_device,
+	.remove_dev = &msmfb_remove_mdp_device,
+};
+
+static int __init msmfb_init(void)
+{
+	return register_mdp_client(&msm_fb_interface);
+}
+
+module_init(msmfb_init);
-- 
cgit v0.10.2


From 6e3658f0df6f708202159302b4f3915d9b97b8dc Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Tue, 22 Sep 2009 16:47:04 -0700
Subject: platinumfb: fix misplaced parenthesis

Since `+' has a higher precedence than the trinary operator `?', this
added `hres * (1 << color_mode)' to the boolean testing videomode and
depth.

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Ville Syrjala <syrjala@sci.fi>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/platinumfb.c b/drivers/video/platinumfb.c
index bacfabd..0a366d8 100644
--- a/drivers/video/platinumfb.c
+++ b/drivers/video/platinumfb.c
@@ -223,10 +223,14 @@ static int platinumfb_setcolreg(u_int regno, u_int red, u_int green, u_int blue,
 
 static inline int platinum_vram_reqd(int video_mode, int color_mode)
 {
-	return vmode_attrs[video_mode-1].vres *
-	       (vmode_attrs[video_mode-1].hres * (1<<color_mode) +
-		((video_mode == VMODE_832_624_75) &&
-		 (color_mode > CMODE_8)) ? 0x10 : 0x20) + 0x1000;
+	int baseval = vmode_attrs[video_mode-1].hres * (1<<color_mode);
+
+	if ((video_mode == VMODE_832_624_75) && (color_mode > CMODE_8))
+		baseval += 0x10;
+	else
+		baseval += 0x20;
+
+	return vmode_attrs[video_mode-1].vres * baseval + 0x1000;
 }
 
 #define STORE_D2(a, d) { \
-- 
cgit v0.10.2


From 4ed824d9aead77a6a4eb1e89c3b3d270ba386fad Mon Sep 17 00:00:00 2001
From: Sudhakar Rajashekhara <sudhakar.raj@ti.com>
Date: Tue, 22 Sep 2009 16:47:06 -0700
Subject: davinci: fb: Frame Buffer driver for TI DA8xx/OMAP-L1xx

Add LCD controller (LCDC) driver for TI's DA8xx/OMAP-L1xx architecture.
LCDC specifications can be found at http://www.ti.com/litv/pdf/sprufm0a.

LCDC on DA8xx consists of two independent controllers, the Raster
Controller and the LCD Interface Display Driver (LIDD) controller.  LIDD
further supports character and graphic displays.

This patch adds support for the graphic display (Sharp LQ035Q3DG01) found
on the DA830 based EVM.  The EVM details can be found at:
http://support.spectrumdigital.com/boards/dskda830/revc/.

Signed-off-by: Sudhakar Rajashekhara <sudhakar.raj@ti.com>
Signed-off-by: Pavel Kiryukhin <pkiryukhin@ru.mvista.com>
Signed-off-by: Steve Chen <schen@mvista.com>
Acked-by: Krzysztof Helt <krzysztof.h1@wp.pl>
DESC
davinci-fb-frame-buffer-driver-for-ti-da8xx-omap-l1xx-fix
EDESC
From: Andrew Morton <akpm@linux-foundation.org>

fix kconfig indenting

Cc: Krzysztof Helt <krzysztof.h1@wp.pl>
Cc: Pavel Kiryukhin <pkiryukhin@ru.mvista.com>
Cc: Steve Chen <schen@mvista.com>
Cc: Sudhakar Rajashekhara <sudhakar.raj@ti.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index 05184a1..a7944ef 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -2041,6 +2041,17 @@ config FB_SH7760
 	  and 8, 15 or 16 bpp color; 90 degrees clockwise display rotation for
 	  panels <= 320 pixel horizontal resolution.
 
+config FB_DA8XX
+	tristate "DA8xx/OMAP-L1xx Framebuffer support"
+	depends on FB && ARCH_DAVINCI_DA8XX
+	select FB_CFB_FILLRECT
+	select FB_CFB_COPYAREA
+	select FB_CFB_IMAGEBLIT
+	---help---
+	  This is the frame buffer device driver for the TI LCD controller
+	  found on DA8xx/OMAP-L1xx SoCs.
+	  If unsure, say N.
+
 config FB_VIRTUAL
 	tristate "Virtual Frame Buffer support (ONLY FOR TESTING!)"
 	depends on FB
diff --git a/drivers/video/Makefile b/drivers/video/Makefile
index f4b6c15..85b2d23 100644
--- a/drivers/video/Makefile
+++ b/drivers/video/Makefile
@@ -137,6 +137,7 @@ obj-$(CONFIG_FB_OF)               += offb.o
 obj-$(CONFIG_FB_BF54X_LQ043)	  += bf54x-lq043fb.o
 obj-$(CONFIG_FB_BFIN_T350MCQB)	  += bfin-t350mcqb-fb.o
 obj-$(CONFIG_FB_MX3)		  += mx3fb.o
+obj-$(CONFIG_FB_DA8XX)		  += da8xx-fb.o
 
 # the test framebuffer is last
 obj-$(CONFIG_FB_VIRTUAL)          += vfb.o
diff --git a/drivers/video/da8xx-fb.c b/drivers/video/da8xx-fb.c
new file mode 100644
index 0000000..e0bbc66
--- /dev/null
+++ b/drivers/video/da8xx-fb.c
@@ -0,0 +1,909 @@
+/*
+ * Copyright (C) 2008-2009 MontaVista Software Inc.
+ * Copyright (C) 2008-2009 Texas Instruments Inc
+ *
+ * Based on the LCD driver for TI Avalanche processors written by
+ * Ajay Singh and Shalom Hai.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option)any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/fb.h>
+#include <linux/dma-mapping.h>
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/uaccess.h>
+#include <linux/device.h>
+#include <linux/interrupt.h>
+#include <linux/clk.h>
+#include <video/da8xx-fb.h>
+
+#define DRIVER_NAME "da8xx_lcdc"
+
+/* LCD Status Register */
+#define LCD_END_OF_FRAME0		BIT(8)
+#define LCD_FIFO_UNDERFLOW		BIT(5)
+#define LCD_SYNC_LOST			BIT(2)
+
+/* LCD DMA Control Register */
+#define LCD_DMA_BURST_SIZE(x)		((x) << 4)
+#define LCD_DMA_BURST_1			0x0
+#define LCD_DMA_BURST_2			0x1
+#define LCD_DMA_BURST_4			0x2
+#define LCD_DMA_BURST_8			0x3
+#define LCD_DMA_BURST_16		0x4
+#define LCD_END_OF_FRAME_INT_ENA	BIT(2)
+#define LCD_DUAL_FRAME_BUFFER_ENABLE	BIT(0)
+
+/* LCD Control Register */
+#define LCD_CLK_DIVISOR(x)		((x) << 8)
+#define LCD_RASTER_MODE			0x01
+
+/* LCD Raster Control Register */
+#define LCD_PALETTE_LOAD_MODE(x)	((x) << 20)
+#define PALETTE_AND_DATA		0x00
+#define PALETTE_ONLY			0x01
+
+#define LCD_MONO_8BIT_MODE		BIT(9)
+#define LCD_RASTER_ORDER		BIT(8)
+#define LCD_TFT_MODE			BIT(7)
+#define LCD_UNDERFLOW_INT_ENA		BIT(6)
+#define LCD_MONOCHROME_MODE		BIT(1)
+#define LCD_RASTER_ENABLE		BIT(0)
+#define LCD_TFT_ALT_ENABLE		BIT(23)
+#define LCD_STN_565_ENABLE		BIT(24)
+
+/* LCD Raster Timing 2 Register */
+#define LCD_AC_BIAS_TRANSITIONS_PER_INT(x)	((x) << 16)
+#define LCD_AC_BIAS_FREQUENCY(x)		((x) << 8)
+#define LCD_SYNC_CTRL				BIT(25)
+#define LCD_SYNC_EDGE				BIT(24)
+#define LCD_INVERT_PIXEL_CLOCK			BIT(22)
+#define LCD_INVERT_LINE_CLOCK			BIT(21)
+#define LCD_INVERT_FRAME_CLOCK			BIT(20)
+
+/* LCD Block */
+#define  LCD_CTRL_REG				0x4
+#define  LCD_STAT_REG				0x8
+#define  LCD_RASTER_CTRL_REG			0x28
+#define  LCD_RASTER_TIMING_0_REG		0x2C
+#define  LCD_RASTER_TIMING_1_REG		0x30
+#define  LCD_RASTER_TIMING_2_REG		0x34
+#define  LCD_DMA_CTRL_REG			0x40
+#define  LCD_DMA_FRM_BUF_BASE_ADDR_0_REG	0x44
+#define  LCD_DMA_FRM_BUF_CEILING_ADDR_0_REG	0x48
+
+#define WSI_TIMEOUT	50
+#define PALETTE_SIZE	256
+#define LEFT_MARGIN	64
+#define RIGHT_MARGIN	64
+#define UPPER_MARGIN	32
+#define LOWER_MARGIN	32
+
+static resource_size_t da8xx_fb_reg_base;
+static struct resource *lcdc_regs;
+
+static inline unsigned int lcdc_read(unsigned int addr)
+{
+	return (unsigned int)__raw_readl(da8xx_fb_reg_base + (addr));
+}
+
+static inline void lcdc_write(unsigned int val, unsigned int addr)
+{
+	__raw_writel(val, da8xx_fb_reg_base + (addr));
+}
+
+struct da8xx_fb_par {
+	wait_queue_head_t da8xx_wq;
+	resource_size_t p_palette_base;
+	unsigned char *v_palette_base;
+	struct clk *lcdc_clk;
+	int irq;
+	unsigned short pseudo_palette[16];
+	unsigned int databuf_sz;
+	unsigned int palette_sz;
+};
+
+/* Variable Screen Information */
+static struct fb_var_screeninfo da8xx_fb_var __devinitdata = {
+	.xoffset = 0,
+	.yoffset = 0,
+	.transp = {0, 0, 0},
+	.nonstd = 0,
+	.activate = 0,
+	.height = -1,
+	.width = -1,
+	.pixclock = 46666,	/* 46us - AUO display */
+	.accel_flags = 0,
+	.left_margin = LEFT_MARGIN,
+	.right_margin = RIGHT_MARGIN,
+	.upper_margin = UPPER_MARGIN,
+	.lower_margin = LOWER_MARGIN,
+	.sync = 0,
+	.vmode = FB_VMODE_NONINTERLACED
+};
+
+static struct fb_fix_screeninfo da8xx_fb_fix __devinitdata = {
+	.id = "DA8xx FB Drv",
+	.type = FB_TYPE_PACKED_PIXELS,
+	.type_aux = 0,
+	.visual = FB_VISUAL_PSEUDOCOLOR,
+	.xpanstep = 1,
+	.ypanstep = 1,
+	.ywrapstep = 1,
+	.accel = FB_ACCEL_NONE
+};
+
+struct da8xx_panel {
+	const char	name[25];	/* Full name <vendor>_<model> */
+	unsigned short	width;
+	unsigned short	height;
+	int		hfp;		/* Horizontal front porch */
+	int		hbp;		/* Horizontal back porch */
+	int		hsw;		/* Horizontal Sync Pulse Width */
+	int		vfp;		/* Vertical front porch */
+	int		vbp;		/* Vertical back porch */
+	int		vsw;		/* Vertical Sync Pulse Width */
+	int		pxl_clk;	/* Pixel clock */
+};
+
+static struct da8xx_panel known_lcd_panels[] = {
+	/* Sharp LCD035Q3DG01 */
+	[0] = {
+		.name = "Sharp_LCD035Q3DG01",
+		.width = 320,
+		.height = 240,
+		.hfp = 8,
+		.hbp = 6,
+		.hsw = 0,
+		.vfp = 2,
+		.vbp = 2,
+		.vsw = 0,
+		.pxl_clk = 0x10,
+	},
+	/* Sharp LK043T1DG01 */
+	[1] = {
+		.name = "Sharp_LK043T1DG01",
+		.width = 480,
+		.height = 272,
+		.hfp = 2,
+		.hbp = 2,
+		.hsw = 41,
+		.vfp = 2,
+		.vbp = 2,
+		.vsw = 10,
+		.pxl_clk = 0x12,
+	},
+};
+
+/* Disable the Raster Engine of the LCD Controller */
+static int lcd_disable_raster(struct da8xx_fb_par *par)
+{
+	int ret = 0;
+	u32 reg;
+
+	reg = lcdc_read(LCD_RASTER_CTRL_REG);
+	if (reg & LCD_RASTER_ENABLE) {
+		lcdc_write(reg & ~LCD_RASTER_ENABLE, LCD_RASTER_CTRL_REG);
+		ret = wait_event_interruptible_timeout(par->da8xx_wq,
+						!lcdc_read(LCD_STAT_REG) &
+						LCD_END_OF_FRAME0, WSI_TIMEOUT);
+	}
+
+	if (ret < 0)
+		return ret;
+	if (ret == 0)
+		return -ETIMEDOUT;
+
+	return 0;
+}
+
+static void lcd_blit(int load_mode, struct da8xx_fb_par *par)
+{
+	u32 tmp = par->p_palette_base + par->databuf_sz - 4;
+	u32 reg;
+
+	/* Update the databuf in the hw. */
+	lcdc_write(par->p_palette_base, LCD_DMA_FRM_BUF_BASE_ADDR_0_REG);
+	lcdc_write(tmp, LCD_DMA_FRM_BUF_CEILING_ADDR_0_REG);
+
+	/* Start the DMA. */
+	reg = lcdc_read(LCD_RASTER_CTRL_REG);
+	reg &= ~(3 << 20);
+	if (load_mode == LOAD_DATA)
+		reg |= LCD_PALETTE_LOAD_MODE(PALETTE_AND_DATA);
+	else if (load_mode == LOAD_PALETTE)
+		reg |= LCD_PALETTE_LOAD_MODE(PALETTE_ONLY);
+
+	lcdc_write(reg, LCD_RASTER_CTRL_REG);
+}
+
+/* Configure the Burst Size of DMA */
+static int lcd_cfg_dma(int burst_size)
+{
+	u32 reg;
+
+	reg = lcdc_read(LCD_DMA_CTRL_REG) & 0x00000001;
+	switch (burst_size) {
+	case 1:
+		reg |= LCD_DMA_BURST_SIZE(LCD_DMA_BURST_1);
+		break;
+	case 2:
+		reg |= LCD_DMA_BURST_SIZE(LCD_DMA_BURST_2);
+		break;
+	case 4:
+		reg |= LCD_DMA_BURST_SIZE(LCD_DMA_BURST_4);
+		break;
+	case 8:
+		reg |= LCD_DMA_BURST_SIZE(LCD_DMA_BURST_8);
+		break;
+	case 16:
+		reg |= LCD_DMA_BURST_SIZE(LCD_DMA_BURST_16);
+		break;
+	default:
+		return -EINVAL;
+	}
+	lcdc_write(reg | LCD_END_OF_FRAME_INT_ENA, LCD_DMA_CTRL_REG);
+
+	return 0;
+}
+
+static void lcd_cfg_ac_bias(int period, int transitions_per_int)
+{
+	u32 reg;
+
+	/* Set the AC Bias Period and Number of Transisitons per Interrupt */
+	reg = lcdc_read(LCD_RASTER_TIMING_2_REG) & 0xFFF00000;
+	reg |= LCD_AC_BIAS_FREQUENCY(period) |
+		LCD_AC_BIAS_TRANSITIONS_PER_INT(transitions_per_int);
+	lcdc_write(reg, LCD_RASTER_TIMING_2_REG);
+}
+
+static void lcd_cfg_horizontal_sync(int back_porch, int pulse_width,
+		int front_porch)
+{
+	u32 reg;
+
+	reg = lcdc_read(LCD_RASTER_TIMING_0_REG) & 0xf;
+	reg |= ((back_porch & 0xff) << 24)
+	    | ((front_porch & 0xff) << 16)
+	    | ((pulse_width & 0x3f) << 10);
+	lcdc_write(reg, LCD_RASTER_TIMING_0_REG);
+}
+
+static void lcd_cfg_vertical_sync(int back_porch, int pulse_width,
+		int front_porch)
+{
+	u32 reg;
+
+	reg = lcdc_read(LCD_RASTER_TIMING_1_REG) & 0x3ff;
+	reg |= ((back_porch & 0xff) << 24)
+	    | ((front_porch & 0xff) << 16)
+	    | ((pulse_width & 0x3f) << 10);
+	lcdc_write(reg, LCD_RASTER_TIMING_1_REG);
+}
+
+static int lcd_cfg_display(const struct lcd_ctrl_config *cfg)
+{
+	u32 reg;
+
+	reg = lcdc_read(LCD_RASTER_CTRL_REG) & ~(LCD_TFT_MODE |
+						LCD_MONO_8BIT_MODE |
+						LCD_MONOCHROME_MODE);
+
+	switch (cfg->p_disp_panel->panel_shade) {
+	case MONOCHROME:
+		reg |= LCD_MONOCHROME_MODE;
+		if (cfg->mono_8bit_mode)
+			reg |= LCD_MONO_8BIT_MODE;
+		break;
+	case COLOR_ACTIVE:
+		reg |= LCD_TFT_MODE;
+		if (cfg->tft_alt_mode)
+			reg |= LCD_TFT_ALT_ENABLE;
+		break;
+
+	case COLOR_PASSIVE:
+		if (cfg->stn_565_mode)
+			reg |= LCD_STN_565_ENABLE;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	/* enable additional interrupts here */
+	reg |= LCD_UNDERFLOW_INT_ENA;
+
+	lcdc_write(reg, LCD_RASTER_CTRL_REG);
+
+	reg = lcdc_read(LCD_RASTER_TIMING_2_REG);
+
+	if (cfg->sync_ctrl)
+		reg |= LCD_SYNC_CTRL;
+	else
+		reg &= ~LCD_SYNC_CTRL;
+
+	if (cfg->sync_edge)
+		reg |= LCD_SYNC_EDGE;
+	else
+		reg &= ~LCD_SYNC_EDGE;
+
+	if (cfg->invert_pxl_clock)
+		reg |= LCD_INVERT_PIXEL_CLOCK;
+	else
+		reg &= ~LCD_INVERT_PIXEL_CLOCK;
+
+	if (cfg->invert_line_clock)
+		reg |= LCD_INVERT_LINE_CLOCK;
+	else
+		reg &= ~LCD_INVERT_LINE_CLOCK;
+
+	if (cfg->invert_frm_clock)
+		reg |= LCD_INVERT_FRAME_CLOCK;
+	else
+		reg &= ~LCD_INVERT_FRAME_CLOCK;
+
+	lcdc_write(reg, LCD_RASTER_TIMING_2_REG);
+
+	return 0;
+}
+
+static int lcd_cfg_frame_buffer(struct da8xx_fb_par *par, u32 width, u32 height,
+		u32 bpp, u32 raster_order)
+{
+	u32 bpl, reg;
+
+	/* Disable Dual Frame Buffer. */
+	reg = lcdc_read(LCD_DMA_CTRL_REG);
+	lcdc_write(reg & ~LCD_DUAL_FRAME_BUFFER_ENABLE,
+						LCD_DMA_CTRL_REG);
+	/* Set the Panel Width */
+	/* Pixels per line = (PPL + 1)*16 */
+	/*0x3F in bits 4..9 gives max horisontal resolution = 1024 pixels*/
+	width &= 0x3f0;
+	reg = lcdc_read(LCD_RASTER_TIMING_0_REG);
+	reg &= 0xfffffc00;
+	reg |= ((width >> 4) - 1) << 4;
+	lcdc_write(reg, LCD_RASTER_TIMING_0_REG);
+
+	/* Set the Panel Height */
+	reg = lcdc_read(LCD_RASTER_TIMING_1_REG);
+	reg = ((height - 1) & 0x3ff) | (reg & 0xfffffc00);
+	lcdc_write(reg, LCD_RASTER_TIMING_1_REG);
+
+	/* Set the Raster Order of the Frame Buffer */
+	reg = lcdc_read(LCD_RASTER_CTRL_REG) & ~(1 << 8);
+	if (raster_order)
+		reg |= LCD_RASTER_ORDER;
+	lcdc_write(reg, LCD_RASTER_CTRL_REG);
+
+	switch (bpp) {
+	case 1:
+	case 2:
+	case 4:
+	case 16:
+		par->palette_sz = 16 * 2;
+		break;
+
+	case 8:
+		par->palette_sz = 256 * 2;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	bpl = width * bpp / 8;
+	par->databuf_sz = height * bpl + par->palette_sz;
+
+	return 0;
+}
+
+static int fb_setcolreg(unsigned regno, unsigned red, unsigned green,
+			      unsigned blue, unsigned transp,
+			      struct fb_info *info)
+{
+	struct da8xx_fb_par *par = info->par;
+	unsigned short *palette = (unsigned short *)par->v_palette_base;
+	u_short pal;
+
+	if (regno > 255)
+		return 1;
+
+	if (info->fix.visual == FB_VISUAL_DIRECTCOLOR)
+		return 1;
+
+	if (info->var.bits_per_pixel == 8) {
+		red >>= 4;
+		green >>= 8;
+		blue >>= 12;
+
+		pal = (red & 0x0f00);
+		pal |= (green & 0x00f0);
+		pal |= (blue & 0x000f);
+
+		palette[regno] = pal;
+
+	} else if ((info->var.bits_per_pixel == 16) && regno < 16) {
+		red >>= (16 - info->var.red.length);
+		red <<= info->var.red.offset;
+
+		green >>= (16 - info->var.green.length);
+		green <<= info->var.green.offset;
+
+		blue >>= (16 - info->var.blue.length);
+		blue <<= info->var.blue.offset;
+
+		par->pseudo_palette[regno] = red | green | blue;
+
+		palette[0] = 0x4000;
+	}
+
+	return 0;
+}
+
+static int lcd_reset(struct da8xx_fb_par *par)
+{
+	int ret = 0;
+
+	/* Disable the Raster if previously Enabled */
+	if (lcdc_read(LCD_RASTER_CTRL_REG) & LCD_RASTER_ENABLE)
+		ret = lcd_disable_raster(par);
+
+	/* DMA has to be disabled */
+	lcdc_write(0, LCD_DMA_CTRL_REG);
+	lcdc_write(0, LCD_RASTER_CTRL_REG);
+
+	return ret;
+}
+
+static int lcd_init(struct da8xx_fb_par *par, const struct lcd_ctrl_config *cfg,
+		struct da8xx_panel *panel)
+{
+	u32 bpp;
+	int ret = 0;
+
+	ret = lcd_reset(par);
+	if (ret != 0)
+		return ret;
+
+	/* Configure the LCD clock divisor. */
+	lcdc_write(LCD_CLK_DIVISOR(panel->pxl_clk) |
+			(LCD_RASTER_MODE & 0x1), LCD_CTRL_REG);
+
+	/* Configure the DMA burst size. */
+	ret = lcd_cfg_dma(cfg->dma_burst_sz);
+	if (ret < 0)
+		return ret;
+
+	/* Configure the AC bias properties. */
+	lcd_cfg_ac_bias(cfg->ac_bias, cfg->ac_bias_intrpt);
+
+	/* Configure the vertical and horizontal sync properties. */
+	lcd_cfg_vertical_sync(panel->vbp, panel->vsw, panel->vfp);
+	lcd_cfg_horizontal_sync(panel->hbp, panel->hsw, panel->hfp);
+
+	/* Configure for disply */
+	ret = lcd_cfg_display(cfg);
+	if (ret < 0)
+		return ret;
+
+	if (QVGA != cfg->p_disp_panel->panel_type)
+		return -EINVAL;
+
+	if (cfg->bpp <= cfg->p_disp_panel->max_bpp &&
+	    cfg->bpp >= cfg->p_disp_panel->min_bpp)
+		bpp = cfg->bpp;
+	else
+		bpp = cfg->p_disp_panel->max_bpp;
+	if (bpp == 12)
+		bpp = 16;
+	ret = lcd_cfg_frame_buffer(par, (unsigned int)panel->width,
+				(unsigned int)panel->height, bpp,
+				cfg->raster_order);
+	if (ret < 0)
+		return ret;
+
+	/* Configure FDD */
+	lcdc_write((lcdc_read(LCD_RASTER_CTRL_REG) & 0xfff00fff) |
+		       (cfg->fdd << 12), LCD_RASTER_CTRL_REG);
+
+	return 0;
+}
+
+static irqreturn_t lcdc_irq_handler(int irq, void *arg)
+{
+	u32 stat = lcdc_read(LCD_STAT_REG);
+	struct da8xx_fb_par *par = arg;
+	u32 reg;
+
+	if ((stat & LCD_SYNC_LOST) && (stat & LCD_FIFO_UNDERFLOW)) {
+		reg = lcdc_read(LCD_RASTER_CTRL_REG);
+		lcdc_write(reg & ~LCD_RASTER_ENABLE, LCD_RASTER_CTRL_REG);
+		lcdc_write(stat, LCD_STAT_REG);
+		lcdc_write(reg | LCD_RASTER_ENABLE, LCD_RASTER_CTRL_REG);
+	} else
+		lcdc_write(stat, LCD_STAT_REG);
+
+	wake_up_interruptible(&par->da8xx_wq);
+	return IRQ_HANDLED;
+}
+
+static int fb_check_var(struct fb_var_screeninfo *var,
+			struct fb_info *info)
+{
+	int err = 0;
+
+	switch (var->bits_per_pixel) {
+	case 1:
+	case 8:
+		var->red.offset = 0;
+		var->red.length = 8;
+		var->green.offset = 0;
+		var->green.length = 8;
+		var->blue.offset = 0;
+		var->blue.length = 8;
+		var->transp.offset = 0;
+		var->transp.length = 0;
+		break;
+	case 4:
+		var->red.offset = 0;
+		var->red.length = 4;
+		var->green.offset = 0;
+		var->green.length = 4;
+		var->blue.offset = 0;
+		var->blue.length = 4;
+		var->transp.offset = 0;
+		var->transp.length = 0;
+		break;
+	case 16:		/* RGB 565 */
+		var->red.offset = 0;
+		var->red.length = 5;
+		var->green.offset = 5;
+		var->green.length = 6;
+		var->blue.offset = 11;
+		var->blue.length = 5;
+		var->transp.offset = 0;
+		var->transp.length = 0;
+		break;
+	default:
+		err = -EINVAL;
+	}
+
+	var->red.msb_right = 0;
+	var->green.msb_right = 0;
+	var->blue.msb_right = 0;
+	var->transp.msb_right = 0;
+	return err;
+}
+
+static int __devexit fb_remove(struct platform_device *dev)
+{
+	struct fb_info *info = dev_get_drvdata(&dev->dev);
+	int ret = 0;
+
+	if (info) {
+		struct da8xx_fb_par *par = info->par;
+
+		if (lcdc_read(LCD_RASTER_CTRL_REG) & LCD_RASTER_ENABLE)
+			ret = lcd_disable_raster(par);
+		lcdc_write(0, LCD_RASTER_CTRL_REG);
+
+		/* disable DMA  */
+		lcdc_write(0, LCD_DMA_CTRL_REG);
+
+		unregister_framebuffer(info);
+		fb_dealloc_cmap(&info->cmap);
+		dma_free_coherent(NULL, par->databuf_sz + PAGE_SIZE,
+					info->screen_base,
+					info->fix.smem_start);
+		free_irq(par->irq, par);
+		clk_disable(par->lcdc_clk);
+		clk_put(par->lcdc_clk);
+		framebuffer_release(info);
+		iounmap((void __iomem *)da8xx_fb_reg_base);
+		release_mem_region(lcdc_regs->start, resource_size(lcdc_regs));
+
+	}
+	return ret;
+}
+
+static int fb_ioctl(struct fb_info *info, unsigned int cmd,
+			  unsigned long arg)
+{
+	struct lcd_sync_arg sync_arg;
+
+	switch (cmd) {
+	case FBIOGET_CONTRAST:
+	case FBIOPUT_CONTRAST:
+	case FBIGET_BRIGHTNESS:
+	case FBIPUT_BRIGHTNESS:
+	case FBIGET_COLOR:
+	case FBIPUT_COLOR:
+		return -EINVAL;
+	case FBIPUT_HSYNC:
+		if (copy_from_user(&sync_arg, (char *)arg,
+				sizeof(struct lcd_sync_arg)))
+			return -EINVAL;
+		lcd_cfg_horizontal_sync(sync_arg.back_porch,
+					sync_arg.pulse_width,
+					sync_arg.front_porch);
+		break;
+	case FBIPUT_VSYNC:
+		if (copy_from_user(&sync_arg, (char *)arg,
+				sizeof(struct lcd_sync_arg)))
+			return -EINVAL;
+		lcd_cfg_vertical_sync(sync_arg.back_porch,
+					sync_arg.pulse_width,
+					sync_arg.front_porch);
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static struct fb_ops da8xx_fb_ops = {
+	.owner = THIS_MODULE,
+	.fb_check_var = fb_check_var,
+	.fb_setcolreg = fb_setcolreg,
+	.fb_ioctl = fb_ioctl,
+	.fb_fillrect = cfb_fillrect,
+	.fb_copyarea = cfb_copyarea,
+	.fb_imageblit = cfb_imageblit,
+};
+
+static int __init fb_probe(struct platform_device *device)
+{
+	struct da8xx_lcdc_platform_data *fb_pdata =
+						device->dev.platform_data;
+	struct lcd_ctrl_config *lcd_cfg;
+	struct da8xx_panel *lcdc_info;
+	struct fb_info *da8xx_fb_info;
+	struct clk *fb_clk = NULL;
+	struct da8xx_fb_par *par;
+	resource_size_t len;
+	int ret, i;
+
+	if (fb_pdata == NULL) {
+		dev_err(&device->dev, "Can not get platform data\n");
+		return -ENOENT;
+	}
+
+	lcdc_regs = platform_get_resource(device, IORESOURCE_MEM, 0);
+	if (!lcdc_regs) {
+		dev_err(&device->dev,
+			"Can not get memory resource for LCD controller\n");
+		return -ENOENT;
+	}
+
+	len = resource_size(lcdc_regs);
+
+	lcdc_regs = request_mem_region(lcdc_regs->start, len, lcdc_regs->name);
+	if (!lcdc_regs)
+		return -EBUSY;
+
+	da8xx_fb_reg_base = (resource_size_t)ioremap(lcdc_regs->start, len);
+	if (!da8xx_fb_reg_base) {
+		ret = -EBUSY;
+		goto err_request_mem;
+	}
+
+	fb_clk = clk_get(&device->dev, NULL);
+	if (IS_ERR(fb_clk)) {
+		dev_err(&device->dev, "Can not get device clock\n");
+		ret = -ENODEV;
+		goto err_ioremap;
+	}
+	ret = clk_enable(fb_clk);
+	if (ret)
+		goto err_clk_put;
+
+	for (i = 0, lcdc_info = known_lcd_panels;
+		i < ARRAY_SIZE(known_lcd_panels);
+		i++, lcdc_info++) {
+		if (strcmp(fb_pdata->type, lcdc_info->name) == 0)
+			break;
+	}
+
+	if (i == ARRAY_SIZE(known_lcd_panels)) {
+		dev_err(&device->dev, "GLCD: No valid panel found\n");
+		ret = ENODEV;
+		goto err_clk_disable;
+	} else
+		dev_info(&device->dev, "GLCD: Found %s panel\n",
+					fb_pdata->type);
+
+	lcd_cfg = (struct lcd_ctrl_config *)fb_pdata->controller_data;
+
+	da8xx_fb_info = framebuffer_alloc(sizeof(struct da8xx_fb_par),
+					&device->dev);
+	if (!da8xx_fb_info) {
+		dev_dbg(&device->dev, "Memory allocation failed for fb_info\n");
+		ret = -ENOMEM;
+		goto err_clk_disable;
+	}
+
+	par = da8xx_fb_info->par;
+
+	if (lcd_init(par, lcd_cfg, lcdc_info) < 0) {
+		dev_err(&device->dev, "lcd_init failed\n");
+		ret = -EFAULT;
+		goto err_release_fb;
+	}
+
+	/* allocate frame buffer */
+	da8xx_fb_info->screen_base = dma_alloc_coherent(NULL,
+					par->databuf_sz + PAGE_SIZE,
+					(resource_size_t *)
+					&da8xx_fb_info->fix.smem_start,
+					GFP_KERNEL | GFP_DMA);
+
+	if (!da8xx_fb_info->screen_base) {
+		dev_err(&device->dev,
+			"GLCD: kmalloc for frame buffer failed\n");
+		ret = -EINVAL;
+		goto err_release_fb;
+	}
+
+	/* move palette base pointer by (PAGE_SIZE - palette_sz) bytes */
+	par->v_palette_base = da8xx_fb_info->screen_base +
+				(PAGE_SIZE - par->palette_sz);
+	par->p_palette_base = da8xx_fb_info->fix.smem_start +
+				(PAGE_SIZE - par->palette_sz);
+
+	/* the rest of the frame buffer is pixel data */
+	da8xx_fb_fix.smem_start = par->p_palette_base + par->palette_sz;
+	da8xx_fb_fix.smem_len = par->databuf_sz - par->palette_sz;
+	da8xx_fb_fix.line_length = (lcdc_info->width * lcd_cfg->bpp) / 8;
+
+	par->lcdc_clk = fb_clk;
+
+	init_waitqueue_head(&par->da8xx_wq);
+
+	par->irq = platform_get_irq(device, 0);
+	if (par->irq < 0) {
+		ret = -ENOENT;
+		goto err_release_fb_mem;
+	}
+
+	ret = request_irq(par->irq, lcdc_irq_handler, 0, DRIVER_NAME, par);
+	if (ret)
+		goto err_release_fb_mem;
+
+	/* Initialize par */
+	da8xx_fb_info->var.bits_per_pixel = lcd_cfg->bpp;
+
+	da8xx_fb_var.xres = lcdc_info->width;
+	da8xx_fb_var.xres_virtual = lcdc_info->width;
+
+	da8xx_fb_var.yres = lcdc_info->height;
+	da8xx_fb_var.yres_virtual = lcdc_info->height;
+
+	da8xx_fb_var.grayscale =
+	    lcd_cfg->p_disp_panel->panel_shade == MONOCHROME ? 1 : 0;
+	da8xx_fb_var.bits_per_pixel = lcd_cfg->bpp;
+
+	da8xx_fb_var.hsync_len = lcdc_info->hsw;
+	da8xx_fb_var.vsync_len = lcdc_info->vsw;
+
+	/* Initialize fbinfo */
+	da8xx_fb_info->flags = FBINFO_FLAG_DEFAULT;
+	da8xx_fb_info->fix = da8xx_fb_fix;
+	da8xx_fb_info->var = da8xx_fb_var;
+	da8xx_fb_info->fbops = &da8xx_fb_ops;
+	da8xx_fb_info->pseudo_palette = par->pseudo_palette;
+
+	ret = fb_alloc_cmap(&da8xx_fb_info->cmap, PALETTE_SIZE, 0);
+	if (ret)
+		goto err_free_irq;
+
+	/* First palette_sz byte of the frame buffer is the palette */
+	da8xx_fb_info->cmap.len = par->palette_sz;
+
+	/* Flush the buffer to the screen. */
+	lcd_blit(LOAD_DATA, par);
+
+	/* initialize var_screeninfo */
+	da8xx_fb_var.activate = FB_ACTIVATE_FORCE;
+	fb_set_var(da8xx_fb_info, &da8xx_fb_var);
+
+	dev_set_drvdata(&device->dev, da8xx_fb_info);
+	/* Register the Frame Buffer  */
+	if (register_framebuffer(da8xx_fb_info) < 0) {
+		dev_err(&device->dev,
+			"GLCD: Frame Buffer Registration Failed!\n");
+		ret = -EINVAL;
+		goto err_dealloc_cmap;
+	}
+
+	/* enable raster engine */
+	lcdc_write(lcdc_read(LCD_RASTER_CTRL_REG) |
+			LCD_RASTER_ENABLE, LCD_RASTER_CTRL_REG);
+
+	return 0;
+
+err_dealloc_cmap:
+	fb_dealloc_cmap(&da8xx_fb_info->cmap);
+
+err_free_irq:
+	free_irq(par->irq, par);
+
+err_release_fb_mem:
+	dma_free_coherent(NULL, par->databuf_sz + PAGE_SIZE,
+				da8xx_fb_info->screen_base,
+				da8xx_fb_info->fix.smem_start);
+
+err_release_fb:
+	framebuffer_release(da8xx_fb_info);
+
+err_clk_disable:
+	clk_disable(fb_clk);
+
+err_clk_put:
+	clk_put(fb_clk);
+
+err_ioremap:
+	iounmap((void __iomem *)da8xx_fb_reg_base);
+
+err_request_mem:
+	release_mem_region(lcdc_regs->start, len);
+
+	return ret;
+}
+
+#ifdef CONFIG_PM
+static int fb_suspend(struct platform_device *dev, pm_message_t state)
+{
+	 return -EBUSY;
+}
+static int fb_resume(struct platform_device *dev)
+{
+	 return -EBUSY;
+}
+#else
+#define fb_suspend NULL
+#define fb_resume NULL
+#endif
+
+static struct platform_driver da8xx_fb_driver = {
+	.probe = fb_probe,
+	.remove = fb_remove,
+	.suspend = fb_suspend,
+	.resume = fb_resume,
+	.driver = {
+		   .name = DRIVER_NAME,
+		   .owner = THIS_MODULE,
+		   },
+};
+
+static int __init da8xx_fb_init(void)
+{
+	return platform_driver_register(&da8xx_fb_driver);
+}
+
+static void __exit da8xx_fb_cleanup(void)
+{
+	platform_driver_unregister(&da8xx_fb_driver);
+}
+
+module_init(da8xx_fb_init);
+module_exit(da8xx_fb_cleanup);
+
+MODULE_DESCRIPTION("Framebuffer driver for TI da8xx/omap-l1xx");
+MODULE_AUTHOR("Texas Instruments");
+MODULE_LICENSE("GPL");
diff --git a/include/video/da8xx-fb.h b/include/video/da8xx-fb.h
new file mode 100644
index 0000000..5f77675
--- /dev/null
+++ b/include/video/da8xx-fb.h
@@ -0,0 +1,106 @@
+/*
+ * Header file for TI DA8XX LCD controller platform data.
+ *
+ * Copyright (C) 2008-2009 MontaVista Software Inc.
+ * Copyright (C) 2008-2009 Texas Instruments Inc
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#ifndef DA8XX_FB_H
+#define DA8XX_FB_H
+
+enum panel_type {
+	QVGA = 0
+};
+
+enum panel_shade {
+	MONOCHROME = 0,
+	COLOR_ACTIVE,
+	COLOR_PASSIVE,
+};
+
+enum raster_load_mode {
+	LOAD_DATA = 1,
+	LOAD_PALETTE,
+};
+
+struct display_panel {
+	enum panel_type panel_type; /* QVGA */
+	int max_bpp;
+	int min_bpp;
+	enum panel_shade panel_shade;
+};
+
+struct da8xx_lcdc_platform_data {
+	const char manu_name[10];
+	void *controller_data;
+	const char type[25];
+};
+
+struct lcd_ctrl_config {
+	const struct display_panel *p_disp_panel;
+
+	/* AC Bias Pin Frequency */
+	int ac_bias;
+
+	/* AC Bias Pin Transitions per Interrupt */
+	int ac_bias_intrpt;
+
+	/* DMA burst size */
+	int dma_burst_sz;
+
+	/* Bits per pixel */
+	int bpp;
+
+	/* FIFO DMA Request Delay */
+	int fdd;
+
+	/* TFT Alternative Signal Mapping (Only for active) */
+	unsigned char tft_alt_mode;
+
+	/* 12 Bit Per Pixel (5-6-5) Mode (Only for passive) */
+	unsigned char stn_565_mode;
+
+	/* Mono 8-bit Mode: 1=D0-D7 or 0=D0-D3 */
+	unsigned char mono_8bit_mode;
+
+	/* Invert pixel clock */
+	unsigned char invert_pxl_clock;
+
+	/* Invert line clock */
+	unsigned char invert_line_clock;
+
+	/* Invert frame clock  */
+	unsigned char invert_frm_clock;
+
+	/* Horizontal and Vertical Sync Edge: 0=rising 1=falling */
+	unsigned char sync_edge;
+
+	/* Horizontal and Vertical Sync: Control: 0=ignore */
+	unsigned char sync_ctrl;
+
+	/* Raster Data Order Select: 1=Most-to-least 0=Least-to-most */
+	unsigned char raster_order;
+};
+
+struct lcd_sync_arg {
+	int back_porch;
+	int front_porch;
+	int pulse_width;
+};
+
+/* ioctls */
+#define FBIOGET_CONTRAST	_IOR('F', 1, int)
+#define FBIOPUT_CONTRAST	_IOW('F', 2, int)
+#define FBIGET_BRIGHTNESS	_IOR('F', 3, int)
+#define FBIPUT_BRIGHTNESS	_IOW('F', 3, int)
+#define FBIGET_COLOR		_IOR('F', 5, int)
+#define FBIPUT_COLOR		_IOW('F', 6, int)
+#define FBIPUT_HSYNC		_IOW('F', 9, int)
+#define FBIPUT_VSYNC		_IOW('F', 10, int)
+
+#endif  /* ifndef DA8XX_FB_H */
+
-- 
cgit v0.10.2


From 2f93e8f4822fdd48fa9c4c901eea87ab1c902f87 Mon Sep 17 00:00:00 2001
From: Sudhakar Rajashekhara <sudhakar.raj@ti.com>
Date: Tue, 22 Sep 2009 16:47:06 -0700
Subject: davinci-fb-frame-buffer-driver-for-ti-da8xx-omap-l1xx-v4

Since the previous version, return values in ioctl() function have been
modified.

[akpm@linux-foundation.org: simplify lcd_disable_raster()]
Signed-off-by: Sudhakar Rajashekhara <sudhakar.raj@ti.com>
Signed-off-by: Pavel Kiryukhin <pkiryukhin@ru.mvista.com>
Signed-off-by: Steve Chen <schen@mvista.com>
Acked-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/da8xx-fb.c b/drivers/video/da8xx-fb.c
index e0bbc66..42e1005 100644
--- a/drivers/video/da8xx-fb.c
+++ b/drivers/video/da8xx-fb.c
@@ -107,7 +107,6 @@ static inline void lcdc_write(unsigned int val, unsigned int addr)
 }
 
 struct da8xx_fb_par {
-	wait_queue_head_t da8xx_wq;
 	resource_size_t p_palette_base;
 	unsigned char *v_palette_base;
 	struct clk *lcdc_clk;
@@ -158,6 +157,7 @@ struct da8xx_panel {
 	int		vbp;		/* Vertical back porch */
 	int		vsw;		/* Vertical Sync Pulse Width */
 	int		pxl_clk;	/* Pixel clock */
+	unsigned char	invert_pxl_clk;	/* Invert Pixel clock */
 };
 
 static struct da8xx_panel known_lcd_panels[] = {
@@ -173,6 +173,7 @@ static struct da8xx_panel known_lcd_panels[] = {
 		.vbp = 2,
 		.vsw = 0,
 		.pxl_clk = 0x10,
+		.invert_pxl_clk = 1,
 	},
 	/* Sharp LK043T1DG01 */
 	[1] = {
@@ -186,29 +187,18 @@ static struct da8xx_panel known_lcd_panels[] = {
 		.vbp = 2,
 		.vsw = 10,
 		.pxl_clk = 0x12,
+		.invert_pxl_clk = 0,
 	},
 };
 
 /* Disable the Raster Engine of the LCD Controller */
-static int lcd_disable_raster(struct da8xx_fb_par *par)
+static void lcd_disable_raster(struct da8xx_fb_par *par)
 {
-	int ret = 0;
 	u32 reg;
 
 	reg = lcdc_read(LCD_RASTER_CTRL_REG);
-	if (reg & LCD_RASTER_ENABLE) {
+	if (reg & LCD_RASTER_ENABLE)
 		lcdc_write(reg & ~LCD_RASTER_ENABLE, LCD_RASTER_CTRL_REG);
-		ret = wait_event_interruptible_timeout(par->da8xx_wq,
-						!lcdc_read(LCD_STAT_REG) &
-						LCD_END_OF_FRAME0, WSI_TIMEOUT);
-	}
-
-	if (ret < 0)
-		return ret;
-	if (ret == 0)
-		return -ETIMEDOUT;
-
-	return 0;
 }
 
 static void lcd_blit(int load_mode, struct da8xx_fb_par *par)
@@ -256,7 +246,7 @@ static int lcd_cfg_dma(int burst_size)
 	default:
 		return -EINVAL;
 	}
-	lcdc_write(reg | LCD_END_OF_FRAME_INT_ENA, LCD_DMA_CTRL_REG);
+	lcdc_write(reg, LCD_DMA_CTRL_REG);
 
 	return 0;
 }
@@ -342,11 +332,6 @@ static int lcd_cfg_display(const struct lcd_ctrl_config *cfg)
 	else
 		reg &= ~LCD_SYNC_EDGE;
 
-	if (cfg->invert_pxl_clock)
-		reg |= LCD_INVERT_PIXEL_CLOCK;
-	else
-		reg &= ~LCD_INVERT_PIXEL_CLOCK;
-
 	if (cfg->invert_line_clock)
 		reg |= LCD_INVERT_LINE_CLOCK;
 	else
@@ -456,19 +441,15 @@ static int fb_setcolreg(unsigned regno, unsigned red, unsigned green,
 	return 0;
 }
 
-static int lcd_reset(struct da8xx_fb_par *par)
+static void lcd_reset(struct da8xx_fb_par *par)
 {
-	int ret = 0;
-
 	/* Disable the Raster if previously Enabled */
 	if (lcdc_read(LCD_RASTER_CTRL_REG) & LCD_RASTER_ENABLE)
-		ret = lcd_disable_raster(par);
+		lcd_disable_raster(par);
 
 	/* DMA has to be disabled */
 	lcdc_write(0, LCD_DMA_CTRL_REG);
 	lcdc_write(0, LCD_RASTER_CTRL_REG);
-
-	return ret;
 }
 
 static int lcd_init(struct da8xx_fb_par *par, const struct lcd_ctrl_config *cfg,
@@ -477,14 +458,19 @@ static int lcd_init(struct da8xx_fb_par *par, const struct lcd_ctrl_config *cfg,
 	u32 bpp;
 	int ret = 0;
 
-	ret = lcd_reset(par);
-	if (ret != 0)
-		return ret;
+	lcd_reset(par);
 
 	/* Configure the LCD clock divisor. */
 	lcdc_write(LCD_CLK_DIVISOR(panel->pxl_clk) |
 			(LCD_RASTER_MODE & 0x1), LCD_CTRL_REG);
 
+	if (panel->invert_pxl_clk)
+		lcdc_write((lcdc_read(LCD_RASTER_TIMING_2_REG) |
+			LCD_INVERT_PIXEL_CLOCK), LCD_RASTER_TIMING_2_REG);
+	else
+		lcdc_write((lcdc_read(LCD_RASTER_TIMING_2_REG) &
+			~LCD_INVERT_PIXEL_CLOCK), LCD_RASTER_TIMING_2_REG);
+
 	/* Configure the DMA burst size. */
 	ret = lcd_cfg_dma(cfg->dma_burst_sz);
 	if (ret < 0)
@@ -528,7 +514,6 @@ static int lcd_init(struct da8xx_fb_par *par, const struct lcd_ctrl_config *cfg,
 static irqreturn_t lcdc_irq_handler(int irq, void *arg)
 {
 	u32 stat = lcdc_read(LCD_STAT_REG);
-	struct da8xx_fb_par *par = arg;
 	u32 reg;
 
 	if ((stat & LCD_SYNC_LOST) && (stat & LCD_FIFO_UNDERFLOW)) {
@@ -539,7 +524,6 @@ static irqreturn_t lcdc_irq_handler(int irq, void *arg)
 	} else
 		lcdc_write(stat, LCD_STAT_REG);
 
-	wake_up_interruptible(&par->da8xx_wq);
 	return IRQ_HANDLED;
 }
 
@@ -594,13 +578,12 @@ static int fb_check_var(struct fb_var_screeninfo *var,
 static int __devexit fb_remove(struct platform_device *dev)
 {
 	struct fb_info *info = dev_get_drvdata(&dev->dev);
-	int ret = 0;
 
 	if (info) {
 		struct da8xx_fb_par *par = info->par;
 
 		if (lcdc_read(LCD_RASTER_CTRL_REG) & LCD_RASTER_ENABLE)
-			ret = lcd_disable_raster(par);
+			lcd_disable_raster(par);
 		lcdc_write(0, LCD_RASTER_CTRL_REG);
 
 		/* disable DMA  */
@@ -619,7 +602,7 @@ static int __devexit fb_remove(struct platform_device *dev)
 		release_mem_region(lcdc_regs->start, resource_size(lcdc_regs));
 
 	}
-	return ret;
+	return 0;
 }
 
 static int fb_ioctl(struct fb_info *info, unsigned int cmd,
@@ -634,11 +617,11 @@ static int fb_ioctl(struct fb_info *info, unsigned int cmd,
 	case FBIPUT_BRIGHTNESS:
 	case FBIGET_COLOR:
 	case FBIPUT_COLOR:
-		return -EINVAL;
+		return -ENOTTY;
 	case FBIPUT_HSYNC:
 		if (copy_from_user(&sync_arg, (char *)arg,
 				sizeof(struct lcd_sync_arg)))
-			return -EINVAL;
+			return -EFAULT;
 		lcd_cfg_horizontal_sync(sync_arg.back_porch,
 					sync_arg.pulse_width,
 					sync_arg.front_porch);
@@ -646,7 +629,7 @@ static int fb_ioctl(struct fb_info *info, unsigned int cmd,
 	case FBIPUT_VSYNC:
 		if (copy_from_user(&sync_arg, (char *)arg,
 				sizeof(struct lcd_sync_arg)))
-			return -EINVAL;
+			return -EFAULT;
 		lcd_cfg_vertical_sync(sync_arg.back_porch,
 					sync_arg.pulse_width,
 					sync_arg.front_porch);
@@ -773,8 +756,6 @@ static int __init fb_probe(struct platform_device *device)
 
 	par->lcdc_clk = fb_clk;
 
-	init_waitqueue_head(&par->da8xx_wq);
-
 	par->irq = platform_get_irq(device, 0);
 	if (par->irq < 0) {
 		ret = -ENOENT;
diff --git a/include/video/da8xx-fb.h b/include/video/da8xx-fb.h
index 5f77675..c051a50 100644
--- a/include/video/da8xx-fb.h
+++ b/include/video/da8xx-fb.h
@@ -67,9 +67,6 @@ struct lcd_ctrl_config {
 	/* Mono 8-bit Mode: 1=D0-D7 or 0=D0-D3 */
 	unsigned char mono_8bit_mode;
 
-	/* Invert pixel clock */
-	unsigned char invert_pxl_clock;
-
 	/* Invert line clock */
 	unsigned char invert_line_clock;
 
-- 
cgit v0.10.2


From d63870db3c41086d7f13ec8b41def4331db32327 Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Tue, 22 Sep 2009 16:47:07 -0700
Subject: sisfb: read buffer overflow

If called with mode_idx = 1, rate = 68, a read occurs from
sisfb_vrate[-1].refresh.

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Cc: Krzysztof Helt <krzysztof.h1@poczta.fm>
Cc: Thomas Winischhofer <thomas@winischhofer.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/sis/sis_main.c b/drivers/video/sis/sis_main.c
index 4a067f0..a4e05e4 100644
--- a/drivers/video/sis/sis_main.c
+++ b/drivers/video/sis/sis_main.c
@@ -698,8 +698,8 @@ sisfb_search_refresh_rate(struct sis_video_info *ivideo, unsigned int rate, int
 						rate, sisfb_vrate[i].refresh);
 					ivideo->rate_idx = sisfb_vrate[i].idx;
 					ivideo->refresh_rate = sisfb_vrate[i].refresh;
-				} else if(((rate - sisfb_vrate[i-1].refresh) <= 2)
-						&& (sisfb_vrate[i].idx != 1)) {
+				} else if((sisfb_vrate[i].idx != 1) &&
+						((rate - sisfb_vrate[i-1].refresh) <= 2)) {
 					DPRINTK("sisfb: Adjusting rate from %d down to %d\n",
 						rate, sisfb_vrate[i-1].refresh);
 					ivideo->rate_idx = sisfb_vrate[i-1].idx;
-- 
cgit v0.10.2


From c6012189a40d33213ead5d15769fab615443206f Mon Sep 17 00:00:00 2001
From: Ryan Mallon <ryan@bluewatersys.com>
Date: Tue, 22 Sep 2009 16:47:09 -0700
Subject: ep93xx video driver platform support

Signed-off-by: Ryan Mallon <ryan@bluewatersys.com>
Acked-by: H Hartley Sweeten <hsweeten@visionengravers.com>
Cc: Daniele Venzano <linux@brownhat.org>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Krzysztof Helt <krzysztof.h1@poczta.fm>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/arm/mach-ep93xx/clock.c b/arch/arm/mach-ep93xx/clock.c
index 3dd0e2a..dda19cd7 100644
--- a/arch/arm/mach-ep93xx/clock.c
+++ b/arch/arm/mach-ep93xx/clock.c
@@ -37,7 +37,7 @@ struct clk {
 static unsigned long get_uart_rate(struct clk *clk);
 
 static int set_keytchclk_rate(struct clk *clk, unsigned long rate);
-
+static int set_div_rate(struct clk *clk, unsigned long rate);
 
 static struct clk clk_uart1 = {
 	.sw_locked	= 1,
@@ -76,6 +76,13 @@ static struct clk clk_pwm = {
 	.rate		= EP93XX_EXT_CLK_RATE,
 };
 
+static struct clk clk_video = {
+	.sw_locked	= 1,
+	.enable_reg     = EP93XX_SYSCON_VIDCLKDIV,
+	.enable_mask    = EP93XX_SYSCON_CLKDIV_ENABLE,
+	.set_rate	= set_div_rate,
+};
+
 /* DMA Clocks */
 static struct clk clk_m2p0 = {
 	.enable_reg	= EP93XX_SYSCON_PWRCNT,
@@ -140,6 +147,7 @@ static struct clk_lookup clocks[] = {
 	INIT_CK(NULL,			"pll2",		&clk_pll2),
 	INIT_CK("ep93xx-ohci",		NULL,		&clk_usb_host),
 	INIT_CK("ep93xx-keypad",	NULL,		&clk_keypad),
+	INIT_CK("ep93xx-fb",		NULL,		&clk_video),
 	INIT_CK(NULL,			"pwm_clk",	&clk_pwm),
 	INIT_CK(NULL,			"m2p0",		&clk_m2p0),
 	INIT_CK(NULL,			"m2p1",		&clk_m2p1),
@@ -236,6 +244,84 @@ static int set_keytchclk_rate(struct clk *clk, unsigned long rate)
 	return 0;
 }
 
+static unsigned long calc_clk_div(unsigned long rate, int *psel, int *esel,
+				  int *pdiv, int *div)
+{
+	unsigned long max_rate, best_rate = 0,
+		actual_rate = 0, mclk_rate = 0, rate_err = -1;
+	int i, found = 0, __div = 0, __pdiv = 0;
+
+	/* Don't exceed the maximum rate */
+	max_rate = max(max(clk_pll1.rate / 4, clk_pll2.rate / 4),
+		       (unsigned long)EP93XX_EXT_CLK_RATE / 4);
+	rate = min(rate, max_rate);
+
+	/*
+	 * Try the two pll's and the external clock
+	 * Because the valid predividers are 2, 2.5 and 3, we multiply
+	 * all the clocks by 2 to avoid floating point math.
+	 *
+	 * This is based on the algorithm in the ep93xx raster guide:
+	 * http://be-a-maverick.com/en/pubs/appNote/AN269REV1.pdf
+	 *
+	 */
+	for (i = 0; i < 3; i++) {
+		if (i == 0)
+			mclk_rate = EP93XX_EXT_CLK_RATE * 2;
+		else if (i == 1)
+			mclk_rate = clk_pll1.rate * 2;
+		else if (i == 2)
+			mclk_rate = clk_pll2.rate * 2;
+
+		/* Try each predivider value */
+		for (__pdiv = 4; __pdiv <= 6; __pdiv++) {
+			__div = mclk_rate / (rate * __pdiv);
+			if (__div < 2 || __div > 127)
+				continue;
+
+			actual_rate = mclk_rate / (__pdiv * __div);
+
+			if (!found || abs(actual_rate - rate) < rate_err) {
+				*pdiv = __pdiv - 3;
+				*div = __div;
+				*psel = (i == 2);
+				*esel = (i != 0);
+				best_rate = actual_rate;
+				rate_err = abs(actual_rate - rate);
+				found = 1;
+			}
+		}
+	}
+
+	if (!found)
+		return 0;
+
+	return best_rate;
+}
+
+static int set_div_rate(struct clk *clk, unsigned long rate)
+{
+	unsigned long actual_rate;
+	int psel = 0, esel = 0, pdiv = 0, div = 0;
+	u32 val;
+
+	actual_rate = calc_clk_div(rate, &psel, &esel, &pdiv, &div);
+	if (actual_rate == 0)
+		return -EINVAL;
+	clk->rate = actual_rate;
+
+	/* Clear the esel, psel, pdiv and div bits */
+	val = __raw_readl(clk->enable_reg);
+	val &= ~0x7fff;
+
+	/* Set the new esel, psel, pdiv and div bits for the new clock rate */
+	val |= (esel ? EP93XX_SYSCON_CLKDIV_ESEL : 0) |
+		(psel ? EP93XX_SYSCON_CLKDIV_PSEL : 0) |
+		(pdiv << EP93XX_SYSCON_CLKDIV_PDIV_SHIFT) | div;
+	ep93xx_syscon_swlocked_write(val, clk->enable_reg);
+	return 0;
+}
+
 int clk_set_rate(struct clk *clk, unsigned long rate)
 {
 	if (clk->set_rate)
diff --git a/arch/arm/mach-ep93xx/core.c b/arch/arm/mach-ep93xx/core.c
index 16b92c3..f7ebed9 100644
--- a/arch/arm/mach-ep93xx/core.c
+++ b/arch/arm/mach-ep93xx/core.c
@@ -30,6 +30,7 @@
 #include <linux/i2c-gpio.h>
 
 #include <mach/hardware.h>
+#include <mach/fb.h>
 
 #include <asm/mach/map.h>
 #include <asm/mach/time.h>
@@ -682,6 +683,37 @@ void ep93xx_pwm_release_gpio(struct platform_device *pdev)
 EXPORT_SYMBOL(ep93xx_pwm_release_gpio);
 
 
+/*************************************************************************
+ * EP93xx video peripheral handling
+ *************************************************************************/
+static struct ep93xxfb_mach_info ep93xxfb_data;
+
+static struct resource ep93xx_fb_resource[] = {
+	{
+		.start		= EP93XX_RASTER_PHYS_BASE,
+		.end		= EP93XX_RASTER_PHYS_BASE + 0x800 - 1,
+		.flags		= IORESOURCE_MEM,
+	},
+};
+
+static struct platform_device ep93xx_fb_device = {
+	.name			= "ep93xx-fb",
+	.id			= -1,
+	.dev			= {
+		.platform_data	= &ep93xxfb_data,
+		.coherent_dma_mask	= DMA_BIT_MASK(32),
+		.dma_mask		= &ep93xx_fb_device.dev.coherent_dma_mask,
+	},
+	.num_resources		= ARRAY_SIZE(ep93xx_fb_resource),
+	.resource		= ep93xx_fb_resource,
+};
+
+void __init ep93xx_register_fb(struct ep93xxfb_mach_info *data)
+{
+	ep93xxfb_data = *data;
+	platform_device_register(&ep93xx_fb_device);
+}
+
 extern void ep93xx_gpio_init(void);
 
 void __init ep93xx_init_devices(void)
diff --git a/arch/arm/mach-ep93xx/include/mach/ep93xx-regs.h b/arch/arm/mach-ep93xx/include/mach/ep93xx-regs.h
index ea78e90..0fbf87b 100644
--- a/arch/arm/mach-ep93xx/include/mach/ep93xx-regs.h
+++ b/arch/arm/mach-ep93xx/include/mach/ep93xx-regs.h
@@ -70,6 +70,7 @@
 #define EP93XX_USB_PHYS_BASE		(EP93XX_AHB_PHYS_BASE + 0x00020000)
 #define EP93XX_USB_BASE			EP93XX_AHB_IOMEM(0x00020000)
 
+#define EP93XX_RASTER_PHYS_BASE		(EP93XX_AHB_PHYS_BASE + 0x00030000)
 #define EP93XX_RASTER_BASE		EP93XX_AHB_IOMEM(0x00030000)
 
 #define EP93XX_GRAPHICS_ACCEL_BASE	EP93XX_AHB_IOMEM(0x00040000)
@@ -207,6 +208,11 @@
 #define EP93XX_SYSCON_DEVCFG_ADCPD	(1<<2)
 #define EP93XX_SYSCON_DEVCFG_KEYS	(1<<1)
 #define EP93XX_SYSCON_DEVCFG_SHENA	(1<<0)
+#define EP93XX_SYSCON_VIDCLKDIV		EP93XX_SYSCON_REG(0x84)
+#define EP93XX_SYSCON_CLKDIV_ENABLE	(1<<15)
+#define EP93XX_SYSCON_CLKDIV_ESEL	(1<<14)
+#define EP93XX_SYSCON_CLKDIV_PSEL	(1<<13)
+#define EP93XX_SYSCON_CLKDIV_PDIV_SHIFT	8
 #define EP93XX_SYSCON_KEYTCHCLKDIV	EP93XX_SYSCON_REG(0x90)
 #define EP93XX_SYSCON_KEYTCHCLKDIV_TSEN	(1<<31)
 #define EP93XX_SYSCON_KEYTCHCLKDIV_ADIV	(1<<16)
diff --git a/arch/arm/mach-ep93xx/include/mach/fb.h b/arch/arm/mach-ep93xx/include/mach/fb.h
new file mode 100644
index 0000000..d5ae11d
--- /dev/null
+++ b/arch/arm/mach-ep93xx/include/mach/fb.h
@@ -0,0 +1,56 @@
+/*
+ * arch/arm/mach-ep93xx/include/mach/fb.h
+ */
+
+#ifndef __ASM_ARCH_EP93XXFB_H
+#define __ASM_ARCH_EP93XXFB_H
+
+struct platform_device;
+struct fb_videomode;
+struct fb_info;
+
+#define EP93XXFB_USE_MODEDB		0
+
+/* VideoAttributes flags */
+#define EP93XXFB_STATE_MACHINE_ENABLE	(1 << 0)
+#define EP93XXFB_PIXEL_CLOCK_ENABLE	(1 << 1)
+#define EP93XXFB_VSYNC_ENABLE		(1 << 2)
+#define EP93XXFB_PIXEL_DATA_ENABLE	(1 << 3)
+#define EP93XXFB_COMPOSITE_SYNC		(1 << 4)
+#define EP93XXFB_SYNC_VERT_HIGH		(1 << 5)
+#define EP93XXFB_SYNC_HORIZ_HIGH	(1 << 6)
+#define EP93XXFB_SYNC_BLANK_HIGH	(1 << 7)
+#define EP93XXFB_PCLK_FALLING		(1 << 8)
+#define EP93XXFB_ENABLE_AC		(1 << 9)
+#define EP93XXFB_ENABLE_LCD		(1 << 10)
+#define EP93XXFB_ENABLE_CCIR		(1 << 12)
+#define EP93XXFB_USE_PARALLEL_INTERFACE	(1 << 13)
+#define EP93XXFB_ENABLE_INTERRUPT	(1 << 14)
+#define EP93XXFB_USB_INTERLACE		(1 << 16)
+#define EP93XXFB_USE_EQUALIZATION	(1 << 17)
+#define EP93XXFB_USE_DOUBLE_HORZ	(1 << 18)
+#define EP93XXFB_USE_DOUBLE_VERT	(1 << 19)
+#define EP93XXFB_USE_BLANK_PIXEL	(1 << 20)
+#define EP93XXFB_USE_SDCSN0		(0 << 21)
+#define EP93XXFB_USE_SDCSN1		(1 << 21)
+#define EP93XXFB_USE_SDCSN2		(2 << 21)
+#define EP93XXFB_USE_SDCSN3		(3 << 21)
+
+#define EP93XXFB_ENABLE			(EP93XXFB_STATE_MACHINE_ENABLE	| \
+					 EP93XXFB_PIXEL_CLOCK_ENABLE	| \
+					 EP93XXFB_VSYNC_ENABLE		| \
+					 EP93XXFB_PIXEL_DATA_ENABLE)
+
+struct ep93xxfb_mach_info {
+	unsigned int			num_modes;
+	const struct fb_videomode	*modes;
+	const struct fb_videomode	*default_mode;
+	int				bpp;
+	unsigned int			flags;
+
+	int	(*setup)(struct platform_device *pdev);
+	void	(*teardown)(struct platform_device *pdev);
+	void	(*blank)(int blank_mode, struct fb_info *info);
+};
+
+#endif /* __ASM_ARCH_EP93XXFB_H */
diff --git a/arch/arm/mach-ep93xx/include/mach/platform.h b/arch/arm/mach-ep93xx/include/mach/platform.h
index 5f5fa65..01a0f08 100644
--- a/arch/arm/mach-ep93xx/include/mach/platform.h
+++ b/arch/arm/mach-ep93xx/include/mach/platform.h
@@ -6,6 +6,7 @@
 
 struct i2c_board_info;
 struct platform_device;
+struct ep93xxfb_mach_info;
 
 struct ep93xx_eth_data
 {
@@ -33,6 +34,7 @@ static inline void ep93xx_devcfg_clear_bits(unsigned int bits)
 
 void ep93xx_register_eth(struct ep93xx_eth_data *data, int copy_addr);
 void ep93xx_register_i2c(struct i2c_board_info *devices, int num);
+void ep93xx_register_fb(struct ep93xxfb_mach_info *data);
 void ep93xx_register_pwm(int pwm0, int pwm1);
 int ep93xx_pwm_acquire_gpio(struct platform_device *pdev);
 void ep93xx_pwm_release_gpio(struct platform_device *pdev);
-- 
cgit v0.10.2


From 88017bda96a5fd568a982b01546c8fb1782dda62 Mon Sep 17 00:00:00 2001
From: Ryan Mallon <ryan@bluewatersys.com>
Date: Tue, 22 Sep 2009 16:47:09 -0700
Subject: ep93xx video driver

EP93xx video driver plus documentation.

Signed-off-by: Ryan Mallon <ryan@bluewatersys.com>
Acked-by: H Hartley Sweeten <hsweeten@visionengravers.com>
Cc: Daniele Venzano <linux@brownhat.org>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Krzysztof Helt <krzysztof.h1@poczta.fm>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/Documentation/fb/ep93xx-fb.txt b/Documentation/fb/ep93xx-fb.txt
new file mode 100644
index 0000000..5af1bd9
--- /dev/null
+++ b/Documentation/fb/ep93xx-fb.txt
@@ -0,0 +1,135 @@
+================================
+Driver for EP93xx LCD controller
+================================
+
+The EP93xx LCD controller can drive both standard desktop monitors and
+embedded LCD displays. If you have a standard desktop monitor then you
+can use the standard Linux video mode database. In your board file:
+
+	static struct ep93xxfb_mach_info some_board_fb_info = {
+		.num_modes	= EP93XXFB_USE_MODEDB,
+		.bpp		= 16,
+	};
+
+If you have an embedded LCD display then you need to define a video
+mode for it as follows:
+
+	static struct fb_videomode some_board_video_modes[] = {
+		{
+			.name		= "some_lcd_name",
+			/* Pixel clock, porches, etc */
+		},
+	};
+
+Note that the pixel clock value is in pico-seconds. You can use the
+KHZ2PICOS macro to convert the pixel clock value. Most other values
+are in pixel clocks. See Documentation/fb/framebuffer.txt for further
+details.
+
+The ep93xxfb_mach_info structure for your board should look like the
+following:
+
+	static struct ep93xxfb_mach_info some_board_fb_info = {
+		.num_modes	= ARRAY_SIZE(some_board_video_modes),
+		.modes		= some_board_video_modes,
+		.default_mode	= &some_board_video_modes[0],
+		.bpp		= 16,
+	};
+
+The framebuffer device can be registered by adding the following to
+your board initialisation function:
+
+	ep93xx_register_fb(&some_board_fb_info);
+
+=====================
+Video Attribute Flags
+=====================
+
+The ep93xxfb_mach_info structure has a flags field which can be used
+to configure the controller. The video attributes flags are fully
+documented in section 7 of the EP93xx users' guide. The following
+flags are available:
+
+EP93XXFB_PCLK_FALLING		Clock data on the falling edge of the
+				pixel clock. The default is to clock
+				data on the rising edge.
+
+EP93XXFB_SYNC_BLANK_HIGH	Blank signal is active high. By
+				default the blank signal is active low.
+
+EP93XXFB_SYNC_HORIZ_HIGH	Horizontal sync is active high. By
+				default the horizontal sync is active low.
+
+EP93XXFB_SYNC_VERT_HIGH		Vertical sync is active high. By
+				default the vertical sync is active high.
+
+The physical address of the framebuffer can be controlled using the
+following flags:
+
+EP93XXFB_USE_SDCSN0		Use SDCSn[0] for the framebuffer. This
+				is the default setting.
+
+EP93XXFB_USE_SDCSN1		Use SDCSn[1] for the framebuffer.
+
+EP93XXFB_USE_SDCSN2		Use SDCSn[2] for the framebuffer.
+
+EP93XXFB_USE_SDCSN3		Use SDCSn[3] for the framebuffer.
+
+==================
+Platform callbacks
+==================
+
+The EP93xx framebuffer driver supports three optional platform
+callbacks: setup, teardown and blank. The setup and teardown functions
+are called when the framebuffer driver is installed and removed
+respectively. The blank function is called whenever the display is
+blanked or unblanked.
+
+The setup and teardown devices pass the platform_device structure as
+an argument. The fb_info and ep93xxfb_mach_info structures can be
+obtained as follows:
+
+	static int some_board_fb_setup(struct platform_device *pdev)
+	{
+		struct ep93xxfb_mach_info *mach_info = pdev->dev.platform_data;
+		struct fb_info *fb_info = platform_get_drvdata(pdev);
+
+		/* Board specific framebuffer setup */
+	}
+
+======================
+Setting the video mode
+======================
+
+The video mode is set using the following syntax:
+
+	video=XRESxYRES[-BPP][@REFRESH]
+
+If the EP93xx video driver is built-in then the video mode is set on
+the Linux kernel command line, for example:
+
+	video=ep93xx-fb:800x600-16@60
+
+If the EP93xx video driver is built as a module then the video mode is
+set when the module is installed:
+
+	modprobe ep93xx-fb video=320x240
+
+==============
+Screenpage bug
+==============
+
+At least on the EP9315 there is a silicon bug which causes bit 27 of
+the VIDSCRNPAGE (framebuffer physical offset) to be tied low. There is
+an unofficial errata for this bug at:
+	http://marc.info/?l=linux-arm-kernel&m=110061245502000&w=2
+
+By default the EP93xx framebuffer driver checks if the allocated physical
+address has bit 27 set. If it does, then the memory is freed and an
+error is returned. The check can be disabled by adding the following
+option when loading the driver:
+
+      ep93xx-fb.check_screenpage_bug=0
+
+In some cases it may be possible to reconfigure your SDRAM layout to
+avoid this bug. See section 13 of the EP93xx users' guide for details.
diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index a7944ef..7be5062 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -2128,6 +2128,17 @@ config FB_MB862XX_LIME
 	---help---
 	  Framebuffer support for Fujitsu Lime GDC on host CPU bus.
 
+config FB_EP93XX
+	tristate "EP93XX frame buffer support"
+	depends on FB && ARCH_EP93XX
+	select FB_CFB_FILLRECT
+	select FB_CFB_COPYAREA
+	select FB_CFB_IMAGEBLIT
+	---help---
+	  Framebuffer driver for the Cirrus Logic EP93XX series of processors.
+	  This driver is also available as a module. The module will be called
+	  ep93xx-fb.
+
 config FB_PRE_INIT_FB
 	bool "Don't reinitialize, use bootloader's GDC/Display configuration"
 	depends on FB_MB862XX_LIME
diff --git a/drivers/video/Makefile b/drivers/video/Makefile
index 85b2d23..80232e1 100644
--- a/drivers/video/Makefile
+++ b/drivers/video/Makefile
@@ -85,6 +85,7 @@ obj-$(CONFIG_FB_Q40)              += q40fb.o
 obj-$(CONFIG_FB_TGA)              += tgafb.o
 obj-$(CONFIG_FB_HP300)            += hpfb.o
 obj-$(CONFIG_FB_G364)             += g364fb.o
+obj-$(CONFIG_FB_EP93XX)		  += ep93xx-fb.o
 obj-$(CONFIG_FB_SA1100)           += sa1100fb.o
 obj-$(CONFIG_FB_HIT)              += hitfb.o
 obj-$(CONFIG_FB_EPSON1355)	  += epson1355fb.o
diff --git a/drivers/video/ep93xx-fb.c b/drivers/video/ep93xx-fb.c
new file mode 100644
index 0000000..bd9d46f
--- /dev/null
+++ b/drivers/video/ep93xx-fb.c
@@ -0,0 +1,646 @@
+/*
+ * linux/drivers/video/ep93xx-fb.c
+ *
+ * Framebuffer support for the EP93xx series.
+ *
+ * Copyright (C) 2007 Bluewater Systems Ltd
+ * Author: Ryan Mallon <ryan@bluewatersys.com>
+ *
+ * Copyright (c) 2009 H Hartley Sweeten <hsweeten@visionengravers.com>
+ *
+ * Based on the Cirrus Logic ep93xxfb driver, and various other ep93xxfb
+ * drivers.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/platform_device.h>
+#include <linux/dma-mapping.h>
+#include <linux/clk.h>
+#include <linux/fb.h>
+
+#include <mach/fb.h>
+
+/* Vertical Frame Timing Registers */
+#define EP93XXFB_VLINES_TOTAL			0x0000	/* SW locked */
+#define EP93XXFB_VSYNC				0x0004	/* SW locked */
+#define EP93XXFB_VACTIVE			0x0008	/* SW locked */
+#define EP93XXFB_VBLANK				0x0228	/* SW locked */
+#define EP93XXFB_VCLK				0x000c	/* SW locked */
+
+/* Horizontal Frame Timing Registers */
+#define EP93XXFB_HCLKS_TOTAL			0x0010	/* SW locked */
+#define EP93XXFB_HSYNC				0x0014	/* SW locked */
+#define EP93XXFB_HACTIVE			0x0018	/* SW locked */
+#define EP93XXFB_HBLANK				0x022c	/* SW locked */
+#define EP93XXFB_HCLK				0x001c	/* SW locked */
+
+/* Frame Buffer Memory Configuration Registers */
+#define EP93XXFB_SCREEN_PAGE			0x0028
+#define EP93XXFB_SCREEN_HPAGE			0x002c
+#define EP93XXFB_SCREEN_LINES			0x0030
+#define EP93XXFB_LINE_LENGTH			0x0034
+#define EP93XXFB_VLINE_STEP			0x0038
+#define EP93XXFB_LINE_CARRY			0x003c	/* SW locked */
+#define EP93XXFB_EOL_OFFSET			0x0230
+
+/* Other Video Registers */
+#define EP93XXFB_BRIGHTNESS			0x0020
+#define EP93XXFB_ATTRIBS			0x0024	/* SW locked */
+#define EP93XXFB_SWLOCK				0x007c	/* SW locked */
+#define EP93XXFB_AC_RATE			0x0214
+#define EP93XXFB_FIFO_LEVEL			0x0234
+#define EP93XXFB_PIXELMODE			0x0054
+#define EP93XXFB_PIXELMODE_32BPP		(0x7 << 0)
+#define EP93XXFB_PIXELMODE_24BPP		(0x6 << 0)
+#define EP93XXFB_PIXELMODE_16BPP		(0x4 << 0)
+#define EP93XXFB_PIXELMODE_8BPP			(0x2 << 0)
+#define EP93XXFB_PIXELMODE_SHIFT_1P_24B		(0x0 << 3)
+#define EP93XXFB_PIXELMODE_SHIFT_1P_18B		(0x1 << 3)
+#define EP93XXFB_PIXELMODE_COLOR_LUT		(0x0 << 10)
+#define EP93XXFB_PIXELMODE_COLOR_888		(0x4 << 10)
+#define EP93XXFB_PIXELMODE_COLOR_555		(0x5 << 10)
+#define EP93XXFB_PARL_IF_OUT			0x0058
+#define EP93XXFB_PARL_IF_IN			0x005c
+
+/* Blink Control Registers */
+#define EP93XXFB_BLINK_RATE			0x0040
+#define EP93XXFB_BLINK_MASK			0x0044
+#define EP93XXFB_BLINK_PATTRN			0x0048
+#define EP93XXFB_PATTRN_MASK			0x004c
+#define EP93XXFB_BKGRND_OFFSET			0x0050
+
+/* Hardware Cursor Registers */
+#define EP93XXFB_CURSOR_ADR_START		0x0060
+#define EP93XXFB_CURSOR_ADR_RESET		0x0064
+#define EP93XXFB_CURSOR_SIZE			0x0068
+#define EP93XXFB_CURSOR_COLOR1			0x006c
+#define EP93XXFB_CURSOR_COLOR2			0x0070
+#define EP93XXFB_CURSOR_BLINK_COLOR1		0x021c
+#define EP93XXFB_CURSOR_BLINK_COLOR2		0x0220
+#define EP93XXFB_CURSOR_XY_LOC			0x0074
+#define EP93XXFB_CURSOR_DSCAN_HY_LOC		0x0078
+#define EP93XXFB_CURSOR_BLINK_RATE_CTRL		0x0224
+
+/* LUT Registers */
+#define EP93XXFB_GRY_SCL_LUTR			0x0080
+#define EP93XXFB_GRY_SCL_LUTG			0x0280
+#define EP93XXFB_GRY_SCL_LUTB			0x0300
+#define EP93XXFB_LUT_SW_CONTROL			0x0218
+#define EP93XXFB_LUT_SW_CONTROL_SWTCH		(1 << 0)
+#define EP93XXFB_LUT_SW_CONTROL_SSTAT		(1 << 1)
+#define EP93XXFB_COLOR_LUT			0x0400
+
+/* Video Signature Registers */
+#define EP93XXFB_VID_SIG_RSLT_VAL		0x0200
+#define EP93XXFB_VID_SIG_CTRL			0x0204
+#define EP93XXFB_VSIG				0x0208
+#define EP93XXFB_HSIG				0x020c
+#define EP93XXFB_SIG_CLR_STR			0x0210
+
+/* Minimum / Maximum resolutions supported */
+#define EP93XXFB_MIN_XRES			64
+#define EP93XXFB_MIN_YRES			64
+#define EP93XXFB_MAX_XRES			1024
+#define EP93XXFB_MAX_YRES			768
+
+struct ep93xx_fbi {
+	struct ep93xxfb_mach_info	*mach_info;
+	struct clk			*clk;
+	struct resource			*res;
+	void __iomem			*mmio_base;
+	unsigned int			pseudo_palette[256];
+};
+
+static int check_screenpage_bug = 1;
+module_param(check_screenpage_bug, int, 0644);
+MODULE_PARM_DESC(check_screenpage_bug,
+		 "Check for bit 27 screen page bug. Default = 1");
+
+static inline unsigned int ep93xxfb_readl(struct ep93xx_fbi *fbi,
+					  unsigned int off)
+{
+	return __raw_readl(fbi->mmio_base + off);
+}
+
+static inline void ep93xxfb_writel(struct ep93xx_fbi *fbi,
+				   unsigned int val, unsigned int off)
+{
+	__raw_writel(val, fbi->mmio_base + off);
+}
+
+/*
+ * Write to one of the locked raster registers.
+ */
+static inline void ep93xxfb_out_locked(struct ep93xx_fbi *fbi,
+				       unsigned int val, unsigned int reg)
+{
+	/*
+	 * We don't need a lock or delay here since the raster register
+	 * block will remain unlocked until the next access.
+	 */
+	ep93xxfb_writel(fbi, 0xaa, EP93XXFB_SWLOCK);
+	ep93xxfb_writel(fbi, val, reg);
+}
+
+static void ep93xxfb_set_video_attribs(struct fb_info *info)
+{
+	struct ep93xx_fbi *fbi = info->par;
+	unsigned int attribs;
+
+	attribs = EP93XXFB_ENABLE;
+	attribs |= fbi->mach_info->flags;
+	ep93xxfb_out_locked(fbi, attribs, EP93XXFB_ATTRIBS);
+}
+
+static int ep93xxfb_set_pixelmode(struct fb_info *info)
+{
+	struct ep93xx_fbi *fbi = info->par;
+	unsigned int val;
+
+	info->var.transp.offset = 0;
+	info->var.transp.length = 0;
+
+	switch (info->var.bits_per_pixel) {
+	case 8:
+		val = EP93XXFB_PIXELMODE_8BPP | EP93XXFB_PIXELMODE_COLOR_LUT |
+			EP93XXFB_PIXELMODE_SHIFT_1P_18B;
+
+		info->var.red.offset	= 0;
+		info->var.red.length	= 8;
+		info->var.green.offset	= 0;
+		info->var.green.length	= 8;
+		info->var.blue.offset	= 0;
+		info->var.blue.length	= 8;
+		info->fix.visual 	= FB_VISUAL_PSEUDOCOLOR;
+		break;
+
+	case 16:
+		val = EP93XXFB_PIXELMODE_16BPP | EP93XXFB_PIXELMODE_COLOR_555 |
+			EP93XXFB_PIXELMODE_SHIFT_1P_18B;
+
+		info->var.red.offset	= 11;
+		info->var.red.length	= 5;
+		info->var.green.offset	= 5;
+		info->var.green.length	= 6;
+		info->var.blue.offset	= 0;
+		info->var.blue.length	= 5;
+		info->fix.visual 	= FB_VISUAL_TRUECOLOR;
+		break;
+
+	case 24:
+		val = EP93XXFB_PIXELMODE_24BPP | EP93XXFB_PIXELMODE_COLOR_888 |
+			EP93XXFB_PIXELMODE_SHIFT_1P_24B;
+
+		info->var.red.offset	= 16;
+		info->var.red.length	= 8;
+		info->var.green.offset	= 8;
+		info->var.green.length	= 8;
+		info->var.blue.offset	= 0;
+		info->var.blue.length	= 8;
+		info->fix.visual 	= FB_VISUAL_TRUECOLOR;
+		break;
+
+	case 32:
+		val = EP93XXFB_PIXELMODE_32BPP | EP93XXFB_PIXELMODE_COLOR_888 |
+			EP93XXFB_PIXELMODE_SHIFT_1P_24B;
+
+		info->var.red.offset	= 16;
+		info->var.red.length	= 8;
+		info->var.green.offset	= 8;
+		info->var.green.length	= 8;
+		info->var.blue.offset	= 0;
+		info->var.blue.length	= 8;
+		info->fix.visual 	= FB_VISUAL_TRUECOLOR;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	ep93xxfb_writel(fbi, val, EP93XXFB_PIXELMODE);
+	return 0;
+}
+
+static void ep93xxfb_set_timing(struct fb_info *info)
+{
+	struct ep93xx_fbi *fbi = info->par;
+	unsigned int vlines_total, hclks_total, start, stop;
+
+	vlines_total = info->var.yres + info->var.upper_margin +
+		info->var.lower_margin + info->var.vsync_len - 1;
+
+	hclks_total = info->var.xres + info->var.left_margin +
+		info->var.right_margin + info->var.hsync_len - 1;
+
+	ep93xxfb_out_locked(fbi, vlines_total, EP93XXFB_VLINES_TOTAL);
+	ep93xxfb_out_locked(fbi, hclks_total, EP93XXFB_HCLKS_TOTAL);
+
+	start = vlines_total;
+	stop = vlines_total - info->var.vsync_len;
+	ep93xxfb_out_locked(fbi, start | (stop << 16), EP93XXFB_VSYNC);
+
+	start = vlines_total - info->var.vsync_len - info->var.upper_margin;
+	stop = info->var.lower_margin - 1;
+	ep93xxfb_out_locked(fbi, start | (stop << 16), EP93XXFB_VBLANK);
+	ep93xxfb_out_locked(fbi, start | (stop << 16), EP93XXFB_VACTIVE);
+
+	start = vlines_total;
+	stop = vlines_total + 1;
+	ep93xxfb_out_locked(fbi, start | (stop << 16), EP93XXFB_VCLK);
+
+	start = hclks_total;
+	stop = hclks_total - info->var.hsync_len;
+	ep93xxfb_out_locked(fbi, start | (stop << 16), EP93XXFB_HSYNC);
+
+	start = hclks_total - info->var.hsync_len - info->var.left_margin;
+	stop = info->var.right_margin - 1;
+	ep93xxfb_out_locked(fbi, start | (stop << 16), EP93XXFB_HBLANK);
+	ep93xxfb_out_locked(fbi, start | (stop << 16), EP93XXFB_HACTIVE);
+
+	start = hclks_total;
+	stop = hclks_total;
+	ep93xxfb_out_locked(fbi, start | (stop << 16), EP93XXFB_HCLK);
+
+	ep93xxfb_out_locked(fbi, 0x0, EP93XXFB_LINE_CARRY);
+}
+
+static int ep93xxfb_set_par(struct fb_info *info)
+{
+	struct ep93xx_fbi *fbi = info->par;
+
+	clk_set_rate(fbi->clk, 1000 * PICOS2KHZ(info->var.pixclock));
+
+	ep93xxfb_set_timing(info);
+
+	info->fix.line_length = info->var.xres_virtual *
+		info->var.bits_per_pixel / 8;
+
+	ep93xxfb_writel(fbi, info->fix.smem_start, EP93XXFB_SCREEN_PAGE);
+	ep93xxfb_writel(fbi, info->var.yres - 1, EP93XXFB_SCREEN_LINES);
+	ep93xxfb_writel(fbi, ((info->var.xres * info->var.bits_per_pixel)
+			      / 32) - 1, EP93XXFB_LINE_LENGTH);
+	ep93xxfb_writel(fbi, info->fix.line_length / 4, EP93XXFB_VLINE_STEP);
+	ep93xxfb_set_video_attribs(info);
+	return 0;
+}
+
+static int ep93xxfb_check_var(struct fb_var_screeninfo *var,
+			      struct fb_info *info)
+{
+	int err;
+
+	err = ep93xxfb_set_pixelmode(info);
+	if (err)
+		return err;
+
+	var->xres = max_t(unsigned int, var->xres, EP93XXFB_MIN_XRES);
+	var->xres = min_t(unsigned int, var->xres, EP93XXFB_MAX_XRES);
+	var->xres_virtual = max(var->xres_virtual, var->xres);
+
+	var->yres = max_t(unsigned int, var->yres, EP93XXFB_MIN_YRES);
+	var->yres = min_t(unsigned int, var->yres, EP93XXFB_MAX_YRES);
+	var->yres_virtual = max(var->yres_virtual, var->yres);
+
+	return 0;
+}
+
+static int ep93xxfb_mmap(struct fb_info *info, struct vm_area_struct *vma)
+{
+	unsigned int offset = vma->vm_pgoff << PAGE_SHIFT;
+
+	if (offset < info->fix.smem_len) {
+		return dma_mmap_writecombine(info->dev, vma, info->screen_base,
+					     info->fix.smem_start,
+					     info->fix.smem_len);
+	}
+
+	return -EINVAL;
+}
+
+static int ep93xxfb_blank(int blank_mode, struct fb_info *info)
+{
+	struct ep93xx_fbi *fbi = info->par;
+	unsigned int attribs = ep93xxfb_readl(fbi, EP93XXFB_ATTRIBS);
+
+	if (blank_mode) {
+		if (fbi->mach_info->blank)
+			fbi->mach_info->blank(blank_mode, info);
+		ep93xxfb_out_locked(fbi, attribs & ~EP93XXFB_ENABLE,
+				    EP93XXFB_ATTRIBS);
+		clk_disable(fbi->clk);
+	} else {
+		clk_enable(fbi->clk);
+		ep93xxfb_out_locked(fbi, attribs | EP93XXFB_ENABLE,
+				    EP93XXFB_ATTRIBS);
+		if (fbi->mach_info->blank)
+			fbi->mach_info->blank(blank_mode, info);
+	}
+
+	return 0;
+}
+
+static inline int ep93xxfb_convert_color(int val, int width)
+{
+	return ((val << width) + 0x7fff - val) >> 16;
+}
+
+static int ep93xxfb_setcolreg(unsigned int regno, unsigned int red,
+			      unsigned int green, unsigned int blue,
+			      unsigned int transp, struct fb_info *info)
+{
+	struct ep93xx_fbi *fbi = info->par;
+	unsigned int *pal = info->pseudo_palette;
+	unsigned int ctrl, i, rgb, lut_current, lut_stat;
+
+	switch (info->fix.visual) {
+	case FB_VISUAL_PSEUDOCOLOR:
+		rgb = ((red & 0xff00) << 8) | (green & 0xff00) |
+			((blue & 0xff00) >> 8);
+
+		pal[regno] = rgb;
+		ep93xxfb_writel(fbi, rgb, (EP93XXFB_COLOR_LUT + (regno << 2)));
+		ctrl = ep93xxfb_readl(fbi, EP93XXFB_LUT_SW_CONTROL);
+		lut_stat = !!(ctrl & EP93XXFB_LUT_SW_CONTROL_SSTAT);
+		lut_current = !!(ctrl & EP93XXFB_LUT_SW_CONTROL_SWTCH);
+
+		if (lut_stat == lut_current) {
+			for (i = 0; i < 256; i++) {
+				ep93xxfb_writel(fbi, pal[i],
+					EP93XXFB_COLOR_LUT + (i << 2));
+			}
+
+			ep93xxfb_writel(fbi,
+					ctrl ^ EP93XXFB_LUT_SW_CONTROL_SWTCH,
+					EP93XXFB_LUT_SW_CONTROL);
+		}
+		break;
+
+	case FB_VISUAL_TRUECOLOR:
+		if (regno > 16)
+			return 1;
+
+		red = ep93xxfb_convert_color(red, info->var.red.length);
+		green = ep93xxfb_convert_color(green, info->var.green.length);
+		blue = ep93xxfb_convert_color(blue, info->var.blue.length);
+		transp = ep93xxfb_convert_color(transp,
+						info->var.transp.length);
+
+		pal[regno] = (red << info->var.red.offset) |
+			(green << info->var.green.offset) |
+			(blue << info->var.blue.offset) |
+			(transp << info->var.transp.offset);
+		break;
+
+	default:
+		return 1;
+	}
+
+	return 0;
+}
+
+static struct fb_ops ep93xxfb_ops = {
+	.owner		= THIS_MODULE,
+	.fb_check_var	= ep93xxfb_check_var,
+	.fb_set_par	= ep93xxfb_set_par,
+	.fb_blank	= ep93xxfb_blank,
+	.fb_fillrect	= cfb_fillrect,
+	.fb_copyarea	= cfb_copyarea,
+	.fb_imageblit	= cfb_imageblit,
+	.fb_setcolreg	= ep93xxfb_setcolreg,
+	.fb_mmap	= ep93xxfb_mmap,
+};
+
+static int __init ep93xxfb_calc_fbsize(struct ep93xxfb_mach_info *mach_info)
+{
+	int i, fb_size = 0;
+
+	if (mach_info->num_modes == EP93XXFB_USE_MODEDB) {
+		fb_size = EP93XXFB_MAX_XRES * EP93XXFB_MAX_YRES *
+			mach_info->bpp / 8;
+	} else {
+		for (i = 0; i < mach_info->num_modes; i++) {
+			const struct fb_videomode *mode;
+			int size;
+
+			mode = &mach_info->modes[i];
+			size = mode->xres * mode->yres * mach_info->bpp / 8;
+			if (size > fb_size)
+				fb_size = size;
+		}
+	}
+
+	return fb_size;
+}
+
+static int __init ep93xxfb_alloc_videomem(struct fb_info *info)
+{
+	struct ep93xx_fbi *fbi = info->par;
+	char __iomem *virt_addr;
+	dma_addr_t phys_addr;
+	unsigned int fb_size;
+
+	fb_size = ep93xxfb_calc_fbsize(fbi->mach_info);
+	virt_addr = dma_alloc_writecombine(info->dev, fb_size,
+					   &phys_addr, GFP_KERNEL);
+	if (!virt_addr)
+		return -ENOMEM;
+
+	/*
+	 * There is a bug in the ep93xx framebuffer which causes problems
+	 * if bit 27 of the physical address is set.
+	 * See: http://marc.info/?l=linux-arm-kernel&m=110061245502000&w=2
+	 * There does not seem to be any offical errata for this, but I
+	 * have confirmed the problem exists on my hardware (ep9315) at
+	 * least.
+	 */
+	if (check_screenpage_bug && phys_addr & (1 << 27)) {
+		dev_err(info->dev, "ep93xx framebuffer bug. phys addr (0x%x) "
+			"has bit 27 set: cannot init framebuffer\n",
+			phys_addr);
+
+		dma_free_coherent(info->dev, fb_size, virt_addr, phys_addr);
+		return -ENOMEM;
+	}
+
+	info->fix.smem_start = phys_addr;
+	info->fix.smem_len = fb_size;
+	info->screen_base = virt_addr;
+
+	return 0;
+}
+
+static void ep93xxfb_dealloc_videomem(struct fb_info *info)
+{
+	if (info->screen_base)
+		dma_free_coherent(info->dev, info->fix.smem_len,
+				  info->screen_base, info->fix.smem_start);
+}
+
+static int __init ep93xxfb_probe(struct platform_device *pdev)
+{
+	struct ep93xxfb_mach_info *mach_info = pdev->dev.platform_data;
+	struct fb_info *info;
+	struct ep93xx_fbi *fbi;
+	struct resource *res;
+	char *video_mode;
+	int err;
+
+	if (!mach_info)
+		return -EINVAL;
+
+	info = framebuffer_alloc(sizeof(struct ep93xx_fbi), &pdev->dev);
+	if (!info)
+		return -ENOMEM;
+
+	info->dev = &pdev->dev;
+	platform_set_drvdata(pdev, info);
+	fbi = info->par;
+	fbi->mach_info = mach_info;
+
+	err = fb_alloc_cmap(&info->cmap, 256, 0);
+	if (err)
+		goto failed;
+
+	err = ep93xxfb_alloc_videomem(info);
+	if (err)
+		goto failed;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		err = -ENXIO;
+		goto failed;
+	}
+
+	res = request_mem_region(res->start, resource_size(res), pdev->name);
+	if (!res) {
+		err = -EBUSY;
+		goto failed;
+	}
+
+	fbi->res = res;
+	fbi->mmio_base = ioremap(res->start, resource_size(res));
+	if (!fbi->mmio_base) {
+		err = -ENXIO;
+		goto failed;
+	}
+
+	strcpy(info->fix.id, pdev->name);
+	info->fbops		= &ep93xxfb_ops;
+	info->fix.type		= FB_TYPE_PACKED_PIXELS;
+	info->fix.accel		= FB_ACCEL_NONE;
+	info->var.activate	= FB_ACTIVATE_NOW;
+	info->var.vmode		= FB_VMODE_NONINTERLACED;
+	info->flags		= FBINFO_DEFAULT;
+	info->node		= -1;
+	info->state		= FBINFO_STATE_RUNNING;
+	info->pseudo_palette	= &fbi->pseudo_palette;
+
+	fb_get_options("ep93xx-fb", &video_mode);
+	err = fb_find_mode(&info->var, info, video_mode,
+			   fbi->mach_info->modes, fbi->mach_info->num_modes,
+			   fbi->mach_info->default_mode, fbi->mach_info->bpp);
+	if (err == 0) {
+		dev_err(info->dev, "No suitable video mode found\n");
+		err = -EINVAL;
+		goto failed;
+	}
+
+	if (mach_info->setup) {
+		err = mach_info->setup(pdev);
+		if (err)
+			return err;
+	}
+
+	err = ep93xxfb_check_var(&info->var, info);
+	if (err)
+		goto failed;
+
+	fbi->clk = clk_get(info->dev, NULL);
+	if (IS_ERR(fbi->clk)) {
+		err = PTR_ERR(fbi->clk);
+		fbi->clk = NULL;
+		goto failed;
+	}
+
+	ep93xxfb_set_par(info);
+	clk_enable(fbi->clk);
+
+	err = register_framebuffer(info);
+	if (err)
+		goto failed;
+
+	dev_info(info->dev, "registered. Mode = %dx%d-%d\n",
+		 info->var.xres, info->var.yres, info->var.bits_per_pixel);
+	return 0;
+
+failed:
+	if (fbi->clk)
+		clk_put(fbi->clk);
+	if (fbi->mmio_base)
+		iounmap(fbi->mmio_base);
+	if (fbi->res)
+		release_mem_region(fbi->res->start, resource_size(fbi->res));
+	ep93xxfb_dealloc_videomem(info);
+	if (&info->cmap)
+		fb_dealloc_cmap(&info->cmap);
+	if (fbi->mach_info->teardown)
+		fbi->mach_info->teardown(pdev);
+	kfree(info);
+	platform_set_drvdata(pdev, NULL);
+
+	return err;
+}
+
+static int ep93xxfb_remove(struct platform_device *pdev)
+{
+	struct fb_info *info = platform_get_drvdata(pdev);
+	struct ep93xx_fbi *fbi = info->par;
+
+	unregister_framebuffer(info);
+	clk_disable(fbi->clk);
+	clk_put(fbi->clk);
+	iounmap(fbi->mmio_base);
+	release_mem_region(fbi->res->start, resource_size(fbi->res));
+	ep93xxfb_dealloc_videomem(info);
+	fb_dealloc_cmap(&info->cmap);
+
+	if (fbi->mach_info->teardown)
+		fbi->mach_info->teardown(pdev);
+
+	kfree(info);
+	platform_set_drvdata(pdev, NULL);
+
+	return 0;
+}
+
+static struct platform_driver ep93xxfb_driver = {
+	.probe		= ep93xxfb_probe,
+	.remove		= ep93xxfb_remove,
+	.driver = {
+		.name	= "ep93xx-fb",
+		.owner	= THIS_MODULE,
+	},
+};
+
+static int __devinit ep93xxfb_init(void)
+{
+	return platform_driver_register(&ep93xxfb_driver);
+}
+
+static void __exit ep93xxfb_exit(void)
+{
+	platform_driver_unregister(&ep93xxfb_driver);
+}
+
+module_init(ep93xxfb_init);
+module_exit(ep93xxfb_exit);
+
+MODULE_DESCRIPTION("EP93XX Framebuffer Driver");
+MODULE_ALIAS("platform:ep93xx-fb");
+MODULE_AUTHOR("Ryan Mallon <ryan&bluewatersys.com>, "
+	      "H Hartley Sweeten <hsweeten@visionengravers.com");
+MODULE_LICENSE("GPL");
-- 
cgit v0.10.2


From 0e3ca33ac7aa88ac2f28d4ec99f0bfeaf2a2318d Mon Sep 17 00:00:00 2001
From: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Date: Tue, 22 Sep 2009 16:47:10 -0700
Subject: viafb: remove duplicated CX700 register init

The current code initializes the register for CX700 chips 2 times due to a
missing break as discovered by Harald Welte.

As CX700 and VX800 have exactly the same register initialization we can
use one for both to avoid duplicated code.

As this is a pure code cleanup no measurable runtime effects are expected.

Signed-off-by: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Cc: Scott Fang <ScottFang@viatech.com.cn>
Cc: Joseph Chan <JosephChan@via.com.tw>
Cc: Harald Welte <laforge@gnumonks.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/via/hw.c b/drivers/video/via/hw.c
index c896000..64a820c 100644
--- a/drivers/video/via/hw.c
+++ b/drivers/video/via/hw.c
@@ -2271,11 +2271,8 @@ int viafb_setmode(int vmode_index, int hor_res, int ver_res, int video_bpp,
 		break;
 
 	case UNICHROME_CX700:
-		viafb_write_regx(CX700_ModeXregs, NUM_TOTAL_CX700_ModeXregs);
-
 	case UNICHROME_VX800:
-		viafb_write_regx(VX800_ModeXregs, NUM_TOTAL_VX800_ModeXregs);
-
+		viafb_write_regx(CX700_ModeXregs, NUM_TOTAL_CX700_ModeXregs);
 		break;
 	}
 
diff --git a/drivers/video/via/viamode.c b/drivers/video/via/viamode.c
index 6dcf583..e799b2d 100644
--- a/drivers/video/via/viamode.c
+++ b/drivers/video/via/viamode.c
@@ -329,67 +329,6 @@ struct io_reg CX700_ModeXregs[] = { {VIASR, SR10, 0xFF, 0x01},
 {VIACR, CRD2, 0xFF, 0xFF}	/* TMDS/LVDS control register.         */
 };
 
-/* For VT3353: Common Setting for Video Mode */
-struct io_reg VX800_ModeXregs[] = { {VIASR, SR10, 0xFF, 0x01},
-{VIASR, SR15, 0x02, 0x02},
-{VIASR, SR16, 0xBF, 0x08},
-{VIASR, SR17, 0xFF, 0x1F},
-{VIASR, SR18, 0xFF, 0x4E},
-{VIASR, SR1A, 0xFB, 0x08},
-{VIASR, SR1B, 0xFF, 0xF0},
-{VIASR, SR1E, 0xFF, 0x01},
-{VIASR, SR2A, 0xFF, 0x00},
-{VIASR, SR2D, 0xFF, 0xFF},	/* VCK and LCK PLL power on.           */
-{VIACR, CR0A, 0xFF, 0x1E},	/* Cursor Start                        */
-{VIACR, CR0B, 0xFF, 0x00},	/* Cursor End                          */
-{VIACR, CR0E, 0xFF, 0x00},	/* Cursor Location High                */
-{VIACR, CR0F, 0xFF, 0x00},	/* Cursor Localtion Low                */
-{VIACR, CR32, 0xFF, 0x00},
-{VIACR, CR33, 0xFF, 0x00},
-{VIACR, CR34, 0xFF, 0x00},
-{VIACR, CR35, 0xFF, 0x00},
-{VIACR, CR36, 0x08, 0x00},
-{VIACR, CR47, 0xC8, 0x00},	/* Clear VCK Plus. */
-{VIACR, CR62, 0xFF, 0x00},	/* Secondary Display Starting Address  */
-{VIACR, CR63, 0xFF, 0x00},	/* Secondary Display Starting Address  */
-{VIACR, CR64, 0xFF, 0x00},	/* Secondary Display Starting Address  */
-{VIACR, CRA3, 0xFF, 0x00},	/* Secondary Display Starting Address  */
-{VIACR, CR69, 0xFF, 0x00},
-{VIACR, CR6A, 0xFF, 0x40},
-{VIACR, CR6B, 0xFF, 0x00},
-{VIACR, CR6C, 0xFF, 0x00},
-{VIACR, CR7A, 0xFF, 0x01},	/* LCD Scaling Parameter 1             */
-{VIACR, CR7B, 0xFF, 0x02},	/* LCD Scaling Parameter 2             */
-{VIACR, CR7C, 0xFF, 0x03},	/* LCD Scaling Parameter 3             */
-{VIACR, CR7D, 0xFF, 0x04},	/* LCD Scaling Parameter 4             */
-{VIACR, CR7E, 0xFF, 0x07},	/* LCD Scaling Parameter 5             */
-{VIACR, CR7F, 0xFF, 0x0A},	/* LCD Scaling Parameter 6             */
-{VIACR, CR80, 0xFF, 0x0D},	/* LCD Scaling Parameter 7             */
-{VIACR, CR81, 0xFF, 0x13},	/* LCD Scaling Parameter 8             */
-{VIACR, CR82, 0xFF, 0x16},	/* LCD Scaling Parameter 9             */
-{VIACR, CR83, 0xFF, 0x19},	/* LCD Scaling Parameter 10            */
-{VIACR, CR84, 0xFF, 0x1C},	/* LCD Scaling Parameter 11            */
-{VIACR, CR85, 0xFF, 0x1D},	/* LCD Scaling Parameter 12            */
-{VIACR, CR86, 0xFF, 0x1E},	/* LCD Scaling Parameter 13            */
-{VIACR, CR87, 0xFF, 0x1F},	/* LCD Scaling Parameter 14            */
-{VIACR, CR88, 0xFF, 0x40},	/* LCD Panel Type                      */
-{VIACR, CR89, 0xFF, 0x00},	/* LCD Timing Control 0                */
-{VIACR, CR8A, 0xFF, 0x88},	/* LCD Timing Control 1                */
-{VIACR, CRD4, 0xFF, 0x81},	/* Second power sequence control       */
-{VIACR, CR8B, 0xFF, 0x5D},	/* LCD Power Sequence Control 0        */
-{VIACR, CR8C, 0xFF, 0x2B},	/* LCD Power Sequence Control 1        */
-{VIACR, CR8D, 0xFF, 0x6F},	/* LCD Power Sequence Control 2        */
-{VIACR, CR8E, 0xFF, 0x2B},	/* LCD Power Sequence Control 3        */
-{VIACR, CR8F, 0xFF, 0x01},	/* LCD Power Sequence Control 4        */
-{VIACR, CR90, 0xFF, 0x01},	/* LCD Power Sequence Control 5        */
-{VIACR, CR91, 0xFF, 0x80},	/* 24/12 bit LVDS Data off             */
-{VIACR, CR96, 0xFF, 0x00},
-{VIACR, CR97, 0xFF, 0x00},
-{VIACR, CR99, 0xFF, 0x00},
-{VIACR, CR9B, 0xFF, 0x00},
-{VIACR, CRD2, 0xFF, 0xFF}	/* TMDS/LVDS control register.         */
-};
-
 /* Video Mode Table */
 /* Common Setting for Video Mode */
 struct io_reg CLE266_ModeXregs[] = { {VIASR, SR1E, 0xF0, 0x00},
diff --git a/drivers/video/via/viamode.h b/drivers/video/via/viamode.h
index 1a5de50..2ec8bfe 100644
--- a/drivers/video/via/viamode.h
+++ b/drivers/video/via/viamode.h
@@ -56,7 +56,6 @@ struct res_map_refresh {
 #define NUM_TOTAL_CN700_ModeXregs ARRAY_SIZE(CN700_ModeXregs)
 #define NUM_TOTAL_KM400_ModeXregs ARRAY_SIZE(KM400_ModeXregs)
 #define NUM_TOTAL_CX700_ModeXregs ARRAY_SIZE(CX700_ModeXregs)
-#define NUM_TOTAL_VX800_ModeXregs ARRAY_SIZE(VX800_ModeXregs)
 #define NUM_TOTAL_CLE266_ModeXregs ARRAY_SIZE(CLE266_ModeXregs)
 #define NUM_TOTAL_PATCH_MODE ARRAY_SIZE(res_patch_table)
 #define NUM_TOTAL_MODETABLE ARRAY_SIZE(CLE266Modes)
-- 
cgit v0.10.2


From 3915a927aaed8d158cba5ad6466e237ae0d84aab Mon Sep 17 00:00:00 2001
From: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Date: Tue, 22 Sep 2009 16:47:12 -0700
Subject: viafb: remove temporary start address setting

Currently the start address is set to an initial value every time
viafb_setmode is called.

This is not done consistently along graphic cores and not even the whole
address but often only parts of it.  On top of that it seems useless as
the real/final address will be set by viafb_set_start_addr a few lines
later.

Remove this superfluous initalization to shrink register initalization and
as a start to decouple primary and secondary display.  Code cleanup, no
notable runtime change expected.

Signed-off-by: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Cc: Scott Fang <ScottFang@viatech.com.cn>
Cc: Joseph Chan <JosephChan@via.com.tw>
Cc: Harald Welte <laforge@gnumonks.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/via/viamode.c b/drivers/video/via/viamode.c
index e799b2d..7abd65d 100644
--- a/drivers/video/via/viamode.c
+++ b/drivers/video/via/viamode.c
@@ -100,12 +100,8 @@ struct io_reg CN400_ModeXregs[] = { {VIASR, SR10, 0xFF, 0x01},
 {VIACR, CR0F, 0xFF, 0x00},	/* Cursor Localtion Low                */
 {VIACR, CR32, 0xFF, 0x00},
 {VIACR, CR33, 0xFF, 0x00},
-{VIACR, CR34, 0xFF, 0x00},
 {VIACR, CR35, 0xFF, 0x00},
 {VIACR, CR36, 0x08, 0x00},
-{VIACR, CR62, 0xFF, 0x00},	/* Secondary Display Starting Address  */
-{VIACR, CR63, 0xFF, 0x00},	/* Secondary Display Starting Address  */
-{VIACR, CR64, 0xFF, 0x00},	/* Secondary Display Starting Address  */
 {VIACR, CR69, 0xFF, 0x00},
 {VIACR, CR6A, 0xFF, 0x40},
 {VIACR, CR6B, 0xFF, 0x00},
@@ -159,16 +155,12 @@ struct io_reg CN700_ModeXregs[] = { {VIASR, SR10, 0xFF, 0x01},
 {VIASR, CR30, 0xFF, 0x04},
 {VIACR, CR32, 0xFF, 0x00},
 {VIACR, CR33, 0x7F, 0x00},
-{VIACR, CR34, 0xFF, 0x00},
 {VIACR, CR35, 0xFF, 0x00},
 {VIACR, CR36, 0xFF, 0x31},
 {VIACR, CR41, 0xFF, 0x80},
 {VIACR, CR42, 0xFF, 0x00},
 {VIACR, CR55, 0x80, 0x00},
 {VIACR, CR5D, 0x80, 0x00},	/*Horizontal Retrace Start bit[11] should be 0*/
-{VIACR, CR62, 0xFF, 0x00},	/* Secondary Display Starting Address */
-{VIACR, CR63, 0xFF, 0x00},	/* Secondary Display Starting Address */
-{VIACR, CR64, 0xFF, 0x00},	/* Secondary Display Starting Address */
 {VIACR, CR68, 0xFF, 0x67},	/* Default FIFO For IGA2 */
 {VIACR, CR69, 0xFF, 0x00},
 {VIACR, CR6A, 0xFD, 0x40},
@@ -233,9 +225,6 @@ struct io_reg KM400_ModeXregs[] = {
 	{VIACR, CR55, 0x80, 0x00},
 	{VIACR, CR5D, 0x80, 0x00},
 	{VIACR, CR36, 0xFF, 0x01},	/* Power Mangement 3                  */
-	{VIACR, CR62, 0xFF, 0x00},	/* Secondary Display Starting Address */
-	{VIACR, CR63, 0xFF, 0x00},	/* Secondary Display Starting Address */
-	{VIACR, CR64, 0xFF, 0x00},	/* Secondary Display Starting Address */
 	{VIACR, CR68, 0xFF, 0x67},	/* Default FIFO For IGA2              */
 	{VIACR, CR6A, 0x20, 0x20},	/* Extended FIFO On                   */
 	{VIACR, CR7A, 0xFF, 0x01},	/* LCD Scaling Parameter 1            */
@@ -285,14 +274,9 @@ struct io_reg CX700_ModeXregs[] = { {VIASR, SR10, 0xFF, 0x01},
 {VIACR, CR0F, 0xFF, 0x00},	/* Cursor Localtion Low                */
 {VIACR, CR32, 0xFF, 0x00},
 {VIACR, CR33, 0xFF, 0x00},
-{VIACR, CR34, 0xFF, 0x00},
 {VIACR, CR35, 0xFF, 0x00},
 {VIACR, CR36, 0x08, 0x00},
 {VIACR, CR47, 0xC8, 0x00},	/* Clear VCK Plus. */
-{VIACR, CR62, 0xFF, 0x00},	/* Secondary Display Starting Address  */
-{VIACR, CR63, 0xFF, 0x00},	/* Secondary Display Starting Address  */
-{VIACR, CR64, 0xFF, 0x00},	/* Secondary Display Starting Address  */
-{VIACR, CRA3, 0xFF, 0x00},	/* Secondary Display Starting Address  */
 {VIACR, CR69, 0xFF, 0x00},
 {VIACR, CR6A, 0xFF, 0x40},
 {VIACR, CR6B, 0xFF, 0x00},
@@ -340,7 +324,6 @@ struct io_reg CLE266_ModeXregs[] = { {VIASR, SR1E, 0xF0, 0x00},
 {VIASR, SR1A, 0xFB, 0x08},
 
 {VIACR, CR32, 0xFF, 0x00},
-{VIACR, CR34, 0xFF, 0x00},
 {VIACR, CR35, 0xFF, 0x00},
 {VIACR, CR36, 0x08, 0x00},
 {VIACR, CR6A, 0xFF, 0x80},
-- 
cgit v0.10.2


From 81228a36a5d05181fff990c852a9abdf03c75593 Mon Sep 17 00:00:00 2001
From: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Date: Tue, 22 Sep 2009 16:47:13 -0700
Subject: viafb: merge viafb_update_viafb_par in viafb_update_fix

Shrink and merge viafb_update_viafb_par.  This removes a lot of duplicated
data in viafb_par.  Use the relevant data of fb_info instead.  On the way
it removes an inconsistency in handling a second framebuffer which only
worked because viafbinfo1->par is modified to point to the same viafb_par
as viafbinfo->par.

Code cleanup only, no runtime change expected.

Signed-off-by: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Cc: Scott Fang <ScottFang@viatech.com.cn>
Cc: Joseph Chan <JosephChan@via.com.tw>
Cc: Harald Welte <laforge@gnumonks.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/via/accel.c b/drivers/video/via/accel.c
index 45c54bf..b3e7e82 100644
--- a/drivers/video/via/accel.c
+++ b/drivers/video/via/accel.c
@@ -34,7 +34,7 @@ void viafb_init_accel(void)
 
 void viafb_init_2d_engine(void)
 {
-	u32 dwVQStartAddr, dwVQEndAddr;
+	u32 dwVQStartAddr, dwVQEndAddr, linesize;
 	u32 dwVQLen, dwVQStartL, dwVQEndL, dwVQStartEndH;
 
 	/* init 2D engine regs to reset 2D engine */
@@ -191,17 +191,14 @@ void viafb_init_2d_engine(void)
 		}
 	}
 
-	viafb_set_2d_color_depth(viaparinfo->bpp);
+	viafb_set_2d_color_depth(viafbinfo->var.bits_per_pixel);
 
 	writel(0x0, viaparinfo->io_virt + VIA_REG_SRCBASE);
 	writel(0x0, viaparinfo->io_virt + VIA_REG_DSTBASE);
 
-	writel(VIA_PITCH_ENABLE |
-		   (((viaparinfo->hres *
-		      viaparinfo->bpp >> 3) >> 3) | (((viaparinfo->hres *
-						   viaparinfo->
-						   bpp >> 3) >> 3) << 16)),
-					viaparinfo->io_virt + VIA_REG_PITCH);
+	linesize = viafbinfo->var.xres * viafbinfo->var.bits_per_pixel >> 3;
+	writel(VIA_PITCH_ENABLE | (linesize >> 3) | ((linesize >> 3) << 16),
+		viaparinfo->io_virt + VIA_REG_PITCH);
 }
 
 void viafb_set_2d_color_depth(int bpp)
diff --git a/drivers/video/via/viafbdev.c b/drivers/video/via/viafbdev.c
index 72833f3..6612ddc 100644
--- a/drivers/video/via/viafbdev.c
+++ b/drivers/video/via/viafbdev.c
@@ -100,21 +100,17 @@ static const struct viafb_modeinfo viafb_modentry[] = {
 
 static struct fb_ops viafb_ops;
 
-static int viafb_update_fix(struct fb_fix_screeninfo *fix, struct fb_info *info)
-{
-	struct viafb_par *ppar;
-	ppar = info->par;
-
-	DEBUG_MSG(KERN_INFO "viafb_update_fix!\n");
 
-	fix->visual =
-	    ppar->bpp == 8 ? FB_VISUAL_PSEUDOCOLOR : FB_VISUAL_TRUECOLOR;
-	fix->line_length = ppar->linelength;
+static void viafb_update_fix(struct fb_info *info)
+{
+	u32 bpp = info->var.bits_per_pixel;
 
-	return 0;
+	info->fix.visual =
+		bpp == 8 ? FB_VISUAL_PSEUDOCOLOR : FB_VISUAL_TRUECOLOR;
+	info->fix.line_length =
+		((info->var.xres_virtual + 7) & ~7) * bpp / 8;
 }
 
-
 static void viafb_setup_fixinfo(struct fb_fix_screeninfo *fix,
 	struct viafb_par *viaparinfo)
 {
@@ -147,19 +143,6 @@ static int viafb_release(struct fb_info *info, int user)
 	return 0;
 }
 
-static void viafb_update_viafb_par(struct fb_info *info)
-{
-	struct viafb_par *ppar;
-
-	ppar = info->par;
-	ppar->bpp = info->var.bits_per_pixel;
-	ppar->linelength = ((info->var.xres_virtual + 7) & ~7) * ppar->bpp / 8;
-	ppar->hres = info->var.xres;
-	ppar->vres = info->var.yres;
-	ppar->xoffset = info->var.xoffset;
-	ppar->yoffset = info->var.yoffset;
-}
-
 static int viafb_check_var(struct fb_var_screeninfo *var,
 	struct fb_info *info)
 {
@@ -255,12 +238,7 @@ static int viafb_set_par(struct fb_info *info)
 		/*We should set memory offset according virtual_x */
 		/*Fix me:put this function into viafb_setmode */
 		viafb_memory_pitch_patch(info);
-
-		/* Update ***fb_par information */
-		viafb_update_viafb_par(info);
-
-		/* Update other fixed information */
-		viafb_update_fix(&info->fix, info);
+		viafb_update_fix(info);
 		viafb_bpp = info->var.bits_per_pixel;
 		/* Update viafb_accel, it is necessary to our 2D accelerate */
 		viafb_accel = info->var.accel_flags;
@@ -2323,15 +2301,13 @@ static int __devinit via_pci_probe(void)
 		viafb_setup_fixinfo(&viafbinfo1->fix, viaparinfo1);
 		viafb_check_var(&default_var, viafbinfo1);
 		viafbinfo1->var = default_var;
-		viafb_update_viafb_par(viafbinfo);
-		viafb_update_fix(&viafbinfo1->fix, viafbinfo1);
+		viafb_update_fix(viafbinfo1);
 	}
 
 	viafb_setup_fixinfo(&viafbinfo->fix, viaparinfo);
 	viafb_check_var(&default_var, viafbinfo);
 	viafbinfo->var = default_var;
-	viafb_update_viafb_par(viafbinfo);
-	viafb_update_fix(&viafbinfo->fix, viafbinfo);
+	viafb_update_fix(viafbinfo);
 	default_var.activate = FB_ACTIVATE_NOW;
 	fb_alloc_cmap(&viafbinfo->cmap, 256, 0);
 
diff --git a/drivers/video/via/viafbdev.h b/drivers/video/via/viafbdev.h
index 227b000..ea0df4f 100644
--- a/drivers/video/via/viafbdev.h
+++ b/drivers/video/via/viafbdev.h
@@ -38,13 +38,6 @@
 #define VERSION_MINOR       4
 
 struct viafb_par {
-	int bpp;
-	int hres;
-	int vres;
-	int linelength;
-	u32 xoffset;
-	u32 yoffset;
-
 	void __iomem *fbmem_virt;	/*framebuffer virtual memory address */
 	void __iomem *io_virt;	/*iospace virtual memory address */
 	unsigned int fbmem;	/*framebuffer physical memory address */
-- 
cgit v0.10.2


From 09cf11806e4b4be92af76253ff9834c48b416129 Mon Sep 17 00:00:00 2001
From: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Date: Tue, 22 Sep 2009 16:47:14 -0700
Subject: viafb: split viafb_set_start_addr up

Move individual start address setting to viafb_set_primary_address and
viafb_set_secondary_address and make it more flexible to reuse it for
panning.  Using central functions makes it easier to follow HW
manipulations.

Remove crt locking as it should be only needed for timing manipulation.
Move iga_path manipulation to via_pci_probe.

Remove memset for screen cleaning as it is currently done only for the
second screen.  This is not needed for normal operation but has a little
chance of causing unwanted display artifacts.  This can be fixed later
more consistent and more efficient (using viafb_fillrect) if needed.

This is a code clenup, no notable runtime changes expected.

Signed-off-by: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Cc: Scott Fang <ScottFang@viatech.com.cn>
Cc: Joseph Chan <JosephChan@via.com.tw>
Cc: Harald Welte <laforge@gnumonks.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/via/hw.c b/drivers/video/via/hw.c
index 64a820c..5be4a67 100644
--- a/drivers/video/via/hw.c
+++ b/drivers/video/via/hw.c
@@ -629,70 +629,23 @@ void viafb_set_iga_path(void)
 	}
 }
 
-void viafb_set_start_addr(void)
+void viafb_set_primary_address(u32 addr)
 {
-	unsigned long offset = 0, tmp = 0, size = 0;
-	unsigned long length;
-
-	DEBUG_MSG(KERN_INFO "viafb_set_start_addr!\n");
-	viafb_unlock_crt();
-	/* update starting address of IGA1 */
-	viafb_write_reg(CR0C, VIACR, 0x00);	/*initial starting address */
-	viafb_write_reg(CR0D, VIACR, 0x00);
-	viafb_write_reg(CR34, VIACR, 0x00);
-	viafb_write_reg_mask(CR48, VIACR, 0x00, 0x1F);
-
-	if (viafb_dual_fb) {
-		viaparinfo->iga_path = IGA1;
-		viaparinfo1->iga_path = IGA2;
-	}
-
-	if (viafb_SAMM_ON == 1) {
-		if (!viafb_dual_fb) {
-			if (viafb_second_size)
-				size = viafb_second_size * 1024 * 1024;
-			else
-				size = 8 * 1024 * 1024;
-		} else {
-
-			size = viaparinfo1->memsize;
-		}
-		offset = viafb_second_offset;
-		DEBUG_MSG(KERN_INFO
-			  "viafb_second_size=%lx, second start_adddress=%lx\n",
-			  size, offset);
-	}
-	if (viafb_SAMM_ON == 1) {
-		offset = offset >> 3;
-
-		tmp = viafb_read_reg(VIACR, 0x62) & 0x01;
-		tmp |= (offset & 0x7F) << 1;
-		viafb_write_reg(CR62, VIACR, tmp);
-		viafb_write_reg(CR63, VIACR, ((offset & 0x7F80) >> 7));
-		viafb_write_reg(CR64, VIACR, ((offset & 0x7F8000) >> 15));
-		viafb_write_reg(CRA3, VIACR, ((offset & 0x3800000) >> 23));
-	} else {
-		/* update starting address */
-		viafb_write_reg(CR62, VIACR, 0x00);
-		viafb_write_reg(CR63, VIACR, 0x00);
-		viafb_write_reg(CR64, VIACR, 0x00);
-		viafb_write_reg(CRA3, VIACR, 0x00);
-	}
-
-	if (viafb_SAMM_ON == 1) {
-		if (viafb_accel) {
-			if (!viafb_dual_fb)
-				length = size - viaparinfo->fbmem_used;
-			else
-				length = size - viaparinfo1->fbmem_used;
-		} else
-			length = size;
-		offset = (unsigned long)(void *)viafb_FB_MM +
-			viafb_second_offset;
-		memset((void *)offset, 0, length);
-	}
+	DEBUG_MSG(KERN_DEBUG "viafb_set_primary_address(0x%08X)\n", addr);
+	viafb_write_reg(CR0D, VIACR, addr & 0xFF);
+	viafb_write_reg(CR0C, VIACR, (addr >> 8) & 0xFF);
+	viafb_write_reg(CR34, VIACR, (addr >> 16) & 0xFF);
+	viafb_write_reg_mask(CR48, VIACR, (addr >> 24) & 0x1F, 0x1F);
+}
 
-	viafb_lock_crt();
+void viafb_set_secondary_address(u32 addr)
+{
+	DEBUG_MSG(KERN_DEBUG "viafb_set_secondary_address(0x%08X)\n", addr);
+	/* secondary display supports only quadword aligned memory */
+	viafb_write_reg_mask(CR62, VIACR, (addr >> 2) & 0xFE, 0xFE);
+	viafb_write_reg(CR63, VIACR, (addr >> 10) & 0xFF);
+	viafb_write_reg(CR64, VIACR, (addr >> 18) & 0xFF);
+	viafb_write_reg_mask(CRA3, VIACR, (addr >> 26) & 0x07, 0x07);
 }
 
 void viafb_set_output_path(int device, int set_iga, int output_interface)
@@ -2288,7 +2241,8 @@ int viafb_setmode(int vmode_index, int hor_res, int ver_res, int video_bpp,
 		outb(VPIT.SR[i - 1], VIASR + 1);
 	}
 
-	viafb_set_start_addr();
+	viafb_set_primary_address(0);
+	viafb_set_secondary_address(viafb_SAMM_ON ? viafb_second_offset : 0);
 	viafb_set_iga_path();
 
 	/* Write CRTC */
diff --git a/drivers/video/via/hw.h b/drivers/video/via/hw.h
index 6ff38fa..874ea3f 100644
--- a/drivers/video/via/hw.h
+++ b/drivers/video/via/hw.h
@@ -927,7 +927,8 @@ void viafb_get_mmio_info(unsigned long *mmio_base,
 	unsigned long *mmio_len);
 
 void viafb_set_iga_path(void);
-void viafb_set_start_addr(void);
+void viafb_set_primary_address(u32 addr);
+void viafb_set_secondary_address(u32 addr);
 void viafb_get_fb_info(unsigned int *fb_base, unsigned int *fb_len);
 
 #endif /* __HW_H__ */
diff --git a/drivers/video/via/viafbdev.c b/drivers/video/via/viafbdev.c
index 6612ddc..743eed4 100644
--- a/drivers/video/via/viafbdev.c
+++ b/drivers/video/via/viafbdev.c
@@ -481,12 +481,7 @@ static int viafb_pan_display(struct fb_var_screeninfo *var,
 	    var->bits_per_pixel / 16;
 
 	DEBUG_MSG(KERN_INFO "\nviafb_pan_display,offset =%d ", offset);
-
-	viafb_write_reg_mask(0x48, 0x3d4, ((offset >> 24) & 0x3), 0x3);
-	viafb_write_reg_mask(0x34, 0x3d4, ((offset >> 16) & 0xff), 0xff);
-	viafb_write_reg_mask(0x0c, 0x3d4, ((offset >> 8) & 0xff), 0xff);
-	viafb_write_reg_mask(0x0d, 0x3d4, (offset & 0xff), 0xff);
-
+	viafb_set_primary_address(offset);
 	return 0;
 }
 
@@ -1353,7 +1348,8 @@ static void viafb_set_device(struct device_t active_dev)
 		viafb_SAMM_ON = active_dev.samm;
 	viafb_primary_dev = active_dev.primary_dev;
 
-	viafb_set_start_addr();
+	viafb_set_primary_address(0);
+	viafb_set_secondary_address(viafb_SAMM_ON ? viafb_second_offset : 0);
 	viafb_set_iga_path();
 }
 
@@ -1537,7 +1533,8 @@ static int apply_device_setting(struct viafb_ioctl_setting setting_info,
 			if (viafb_SAMM_ON)
 				viafb_primary_dev = setting_info.primary_device;
 
-			viafb_set_start_addr();
+			viafb_set_primary_address(0);
+			viafb_set_secondary_address(viafb_SAMM_ON ? viafb_second_offset : 0);
 			viafb_set_iga_path();
 		}
 		need_set_mode = 1;
@@ -2275,6 +2272,8 @@ static int __devinit via_pci_probe(void)
 				viafb_second_offset;
 		}
 
+		viaparinfo->iga_path = IGA1;
+		viaparinfo1->iga_path = IGA2;
 		memcpy(viafbinfo1, viafbinfo, sizeof(struct fb_info));
 		viafbinfo1->screen_base = viafbinfo->screen_base +
 			viafb_second_offset;
-- 
cgit v0.10.2


From bc6932bb4a25ced97c4f201874573e6097237b78 Mon Sep 17 00:00:00 2001
From: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Date: Tue, 22 Sep 2009 16:47:15 -0700
Subject: viafb: fix ioremap_nocache error handling

Correct the returned error code for remapping the video framebuffer.
Introduce error handling for remapping MMIO register address space to
avoid a NULL pointer dereference.  Disable hardware acceleration if
remapping MMIO register address space failed as those registers are only
used for hardware acceleration.

Signed-off-by: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Cc: Scott Fang <ScottFang@viatech.com.cn>
Cc: Joseph Chan <JosephChan@via.com.tw>
Cc: Harald Welte <laforge@gnumonks.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/via/viafbdev.c b/drivers/video/via/viafbdev.c
index 743eed4..72759b6 100644
--- a/drivers/video/via/viafbdev.c
+++ b/drivers/video/via/viafbdev.c
@@ -2134,12 +2134,16 @@ static int __devinit via_pci_probe(void)
 
 	if (!viaparinfo->fbmem_virt) {
 		printk(KERN_INFO "ioremap failed\n");
-		return -1;
+		return -ENOMEM;
 	}
 
 	viafb_get_mmio_info(&viaparinfo->mmio_base, &viaparinfo->mmio_len);
 	viaparinfo->io_virt = ioremap_nocache(viaparinfo->mmio_base,
 		viaparinfo->mmio_len);
+	if (!viaparinfo->io_virt) {
+		printk(KERN_WARNING "ioremap failed: hardware acceleration disabled\n");
+		viafb_accel = 0;
+	}
 
 	viafbinfo->node = 0;
 	viafbinfo->fbops = &viafb_ops;
-- 
cgit v0.10.2


From deb7aab6379502fd173cd34efa005108eb0b111b Mon Sep 17 00:00:00 2001
From: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Date: Tue, 22 Sep 2009 16:47:16 -0700
Subject: viafb: clean up viamode.h

Remove unneeeded declarations from the header and makes it more
maintainable by evaluating the array size in the file the array exist in
and exporting it via variables.  This removes the need to keep the array
size in the header in sync with the real array size.

Signed-off-by: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Cc: Scott Fang <ScottFang@viatech.com.cn>
Cc: Joseph Chan <JosephChan@via.com.tw>
Cc: Harald Welte <laforge@gnumonks.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/via/viamode.c b/drivers/video/via/viamode.c
index 7abd65d..9ff9424 100644
--- a/drivers/video/via/viamode.c
+++ b/drivers/video/via/viamode.c
@@ -1006,3 +1006,13 @@ struct VideoModeTable CEA_HDMI_Modes[] = {
 	{VIA_RES_1280X720, CEAM1280x720, ARRAY_SIZE(CEAM1280x720)},
 	{VIA_RES_1920X1080, CEAM1920x1080, ARRAY_SIZE(CEAM1920x1080)}
 };
+
+int NUM_TOTAL_RES_MAP_REFRESH = ARRAY_SIZE(res_map_refresh_tbl);
+int NUM_TOTAL_CEA_MODES = ARRAY_SIZE(CEA_HDMI_Modes);
+int NUM_TOTAL_CN400_ModeXregs = ARRAY_SIZE(CN400_ModeXregs);
+int NUM_TOTAL_CN700_ModeXregs = ARRAY_SIZE(CN700_ModeXregs);
+int NUM_TOTAL_KM400_ModeXregs = ARRAY_SIZE(KM400_ModeXregs);
+int NUM_TOTAL_CX700_ModeXregs = ARRAY_SIZE(CX700_ModeXregs);
+int NUM_TOTAL_CLE266_ModeXregs = ARRAY_SIZE(CLE266_ModeXregs);
+int NUM_TOTAL_PATCH_MODE = ARRAY_SIZE(res_patch_table);
+int NUM_TOTAL_MODETABLE = ARRAY_SIZE(CLE266Modes);
diff --git a/drivers/video/via/viamode.h b/drivers/video/via/viamode.h
index 2ec8bfe..504e16a 100644
--- a/drivers/video/via/viamode.h
+++ b/drivers/video/via/viamode.h
@@ -50,127 +50,33 @@ struct res_map_refresh {
 	int vmode_refresh;
 };
 
-#define NUM_TOTAL_RES_MAP_REFRESH ARRAY_SIZE(res_map_refresh_tbl)
-#define NUM_TOTAL_CEA_MODES  ARRAY_SIZE(CEA_HDMI_Modes)
-#define NUM_TOTAL_CN400_ModeXregs ARRAY_SIZE(CN400_ModeXregs)
-#define NUM_TOTAL_CN700_ModeXregs ARRAY_SIZE(CN700_ModeXregs)
-#define NUM_TOTAL_KM400_ModeXregs ARRAY_SIZE(KM400_ModeXregs)
-#define NUM_TOTAL_CX700_ModeXregs ARRAY_SIZE(CX700_ModeXregs)
-#define NUM_TOTAL_CLE266_ModeXregs ARRAY_SIZE(CLE266_ModeXregs)
-#define NUM_TOTAL_PATCH_MODE ARRAY_SIZE(res_patch_table)
-#define NUM_TOTAL_MODETABLE ARRAY_SIZE(CLE266Modes)
+extern int NUM_TOTAL_RES_MAP_REFRESH;
+extern int NUM_TOTAL_CEA_MODES;
+extern int NUM_TOTAL_CN400_ModeXregs;
+extern int NUM_TOTAL_CN700_ModeXregs;
+extern int NUM_TOTAL_KM400_ModeXregs;
+extern int NUM_TOTAL_CX700_ModeXregs;
+extern int NUM_TOTAL_CLE266_ModeXregs;
+extern int NUM_TOTAL_PATCH_MODE;
+extern int NUM_TOTAL_MODETABLE;
 
 /********************/
 /* Mode Table       */
 /********************/
 
-/* 480x640 */
-extern struct crt_mode_table CRTM480x640[1];
-/* 640x480*/
-extern struct crt_mode_table CRTM640x480[5];
-/*720x480 (GTF)*/
-extern struct crt_mode_table CRTM720x480[1];
-/*720x576 (GTF)*/
-extern struct crt_mode_table CRTM720x576[1];
-/* 800x480 (CVT) */
-extern struct crt_mode_table CRTM800x480[1];
-/* 800x600*/
-extern struct crt_mode_table CRTM800x600[5];
-/* 848x480 (CVT) */
-extern struct crt_mode_table CRTM848x480[1];
-/*856x480 (GTF) convert to 852x480*/
-extern struct crt_mode_table CRTM852x480[1];
-/*1024x512 (GTF)*/
-extern struct crt_mode_table CRTM1024x512[1];
-/* 1024x600*/
-extern struct crt_mode_table CRTM1024x600[1];
-/* 1024x768*/
-extern struct crt_mode_table CRTM1024x768[4];
-/* 1152x864*/
-extern struct crt_mode_table CRTM1152x864[1];
-/* 1280x720 (HDMI 720P)*/
-extern struct crt_mode_table CRTM1280x720[2];
-/*1280x768 (GTF)*/
-extern struct crt_mode_table CRTM1280x768[2];
-/* 1280x800 (CVT) */
-extern struct crt_mode_table CRTM1280x800[1];
-/*1280x960*/
-extern struct crt_mode_table CRTM1280x960[1];
-/* 1280x1024*/
-extern struct crt_mode_table CRTM1280x1024[3];
-/* 1368x768 (GTF) */
-extern struct crt_mode_table CRTM1368x768[1];
-/*1440x1050 (GTF)*/
-extern struct crt_mode_table CRTM1440x1050[1];
-/* 1600x1200*/
-extern struct crt_mode_table CRTM1600x1200[2];
-/* 1680x1050 (CVT) */
-extern struct crt_mode_table CRTM1680x1050[2];
-/* 1680x1050 (CVT Reduce Blanking) */
-extern struct crt_mode_table CRTM1680x1050_RB[1];
-/* 1920x1080 (CVT)*/
-extern struct crt_mode_table CRTM1920x1080[1];
-/* 1920x1080 (CVT with Reduce Blanking) */
-extern struct crt_mode_table CRTM1920x1080_RB[1];
-/* 1920x1440*/
-extern struct crt_mode_table CRTM1920x1440[2];
-/* 1400x1050 (CVT) */
-extern struct crt_mode_table CRTM1400x1050[2];
-/* 1400x1050 (CVT Reduce Blanking) */
-extern struct crt_mode_table CRTM1400x1050_RB[1];
-/* 960x600 (CVT) */
-extern struct crt_mode_table CRTM960x600[1];
-/* 1000x600 (GTF) */
-extern struct crt_mode_table CRTM1000x600[1];
-/* 1024x576 (GTF) */
-extern struct crt_mode_table CRTM1024x576[1];
-/* 1088x612 (CVT) */
-extern struct crt_mode_table CRTM1088x612[1];
-/* 1152x720 (CVT) */
-extern struct crt_mode_table CRTM1152x720[1];
-/* 1200x720 (GTF) */
-extern struct crt_mode_table CRTM1200x720[1];
-/* 1280x600 (GTF) */
-extern struct crt_mode_table CRTM1280x600[1];
-/* 1360x768 (CVT) */
-extern struct crt_mode_table CRTM1360x768[1];
-/* 1360x768 (CVT Reduce Blanking) */
-extern struct crt_mode_table CRTM1360x768_RB[1];
-/* 1366x768 (GTF) */
-extern struct crt_mode_table CRTM1366x768[2];
-/* 1440x900 (CVT) */
-extern struct crt_mode_table CRTM1440x900[2];
-/* 1440x900 (CVT Reduce Blanking) */
-extern struct crt_mode_table CRTM1440x900_RB[1];
-/* 1600x900 (CVT) */
-extern struct crt_mode_table CRTM1600x900[1];
-/* 1600x900 (CVT Reduce Blanking) */
-extern struct crt_mode_table CRTM1600x900_RB[1];
-/* 1600x1024 (GTF) */
-extern struct crt_mode_table CRTM1600x1024[1];
-/* 1792x1344 (DMT) */
-extern struct crt_mode_table CRTM1792x1344[1];
-/* 1856x1392 (DMT) */
-extern struct crt_mode_table CRTM1856x1392[1];
-/* 1920x1200 (CVT) */
-extern struct crt_mode_table CRTM1920x1200[1];
-/* 1920x1200 (CVT with Reduce Blanking) */
-extern struct crt_mode_table CRTM1920x1200_RB[1];
-/* 2048x1536 (CVT) */
-extern struct crt_mode_table CRTM2048x1536[1];
-extern struct VideoModeTable CLE266Modes[47];
-extern struct crt_mode_table CEAM1280x720[1];
-extern struct crt_mode_table CEAM1920x1080[1];
-extern struct VideoModeTable CEA_HDMI_Modes[2];
+extern struct VideoModeTable CLE266Modes[];
+extern struct crt_mode_table CEAM1280x720[];
+extern struct crt_mode_table CEAM1920x1080[];
+extern struct VideoModeTable CEA_HDMI_Modes[];
 
-extern struct res_map_refresh res_map_refresh_tbl[61];
-extern struct io_reg CN400_ModeXregs[52];
-extern struct io_reg CN700_ModeXregs[66];
-extern struct io_reg KM400_ModeXregs[55];
-extern struct io_reg CX700_ModeXregs[58];
-extern struct io_reg VX800_ModeXregs[58];
-extern struct io_reg CLE266_ModeXregs[32];
-extern struct io_reg PM1024x768[2];
-extern struct patch_table res_patch_table[1];
+extern struct res_map_refresh res_map_refresh_tbl[];
+extern struct io_reg CN400_ModeXregs[];
+extern struct io_reg CN700_ModeXregs[];
+extern struct io_reg KM400_ModeXregs[];
+extern struct io_reg CX700_ModeXregs[];
+extern struct io_reg VX800_ModeXregs[];
+extern struct io_reg CLE266_ModeXregs[];
+extern struct io_reg PM1024x768[];
+extern struct patch_table res_patch_table[];
 extern struct VPITTable VPIT;
 #endif /* __VIAMODE_H__ */
-- 
cgit v0.10.2


From 13147f291cb18489cbaa550b100dee6f9defd007 Mon Sep 17 00:00:00 2001
From: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Date: Tue, 22 Sep 2009 16:47:17 -0700
Subject: viafb: remove duplicated mode information

Remove the mode information from viafbdev.c and uses the one of viamode.c
instead.  This is possible because horizontal and vertical address are the
same as horizontal and vertical resolution.  The reduced blanking modes in
the table are no problem because they have a higher index than the normal
modes and therefore always the normal modes are selected just as the old
behaviour.

Signed-off-by: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Cc: Scott Fang <ScottFang@viatech.com.cn>
Cc: Joseph Chan <JosephChan@via.com.tw>
Cc: Harald Welte <laforge@gnumonks.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/via/viafbdev.c b/drivers/video/via/viafbdev.c
index 72759b6..50e9300 100644
--- a/drivers/video/via/viafbdev.c
+++ b/drivers/video/via/viafbdev.c
@@ -53,51 +53,6 @@ static void retrieve_device_setting(struct viafb_ioctl_setting
 static void viafb_set_video_device(u32 video_dev_info);
 static void viafb_get_video_device(u32 *video_dev_info);
 
-/* Mode information */
-static const struct viafb_modeinfo viafb_modentry[] = {
-	{480, 640, VIA_RES_480X640},
-	{640, 480, VIA_RES_640X480},
-	{800, 480, VIA_RES_800X480},
-	{800, 600, VIA_RES_800X600},
-	{1024, 768, VIA_RES_1024X768},
-	{1152, 864, VIA_RES_1152X864},
-	{1280, 1024, VIA_RES_1280X1024},
-	{1600, 1200, VIA_RES_1600X1200},
-	{1440, 1050, VIA_RES_1440X1050},
-	{1280, 768, VIA_RES_1280X768,},
-	{1280, 800, VIA_RES_1280X800},
-	{1280, 960, VIA_RES_1280X960},
-	{1920, 1440, VIA_RES_1920X1440},
-	{848, 480, VIA_RES_848X480},
-	{1400, 1050, VIA_RES_1400X1050},
-	{720, 480, VIA_RES_720X480},
-	{720, 576, VIA_RES_720X576},
-	{1024, 512, VIA_RES_1024X512},
-	{1024, 576, VIA_RES_1024X576},
-	{1024, 600, VIA_RES_1024X600},
-	{1280, 720, VIA_RES_1280X720},
-	{1920, 1080, VIA_RES_1920X1080},
-	{1366, 768, VIA_RES_1368X768},
-	{1680, 1050, VIA_RES_1680X1050},
-	{960, 600, VIA_RES_960X600},
-	{1000, 600, VIA_RES_1000X600},
-	{1024, 576, VIA_RES_1024X576},
-	{1024, 600, VIA_RES_1024X600},
-	{1088, 612, VIA_RES_1088X612},
-	{1152, 720, VIA_RES_1152X720},
-	{1200, 720, VIA_RES_1200X720},
-	{1280, 600, VIA_RES_1280X600},
-	{1360, 768, VIA_RES_1360X768},
-	{1440, 900, VIA_RES_1440X900},
-	{1600, 900, VIA_RES_1600X900},
-	{1600, 1024, VIA_RES_1600X1024},
-	{1792, 1344, VIA_RES_1792X1344},
-	{1856, 1392, VIA_RES_1856X1392},
-	{1920, 1200, VIA_RES_1920X1200},
-	{2048, 1536, VIA_RES_2048X1536},
-	{0, 0, VIA_RES_INVALID}
-};
-
 static struct fb_ops viafb_ops;
 
 
@@ -1239,12 +1194,16 @@ int viafb_get_mode_index(int hres, int vres)
 	u32 i;
 	DEBUG_MSG(KERN_INFO "viafb_get_mode_index!\n");
 
-	for (i = 0; viafb_modentry[i].mode_index != VIA_RES_INVALID; i++)
-		if (viafb_modentry[i].xres == hres &&
-			viafb_modentry[i].yres == vres)
+	for (i = 0; i < NUM_TOTAL_MODETABLE; i++)
+		if (CLE266Modes[i].mode_array &&
+			CLE266Modes[i].crtc[0].crtc.hor_addr == hres &&
+			CLE266Modes[i].crtc[0].crtc.ver_addr == vres)
 			break;
 
-	return viafb_modentry[i].mode_index;
+	if (i == NUM_TOTAL_MODETABLE)
+		return VIA_RES_INVALID;
+
+	return CLE266Modes[i].ModeIndex;
 }
 
 static void check_available_device_to_enable(int device_id)
diff --git a/drivers/video/via/viafbdev.h b/drivers/video/via/viafbdev.h
index ea0df4f..cd3dff7 100644
--- a/drivers/video/via/viafbdev.h
+++ b/drivers/video/via/viafbdev.h
@@ -70,11 +70,7 @@ struct viafb_par {
 	int video_on_lcd;
 
 };
-struct viafb_modeinfo {
-	u32 xres;
-	u32 yres;
-	int mode_index;
-};
+
 extern unsigned int viafb_second_virtual_yres;
 extern unsigned int viafb_second_virtual_xres;
 extern unsigned int viafb_second_offset;
-- 
cgit v0.10.2


From b008c64b5dfe37ac14928668da60132e9c8361ff Mon Sep 17 00:00:00 2001
From: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Date: Tue, 22 Sep 2009 16:47:18 -0700
Subject: viafb: clean up duoview

Clean the duoview handling up by replacing the varible with the funtion in
the only place where it is used.  This is a code cleanup only, no runtime
change expected.

Signed-off-by: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Cc: Scott Fang <ScottFang@viatech.com.cn>
Cc: Joseph Chan <JosephChan@via.com.tw>
Cc: Harald Welte <laforge@gnumonks.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/via/viafbdev.c b/drivers/video/via/viafbdev.c
index 50e9300..0cd112a 100644
--- a/drivers/video/via/viafbdev.c
+++ b/drivers/video/via/viafbdev.c
@@ -1009,7 +1009,8 @@ static int viafb_cursor(struct fb_info *info, struct fb_cursor *cursor)
 		return -ENODEV;
 
 	/* When duoview and using lcd , use soft cursor */
-	if (viafb_LCD_ON || ((struct viafb_par *)(info->par))->duoview)
+	if (viafb_LCD_ON || (!viafb_SAMM_ON &&
+		viafb_LCD2_ON + viafb_DVI_ON + viafb_CRT_ON == 2))
 		return -ENODEV;
 
 	viafb_show_hw_cursor(info, HW_Cursor_OFF);
@@ -1379,18 +1380,6 @@ static int get_primary_device(void)
 	return primary_device;
 }
 
-static u8 is_duoview(void)
-{
-	if (0 == viafb_SAMM_ON) {
-		if (viafb_LCD_ON + viafb_LCD2_ON +
-			viafb_DVI_ON + viafb_CRT_ON == 2)
-			return true;
-		return false;
-	} else {
-		return false;
-	}
-}
-
 static void apply_second_mode_setting(struct fb_var_screeninfo
 	*sec_var)
 {
@@ -1499,8 +1488,6 @@ static int apply_device_setting(struct viafb_ioctl_setting setting_info,
 		need_set_mode = 1;
 	}
 
-	viaparinfo->duoview = is_duoview();
-
 	if (!need_set_mode) {
 		;
 	} else {
@@ -1621,7 +1608,6 @@ static void parse_active_dev(void)
 		viafb_CRT_ON = STATE_ON;
 		viafb_SAMM_ON = STATE_OFF;
 	}
-	viaparinfo->duoview = is_duoview();
 }
 
 static void parse_video_dev(void)
diff --git a/drivers/video/via/viafbdev.h b/drivers/video/via/viafbdev.h
index cd3dff7..9231877 100644
--- a/drivers/video/via/viafbdev.h
+++ b/drivers/video/via/viafbdev.h
@@ -52,7 +52,6 @@ struct viafb_par {
 	u32 VQ_end;		/* Virtual Queue End Address */
 	u32 iga_path;
 	struct proc_dir_entry *proc_entry;	/*viafb proc entry */
-	u8 duoview;		/*Is working in duoview mode? */
 
 	/* I2C stuff */
 	struct via_i2c_stuff i2c_stuff;
-- 
cgit v0.10.2


From db88e382a043bf288a6257dc5069f19c5ae67df6 Mon Sep 17 00:00:00 2001
From: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Date: Tue, 22 Sep 2009 16:47:20 -0700
Subject: viafb: clean up virtual memory handling

Clean the handling of ioremapped video memory up.  The following changes
were made:

info->screen_base - viafb_FB_MM
(VRAM offset calculation) was replaced by
info->fix.smem_start - viafbinfo->fix.smem_start
which is essentially the same calculation but done with physical instead
virtual addresses.

*->fbmem_virt
was replaced by
viafbinfo->screen_base
This is true for viafbinfo and viafbinfo1 as the par pointers are equal.

An early initialization of viafbinfo1->fix.smem* was removed as done later
in viafb_setup_fixinfo.

This patch highlights that the only usage of the ioremapped video memory
in the driver is for hardware cursor handling.  Even if it has to hold the
used virtual screen mapped for old-fashioned read/write calls (vs.
mmap'ed) a lot virtual memory could be saved by only ioremapping on
demand.

Code cleanup, no runtime changes expected.

Signed-off-by: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Cc: Scott Fang <ScottFang@viatech.com.cn>
Cc: Joseph Chan <JosephChan@via.com.tw>
Cc: Harald Welte <laforge@gnumonks.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/via/global.c b/drivers/video/via/global.c
index 468be24..1096fd5 100644
--- a/drivers/video/via/global.c
+++ b/drivers/video/via/global.c
@@ -46,7 +46,6 @@ int viafb_hotplug_refresh = 60;
 unsigned int viafb_second_offset;
 int viafb_second_size;
 int viafb_primary_dev = None_Device;
-void __iomem *viafb_FB_MM;
 unsigned int viafb_second_xres = 640;
 unsigned int viafb_second_yres = 480;
 unsigned int viafb_second_virtual_xres;
diff --git a/drivers/video/via/global.h b/drivers/video/via/global.h
index 7543d5f..11382e5 100644
--- a/drivers/video/via/global.h
+++ b/drivers/video/via/global.h
@@ -77,7 +77,6 @@ extern int viafb_hotplug_Yres;
 extern int viafb_hotplug_bpp;
 extern int viafb_hotplug_refresh;
 extern int viafb_primary_dev;
-extern void __iomem *viafb_FB_MM;
 extern struct fb_cursor viacursor;
 
 extern unsigned int viafb_second_xres;
diff --git a/drivers/video/via/viafbdev.c b/drivers/video/via/viafbdev.c
index 0cd112a..86835ee 100644
--- a/drivers/video/via/viafbdev.c
+++ b/drivers/video/via/viafbdev.c
@@ -832,8 +832,7 @@ static void viafb_fillrect(struct fb_info *info,
 	/* Source Base Address */
 	writel(0x0, viaparinfo->io_virt + VIA_REG_SRCBASE);
 	/* Destination Base Address */
-	writel(((unsigned long) (info->screen_base) -
-		   (unsigned long) viafb_FB_MM) >> 3,
+	writel((info->fix.smem_start - viafbinfo->fix.smem_start) >> 3,
 		   viaparinfo->io_virt + VIA_REG_DSTBASE);
 	/* Pitch */
 	pitch = (info->var.xres_virtual + 7) & ~7;
@@ -887,12 +886,10 @@ static void viafb_copyarea(struct fb_info *info,
 	}
 
 	/* Source Base Address */
-	writel(((unsigned long) (info->screen_base) -
-		   (unsigned long) viafb_FB_MM) >> 3,
+	writel((info->fix.smem_start - viafbinfo->fix.smem_start) >> 3,
 		   viaparinfo->io_virt + VIA_REG_SRCBASE);
 	/* Destination Base Address */
-	writel(((unsigned long) (info->screen_base) -
-		   (unsigned long) viafb_FB_MM) >> 3,
+	writel((info->fix.smem_start - viafbinfo->fix.smem_start) >> 3,
 		   viaparinfo->io_virt + VIA_REG_DSTBASE);
 	/* Pitch */
 	pitch = (info->var.xres_virtual + 7) & ~7;
@@ -951,8 +948,7 @@ static void viafb_imageblit(struct fb_info *info,
 	/* Source Base Address */
 	writel(0x0, viaparinfo->io_virt + VIA_REG_SRCBASE);
 	/* Destination Base Address */
-	writel(((unsigned long) (info->screen_base) -
-		   (unsigned long) viafb_FB_MM) >> 3,
+	writel((info->fix.smem_start - viafbinfo->fix.smem_start) >> 3,
 		   viaparinfo->io_virt + VIA_REG_DSTBASE);
 	/* Pitch */
 	pitch = (info->var.xres_virtual + 7) & ~7;
@@ -1170,7 +1166,7 @@ static int viafb_cursor(struct fb_info *info, struct fb_cursor *cursor)
 			}
 		}
 
-		memcpy(((struct viafb_par *)(info->par))->fbmem_virt +
+		memcpy(viafbinfo->screen_base +
 		       ((struct viafb_par *)(info->par))->cursor_start,
 		       cr_data->bak, CURSOR_SIZE);
 out:
@@ -2073,11 +2069,9 @@ static int __devinit via_pci_probe(void)
 	viafb_get_fb_info(&viaparinfo->fbmem, &viaparinfo->memsize);
 	viaparinfo->fbmem_free = viaparinfo->memsize;
 	viaparinfo->fbmem_used = 0;
-	viaparinfo->fbmem_virt = ioremap_nocache(viaparinfo->fbmem,
+	viafbinfo->screen_base = ioremap_nocache(viaparinfo->fbmem,
 		viaparinfo->memsize);
-	viafbinfo->screen_base = (char *)viaparinfo->fbmem_virt;
-
-	if (!viaparinfo->fbmem_virt) {
+	if (!viafbinfo->screen_base) {
 		printk(KERN_INFO "ioremap failed\n");
 		return -ENOMEM;
 	}
@@ -2110,7 +2104,6 @@ static int __devinit via_pci_probe(void)
 			viafb_second_size * 1024 * 1024;
 	}
 
-	viafb_FB_MM = viaparinfo->fbmem_virt;
 	tmpm = viafb_mode;
 	tmpc = strsep(&tmpm, "x");
 	strict_strtoul(tmpc, 0, &default_xres);
@@ -2203,8 +2196,6 @@ static int __devinit via_pci_probe(void)
 		viaparinfo1->memsize = viaparinfo->memsize -
 			viafb_second_offset;
 		viaparinfo->memsize = viafb_second_offset;
-		viaparinfo1->fbmem_virt = viaparinfo->fbmem_virt +
-			viafb_second_offset;
 		viaparinfo1->fbmem = viaparinfo->fbmem + viafb_second_offset;
 
 		viaparinfo1->fbmem_used = viaparinfo->fbmem_used;
@@ -2226,8 +2217,6 @@ static int __devinit via_pci_probe(void)
 		memcpy(viafbinfo1, viafbinfo, sizeof(struct fb_info));
 		viafbinfo1->screen_base = viafbinfo->screen_base +
 			viafb_second_offset;
-		viafbinfo1->fix.smem_start = viaparinfo1->fbmem;
-		viafbinfo1->fix.smem_len = viaparinfo1->fbmem_free;
 
 		default_var.xres = viafb_second_xres;
 		default_var.yres = viafb_second_yres;
@@ -2289,7 +2278,7 @@ static void __devexit via_pci_remove(void)
 	unregister_framebuffer(viafbinfo);
 	if (viafb_dual_fb)
 		unregister_framebuffer(viafbinfo1);
-	iounmap((void *)viaparinfo->fbmem_virt);
+	iounmap((void *)viafbinfo->screen_base);
 	iounmap(viaparinfo->io_virt);
 
 	viafb_delete_i2c_buss(viaparinfo);
diff --git a/drivers/video/via/viafbdev.h b/drivers/video/via/viafbdev.h
index 9231877..e5a247c 100644
--- a/drivers/video/via/viafbdev.h
+++ b/drivers/video/via/viafbdev.h
@@ -38,7 +38,6 @@
 #define VERSION_MINOR       4
 
 struct viafb_par {
-	void __iomem *fbmem_virt;	/*framebuffer virtual memory address */
 	void __iomem *io_virt;	/*iospace virtual memory address */
 	unsigned int fbmem;	/*framebuffer physical memory address */
 	unsigned int memsize;	/*size of fbmem */
-- 
cgit v0.10.2


From b852abc263519e89a364dc81e697cba0d4371cf3 Mon Sep 17 00:00:00 2001
From: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Date: Tue, 22 Sep 2009 16:47:21 -0700
Subject: viafb: remove unused video device stuff

Remove everything related to video devices from the driver as it did not
influence the driver operation.  This patch does change the userspace
behaviour as it removes two IOCTLs and one module parameter.  But this is
good as it removes useless stuff and helps the user to figure out the
options that do affect the driver behaviour (which are still too many).

Signed-off-by: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Cc: Scott Fang <ScottFang@viatech.com.cn>
Cc: Joseph Chan <JosephChan@via.com.tw>
Cc: Harald Welte <laforge@gnumonks.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/via/ioctl.h b/drivers/video/via/ioctl.h
index 842fe30..de89980 100644
--- a/drivers/video/via/ioctl.h
+++ b/drivers/video/via/ioctl.h
@@ -50,8 +50,6 @@
 #define VIAFB_GET_GAMMA_LUT		0x56494124
 #define VIAFB_SET_GAMMA_LUT		0x56494125
 #define VIAFB_GET_GAMMA_SUPPORT_STATE	0x56494126
-#define VIAFB_SET_VIDEO_DEVICE		0x56494127
-#define VIAFB_GET_VIDEO_DEVICE		0x56494128
 #define VIAFB_SET_SECOND_MODE		0x56494129
 #define VIAFB_SYNC_SURFACE		0x56494130
 #define VIAFB_GET_DRIVER_CAPS		0x56494131
@@ -179,9 +177,7 @@ struct viafb_ioctl_setting {
 	unsigned short second_dev_bpp;
 	/* Indicate which device are primary display device. */
 	unsigned int primary_device;
-	/* Indicate which device will show video. only valid in duoview mode */
-	unsigned int video_device_status;
-	unsigned int struct_reserved[34];
+	unsigned int struct_reserved[35];
 	struct viafb_ioctl_lcd_attribute lcd_attributes;
 };
 
diff --git a/drivers/video/via/viafbdev.c b/drivers/video/via/viafbdev.c
index 86835ee..924177c 100644
--- a/drivers/video/via/viafbdev.c
+++ b/drivers/video/via/viafbdev.c
@@ -36,9 +36,6 @@ static char *viafb_mode1 = "640x480";
 /* Added for specifying active devices.*/
 char *viafb_active_dev = "";
 
-/* Added for specifying video on devices.*/
-char *viafb_video_dev = "";
-
 /*Added for specify lcd output port*/
 char *viafb_lcd_port = "";
 char *viafb_dvi_port = "";
@@ -50,8 +47,6 @@ static void apply_second_mode_setting(struct fb_var_screeninfo
 	*sec_var);
 static void retrieve_device_setting(struct viafb_ioctl_setting
 	*setting_info);
-static void viafb_set_video_device(u32 video_dev_info);
-static void viafb_get_video_device(u32 *video_dev_info);
 
 static struct fb_ops viafb_ops;
 
@@ -488,7 +483,6 @@ static int viafb_ioctl(struct fb_info *info, u_int cmd, u_long arg)
 
 	u32 __user *argp = (u32 __user *) arg;
 	u32 gpu32;
-	u32 video_dev_info = 0;
 
 	DEBUG_MSG(KERN_INFO "viafb_ioctl: 0x%X !!\n", cmd);
 	memset(&u, 0, sizeof(u));
@@ -720,15 +714,6 @@ static int viafb_ioctl(struct fb_info *info, u_int cmd, u_long arg)
 		if (put_user(state_info, argp))
 			return -EFAULT;
 		break;
-	case VIAFB_SET_VIDEO_DEVICE:
-		get_user(video_dev_info, argp);
-		viafb_set_video_device(video_dev_info);
-		break;
-	case VIAFB_GET_VIDEO_DEVICE:
-		viafb_get_video_device(&video_dev_info);
-		if (put_user(video_dev_info, argp))
-			return -EFAULT;
-		break;
 	case VIAFB_SYNC_SURFACE:
 		DEBUG_MSG(KERN_INFO "lobo VIAFB_SYNC_SURFACE\n");
 		break;
@@ -1309,32 +1294,6 @@ static void viafb_set_device(struct device_t active_dev)
 	viafb_set_iga_path();
 }
 
-static void viafb_set_video_device(u32 video_dev_info)
-{
-	viaparinfo->video_on_crt = STATE_OFF;
-	viaparinfo->video_on_dvi = STATE_OFF;
-	viaparinfo->video_on_lcd = STATE_OFF;
-
-	/* Check available device to enable: */
-	if ((video_dev_info & CRT_Device) == CRT_Device)
-		viaparinfo->video_on_crt = STATE_ON;
-	else if ((video_dev_info & DVI_Device) == DVI_Device)
-		viaparinfo->video_on_dvi = STATE_ON;
-	else if ((video_dev_info & LCD_Device) == LCD_Device)
-		viaparinfo->video_on_lcd = STATE_ON;
-}
-
-static void viafb_get_video_device(u32 *video_dev_info)
-{
-	*video_dev_info = None_Device;
-	if (viaparinfo->video_on_crt == STATE_ON)
-		*video_dev_info |= CRT_Device;
-	else if (viaparinfo->video_on_dvi == STATE_ON)
-		*video_dev_info |= DVI_Device;
-	else if (viaparinfo->video_on_lcd == STATE_ON)
-		*video_dev_info |= LCD_Device;
-}
-
 static int get_primary_device(void)
 {
 	int primary_device = 0;
@@ -1506,18 +1465,6 @@ static void retrieve_device_setting(struct viafb_ioctl_setting
 		setting_info->device_status |= LCD_Device;
 	if (viafb_LCD2_ON == 1)
 		setting_info->device_status |= LCD2_Device;
-	if ((viaparinfo->video_on_crt == 1) && (viafb_CRT_ON == 1)) {
-		setting_info->video_device_status =
-			viaparinfo->crt_setting_info->iga_path;
-	} else if ((viaparinfo->video_on_dvi == 1) && (viafb_DVI_ON == 1)) {
-		setting_info->video_device_status =
-			viaparinfo->tmds_setting_info->iga_path;
-	} else if ((viaparinfo->video_on_lcd == 1) && (viafb_LCD_ON == 1)) {
-		setting_info->video_device_status =
-			viaparinfo->lvds_setting_info->iga_path;
-	} else {
-		setting_info->video_device_status = 0;
-	}
 
 	setting_info->samm_status = viafb_SAMM_ON;
 	setting_info->primary_device = get_primary_device();
@@ -1606,24 +1553,6 @@ static void parse_active_dev(void)
 	}
 }
 
-static void parse_video_dev(void)
-{
-	viaparinfo->video_on_crt = STATE_OFF;
-	viaparinfo->video_on_dvi = STATE_OFF;
-	viaparinfo->video_on_lcd = STATE_OFF;
-
-	if (!strncmp(viafb_video_dev, "CRT", 3)) {
-		/* Video on CRT */
-		viaparinfo->video_on_crt = STATE_ON;
-	} else if (!strncmp(viafb_video_dev, "DVI", 3)) {
-		/* Video on DVI */
-		viaparinfo->video_on_dvi = STATE_ON;
-	} else if (!strncmp(viafb_video_dev, "LCD", 3)) {
-		/* Video on LCD */
-		viaparinfo->video_on_lcd = STATE_ON;
-	}
-}
-
 static int parse_port(char *opt_str, int *output_interface)
 {
 	if (!strncmp(opt_str, "DVP0", 4))
@@ -2054,7 +1983,6 @@ static int __devinit via_pci_probe(void)
 	if (viafb_dual_fb)
 		viafb_SAMM_ON = 1;
 	parse_active_dev();
-	parse_video_dev();
 	parse_lcd_port();
 	parse_dvi_port();
 
@@ -2354,8 +2282,6 @@ static int __init viafb_setup(char *options)
 		else if (!strncmp(this_opt, "viafb_lcd_mode=", 15))
 			strict_strtoul(this_opt + 15, 0,
 				(unsigned long *)&viafb_lcd_mode);
-		else if (!strncmp(this_opt, "viafb_video_dev=", 16))
-			viafb_video_dev = kstrdup(this_opt + 16, GFP_KERNEL);
 		else if (!strncmp(this_opt, "viafb_lcd_port=", 15))
 			viafb_lcd_port = kstrdup(this_opt + 15, GFP_KERNEL);
 		else if (!strncmp(this_opt, "viafb_dvi_port=", 15))
@@ -2476,9 +2402,6 @@ module_param(viafb_lcd_mode, int, 0);
 MODULE_PARM_DESC(viafb_lcd_mode,
 	"Set Flat Panel mode(Default=OPENLDI)");
 
-module_param(viafb_video_dev, charp, 0);
-MODULE_PARM_DESC(viafb_video_dev, "Specify video devices.");
-
 module_param(viafb_lcd_port, charp, 0);
 MODULE_PARM_DESC(viafb_lcd_port, "Specify LCD output port.");
 
diff --git a/drivers/video/via/viafbdev.h b/drivers/video/via/viafbdev.h
index e5a247c..cd871f9 100644
--- a/drivers/video/via/viafbdev.h
+++ b/drivers/video/via/viafbdev.h
@@ -61,12 +61,6 @@ struct viafb_par {
 	struct lvds_setting_information *lvds_setting_info;
 	struct lvds_setting_information *lvds_setting_info2;
 	struct chip_information *chip_info;
-
-	/* some information related to video playing */
-	int video_on_crt;
-	int video_on_dvi;
-	int video_on_lcd;
-
 };
 
 extern unsigned int viafb_second_virtual_yres;
-- 
cgit v0.10.2


From 8594ac33450e6d66460230e5d07f5515b51476c9 Mon Sep 17 00:00:00 2001
From: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Date: Tue, 22 Sep 2009 16:47:22 -0700
Subject: viafb: remove LVDS initialization

At least for VX800 this initialization is not very good as some parts of
the register are written with reserved values.  This makes the display go
white in some configurations and not usable until the framebuffer is
removed.  It's better to not initialize it as it allows to use a
previously (by BIOS) correctly configured display.

This patch makes some displays work but might cause problems on others.
This is bad but can not be easily avoided.  If this causes some
regressions it's probably the best to fix it in the 'active' display setup
code.

Signed-off-by: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Cc: Scott Fang <ScottFang@viatech.com.cn>
Cc: Joseph Chan <JosephChan@via.com.tw>
Cc: Harald Welte <laforge@gnumonks.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/via/viamode.c b/drivers/video/via/viamode.c
index 9ff9424..16a8a97 100644
--- a/drivers/video/via/viamode.c
+++ b/drivers/video/via/viamode.c
@@ -309,8 +309,7 @@ struct io_reg CX700_ModeXregs[] = { {VIASR, SR10, 0xFF, 0x01},
 {VIACR, CR96, 0xFF, 0x00},
 {VIACR, CR97, 0xFF, 0x00},
 {VIACR, CR99, 0xFF, 0x00},
-{VIACR, CR9B, 0xFF, 0x00},
-{VIACR, CRD2, 0xFF, 0xFF}	/* TMDS/LVDS control register.         */
+{VIACR, CR9B, 0xFF, 0x00}
 };
 
 /* Video Mode Table */
-- 
cgit v0.10.2


From 68fa92082ffda84adcbae06fdd307fca53469c25 Mon Sep 17 00:00:00 2001
From: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Date: Tue, 22 Sep 2009 16:47:23 -0700
Subject: viafb: another small cleanup of viafb_par

This removes the completly useless io variable as well as the temporary
used variables mmio_base and mmio_len in favor to use directly the fb_info
variables.

This is a code cleanup only, no runtime change expected.

Signed-off-by: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Cc: Scott Fang <ScottFang@viatech.com.cn>
Cc: Joseph Chan <JosephChan@via.com.tw>
Cc: Harald Welte <laforge@gnumonks.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/via/hw.c b/drivers/video/via/hw.c
index 5be4a67..86050e7 100644
--- a/drivers/video/via/hw.c
+++ b/drivers/video/via/hw.c
@@ -2496,8 +2496,7 @@ void viafb_crt_enable(void)
 	viafb_write_reg_mask(CR36, VIACR, 0x0, BIT5 + BIT4);
 }
 
-void viafb_get_mmio_info(unsigned long *mmio_base,
-	unsigned long *mmio_len)
+void viafb_get_mmio_info(unsigned long *mmio_base, u32 *mmio_len)
 {
 	struct pci_dev *pdev = NULL;
 	u32 vendor, device;
diff --git a/drivers/video/via/hw.h b/drivers/video/via/hw.h
index 874ea3f..cf1dfd5 100644
--- a/drivers/video/via/hw.h
+++ b/drivers/video/via/hw.h
@@ -923,8 +923,7 @@ int viafb_get_pixclock(int hres, int vres, int vmode_refresh);
 int viafb_get_refresh(int hres, int vres, u32 float_refresh);
 void viafb_update_device_setting(int hres, int vres, int bpp,
 			   int vmode_refresh, int flag);
-void viafb_get_mmio_info(unsigned long *mmio_base,
-	unsigned long *mmio_len);
+void viafb_get_mmio_info(unsigned long *mmio_base, u32 *mmio_len);
 
 void viafb_set_iga_path(void);
 void viafb_set_primary_address(u32 addr);
diff --git a/drivers/video/via/viafbdev.c b/drivers/video/via/viafbdev.c
index 924177c..a17e138 100644
--- a/drivers/video/via/viafbdev.c
+++ b/drivers/video/via/viafbdev.c
@@ -69,8 +69,6 @@ static void viafb_setup_fixinfo(struct fb_fix_screeninfo *fix,
 
 	fix->smem_start = viaparinfo->fbmem;
 	fix->smem_len = viaparinfo->fbmem_free;
-	fix->mmio_start = viaparinfo->mmio_base;
-	fix->mmio_len = viaparinfo->mmio_len;
 
 	fix->type = FB_TYPE_PACKED_PIXELS;
 	fix->type_aux = 0;
@@ -2004,9 +2002,10 @@ static int __devinit via_pci_probe(void)
 		return -ENOMEM;
 	}
 
-	viafb_get_mmio_info(&viaparinfo->mmio_base, &viaparinfo->mmio_len);
-	viaparinfo->io_virt = ioremap_nocache(viaparinfo->mmio_base,
-		viaparinfo->mmio_len);
+	viafb_get_mmio_info(&viafbinfo->fix.mmio_start,
+		&viafbinfo->fix.mmio_len);
+	viaparinfo->io_virt = ioremap_nocache(viafbinfo->fix.mmio_start,
+		viafbinfo->fix.mmio_len);
 	if (!viaparinfo->io_virt) {
 		printk(KERN_WARNING "ioremap failed: hardware acceleration disabled\n");
 		viafb_accel = 0;
diff --git a/drivers/video/via/viafbdev.h b/drivers/video/via/viafbdev.h
index cd871f9..2763922 100644
--- a/drivers/video/via/viafbdev.h
+++ b/drivers/video/via/viafbdev.h
@@ -41,9 +41,6 @@ struct viafb_par {
 	void __iomem *io_virt;	/*iospace virtual memory address */
 	unsigned int fbmem;	/*framebuffer physical memory address */
 	unsigned int memsize;	/*size of fbmem */
-	unsigned int io;	/*io space address */
-	unsigned long mmio_base;	/*mmio base address */
-	unsigned long mmio_len;	/*mmio base length */
 	u32 fbmem_free;		/* Free FB memory */
 	u32 fbmem_used;		/* Use FB memory size */
 	u32 cursor_start;	/* Cursor Start Address */
-- 
cgit v0.10.2


From c4df5489e40e55f2962b9e8100ebc0d4d1374415 Mon Sep 17 00:00:00 2001
From: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Date: Tue, 22 Sep 2009 16:47:24 -0700
Subject: viafb: improve viafb_par

This patch introduces viafb_shared and is the beginning of a smooth
transition to use it.

viafb_shared should contain all general, non-surface specific data that
should be shared along all viafb framebuffers while viafb_par should only
contain things that are specific to each surface or in other words extend
fb_info.  This change is intended to clean the dual/multi framebuffer
handling up.

This removes the annoyance that viafbinfo1->par points to a different
structure than viaparinfo1.

As the last change is fundamental it is difficult to ensure that all parts
of the driver do not depend on the previous brokenness but the chance of
regressions is very low.

Signed-off-by: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Cc: Scott Fang <ScottFang@viatech.com.cn>
Cc: Joseph Chan <JosephChan@via.com.tw>
Cc: Harald Welte <laforge@gnumonks.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/via/dvi.c b/drivers/video/via/dvi.c
index d696544..c5c32b6 100644
--- a/drivers/video/via/dvi.c
+++ b/drivers/video/via/dvi.c
@@ -160,7 +160,7 @@ int viafb_tmds_trasmitter_identify(void)
 
 static void tmds_register_write(int index, u8 data)
 {
-	viaparinfo->i2c_stuff.i2c_port =
+	viaparinfo->shared->i2c_stuff.i2c_port =
 		viaparinfo->chip_info->tmds_chip_info.i2c_port;
 
 	viafb_i2c_writebyte(viaparinfo->chip_info->tmds_chip_info.
@@ -172,7 +172,7 @@ static int tmds_register_read(int index)
 {
 	u8 data;
 
-	viaparinfo->i2c_stuff.i2c_port =
+	viaparinfo->shared->i2c_stuff.i2c_port =
 		viaparinfo->chip_info->tmds_chip_info.i2c_port;
 	viafb_i2c_readbyte((u8) viaparinfo->chip_info->
 	    tmds_chip_info.tmds_chip_slave_addr,
@@ -182,7 +182,7 @@ static int tmds_register_read(int index)
 
 static int tmds_register_read_bytes(int index, u8 *buff, int buff_len)
 {
-	viaparinfo->i2c_stuff.i2c_port =
+	viaparinfo->shared->i2c_stuff.i2c_port =
 		viaparinfo->chip_info->tmds_chip_info.i2c_port;
 	viafb_i2c_readbytes((u8) viaparinfo->chip_info->tmds_chip_info.
 			 tmds_chip_slave_addr, (u8) index, buff, buff_len);
diff --git a/drivers/video/via/lcd.c b/drivers/video/via/lcd.c
index 78c6b33..144d34b 100644
--- a/drivers/video/via/lcd.c
+++ b/drivers/video/via/lcd.c
@@ -207,13 +207,13 @@ static bool lvds_identify_integratedlvds(void)
 
 int viafb_lvds_trasmitter_identify(void)
 {
-	viaparinfo->i2c_stuff.i2c_port = I2CPORTINDEX;
+	viaparinfo->shared->i2c_stuff.i2c_port = I2CPORTINDEX;
 	if (viafb_lvds_identify_vt1636()) {
 		viaparinfo->chip_info->lvds_chip_info.i2c_port = I2CPORTINDEX;
 		DEBUG_MSG(KERN_INFO
 			  "Found VIA VT1636 LVDS on port i2c 0x31 \n");
 	} else {
-		viaparinfo->i2c_stuff.i2c_port = GPIOPORTINDEX;
+		viaparinfo->shared->i2c_stuff.i2c_port = GPIOPORTINDEX;
 		if (viafb_lvds_identify_vt1636()) {
 			viaparinfo->chip_info->lvds_chip_info.i2c_port =
 				GPIOPORTINDEX;
@@ -470,7 +470,7 @@ static int lvds_register_read(int index)
 {
 	u8 data;
 
-	viaparinfo->i2c_stuff.i2c_port = GPIOPORTINDEX;
+	viaparinfo->shared->i2c_stuff.i2c_port = GPIOPORTINDEX;
 	viafb_i2c_readbyte((u8) viaparinfo->chip_info->
 	    lvds_chip_info.lvds_chip_slave_addr,
 			(u8) index, &data);
diff --git a/drivers/video/via/via_i2c.c b/drivers/video/via/via_i2c.c
index 0f3ed4e..15543e9 100644
--- a/drivers/video/via/via_i2c.c
+++ b/drivers/video/via/via_i2c.c
@@ -97,7 +97,7 @@ int viafb_i2c_readbyte(u8 slave_addr, u8 index, u8 *pdata)
 	mm1[0] = index;
 	msgs[0].len = 1; msgs[1].len = 1;
 	msgs[0].buf = mm1; msgs[1].buf = pdata;
-	i2c_transfer(&viaparinfo->i2c_stuff.adapter, msgs, 2);
+	i2c_transfer(&viaparinfo->shared->i2c_stuff.adapter, msgs, 2);
 
 	return 0;
 }
@@ -111,7 +111,7 @@ int viafb_i2c_writebyte(u8 slave_addr, u8 index, u8 data)
 	msgs.addr = slave_addr / 2;
 	msgs.len = 2;
 	msgs.buf = msg;
-	return i2c_transfer(&viaparinfo->i2c_stuff.adapter, &msgs, 1);
+	return i2c_transfer(&viaparinfo->shared->i2c_stuff.adapter, &msgs, 1);
 }
 
 int viafb_i2c_readbytes(u8 slave_addr, u8 index, u8 *buff, int buff_len)
@@ -125,53 +125,53 @@ int viafb_i2c_readbytes(u8 slave_addr, u8 index, u8 *buff, int buff_len)
 	mm1[0] = index;
 	msgs[0].len = 1; msgs[1].len = buff_len;
 	msgs[0].buf = mm1; msgs[1].buf = buff;
-	i2c_transfer(&viaparinfo->i2c_stuff.adapter, msgs, 2);
+	i2c_transfer(&viaparinfo->shared->i2c_stuff.adapter, msgs, 2);
 	return 0;
 }
 
 int viafb_create_i2c_bus(void *viapar)
 {
 	int ret;
-	struct viafb_par *par = (struct viafb_par *)viapar;
-
-	strcpy(par->i2c_stuff.adapter.name, "via_i2c");
-	par->i2c_stuff.i2c_port = 0x0;
-	par->i2c_stuff.adapter.owner = THIS_MODULE;
-	par->i2c_stuff.adapter.id = 0x01FFFF;
-	par->i2c_stuff.adapter.class = 0;
-	par->i2c_stuff.adapter.algo_data = &par->i2c_stuff.algo;
-	par->i2c_stuff.adapter.dev.parent = NULL;
-	par->i2c_stuff.algo.setsda = via_i2c_setsda;
-	par->i2c_stuff.algo.setscl = via_i2c_setscl;
-	par->i2c_stuff.algo.getsda = via_i2c_getsda;
-	par->i2c_stuff.algo.getscl = via_i2c_getscl;
-	par->i2c_stuff.algo.udelay = 40;
-	par->i2c_stuff.algo.timeout = 20;
-	par->i2c_stuff.algo.data = &par->i2c_stuff;
-
-	i2c_set_adapdata(&par->i2c_stuff.adapter, &par->i2c_stuff);
+	struct via_i2c_stuff *i2c_stuff =
+		&((struct viafb_par *)viapar)->shared->i2c_stuff;
+
+	strcpy(i2c_stuff->adapter.name, "via_i2c");
+	i2c_stuff->i2c_port = 0x0;
+	i2c_stuff->adapter.owner = THIS_MODULE;
+	i2c_stuff->adapter.id = 0x01FFFF;
+	i2c_stuff->adapter.class = 0;
+	i2c_stuff->adapter.algo_data = &i2c_stuff->algo;
+	i2c_stuff->adapter.dev.parent = NULL;
+	i2c_stuff->algo.setsda = via_i2c_setsda;
+	i2c_stuff->algo.setscl = via_i2c_setscl;
+	i2c_stuff->algo.getsda = via_i2c_getsda;
+	i2c_stuff->algo.getscl = via_i2c_getscl;
+	i2c_stuff->algo.udelay = 40;
+	i2c_stuff->algo.timeout = 20;
+	i2c_stuff->algo.data = i2c_stuff;
+
+	i2c_set_adapdata(&i2c_stuff->adapter, i2c_stuff);
 
 	/* Raise SCL and SDA */
-	par->i2c_stuff.i2c_port = I2CPORTINDEX;
-	via_i2c_setsda(&par->i2c_stuff, 1);
-	via_i2c_setscl(&par->i2c_stuff, 1);
+	i2c_stuff->i2c_port = I2CPORTINDEX;
+	via_i2c_setsda(i2c_stuff, 1);
+	via_i2c_setscl(i2c_stuff, 1);
 
-	par->i2c_stuff.i2c_port = GPIOPORTINDEX;
-	via_i2c_setsda(&par->i2c_stuff, 1);
-	via_i2c_setscl(&par->i2c_stuff, 1);
+	i2c_stuff->i2c_port = GPIOPORTINDEX;
+	via_i2c_setsda(i2c_stuff, 1);
+	via_i2c_setscl(i2c_stuff, 1);
 	udelay(20);
 
-	ret = i2c_bit_add_bus(&par->i2c_stuff.adapter);
+	ret = i2c_bit_add_bus(&i2c_stuff->adapter);
 	if (ret == 0)
-		DEBUG_MSG("I2C bus %s registered.\n",
-		par->i2c_stuff.adapter.name);
+		DEBUG_MSG("I2C bus %s registered.\n", i2c_stuff->adapter.name);
 	else
 		DEBUG_MSG("Failed to register I2C bus %s.\n",
-			par->i2c_stuff.adapter.name);
+			i2c_stuff->adapter.name);
 	return ret;
 }
 
 void viafb_delete_i2c_buss(void *par)
 {
-	i2c_del_adapter(&((struct viafb_par *)par)->i2c_stuff.adapter);
+	i2c_del_adapter(&((struct viafb_par *)par)->shared->i2c_stuff.adapter);
 }
diff --git a/drivers/video/via/viafbdev.c b/drivers/video/via/viafbdev.c
index a17e138..4a8853a 100644
--- a/drivers/video/via/viafbdev.c
+++ b/drivers/video/via/viafbdev.c
@@ -1943,40 +1943,30 @@ static int __devinit via_pci_probe(void)
 	char *tmpc, *tmpm;
 	char *tmpc_sec, *tmpm_sec;
 	int vmode_index;
-	u32 tmds_length, lvds_length, crt_length, chip_length, viafb_par_length;
+	u32 viafb_par_length;
 
 	DEBUG_MSG(KERN_INFO "VIAFB PCI Probe!!\n");
 
 	viafb_par_length = ALIGN(sizeof(struct viafb_par), BITS_PER_LONG/8);
-	tmds_length = ALIGN(sizeof(struct tmds_setting_information),
-		BITS_PER_LONG/8);
-	lvds_length = ALIGN(sizeof(struct lvds_setting_information),
-		BITS_PER_LONG/8);
-	crt_length = ALIGN(sizeof(struct lvds_setting_information),
-		BITS_PER_LONG/8);
-	chip_length = ALIGN(sizeof(struct chip_information), BITS_PER_LONG/8);
 
 	/* Allocate fb_info and ***_par here, also including some other needed
 	 * variables
 	*/
-	viafbinfo = framebuffer_alloc(viafb_par_length + 2 * lvds_length +
-	tmds_length + crt_length + chip_length, NULL);
+	viafbinfo = framebuffer_alloc(viafb_par_length +
+		ALIGN(sizeof(struct viafb_shared), BITS_PER_LONG/8), NULL);
 	if (!viafbinfo) {
 		printk(KERN_ERR"Could not allocate memory for viafb_info.\n");
 		return -ENODEV;
 	}
 
 	viaparinfo = (struct viafb_par *)viafbinfo->par;
-	viaparinfo->tmds_setting_info = (struct tmds_setting_information *)
-		((unsigned long)viaparinfo + viafb_par_length);
-	viaparinfo->lvds_setting_info = (struct lvds_setting_information *)
-		((unsigned long)viaparinfo->tmds_setting_info + tmds_length);
-	viaparinfo->lvds_setting_info2 = (struct lvds_setting_information *)
-		((unsigned long)viaparinfo->lvds_setting_info + lvds_length);
-	viaparinfo->crt_setting_info = (struct crt_setting_information *)
-		((unsigned long)viaparinfo->lvds_setting_info2 + lvds_length);
-	viaparinfo->chip_info = (struct chip_information *)
-		((unsigned long)viaparinfo->crt_setting_info + crt_length);
+	viaparinfo->shared = viafbinfo->par + viafb_par_length;
+	viaparinfo->tmds_setting_info = &viaparinfo->shared->tmds_setting_info;
+	viaparinfo->lvds_setting_info = &viaparinfo->shared->lvds_setting_info;
+	viaparinfo->lvds_setting_info2 =
+		&viaparinfo->shared->lvds_setting_info2;
+	viaparinfo->crt_setting_info = &viaparinfo->shared->crt_setting_info;
+	viaparinfo->chip_info = &viaparinfo->shared->chip_info;
 
 	if (viafb_dual_fb)
 		viafb_SAMM_ON = 1;
@@ -2142,6 +2132,7 @@ static int __devinit via_pci_probe(void)
 		viaparinfo->iga_path = IGA1;
 		viaparinfo1->iga_path = IGA2;
 		memcpy(viafbinfo1, viafbinfo, sizeof(struct fb_info));
+		viafbinfo1->par = viaparinfo1;
 		viafbinfo1->screen_base = viafbinfo->screen_base +
 			viafb_second_offset;
 
@@ -2193,7 +2184,7 @@ static int __devinit via_pci_probe(void)
 		  viafbinfo->node, viafbinfo->fix.id, default_var.xres,
 		  default_var.yres, default_var.bits_per_pixel);
 
-	viafb_init_proc(&viaparinfo->proc_entry);
+	viafb_init_proc(&viaparinfo->shared->proc_entry);
 	viafb_init_dac(IGA2);
 	return 0;
 }
@@ -2214,7 +2205,7 @@ static void __devexit via_pci_remove(void)
 	if (viafb_dual_fb)
 		framebuffer_release(viafbinfo1);
 
-	viafb_remove_proc(viaparinfo->proc_entry);
+	viafb_remove_proc(viaparinfo->shared->proc_entry);
 }
 
 #ifndef MODULE
diff --git a/drivers/video/via/viafbdev.h b/drivers/video/via/viafbdev.h
index 2763922..1d1fe35 100644
--- a/drivers/video/via/viafbdev.h
+++ b/drivers/video/via/viafbdev.h
@@ -37,6 +37,20 @@
 #define VERSION_OS          0	/* 0: for 32 bits OS, 1: for 64 bits OS */
 #define VERSION_MINOR       4
 
+struct viafb_shared {
+	struct proc_dir_entry *proc_entry;	/*viafb proc entry */
+
+	/* I2C stuff */
+	struct via_i2c_stuff i2c_stuff;
+
+	/* All the information will be needed to set engine */
+	struct tmds_setting_information tmds_setting_info;
+	struct crt_setting_information crt_setting_info;
+	struct lvds_setting_information lvds_setting_info;
+	struct lvds_setting_information lvds_setting_info2;
+	struct chip_information chip_info;
+};
+
 struct viafb_par {
 	void __iomem *io_virt;	/*iospace virtual memory address */
 	unsigned int fbmem;	/*framebuffer physical memory address */
@@ -47,12 +61,11 @@ struct viafb_par {
 	u32 VQ_start;		/* Virtual Queue Start Address */
 	u32 VQ_end;		/* Virtual Queue End Address */
 	u32 iga_path;
-	struct proc_dir_entry *proc_entry;	/*viafb proc entry */
 
-	/* I2C stuff */
-	struct via_i2c_stuff i2c_stuff;
+	struct viafb_shared *shared;
 
 	/* All the information will be needed to set engine */
+	/* depreciated, use the ones in shared directly */
 	struct tmds_setting_information *tmds_setting_info;
 	struct crt_setting_information *crt_setting_info;
 	struct lvds_setting_information *lvds_setting_info;
diff --git a/drivers/video/via/vt1636.c b/drivers/video/via/vt1636.c
index 322a9f9..a6b3749 100644
--- a/drivers/video/via/vt1636.c
+++ b/drivers/video/via/vt1636.c
@@ -27,7 +27,7 @@ u8 viafb_gpio_i2c_read_lvds(struct lvds_setting_information
 {
 	u8 data;
 
-	viaparinfo->i2c_stuff.i2c_port = plvds_chip_info->i2c_port;
+	viaparinfo->shared->i2c_stuff.i2c_port = plvds_chip_info->i2c_port;
 	viafb_i2c_readbyte(plvds_chip_info->lvds_chip_slave_addr, index, &data);
 
 	return data;
@@ -39,7 +39,7 @@ void viafb_gpio_i2c_write_mask_lvds(struct lvds_setting_information
 {
 	int index, data;
 
-	viaparinfo->i2c_stuff.i2c_port = plvds_chip_info->i2c_port;
+	viaparinfo->shared->i2c_stuff.i2c_port = plvds_chip_info->i2c_port;
 
 	index = io_data.Index;
 	data = viafb_gpio_i2c_read_lvds(plvds_setting_info, plvds_chip_info,
-- 
cgit v0.10.2


From c3e25673843153ea75fda79a47cf12f10a25ca37 Mon Sep 17 00:00:00 2001
From: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Date: Tue, 22 Sep 2009 16:47:26 -0700
Subject: viafb: 2D engine rewrite

This patch is a completly rewritten 2D engine.  The engine is no longer in
a default state but reinitialized every time to allow usage for both
framebuffers regardless of their settings.

The whole engine handling is concentrated in a big function which takes 16
parameters.  Although the number of parameters is worryingly it is good to
have a single funtion to deal with this stuff as it allows to easily
support different engines and avoids some code duplication.

On the way support for the new 2D engine in VX800 was added.  As the with
less code duplication but it is probably better to duplicate the code as
this way is easier to walk if VIA ever decides to release a new engine
which changes anything the driver touches.

The engine support for VX800 gives a notable boost in speed.  There are no
known regressions but as this patch changes paths I do neither have the
hardware nor documentation to check and has the possibility to put the
system in a critical state heavy testing is appreciated.

Signed-off-by: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Cc: Scott Fang <ScottFang@viatech.com.cn>
Cc: Joseph Chan <JosephChan@via.com.tw>
Cc: Harald Welte <laforge@gnumonks.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/via/accel.c b/drivers/video/via/accel.c
index b3e7e82..8ac0251 100644
--- a/drivers/video/via/accel.c
+++ b/drivers/video/via/accel.c
@@ -20,8 +20,316 @@
  */
 #include "global.h"
 
-void viafb_init_accel(void)
+static int hw_bitblt_1(void __iomem *engine, u8 op, u32 width, u32 height,
+	u8 dst_bpp, u32 dst_addr, u32 dst_pitch, u32 dst_x, u32 dst_y,
+	u32 *src_mem, u32 src_addr, u32 src_pitch, u32 src_x, u32 src_y,
+	u32 fg_color, u32 bg_color, u8 fill_rop)
 {
+	u32 ge_cmd = 0, tmp, i;
+
+	if (!op || op > 3) {
+		printk(KERN_WARNING "hw_bitblt_1: Invalid operation: %d\n", op);
+		return -EINVAL;
+	}
+
+	if (op != VIA_BITBLT_FILL && !src_mem && src_addr == dst_addr) {
+		if (src_x < dst_x) {
+			ge_cmd |= 0x00008000;
+			src_x += width - 1;
+			dst_x += width - 1;
+		}
+		if (src_y < dst_y) {
+			ge_cmd |= 0x00004000;
+			src_y += height - 1;
+			dst_y += height - 1;
+		}
+	}
+
+	if (op == VIA_BITBLT_FILL) {
+		switch (fill_rop) {
+		case 0x00: /* blackness */
+		case 0x5A: /* pattern inversion */
+		case 0xF0: /* pattern copy */
+		case 0xFF: /* whiteness */
+			break;
+		default:
+			printk(KERN_WARNING "hw_bitblt_1: Invalid fill rop: "
+				"%u\n", fill_rop);
+			return -EINVAL;
+		}
+	}
+
+	switch (dst_bpp) {
+	case 8:
+		tmp = 0x00000000;
+		break;
+	case 16:
+		tmp = 0x00000100;
+		break;
+	case 32:
+		tmp = 0x00000300;
+		break;
+	default:
+		printk(KERN_WARNING "hw_bitblt_1: Unsupported bpp %d\n",
+			dst_bpp);
+		return -EINVAL;
+	}
+	writel(tmp, engine + 0x04);
+
+	if (op != VIA_BITBLT_FILL) {
+		if (src_x & (op == VIA_BITBLT_MONO ? 0xFFFF8000 : 0xFFFFF000)
+			|| src_y & 0xFFFFF000) {
+			printk(KERN_WARNING "hw_bitblt_1: Unsupported source "
+				"x/y %d %d\n", src_x, src_y);
+			return -EINVAL;
+		}
+		tmp = src_x | (src_y << 16);
+		writel(tmp, engine + 0x08);
+	}
+
+	if (dst_x & 0xFFFFF000 || dst_y & 0xFFFFF000) {
+		printk(KERN_WARNING "hw_bitblt_1: Unsupported destination x/y "
+			"%d %d\n", dst_x, dst_y);
+		return -EINVAL;
+	}
+	tmp = dst_x | (dst_y << 16);
+	writel(tmp, engine + 0x0C);
+
+	if ((width - 1) & 0xFFFFF000 || (height - 1) & 0xFFFFF000) {
+		printk(KERN_WARNING "hw_bitblt_1: Unsupported width/height "
+			"%d %d\n", width, height);
+		return -EINVAL;
+	}
+	tmp = (width - 1) | ((height - 1) << 16);
+	writel(tmp, engine + 0x10);
+
+	if (op != VIA_BITBLT_COLOR)
+		writel(fg_color, engine + 0x18);
+
+	if (op == VIA_BITBLT_MONO)
+		writel(bg_color, engine + 0x1C);
+
+	if (op != VIA_BITBLT_FILL) {
+		tmp = src_mem ? 0 : src_addr;
+		if (dst_addr & 0xE0000007) {
+			printk(KERN_WARNING "hw_bitblt_1: Unsupported source "
+				"address %X\n", tmp);
+			return -EINVAL;
+		}
+		tmp >>= 3;
+		writel(tmp, engine + 0x30);
+	}
+
+	if (dst_addr & 0xE0000007) {
+		printk(KERN_WARNING "hw_bitblt_1: Unsupported destination "
+			"address %X\n", dst_addr);
+		return -EINVAL;
+	}
+	tmp = dst_addr >> 3;
+	writel(tmp, engine + 0x34);
+
+	if (op == VIA_BITBLT_FILL)
+		tmp = 0;
+	else
+		tmp = src_pitch;
+	if (tmp & 0xFFFFC007 || dst_pitch & 0xFFFFC007) {
+		printk(KERN_WARNING "hw_bitblt_1: Unsupported pitch %X %X\n",
+			tmp, dst_pitch);
+		return -EINVAL;
+	}
+	tmp = (tmp >> 3) | (dst_pitch << (16 - 3));
+	writel(tmp, engine + 0x38);
+
+	if (op == VIA_BITBLT_FILL)
+		ge_cmd |= fill_rop << 24 | 0x00002000 | 0x00000001;
+	else {
+		ge_cmd |= 0xCC000000; /* ROP=SRCCOPY */
+		if (src_mem)
+			ge_cmd |= 0x00000040;
+		if (op == VIA_BITBLT_MONO)
+			ge_cmd |= 0x00000002 | 0x00000100 | 0x00020000;
+		else
+			ge_cmd |= 0x00000001;
+	}
+	writel(ge_cmd, engine);
+
+	if (op == VIA_BITBLT_FILL || !src_mem)
+		return 0;
+
+	tmp = (width * height * (op == VIA_BITBLT_MONO ? 1 : (dst_bpp >> 3)) +
+		3) >> 2;
+
+	for (i = 0; i < tmp; i++)
+		writel(src_mem[i], engine + VIA_MMIO_BLTBASE);
+
+	return 0;
+}
+
+static int hw_bitblt_2(void __iomem *engine, u8 op, u32 width, u32 height,
+	u8 dst_bpp, u32 dst_addr, u32 dst_pitch, u32 dst_x, u32 dst_y,
+	u32 *src_mem, u32 src_addr, u32 src_pitch, u32 src_x, u32 src_y,
+	u32 fg_color, u32 bg_color, u8 fill_rop)
+{
+	u32 ge_cmd = 0, tmp, i;
+
+	if (!op || op > 3) {
+		printk(KERN_WARNING "hw_bitblt_2: Invalid operation: %d\n", op);
+		return -EINVAL;
+	}
+
+	if (op != VIA_BITBLT_FILL && !src_mem && src_addr == dst_addr) {
+		if (src_x < dst_x) {
+			ge_cmd |= 0x00008000;
+			src_x += width - 1;
+			dst_x += width - 1;
+		}
+		if (src_y < dst_y) {
+			ge_cmd |= 0x00004000;
+			src_y += height - 1;
+			dst_y += height - 1;
+		}
+	}
+
+	if (op == VIA_BITBLT_FILL) {
+		switch (fill_rop) {
+		case 0x00: /* blackness */
+		case 0x5A: /* pattern inversion */
+		case 0xF0: /* pattern copy */
+		case 0xFF: /* whiteness */
+			break;
+		default:
+			printk(KERN_WARNING "hw_bitblt_2: Invalid fill rop: "
+				"%u\n", fill_rop);
+			return -EINVAL;
+		}
+	}
+
+	switch (dst_bpp) {
+	case 8:
+		tmp = 0x00000000;
+		break;
+	case 16:
+		tmp = 0x00000100;
+		break;
+	case 32:
+		tmp = 0x00000300;
+		break;
+	default:
+		printk(KERN_WARNING "hw_bitblt_2: Unsupported bpp %d\n",
+			dst_bpp);
+		return -EINVAL;
+	}
+	writel(tmp, engine + 0x04);
+
+	if (op == VIA_BITBLT_FILL)
+		tmp = 0;
+	else
+		tmp = src_pitch;
+	if (tmp & 0xFFFFC007 || dst_pitch & 0xFFFFC007) {
+		printk(KERN_WARNING "hw_bitblt_2: Unsupported pitch %X %X\n",
+			tmp, dst_pitch);
+		return -EINVAL;
+	}
+	tmp = (tmp >> 3) | (dst_pitch << (16 - 3));
+	writel(tmp, engine + 0x08);
+
+	if ((width - 1) & 0xFFFFF000 || (height - 1) & 0xFFFFF000) {
+		printk(KERN_WARNING "hw_bitblt_2: Unsupported width/height "
+			"%d %d\n", width, height);
+		return -EINVAL;
+	}
+	tmp = (width - 1) | ((height - 1) << 16);
+	writel(tmp, engine + 0x0C);
+
+	if (dst_x & 0xFFFFF000 || dst_y & 0xFFFFF000) {
+		printk(KERN_WARNING "hw_bitblt_2: Unsupported destination x/y "
+			"%d %d\n", dst_x, dst_y);
+		return -EINVAL;
+	}
+	tmp = dst_x | (dst_y << 16);
+	writel(tmp, engine + 0x10);
+
+	if (dst_addr & 0xE0000007) {
+		printk(KERN_WARNING "hw_bitblt_2: Unsupported destination "
+			"address %X\n", dst_addr);
+		return -EINVAL;
+	}
+	tmp = dst_addr >> 3;
+	writel(tmp, engine + 0x14);
+
+	if (op != VIA_BITBLT_FILL) {
+		if (src_x & (op == VIA_BITBLT_MONO ? 0xFFFF8000 : 0xFFFFF000)
+			|| src_y & 0xFFFFF000) {
+			printk(KERN_WARNING "hw_bitblt_2: Unsupported source "
+				"x/y %d %d\n", src_x, src_y);
+			return -EINVAL;
+		}
+		tmp = src_x | (src_y << 16);
+		writel(tmp, engine + 0x18);
+
+		tmp = src_mem ? 0 : src_addr;
+		if (dst_addr & 0xE0000007) {
+			printk(KERN_WARNING "hw_bitblt_2: Unsupported source "
+				"address %X\n", tmp);
+			return -EINVAL;
+		}
+		tmp >>= 3;
+		writel(tmp, engine + 0x1C);
+	}
+
+	if (op != VIA_BITBLT_COLOR)
+		writel(fg_color, engine + 0x4C);
+
+	if (op == VIA_BITBLT_MONO)
+		writel(bg_color, engine + 0x50);
+
+	if (op == VIA_BITBLT_FILL)
+		ge_cmd |= fill_rop << 24 | 0x00002000 | 0x00000001;
+	else {
+		ge_cmd |= 0xCC000000; /* ROP=SRCCOPY */
+		if (src_mem)
+			ge_cmd |= 0x00000040;
+		if (op == VIA_BITBLT_MONO)
+			ge_cmd |= 0x00000002 | 0x00000100 | 0x00020000;
+		else
+			ge_cmd |= 0x00000001;
+	}
+	writel(ge_cmd, engine);
+
+	if (op == VIA_BITBLT_FILL || !src_mem)
+		return 0;
+
+	tmp = (width * height * (op == VIA_BITBLT_MONO ? 1 : (dst_bpp >> 3)) +
+		3) >> 2;
+
+	for (i = 0; i < tmp; i++)
+		writel(src_mem[i], engine + VIA_MMIO_BLTBASE);
+
+	return 0;
+}
+
+void viafb_init_accel(struct viafb_shared *shared)
+{
+	switch (shared->chip_info.gfx_chip_name) {
+	case UNICHROME_CLE266:
+	case UNICHROME_K400:
+	case UNICHROME_K800:
+	case UNICHROME_PM800:
+	case UNICHROME_CN700:
+	case UNICHROME_CX700:
+	case UNICHROME_CN750:
+	case UNICHROME_K8M890:
+	case UNICHROME_P4M890:
+	case UNICHROME_P4M900:
+		shared->hw_bitblt = hw_bitblt_1;
+		break;
+	case UNICHROME_VX800:
+		shared->hw_bitblt = hw_bitblt_2;
+		break;
+	default:
+		shared->hw_bitblt = NULL;
+	}
+
 	viaparinfo->fbmem_free -= CURSOR_SIZE;
 	viaparinfo->cursor_start = viaparinfo->fbmem_free;
 	viaparinfo->fbmem_used += CURSOR_SIZE;
@@ -30,30 +338,14 @@ void viafb_init_accel(void)
 	viaparinfo->fbmem_free -= (CURSOR_SIZE + VQ_SIZE);
 	viaparinfo->VQ_start = viaparinfo->fbmem_free;
 	viaparinfo->VQ_end = viaparinfo->VQ_start + VQ_SIZE - 1;
-	viaparinfo->fbmem_used += (CURSOR_SIZE + VQ_SIZE); }
+	viaparinfo->fbmem_used += (CURSOR_SIZE + VQ_SIZE);
+}
 
 void viafb_init_2d_engine(void)
 {
-	u32 dwVQStartAddr, dwVQEndAddr, linesize;
+	u32 dwVQStartAddr, dwVQEndAddr;
 	u32 dwVQLen, dwVQStartL, dwVQEndL, dwVQStartEndH;
 
-	/* init 2D engine regs to reset 2D engine */
-	writel(0x0, viaparinfo->io_virt + VIA_REG_GEMODE);
-	writel(0x0, viaparinfo->io_virt + VIA_REG_SRCPOS);
-	writel(0x0, viaparinfo->io_virt + VIA_REG_DSTPOS);
-	writel(0x0, viaparinfo->io_virt + VIA_REG_DIMENSION);
-	writel(0x0, viaparinfo->io_virt + VIA_REG_PATADDR);
-	writel(0x0, viaparinfo->io_virt + VIA_REG_FGCOLOR);
-	writel(0x0, viaparinfo->io_virt + VIA_REG_BGCOLOR);
-	writel(0x0, viaparinfo->io_virt + VIA_REG_CLIPTL);
-	writel(0x0, viaparinfo->io_virt + VIA_REG_CLIPBR);
-	writel(0x0, viaparinfo->io_virt + VIA_REG_OFFSET);
-	writel(0x0, viaparinfo->io_virt + VIA_REG_KEYCONTROL);
-	writel(0x0, viaparinfo->io_virt + VIA_REG_SRCBASE);
-	writel(0x0, viaparinfo->io_virt + VIA_REG_DSTBASE);
-	writel(0x0, viaparinfo->io_virt + VIA_REG_PITCH);
-	writel(0x0, viaparinfo->io_virt + VIA_REG_MONOPAT1);
-
 	/* Init AGP and VQ regs */
 	switch (viaparinfo->chip_info->gfx_chip_name) {
 	case UNICHROME_K8M890:
@@ -190,37 +482,6 @@ void viafb_init_2d_engine(void)
 			break;
 		}
 	}
-
-	viafb_set_2d_color_depth(viafbinfo->var.bits_per_pixel);
-
-	writel(0x0, viaparinfo->io_virt + VIA_REG_SRCBASE);
-	writel(0x0, viaparinfo->io_virt + VIA_REG_DSTBASE);
-
-	linesize = viafbinfo->var.xres * viafbinfo->var.bits_per_pixel >> 3;
-	writel(VIA_PITCH_ENABLE | (linesize >> 3) | ((linesize >> 3) << 16),
-		viaparinfo->io_virt + VIA_REG_PITCH);
-}
-
-void viafb_set_2d_color_depth(int bpp)
-{
-	u32 dwGEMode;
-
-	dwGEMode = readl(viaparinfo->io_virt + 0x04) & 0xFFFFFCFF;
-
-	switch (bpp) {
-	case 16:
-		dwGEMode |= VIA_GEM_16bpp;
-		break;
-	case 32:
-		dwGEMode |= VIA_GEM_32bpp;
-		break;
-	default:
-		dwGEMode |= VIA_GEM_8bpp;
-		break;
-	}
-
-	/* Set BPP and Pitch */
-	writel(dwGEMode, viaparinfo->io_virt + VIA_REG_GEMODE);
 }
 
 void viafb_hw_cursor_init(void)
diff --git a/drivers/video/via/accel.h b/drivers/video/via/accel.h
index 29bf854..4d93eba 100644
--- a/drivers/video/via/accel.h
+++ b/drivers/video/via/accel.h
@@ -159,9 +159,12 @@
 
 #define MAXLOOP                 0xFFFFFF
 
-void viafb_init_accel(void);
+#define VIA_BITBLT_COLOR	1
+#define VIA_BITBLT_MONO		2
+#define VIA_BITBLT_FILL		3
+
+void viafb_init_accel(struct viafb_shared *shared);
 void viafb_init_2d_engine(void);
-void set_2d_color_depth(int);
 void viafb_hw_cursor_init(void);
 void viafb_show_hw_cursor(struct fb_info *info, int Status); int
 viafb_wait_engine_idle(void); void viafb_set_2d_color_depth(int bpp);
diff --git a/drivers/video/via/viafbdev.c b/drivers/video/via/viafbdev.c
index 4a8853a..66921de 100644
--- a/drivers/video/via/viafbdev.c
+++ b/drivers/video/via/viafbdev.c
@@ -95,11 +95,8 @@ static int viafb_check_var(struct fb_var_screeninfo *var,
 	struct fb_info *info)
 {
 	int vmode_index, htotal, vtotal;
-	struct viafb_par *ppar;
+	struct viafb_par *ppar = info->par;
 	u32 long_refresh;
-	struct viafb_par *p_viafb_par;
-	ppar = info->par;
-
 
 	DEBUG_MSG(KERN_INFO "viafb_check_var!\n");
 	/* Sanity check */
@@ -144,22 +141,17 @@ static int viafb_check_var(struct fb_var_screeninfo *var,
 	/* Adjust var according to our driver's own table */
 	viafb_fill_var_timing_info(var, viafb_refresh, vmode_index);
 
-	/* This is indeed a patch for VT3353 */
-	if (!info->par)
-		return -1;
-	p_viafb_par = (struct viafb_par *)info->par;
-	if (p_viafb_par->chip_info->gfx_chip_name == UNICHROME_VX800)
-		var->accel_flags = 0;
-
 	return 0;
 }
 
 static int viafb_set_par(struct fb_info *info)
 {
+	struct viafb_par *viapar = info->par;
 	int vmode_index;
 	int vmode_index1 = 0;
 	DEBUG_MSG(KERN_INFO "viafb_set_par!\n");
 
+	viapar->depth = fb_get_color_depth(&info->var, &info->fix);
 	viafb_update_device_setting(info->var.xres, info->var.yres,
 			      info->var.bits_per_pixel, viafb_refresh, 0);
 
@@ -190,9 +182,6 @@ static int viafb_set_par(struct fb_info *info)
 		viafb_bpp = info->var.bits_per_pixel;
 		/* Update viafb_accel, it is necessary to our 2D accelerate */
 		viafb_accel = info->var.accel_flags;
-
-		if (viafb_accel)
-			viafb_set_2d_color_depth(info->var.bits_per_pixel);
 	}
 
 	return 0;
@@ -777,10 +766,11 @@ static int viafb_ioctl(struct fb_info *info, u_int cmd, u_long arg)
 static void viafb_fillrect(struct fb_info *info,
 	const struct fb_fillrect *rect)
 {
-	u32 col = 0, rop = 0;
-	int pitch;
+	struct viafb_par *viapar = info->par;
+	u32 fg_color;
+	u8 rop;
 
-	if (!viafb_accel) {
+	if (!viapar->shared->hw_bitblt) {
 		cfb_fillrect(info, rect);
 		return;
 	}
@@ -788,67 +778,30 @@ static void viafb_fillrect(struct fb_info *info,
 	if (!rect->width || !rect->height)
 		return;
 
-	switch (rect->rop) {
-	case ROP_XOR:
+	if (info->fix.visual == FB_VISUAL_TRUECOLOR)
+		fg_color = ((u32 *)info->pseudo_palette)[rect->color];
+	else
+		fg_color = rect->color;
+
+	if (rect->rop == ROP_XOR)
 		rop = 0x5A;
-		break;
-	case ROP_COPY:
-	default:
+	else
 		rop = 0xF0;
-		break;
-	}
-
-	switch (info->var.bits_per_pixel) {
-	case 8:
-		col = rect->color;
-		break;
-	case 16:
-		col = ((u32 *) (info->pseudo_palette))[rect->color];
-		break;
-	case 32:
-		col = ((u32 *) (info->pseudo_palette))[rect->color];
-		break;
-	}
-
-	/* BitBlt Source Address */
-	writel(0x0, viaparinfo->io_virt + VIA_REG_SRCPOS);
-	/* Source Base Address */
-	writel(0x0, viaparinfo->io_virt + VIA_REG_SRCBASE);
-	/* Destination Base Address */
-	writel((info->fix.smem_start - viafbinfo->fix.smem_start) >> 3,
-		   viaparinfo->io_virt + VIA_REG_DSTBASE);
-	/* Pitch */
-	pitch = (info->var.xres_virtual + 7) & ~7;
-	writel(VIA_PITCH_ENABLE |
-		   (((pitch *
-		      info->var.bits_per_pixel >> 3) >> 3) |
-		      (((pitch * info->
-		      var.bits_per_pixel >> 3) >> 3) << 16)),
-		      viaparinfo->io_virt + VIA_REG_PITCH);
-	/* BitBlt Destination Address */
-	writel(((rect->dy << 16) | rect->dx),
-		viaparinfo->io_virt + VIA_REG_DSTPOS);
-	/* Dimension: width & height */
-	writel((((rect->height - 1) << 16) | (rect->width - 1)),
-		viaparinfo->io_virt + VIA_REG_DIMENSION);
-	/* Forground color or Destination color */
-	writel(col, viaparinfo->io_virt + VIA_REG_FGCOLOR);
-	/* GE Command */
-	writel((0x01 | 0x2000 | (rop << 24)),
-		viaparinfo->io_virt + VIA_REG_GECMD);
 
+	DEBUG_MSG(KERN_DEBUG "viafb 2D engine: fillrect\n");
+	if (viapar->shared->hw_bitblt(viapar->io_virt, VIA_BITBLT_FILL,
+		rect->width, rect->height, info->var.bits_per_pixel,
+		viapar->vram_addr, info->fix.line_length, rect->dx, rect->dy,
+		NULL, 0, 0, 0, 0, fg_color, 0, rop))
+		cfb_fillrect(info, rect);
 }
 
 static void viafb_copyarea(struct fb_info *info,
 	const struct fb_copyarea *area)
 {
-	u32 dy = area->dy, sy = area->sy, direction = 0x0;
-	u32 sx = area->sx, dx = area->dx, width = area->width;
-	int pitch;
+	struct viafb_par *viapar = info->par;
 
-	DEBUG_MSG(KERN_INFO "viafb_copyarea!!\n");
-
-	if (!viafb_accel) {
+	if (!viapar->shared->hw_bitblt) {
 		cfb_copyarea(info, area);
 		return;
 	}
@@ -856,113 +809,48 @@ static void viafb_copyarea(struct fb_info *info,
 	if (!area->width || !area->height)
 		return;
 
-	if (sy < dy) {
-		dy += area->height - 1;
-		sy += area->height - 1;
-		direction |= 0x4000;
-	}
-
-	if (sx < dx) {
-		dx += width - 1;
-		sx += width - 1;
-		direction |= 0x8000;
-	}
-
-	/* Source Base Address */
-	writel((info->fix.smem_start - viafbinfo->fix.smem_start) >> 3,
-		   viaparinfo->io_virt + VIA_REG_SRCBASE);
-	/* Destination Base Address */
-	writel((info->fix.smem_start - viafbinfo->fix.smem_start) >> 3,
-		   viaparinfo->io_virt + VIA_REG_DSTBASE);
-	/* Pitch */
-	pitch = (info->var.xres_virtual + 7) & ~7;
-	/* VIA_PITCH_ENABLE can be omitted now. */
-	writel(VIA_PITCH_ENABLE |
-		   (((pitch *
-		      info->var.bits_per_pixel >> 3) >> 3) | (((pitch *
-								info->var.
-								bits_per_pixel
-								>> 3) >> 3)
-							      << 16)),
-				viaparinfo->io_virt + VIA_REG_PITCH);
-	/* BitBlt Source Address */
-	writel(((sy << 16) | sx), viaparinfo->io_virt + VIA_REG_SRCPOS);
-	/* BitBlt Destination Address */
-	writel(((dy << 16) | dx), viaparinfo->io_virt + VIA_REG_DSTPOS);
-	/* Dimension: width & height */
-	writel((((area->height - 1) << 16) | (area->width - 1)),
-		   viaparinfo->io_virt + VIA_REG_DIMENSION);
-	/* GE Command */
-	writel((0x01 | direction | (0xCC << 24)),
-		viaparinfo->io_virt + VIA_REG_GECMD);
-
+	DEBUG_MSG(KERN_DEBUG "viafb 2D engine: copyarea\n");
+	if (viapar->shared->hw_bitblt(viapar->io_virt, VIA_BITBLT_COLOR,
+		area->width, area->height, info->var.bits_per_pixel,
+		viapar->vram_addr, info->fix.line_length, area->dx, area->dy,
+		NULL, viapar->vram_addr, info->fix.line_length,
+		area->sx, area->sy, 0, 0, 0))
+		cfb_copyarea(info, area);
 }
 
 static void viafb_imageblit(struct fb_info *info,
 	const struct fb_image *image)
 {
-	u32 size, bg_col = 0, fg_col = 0, *udata;
-	int i;
-	int pitch;
+	struct viafb_par *viapar = info->par;
+	u32 fg_color = 0, bg_color = 0;
+	u8 op;
 
-	if (!viafb_accel) {
+	if (!viapar->shared->hw_bitblt ||
+		(image->depth != 1 && image->depth != viapar->depth)) {
 		cfb_imageblit(info, image);
 		return;
 	}
 
-	udata = (u32 *) image->data;
-
-	switch (info->var.bits_per_pixel) {
-	case 8:
-		bg_col = image->bg_color;
-		fg_col = image->fg_color;
-		break;
-	case 16:
-		bg_col = ((u32 *) (info->pseudo_palette))[image->bg_color];
-		fg_col = ((u32 *) (info->pseudo_palette))[image->fg_color];
-		break;
-	case 32:
-		bg_col = ((u32 *) (info->pseudo_palette))[image->bg_color];
-		fg_col = ((u32 *) (info->pseudo_palette))[image->fg_color];
-		break;
-	}
-	size = image->width * image->height;
-
-	/* Source Base Address */
-	writel(0x0, viaparinfo->io_virt + VIA_REG_SRCBASE);
-	/* Destination Base Address */
-	writel((info->fix.smem_start - viafbinfo->fix.smem_start) >> 3,
-		   viaparinfo->io_virt + VIA_REG_DSTBASE);
-	/* Pitch */
-	pitch = (info->var.xres_virtual + 7) & ~7;
-	writel(VIA_PITCH_ENABLE |
-		   (((pitch *
-		      info->var.bits_per_pixel >> 3) >> 3) | (((pitch *
-								info->var.
-								bits_per_pixel
-								>> 3) >> 3)
-							      << 16)),
-				viaparinfo->io_virt + VIA_REG_PITCH);
-	/* BitBlt Source Address */
-	writel(0x0, viaparinfo->io_virt + VIA_REG_SRCPOS);
-	/* BitBlt Destination Address */
-	writel(((image->dy << 16) | image->dx),
-		viaparinfo->io_virt + VIA_REG_DSTPOS);
-	/* Dimension: width & height */
-	writel((((image->height - 1) << 16) | (image->width - 1)),
-		   viaparinfo->io_virt + VIA_REG_DIMENSION);
-	/* fb color */
-	writel(fg_col, viaparinfo->io_virt + VIA_REG_FGCOLOR);
-	/* bg color */
-	writel(bg_col, viaparinfo->io_virt + VIA_REG_BGCOLOR);
-	/* GE Command */
-	writel(0xCC020142, viaparinfo->io_virt + VIA_REG_GECMD);
-
-	for (i = 0; i < size / 4; i++) {
-		writel(*udata, viaparinfo->io_virt + VIA_MMIO_BLTBASE);
-		udata++;
-	}
+	if (image->depth == 1) {
+		op = VIA_BITBLT_MONO;
+		if (info->fix.visual == FB_VISUAL_TRUECOLOR) {
+			fg_color =
+				((u32 *)info->pseudo_palette)[image->fg_color];
+			bg_color =
+				((u32 *)info->pseudo_palette)[image->bg_color];
+		} else {
+			fg_color = image->fg_color;
+			bg_color = image->bg_color;
+		}
+	} else
+		op = VIA_BITBLT_COLOR;
 
+	DEBUG_MSG(KERN_DEBUG "viafb 2D engine: imageblit\n");
+	if (viapar->shared->hw_bitblt(viapar->io_virt, op,
+		image->width, image->height, info->var.bits_per_pixel,
+		viapar->vram_addr, info->fix.line_length, image->dx, image->dy,
+		(u32 *)image->data, 0, 0, 0, 0, fg_color, bg_color, 0))
+		cfb_imageblit(info, image);
 }
 
 static int viafb_cursor(struct fb_info *info, struct fb_cursor *cursor)
@@ -1961,6 +1849,7 @@ static int __devinit via_pci_probe(void)
 
 	viaparinfo = (struct viafb_par *)viafbinfo->par;
 	viaparinfo->shared = viafbinfo->par + viafb_par_length;
+	viaparinfo->vram_addr = 0;
 	viaparinfo->tmds_setting_info = &viaparinfo->shared->tmds_setting_info;
 	viaparinfo->lvds_setting_info = &viaparinfo->shared->lvds_setting_info;
 	viaparinfo->lvds_setting_info2 =
@@ -2007,7 +1896,7 @@ static int __devinit via_pci_probe(void)
 
 	viafbinfo->pseudo_palette = pseudo_pal;
 	if (viafb_accel) {
-		viafb_init_accel();
+		viafb_init_accel(viaparinfo->shared);
 		viafb_init_2d_engine();
 		viafb_hw_cursor_init();
 	}
@@ -2110,6 +1999,7 @@ static int __devinit via_pci_probe(void)
 		}
 		viaparinfo1 = viafbinfo1->par;
 		memcpy(viaparinfo1, viaparinfo, viafb_par_length);
+		viaparinfo1->vram_addr = viafb_second_offset;
 		viaparinfo1->memsize = viaparinfo->memsize -
 			viafb_second_offset;
 		viaparinfo->memsize = viafb_second_offset;
@@ -2157,12 +2047,16 @@ static int __devinit via_pci_probe(void)
 		viafb_check_var(&default_var, viafbinfo1);
 		viafbinfo1->var = default_var;
 		viafb_update_fix(viafbinfo1);
+		viaparinfo1->depth = fb_get_color_depth(&viafbinfo1->var,
+			&viafbinfo1->fix);
 	}
 
 	viafb_setup_fixinfo(&viafbinfo->fix, viaparinfo);
 	viafb_check_var(&default_var, viafbinfo);
 	viafbinfo->var = default_var;
 	viafb_update_fix(viafbinfo);
+	viaparinfo->depth = fb_get_color_depth(&viafbinfo->var,
+		&viafbinfo->fix);
 	default_var.activate = FB_ACTIVATE_NOW;
 	fb_alloc_cmap(&viafbinfo->cmap, 256, 0);
 
diff --git a/drivers/video/via/viafbdev.h b/drivers/video/via/viafbdev.h
index 1d1fe35..beb4703 100644
--- a/drivers/video/via/viafbdev.h
+++ b/drivers/video/via/viafbdev.h
@@ -49,9 +49,17 @@ struct viafb_shared {
 	struct lvds_setting_information lvds_setting_info;
 	struct lvds_setting_information lvds_setting_info2;
 	struct chip_information chip_info;
+
+	/* hardware acceleration stuff */
+	int (*hw_bitblt)(void __iomem *engine, u8 op, u32 width, u32 height,
+		u8 dst_bpp, u32 dst_addr, u32 dst_pitch, u32 dst_x, u32 dst_y,
+		u32 *src_mem, u32 src_addr, u32 src_pitch, u32 src_x, u32 src_y,
+		u32 fg_color, u32 bg_color, u8 fill_rop);
 };
 
 struct viafb_par {
+	u8 depth;
+	u32 vram_addr;
 	void __iomem *io_virt;	/*iospace virtual memory address */
 	unsigned int fbmem;	/*framebuffer physical memory address */
 	unsigned int memsize;	/*size of fbmem */
-- 
cgit v0.10.2


From afbd3c12aca5a29f1627c0c68e6bc77f32459935 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 22 Sep 2009 16:47:27 -0700
Subject: viafb: switch to seq_file

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Acked-by: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Cc: Scott Fang <ScottFang@viatech.com.cn>
Cc: Joseph Chan <JosephChan@via.com.tw>
Cc: Harald Welte <laforge@gnumonks.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/via/viafbdev.c b/drivers/video/via/viafbdev.c
index 66921de..7626325 100644
--- a/drivers/video/via/viafbdev.c
+++ b/drivers/video/via/viafbdev.c
@@ -20,6 +20,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/seq_file.h>
 #define _MASTER_FILE
 
 #include "global.h"
@@ -1485,10 +1486,8 @@ static void parse_dvi_port(void)
  * DVP1Driving, DFPHigh, DFPLow CR96,   SR2A[5], SR1B[1], SR2A[4], SR1E[2],
  * CR9B,    SR65,    CR97,    CR99
  */
-static int viafb_dvp0_proc_read(char *buf, char **start, off_t offset,
-int count, int *eof, void *data)
+static int viafb_dvp0_proc_show(struct seq_file *m, void *v)
 {
-	int len = 0;
 	u8 dvp0_data_dri = 0, dvp0_clk_dri = 0, dvp0 = 0;
 	dvp0_data_dri =
 	    (viafb_read_reg(VIASR, SR2A) & BIT5) >> 4 |
@@ -1497,13 +1496,17 @@ int count, int *eof, void *data)
 	    (viafb_read_reg(VIASR, SR2A) & BIT4) >> 3 |
 	    (viafb_read_reg(VIASR, SR1E) & BIT2) >> 2;
 	dvp0 = viafb_read_reg(VIACR, CR96) & 0x0f;
-	len +=
-	    sprintf(buf + len, "%x %x %x\n", dvp0, dvp0_data_dri, dvp0_clk_dri);
-	*eof = 1;		/*Inform kernel end of data */
-	return len;
+	seq_printf(m, "%x %x %x\n", dvp0, dvp0_data_dri, dvp0_clk_dri);
+	return 0;
+}
+
+static int viafb_dvp0_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, viafb_dvp0_proc_show, NULL);
 }
-static int viafb_dvp0_proc_write(struct file *file,
-	const char __user *buffer, unsigned long count, void *data)
+
+static ssize_t viafb_dvp0_proc_write(struct file *file,
+	const char __user *buffer, size_t count, loff_t *pos)
 {
 	char buf[20], *value, *pbuf;
 	u8 reg_val = 0;
@@ -1547,21 +1550,33 @@ static int viafb_dvp0_proc_write(struct file *file,
 	}
 	return count;
 }
-static int viafb_dvp1_proc_read(char *buf, char **start, off_t offset,
-	int count, int *eof, void *data)
+
+static const struct file_operations viafb_dvp0_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= viafb_dvp0_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+	.write		= viafb_dvp0_proc_write,
+};
+
+static int viafb_dvp1_proc_show(struct seq_file *m, void *v)
 {
-	int len = 0;
 	u8 dvp1 = 0, dvp1_data_dri = 0, dvp1_clk_dri = 0;
 	dvp1 = viafb_read_reg(VIACR, CR9B) & 0x0f;
 	dvp1_data_dri = (viafb_read_reg(VIASR, SR65) & 0x0c) >> 2;
 	dvp1_clk_dri = viafb_read_reg(VIASR, SR65) & 0x03;
-	len +=
-	    sprintf(buf + len, "%x %x %x\n", dvp1, dvp1_data_dri, dvp1_clk_dri);
-	*eof = 1;		/*Inform kernel end of data */
-	return len;
+	seq_printf(m, "%x %x %x\n", dvp1, dvp1_data_dri, dvp1_clk_dri);
+	return 0;
+}
+
+static int viafb_dvp1_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, viafb_dvp1_proc_show, NULL);
 }
-static int viafb_dvp1_proc_write(struct file *file,
-	const char __user *buffer, unsigned long count, void *data)
+
+static ssize_t viafb_dvp1_proc_write(struct file *file,
+	const char __user *buffer, size_t count, loff_t *pos)
 {
 	char buf[20], *value, *pbuf;
 	u8 reg_val = 0;
@@ -1600,18 +1615,30 @@ static int viafb_dvp1_proc_write(struct file *file,
 	return count;
 }
 
-static int viafb_dfph_proc_read(char *buf, char **start, off_t offset,
-	int count, int *eof, void *data)
+static const struct file_operations viafb_dvp1_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= viafb_dvp1_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+	.write		= viafb_dvp1_proc_write,
+};
+
+static int viafb_dfph_proc_show(struct seq_file *m, void *v)
 {
-	int len = 0;
 	u8 dfp_high = 0;
 	dfp_high = viafb_read_reg(VIACR, CR97) & 0x0f;
-	len += sprintf(buf + len, "%x\n", dfp_high);
-	*eof = 1;		/*Inform kernel end of data */
-	return len;
+	seq_printf(m, "%x\n", dfp_high);
+	return 0;
 }
-static int viafb_dfph_proc_write(struct file *file,
-	const char __user *buffer, unsigned long count, void *data)
+
+static int viafb_dfph_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, viafb_dfph_proc_show, NULL);
+}
+
+static ssize_t viafb_dfph_proc_write(struct file *file,
+	const char __user *buffer, size_t count, loff_t *pos)
 {
 	char buf[20];
 	u8 reg_val = 0;
@@ -1626,18 +1653,31 @@ static int viafb_dfph_proc_write(struct file *file,
 	viafb_write_reg_mask(CR97, VIACR, reg_val, 0x0f);
 	return count;
 }
-static int viafb_dfpl_proc_read(char *buf, char **start, off_t offset,
-	int count, int *eof, void *data)
+
+static const struct file_operations viafb_dfph_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= viafb_dfph_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+	.write		= viafb_dfph_proc_write,
+};
+
+static int viafb_dfpl_proc_show(struct seq_file *m, void *v)
 {
-	int len = 0;
 	u8 dfp_low = 0;
 	dfp_low = viafb_read_reg(VIACR, CR99) & 0x0f;
-	len += sprintf(buf + len, "%x\n", dfp_low);
-	*eof = 1;		/*Inform kernel end of data */
-	return len;
+	seq_printf(m, "%x\n", dfp_low);
+	return 0;
+}
+
+static int viafb_dfpl_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, viafb_dfpl_proc_show, NULL);
 }
-static int viafb_dfpl_proc_write(struct file *file,
-	const char __user *buffer, unsigned long count, void *data)
+
+static ssize_t viafb_dfpl_proc_write(struct file *file,
+	const char __user *buffer, size_t count, loff_t *pos)
 {
 	char buf[20];
 	u8 reg_val = 0;
@@ -1652,10 +1692,18 @@ static int viafb_dfpl_proc_write(struct file *file,
 	viafb_write_reg_mask(CR99, VIACR, reg_val, 0x0f);
 	return count;
 }
-static int viafb_vt1636_proc_read(char *buf, char **start,
-	off_t offset, int count, int *eof, void *data)
+
+static const struct file_operations viafb_dfpl_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= viafb_dfpl_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+	.write		= viafb_dfpl_proc_write,
+};
+
+static int viafb_vt1636_proc_show(struct seq_file *m, void *v)
 {
-	int len = 0;
 	u8 vt1636_08 = 0, vt1636_09 = 0;
 	switch (viaparinfo->chip_info->lvds_chip_info.lvds_chip_name) {
 	case VT1636_LVDS:
@@ -1665,7 +1713,7 @@ static int viafb_vt1636_proc_read(char *buf, char **start,
 		vt1636_09 =
 		    viafb_gpio_i2c_read_lvds(viaparinfo->lvds_setting_info,
 		    &viaparinfo->chip_info->lvds_chip_info, 0x09) & 0x1f;
-		len += sprintf(buf + len, "%x %x\n", vt1636_08, vt1636_09);
+		seq_printf(m, "%x %x\n", vt1636_08, vt1636_09);
 		break;
 	default:
 		break;
@@ -1678,16 +1726,21 @@ static int viafb_vt1636_proc_read(char *buf, char **start,
 		vt1636_09 =
 		    viafb_gpio_i2c_read_lvds(viaparinfo->lvds_setting_info2,
 			&viaparinfo->chip_info->lvds_chip_info2, 0x09) & 0x1f;
-		len += sprintf(buf + len, " %x %x\n", vt1636_08, vt1636_09);
+		seq_printf(m, " %x %x\n", vt1636_08, vt1636_09);
 		break;
 	default:
 		break;
 	}
-	*eof = 1;		/*Inform kernel end of data */
-	return len;
+	return 0;
 }
-static int viafb_vt1636_proc_write(struct file *file,
-	const char __user *buffer, unsigned long count, void *data)
+
+static int viafb_vt1636_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, viafb_vt1636_proc_show, NULL);
+}
+
+static ssize_t viafb_vt1636_proc_write(struct file *file,
+	const char __user *buffer, size_t count, loff_t *pos)
 {
 	char buf[30], *value, *pbuf;
 	struct IODATA reg_val;
@@ -1776,39 +1829,27 @@ static int viafb_vt1636_proc_write(struct file *file,
 	return count;
 }
 
+static const struct file_operations viafb_vt1636_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= viafb_vt1636_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+	.write		= viafb_vt1636_proc_write,
+};
+
 static void viafb_init_proc(struct proc_dir_entry **viafb_entry)
 {
-	struct proc_dir_entry *entry;
 	*viafb_entry = proc_mkdir("viafb", NULL);
 	if (viafb_entry) {
-		entry = create_proc_entry("dvp0", 0, *viafb_entry);
-		if (entry) {
-			entry->read_proc = viafb_dvp0_proc_read;
-			entry->write_proc = viafb_dvp0_proc_write;
-		}
-		entry = create_proc_entry("dvp1", 0, *viafb_entry);
-		if (entry) {
-			entry->read_proc = viafb_dvp1_proc_read;
-			entry->write_proc = viafb_dvp1_proc_write;
-		}
-		entry = create_proc_entry("dfph", 0, *viafb_entry);
-		if (entry) {
-			entry->read_proc = viafb_dfph_proc_read;
-			entry->write_proc = viafb_dfph_proc_write;
-		}
-		entry = create_proc_entry("dfpl", 0, *viafb_entry);
-		if (entry) {
-			entry->read_proc = viafb_dfpl_proc_read;
-			entry->write_proc = viafb_dfpl_proc_write;
-		}
+		proc_create("dvp0", 0, *viafb_entry, &viafb_dvp0_proc_fops);
+		proc_create("dvp1", 0, *viafb_entry, &viafb_dvp1_proc_fops);
+		proc_create("dfph", 0, *viafb_entry, &viafb_dfph_proc_fops);
+		proc_create("dfpl", 0, *viafb_entry, &viafb_dfpl_proc_fops);
 		if (VT1636_LVDS == viaparinfo->chip_info->lvds_chip_info.
 			lvds_chip_name || VT1636_LVDS ==
 		    viaparinfo->chip_info->lvds_chip_info2.lvds_chip_name) {
-			entry = create_proc_entry("vt1636", 0, *viafb_entry);
-			if (entry) {
-				entry->read_proc = viafb_vt1636_proc_read;
-				entry->write_proc = viafb_vt1636_proc_write;
-			}
+			proc_create("vt1636", 0, *viafb_entry, &viafb_vt1636_proc_fops);
 		}
 
 	}
-- 
cgit v0.10.2


From 5016af53ebbd1450c2656c94dfbd1dad15c19f60 Mon Sep 17 00:00:00 2001
From: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Date: Tue, 22 Sep 2009 16:47:28 -0700
Subject: viafb: cleanup viafb_cursor

Clean the hardware cursor handling up.

The most notable change is that it no longer buffers the values in
viacursor but uses the ones in cursor instead as they are guaranteed to be
always valid.

Furthermore it uses local instead global variables where possible, moves
the cursor variable in shared as only one hardware cursor is supported and
returns an error if memory allocation fails.  Last but not least it fixes
a too small buffer (as u32 has only 4 and not 32 bytes) but this did not
produce any known problems.

This is mostly a code cleanup, no negative runtime changes are expected.

Signed-off-by: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Cc: Scott Fang <ScottFang@viatech.com.cn>
Cc: Joseph Chan <JosephChan@via.com.tw>
Cc: Harald Welte <laforge@gnumonks.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/via/accel.c b/drivers/video/via/accel.c
index 8ac0251..503f9d8 100644
--- a/drivers/video/via/accel.c
+++ b/drivers/video/via/accel.c
@@ -331,7 +331,7 @@ void viafb_init_accel(struct viafb_shared *shared)
 	}
 
 	viaparinfo->fbmem_free -= CURSOR_SIZE;
-	viaparinfo->cursor_start = viaparinfo->fbmem_free;
+	shared->cursor_vram_addr = viaparinfo->fbmem_free;
 	viaparinfo->fbmem_used += CURSOR_SIZE;
 
 	/* Reverse 8*1024 memory space for cursor image */
@@ -487,7 +487,7 @@ void viafb_init_2d_engine(void)
 void viafb_hw_cursor_init(void)
 {
 	/* Set Cursor Image Base Address */
-	writel(viaparinfo->cursor_start,
+	writel(viaparinfo->shared->cursor_vram_addr,
 		viaparinfo->io_virt + VIA_REG_CURSOR_MODE);
 	writel(0x0, viaparinfo->io_virt + VIA_REG_CURSOR_POS);
 	writel(0x0, viaparinfo->io_virt + VIA_REG_CURSOR_ORG);
diff --git a/drivers/video/via/global.c b/drivers/video/via/global.c
index 1096fd5..33e7c45b 100644
--- a/drivers/video/via/global.c
+++ b/drivers/video/via/global.c
@@ -51,7 +51,6 @@ unsigned int viafb_second_yres = 480;
 unsigned int viafb_second_virtual_xres;
 unsigned int viafb_second_virtual_yres;
 int viafb_lcd_panel_id = LCD_PANEL_ID_MAXIMUM + 1;
-struct fb_cursor viacursor;
 struct fb_info *viafbinfo;
 struct fb_info *viafbinfo1;
 struct viafb_par *viaparinfo;
diff --git a/drivers/video/via/global.h b/drivers/video/via/global.h
index 11382e5..d69d0ca 100644
--- a/drivers/video/via/global.h
+++ b/drivers/video/via/global.h
@@ -77,7 +77,6 @@ extern int viafb_hotplug_Yres;
 extern int viafb_hotplug_bpp;
 extern int viafb_hotplug_refresh;
 extern int viafb_primary_dev;
-extern struct fb_cursor viacursor;
 
 extern unsigned int viafb_second_xres;
 extern unsigned int viafb_second_yres;
diff --git a/drivers/video/via/viafbdev.c b/drivers/video/via/viafbdev.c
index 7626325..03f98d6 100644
--- a/drivers/video/via/viafbdev.c
+++ b/drivers/video/via/viafbdev.c
@@ -25,7 +25,6 @@
 
 #include "global.h"
 
-static int MAX_CURS = 32;
 static struct fb_var_screeninfo default_var;
 static char *viafb_name = "Via";
 static u32 pseudo_pal[17];
@@ -856,150 +855,99 @@ static void viafb_imageblit(struct fb_info *info,
 
 static int viafb_cursor(struct fb_info *info, struct fb_cursor *cursor)
 {
-	u32 temp, xx, yy, bg_col = 0, fg_col = 0;
-	int i, j = 0;
-	static int hw_cursor;
-	struct viafb_par *p_viafb_par;
-
-	if (viafb_accel)
-		hw_cursor = 1;
-
-	if (!viafb_accel) {
-		if (hw_cursor) {
-			viafb_show_hw_cursor(info, HW_Cursor_OFF);
-			hw_cursor = 0;
-		}
-		return -ENODEV;
-	}
+	struct viafb_par *viapar = info->par;
+	u32 temp, xx, yy, bg_color = 0, fg_color = 0,
+		chip_name = viapar->shared->chip_info.gfx_chip_name;
+	int i, j = 0, cur_size = 64;
 
-	if ((((struct viafb_par *)(info->par))->iga_path == IGA2)
-	    && (viaparinfo->chip_info->gfx_chip_name == UNICHROME_CLE266))
+	if (info->flags & FBINFO_HWACCEL_DISABLED || info != viafbinfo)
 		return -ENODEV;
 
-	/* When duoview and using lcd , use soft cursor */
-	if (viafb_LCD_ON || (!viafb_SAMM_ON &&
-		viafb_LCD2_ON + viafb_DVI_ON + viafb_CRT_ON == 2))
+	if (chip_name == UNICHROME_CLE266 && viapar->iga_path == IGA2)
 		return -ENODEV;
 
 	viafb_show_hw_cursor(info, HW_Cursor_OFF);
-	viacursor = *cursor;
 
 	if (cursor->set & FB_CUR_SETHOT) {
-		viacursor.hot = cursor->hot;
-		temp = ((viacursor.hot.x) << 16) + viacursor.hot.y;
-		writel(temp, viaparinfo->io_virt + VIA_REG_CURSOR_ORG);
+		temp = (cursor->hot.x << 16) + cursor->hot.y;
+		writel(temp, viapar->io_virt + VIA_REG_CURSOR_ORG);
 	}
 
 	if (cursor->set & FB_CUR_SETPOS) {
-		viacursor.image.dx = cursor->image.dx;
-		viacursor.image.dy = cursor->image.dy;
 		yy = cursor->image.dy - info->var.yoffset;
 		xx = cursor->image.dx - info->var.xoffset;
 		temp = yy & 0xFFFF;
 		temp |= (xx << 16);
-		writel(temp, viaparinfo->io_virt + VIA_REG_CURSOR_POS);
+		writel(temp, viapar->io_virt + VIA_REG_CURSOR_POS);
 	}
 
-	if (cursor->set & FB_CUR_SETSIZE) {
-		temp = readl(viaparinfo->io_virt + VIA_REG_CURSOR_MODE);
+	if (cursor->image.width <= 32 && cursor->image.height <= 32)
+		cur_size = 32;
+	else if (cursor->image.width <= 64 && cursor->image.height <= 64)
+		cur_size = 64;
+	else {
+		printk(KERN_WARNING "viafb_cursor: The cursor is too large "
+			"%dx%d", cursor->image.width, cursor->image.height);
+		return -ENXIO;
+	}
 
-		if ((cursor->image.width <= 32)
-		    && (cursor->image.height <= 32)) {
-			MAX_CURS = 32;
+	if (cursor->set & FB_CUR_SETSIZE) {
+		temp = readl(viapar->io_virt + VIA_REG_CURSOR_MODE);
+		if (cur_size == 32)
 			temp |= 0x2;
-		} else if ((cursor->image.width <= 64)
-			   && (cursor->image.height <= 64)) {
-			MAX_CURS = 64;
-			temp &= 0xFFFFFFFD;
-		} else {
-			DEBUG_MSG(KERN_INFO
-			"The cursor image is biger than 64x64 bits...\n");
-			return -ENXIO;
-		}
-		writel(temp, viaparinfo->io_virt + VIA_REG_CURSOR_MODE);
+		else
+			temp &= ~0x2;
 
-		viacursor.image.height = cursor->image.height;
-		viacursor.image.width = cursor->image.width;
+		writel(temp, viapar->io_virt + VIA_REG_CURSOR_MODE);
 	}
 
 	if (cursor->set & FB_CUR_SETCMAP) {
-		viacursor.image.fg_color = cursor->image.fg_color;
-		viacursor.image.bg_color = cursor->image.bg_color;
-
-		switch (info->var.bits_per_pixel) {
-		case 8:
-		case 16:
-		case 32:
-			bg_col =
-			    (0xFF << 24) |
-			    (((info->cmap.red)[viacursor.image.bg_color] &
-			    0xFF00) << 8) |
-			    ((info->cmap.green)[viacursor.image.bg_color] &
-			    0xFF00) |
-			    (((info->cmap.blue)[viacursor.image.bg_color] &
-			    0xFF00) >> 8);
-			fg_col =
-			    (0xFF << 24) |
-			    (((info->cmap.red)[viacursor.image.fg_color] &
-			    0xFF00) << 8) |
-			    ((info->cmap.green)[viacursor.image.fg_color] &
-			    0xFF00) |
-			    (((info->cmap.blue)[viacursor.image.fg_color] &
-			    0xFF00) >> 8);
-			break;
-		default:
-			return 0;
-		}
-
-		/* This is indeed a patch for VT3324/VT3353 */
-		if (!info->par)
-			return 0;
-		p_viafb_par = (struct viafb_par *)info->par;
-
-		if ((p_viafb_par->chip_info->gfx_chip_name ==
-			UNICHROME_CX700) ||
-			((p_viafb_par->chip_info->gfx_chip_name ==
-			UNICHROME_VX800))) {
-			bg_col =
-			    (((info->cmap.red)[viacursor.image.bg_color] &
-			    0xFFC0) << 14) |
-			    (((info->cmap.green)[viacursor.image.bg_color] &
-			    0xFFC0) << 4) |
-			    (((info->cmap.blue)[viacursor.image.bg_color] &
-			    0xFFC0) >> 6);
-			fg_col =
-			    (((info->cmap.red)[viacursor.image.fg_color] &
-			    0xFFC0) << 14) |
-			    (((info->cmap.green)[viacursor.image.fg_color] &
-			    0xFFC0) << 4) |
-			    (((info->cmap.blue)[viacursor.image.fg_color] &
-			    0xFFC0) >> 6);
+		fg_color = cursor->image.fg_color;
+		bg_color = cursor->image.bg_color;
+		if (chip_name == UNICHROME_CX700 ||
+			chip_name == UNICHROME_VX800) {
+			fg_color =
+				((info->cmap.red[fg_color] & 0xFFC0) << 14) |
+				((info->cmap.green[fg_color] & 0xFFC0) << 4) |
+				((info->cmap.blue[fg_color] & 0xFFC0) >> 6);
+			bg_color =
+				((info->cmap.red[bg_color] & 0xFFC0) << 14) |
+				((info->cmap.green[bg_color] & 0xFFC0) << 4) |
+				((info->cmap.blue[bg_color] & 0xFFC0) >> 6);
+		} else {
+			fg_color =
+				((info->cmap.red[fg_color] & 0xFF00) << 8) |
+				(info->cmap.green[fg_color] & 0xFF00) |
+				((info->cmap.blue[fg_color] & 0xFF00) >> 8);
+			bg_color =
+				((info->cmap.red[bg_color] & 0xFF00) << 8) |
+				(info->cmap.green[bg_color] & 0xFF00) |
+				((info->cmap.blue[bg_color] & 0xFF00) >> 8);
 		}
 
-		writel(bg_col, viaparinfo->io_virt + VIA_REG_CURSOR_BG);
-		writel(fg_col, viaparinfo->io_virt + VIA_REG_CURSOR_FG);
+		writel(bg_color, viapar->io_virt + VIA_REG_CURSOR_BG);
+		writel(fg_color, viapar->io_virt + VIA_REG_CURSOR_FG);
 	}
 
 	if (cursor->set & FB_CUR_SETSHAPE) {
 		struct {
-			u8 data[CURSOR_SIZE / 8];
-			u32 bak[CURSOR_SIZE / 32];
+			u8 data[CURSOR_SIZE];
+			u32 bak[CURSOR_SIZE / 4];
 		} *cr_data = kzalloc(sizeof(*cr_data), GFP_ATOMIC);
-		int size =
-		    ((viacursor.image.width + 7) >> 3) *
-		    viacursor.image.height;
+		int size = ((cursor->image.width + 7) >> 3) *
+			cursor->image.height;
 
-		if (cr_data == NULL)
-			goto out;
+		if (!cr_data)
+			return -ENOMEM;
 
-		if (MAX_CURS == 32) {
-			for (i = 0; i < (CURSOR_SIZE / 32); i++) {
+		if (cur_size == 32) {
+			for (i = 0; i < (CURSOR_SIZE / 4); i++) {
 				cr_data->bak[i] = 0x0;
 				cr_data->bak[i + 1] = 0xFFFFFFFF;
 				i += 1;
 			}
-		} else if (MAX_CURS == 64) {
-			for (i = 0; i < (CURSOR_SIZE / 32); i++) {
+		} else {
+			for (i = 0; i < (CURSOR_SIZE / 4); i++) {
 				cr_data->bak[i] = 0x0;
 				cr_data->bak[i + 1] = 0x0;
 				cr_data->bak[i + 2] = 0xFFFFFFFF;
@@ -1008,27 +956,27 @@ static int viafb_cursor(struct fb_info *info, struct fb_cursor *cursor)
 			}
 		}
 
-		switch (viacursor.rop) {
+		switch (cursor->rop) {
 		case ROP_XOR:
 			for (i = 0; i < size; i++)
-				cr_data->data[i] = viacursor.mask[i];
+				cr_data->data[i] = cursor->mask[i];
 			break;
 		case ROP_COPY:
 
 			for (i = 0; i < size; i++)
-				cr_data->data[i] = viacursor.mask[i];
+				cr_data->data[i] = cursor->mask[i];
 			break;
 		default:
 			break;
 		}
 
-		if (MAX_CURS == 32) {
+		if (cur_size == 32) {
 			for (i = 0; i < size; i++) {
 				cr_data->bak[j] = (u32) cr_data->data[i];
 				cr_data->bak[j + 1] = ~cr_data->bak[j];
 				j += 2;
 			}
-		} else if (MAX_CURS == 64) {
+		} else {
 			for (i = 0; i < size; i++) {
 				cr_data->bak[j] = (u32) cr_data->data[i];
 				cr_data->bak[j + 1] = 0x0;
@@ -1038,14 +986,12 @@ static int viafb_cursor(struct fb_info *info, struct fb_cursor *cursor)
 			}
 		}
 
-		memcpy(viafbinfo->screen_base +
-		       ((struct viafb_par *)(info->par))->cursor_start,
-		       cr_data->bak, CURSOR_SIZE);
-out:
+		memcpy_toio(viafbinfo->screen_base + viapar->shared->
+			cursor_vram_addr, cr_data->bak, CURSOR_SIZE);
 		kfree(cr_data);
 	}
 
-	if (viacursor.enable)
+	if (cursor->enable)
 		viafb_show_hw_cursor(info, HW_Cursor_ON);
 
 	return 0;
@@ -2052,8 +1998,6 @@ static int __devinit via_pci_probe(void)
 		viaparinfo->fbmem_free = viaparinfo->memsize;
 		viaparinfo->fbmem_used = 0;
 		if (viafb_accel) {
-			viaparinfo1->cursor_start =
-			    viaparinfo->cursor_start - viafb_second_offset;
 			viaparinfo1->VQ_start = viaparinfo->VQ_start -
 				viafb_second_offset;
 			viaparinfo1->VQ_end = viaparinfo->VQ_end -
diff --git a/drivers/video/via/viafbdev.h b/drivers/video/via/viafbdev.h
index beb4703..ca39ec1 100644
--- a/drivers/video/via/viafbdev.h
+++ b/drivers/video/via/viafbdev.h
@@ -51,6 +51,7 @@ struct viafb_shared {
 	struct chip_information chip_info;
 
 	/* hardware acceleration stuff */
+	u32 cursor_vram_addr;
 	int (*hw_bitblt)(void __iomem *engine, u8 op, u32 width, u32 height,
 		u8 dst_bpp, u32 dst_addr, u32 dst_pitch, u32 dst_x, u32 dst_y,
 		u32 *src_mem, u32 src_addr, u32 src_pitch, u32 src_x, u32 src_y,
@@ -65,7 +66,6 @@ struct viafb_par {
 	unsigned int memsize;	/*size of fbmem */
 	u32 fbmem_free;		/* Free FB memory */
 	u32 fbmem_used;		/* Use FB memory size */
-	u32 cursor_start;	/* Cursor Start Address */
 	u32 VQ_start;		/* Virtual Queue Start Address */
 	u32 VQ_end;		/* Virtual Queue End Address */
 	u32 iga_path;
-- 
cgit v0.10.2


From 2d6e8851f608bd0c811f2df83eeff4ad8631e723 Mon Sep 17 00:00:00 2001
From: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Date: Tue, 22 Sep 2009 16:47:29 -0700
Subject: viafb: improve pitch handling

Split the pitch handling up and replaces the calculation from virtual xres
and bpp with fix.line_length which already contains the pitch and does not
add any constrains for the virtual resolution.

Also add a bit to the second pitch which the documentation mentions but
which was ignored by the driver.

Although it is a bit unclear what the right pitch for some LCD modes is
this patch should have no negative runtime impact.

Signed-off-by: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Cc: Scott Fang <ScottFang@viatech.com.cn>
Cc: Joseph Chan <JosephChan@via.com.tw>
Cc: Harald Welte <laforge@gnumonks.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/via/hw.c b/drivers/video/via/hw.c
index 86050e7..4910561 100644
--- a/drivers/video/via/hw.c
+++ b/drivers/video/via/hw.c
@@ -36,13 +36,6 @@ static const struct pci_device_id_info pciidlist[] = {
 	{0, 0, 0}
 };
 
-struct offset offset_reg = {
-	/* IGA1 Offset Register */
-	{IGA1_OFFSET_REG_NUM, {{CR13, 0, 7}, {CR35, 5, 7} } },
-	/* IGA2 Offset Register */
-	{IGA2_OFFSET_REG_NUM, {{CR66, 0, 7}, {CR67, 0, 1} } }
-};
-
 static struct pll_map pll_value[] = {
 	{CLK_25_175M, CLE266_PLL_25_175M, K800_PLL_25_175M, CX700_25_175M},
 	{CLK_29_581M, CLE266_PLL_29_581M, K800_PLL_29_581M, CX700_29_581M},
@@ -648,6 +641,26 @@ void viafb_set_secondary_address(u32 addr)
 	viafb_write_reg_mask(CRA3, VIACR, (addr >> 26) & 0x07, 0x07);
 }
 
+void viafb_set_primary_pitch(u32 pitch)
+{
+	DEBUG_MSG(KERN_DEBUG "viafb_set_primary_pitch(0x%08X)\n", pitch);
+	/* spec does not say that first adapter skips 3 bits but old
+	 * code did it and seems to be reasonable in analogy to 2nd adapter
+	 */
+	pitch = pitch >> 3;
+	viafb_write_reg(0x13, VIACR, pitch & 0xFF);
+	viafb_write_reg_mask(0x35, VIACR, (pitch >> (8 - 5)) & 0xE0, 0xE0);
+}
+
+void viafb_set_secondary_pitch(u32 pitch)
+{
+	DEBUG_MSG(KERN_DEBUG "viafb_set_secondary_pitch(0x%08X)\n", pitch);
+	pitch = pitch >> 3;
+	viafb_write_reg(0x66, VIACR, pitch & 0xFF);
+	viafb_write_reg_mask(0x67, VIACR, (pitch >> 8) & 0x03, 0x03);
+	viafb_write_reg_mask(0x71, VIACR, (pitch >> (10 - 7)) & 0x80, 0x80);
+}
+
 void viafb_set_output_path(int device, int set_iga, int output_interface)
 {
 	switch (device) {
@@ -1076,30 +1089,6 @@ void viafb_write_regx(struct io_reg RegTable[], int ItemNum)
 	}
 }
 
-void viafb_load_offset_reg(int h_addr, int bpp_byte, int set_iga)
-{
-	int reg_value;
-	int viafb_load_reg_num;
-	struct io_register *reg;
-
-	switch (set_iga) {
-	case IGA1_IGA2:
-	case IGA1:
-		reg_value = IGA1_OFFSET_FORMULA(h_addr, bpp_byte);
-		viafb_load_reg_num = offset_reg.iga1_offset_reg.reg_num;
-		reg = offset_reg.iga1_offset_reg.reg;
-		viafb_load_reg(reg_value, viafb_load_reg_num, reg, VIACR);
-		if (set_iga == IGA1)
-			break;
-	case IGA2:
-		reg_value = IGA2_OFFSET_FORMULA(h_addr, bpp_byte);
-		viafb_load_reg_num = offset_reg.iga2_offset_reg.reg_num;
-		reg = offset_reg.iga2_offset_reg.reg;
-		viafb_load_reg(reg_value, viafb_load_reg_num, reg, VIACR);
-		break;
-	}
-}
-
 void viafb_load_fetch_count_reg(int h_addr, int bpp_byte, int set_iga)
 {
 	int reg_value;
@@ -1869,7 +1858,6 @@ void viafb_fill_crtc_timing(struct crt_mode_table *crt_table,
 	load_fix_bit_crtc_reg();
 	viafb_lock_crt();
 	viafb_write_reg_mask(CR17, VIACR, 0x80, BIT7);
-	viafb_load_offset_reg(h_addr, bpp_byte, set_iga);
 	viafb_load_fetch_count_reg(h_addr, bpp_byte, set_iga);
 
 	/* load FIFO */
@@ -2322,6 +2310,9 @@ int viafb_setmode(int vmode_index, int hor_res, int ver_res, int video_bpp,
 		}
 	}
 
+	viafb_set_primary_pitch(viafbinfo->fix.line_length);
+	viafb_set_secondary_pitch(viafb_dual_fb ? viafbinfo1->fix.line_length
+		: viafbinfo->fix.line_length);
 	/* Update Refresh Rate Setting */
 
 	/* Clear On Screen */
@@ -2738,24 +2729,6 @@ void viafb_set_dpa_gfx(int output_interface, struct GFX_DPA_SETTING\
 	}
 }
 
-void viafb_memory_pitch_patch(struct fb_info *info)
-{
-	if (info->var.xres != info->var.xres_virtual) {
-		viafb_load_offset_reg(info->var.xres_virtual,
-				info->var.bits_per_pixel >> 3, IGA1);
-
-		if (viafb_SAMM_ON) {
-			viafb_load_offset_reg(viafb_second_virtual_xres,
-				viafb_bpp1 >> 3,
-					IGA2);
-		} else {
-			viafb_load_offset_reg(info->var.xres_virtual,
-					info->var.bits_per_pixel >> 3, IGA2);
-		}
-
-	}
-}
-
 /*According var's xres, yres fill var's other timing information*/
 void viafb_fill_var_timing_info(struct fb_var_screeninfo *var, int refresh,
 			  int mode_index)
diff --git a/drivers/video/via/hw.h b/drivers/video/via/hw.h
index cf1dfd5..817033d 100644
--- a/drivers/video/via/hw.h
+++ b/drivers/video/via/hw.h
@@ -147,14 +147,8 @@ is reserved, so it may have problem to set 1600x1200 on IGA2. */
 /* location: {CR5F,0,4} */
 #define IGA2_VER_SYNC_END_REG_NUM       1
 
-/* Define Offset and Fetch Count Register*/
+/* Define Fetch Count Register*/
 
-/* location: {CR13,0,7},{CR35,5,7} */
-#define IGA1_OFFSET_REG_NUM             2
-/* 8 bytes alignment. */
-#define IGA1_OFFSER_ALIGN_BYTE          8
-/* x: H resolution, y: color depth */
-#define IGA1_OFFSET_FORMULA(x, y)        ((x*y)/IGA1_OFFSER_ALIGN_BYTE)
 /* location: {SR1C,0,7},{SR1D,0,1} */
 #define IGA1_FETCH_COUNT_REG_NUM        2
 /* 16 bytes alignment. */
@@ -164,11 +158,6 @@ is reserved, so it may have problem to set 1600x1200 on IGA2. */
 #define IGA1_FETCH_COUNT_FORMULA(x, y)   \
 	(((x*y)/IGA1_FETCH_COUNT_ALIGN_BYTE) + IGA1_FETCH_COUNT_PATCH_VALUE)
 
-/* location: {CR66,0,7},{CR67,0,1} */
-#define IGA2_OFFSET_REG_NUM             2
-#define IGA2_OFFSET_ALIGN_BYTE          8
-/* x: H resolution, y: color depth */
-#define IGA2_OFFSET_FORMULA(x, y)        ((x*y)/IGA2_OFFSET_ALIGN_BYTE)
 /* location: {CR65,0,7},{CR67,2,3} */
 #define IGA2_FETCH_COUNT_REG_NUM        2
 #define IGA2_FETCH_COUNT_ALIGN_BYTE     16
@@ -617,23 +606,6 @@ struct iga2_ver_sync_end {
 	struct io_register reg[IGA2_VER_SYNC_END_REG_NUM];
 };
 
-/* IGA1 Offset Register */
-struct iga1_offset {
-	int reg_num;
-	struct io_register reg[IGA1_OFFSET_REG_NUM];
-};
-
-/* IGA2 Offset Register */
-struct iga2_offset {
-	int reg_num;
-	struct io_register reg[IGA2_OFFSET_REG_NUM];
-};
-
-struct offset {
-	struct iga1_offset iga1_offset_reg;
-	struct iga2_offset iga2_offset_reg;
-};
-
 /* IGA1 Fetch Count Register */
 struct iga1_fetch_count {
 	int reg_num;
@@ -904,7 +876,6 @@ void viafb_write_reg(u8 index, u16 io_port, u8 data);
 u8 viafb_read_reg(int io_port, u8 index);
 void viafb_lock_crt(void);
 void viafb_unlock_crt(void);
-void viafb_load_offset_reg(int h_addr, int bpp_byte, int set_iga);
 void viafb_load_fetch_count_reg(int h_addr, int bpp_byte, int set_iga);
 void viafb_write_regx(struct io_reg RegTable[], int ItemNum);
 struct VideoModeTable *viafb_get_modetbl_pointer(int Index);
@@ -928,6 +899,8 @@ void viafb_get_mmio_info(unsigned long *mmio_base, u32 *mmio_len);
 void viafb_set_iga_path(void);
 void viafb_set_primary_address(u32 addr);
 void viafb_set_secondary_address(u32 addr);
+void viafb_set_primary_pitch(u32 pitch);
+void viafb_set_secondary_pitch(u32 pitch);
 void viafb_get_fb_info(unsigned int *fb_base, unsigned int *fb_len);
 
 #endif /* __HW_H__ */
diff --git a/drivers/video/via/lcd.c b/drivers/video/via/lcd.c
index 144d34b..e3e597f 100644
--- a/drivers/video/via/lcd.c
+++ b/drivers/video/via/lcd.c
@@ -952,13 +952,10 @@ void viafb_lcd_set_mode(struct crt_mode_table *mode_crt_table,
 	int video_index = plvds_setting_info->lcd_panel_size;
 	int set_iga = plvds_setting_info->iga_path;
 	int mode_bpp = plvds_setting_info->bpp;
-	int viafb_load_reg_num = 0;
-	int reg_value = 0;
 	int set_hres, set_vres;
 	int panel_hres, panel_vres;
 	u32 pll_D_N;
 	int offset;
-	struct io_register *reg = NULL;
 	struct display_timing mode_crt_reg, panel_crt_reg;
 	struct crt_mode_table *panel_crt_table = NULL;
 	struct VideoModeTable *vmode_tbl = NULL;
@@ -1038,16 +1035,11 @@ void viafb_lcd_set_mode(struct crt_mode_table *mode_crt_table,
 		}
 
 		/* Offset for simultaneous */
-		reg_value = offset;
-		viafb_load_reg_num = offset_reg.iga2_offset_reg.reg_num;
-		reg = offset_reg.iga2_offset_reg.reg;
-		viafb_load_reg(reg_value, viafb_load_reg_num, reg, VIACR);
+		viafb_set_secondary_pitch(offset << 3);
 		DEBUG_MSG(KERN_INFO "viafb_load_reg!!\n");
 		viafb_load_fetch_count_reg(set_hres, 4, IGA2);
 		/* Fetch count for simultaneous */
 	} else {		/* SAMM */
-		/* Offset for IGA2 only */
-		viafb_load_offset_reg(set_hres, mode_bpp / 8, set_iga);
 		/* Fetch count for IGA2 only */
 		viafb_load_fetch_count_reg(set_hres, mode_bpp / 8, set_iga);
 
diff --git a/drivers/video/via/viafbdev.c b/drivers/video/via/viafbdev.c
index 03f98d6..e9c9f0e 100644
--- a/drivers/video/via/viafbdev.c
+++ b/drivers/video/via/viafbdev.c
@@ -175,9 +175,6 @@ static int viafb_set_par(struct fb_info *info)
 			info->var.bits_per_pixel, vmode_index1,
 			viafb_second_xres, viafb_second_yres, viafb_bpp1);
 
-		/*We should set memory offset according virtual_x */
-		/*Fix me:put this function into viafb_setmode */
-		viafb_memory_pitch_patch(info);
 		viafb_update_fix(info);
 		viafb_bpp = info->var.bits_per_pixel;
 		/* Update viafb_accel, it is necessary to our 2D accelerate */
diff --git a/drivers/video/via/viafbdev.h b/drivers/video/via/viafbdev.h
index ca39ec1..159619b 100644
--- a/drivers/video/via/viafbdev.h
+++ b/drivers/video/via/viafbdev.h
@@ -97,7 +97,6 @@ extern int viafb_memsize;
 extern int strict_strtoul(const char *cp, unsigned int base,
 	unsigned long *res);
 
-void viafb_memory_pitch_patch(struct fb_info *info);
 void viafb_fill_var_timing_info(struct fb_var_screeninfo *var, int refresh,
 			  int mode_index);
 int viafb_get_mode_index(int hres, int vres);
-- 
cgit v0.10.2


From 31de59d5e1cd6968ea9d1a19cceefb7a037e46bf Mon Sep 17 00:00:00 2001
From: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Date: Tue, 22 Sep 2009 16:47:31 -0700
Subject: viafb: hardware acceleration initialization cleanup

The main motivation of this patch was to merge the three initialization
functions in one and clean it up. However as some changes in other code
areas where needed to do it right some small other changes were made.

Changes to viafb_par:

io_virt renamed as engine_mmio and moved to shared
VQ_start renamed as vq_vram_addr and moved to shared
VQ_end removed as it is easily recalculatable

vq_vram_addr is not strictly needed but keep it to track where we
allocated video memory.  The memory allocated for the virtual queue was
shrunk to VQ_SIZE as VQ_SIZE+CURSOR_SIZE looked like a bug to me.  But to
be honest I don't have the faintest idea what virtual queues are for in
the graphic hardware and whether the driver needs them in any way.  I only
know that they aren't directly accessed by the driver and so the only
potential current use would be as hardware internal buffers.  For now keep
them to avoid regressions and only remove the double cursor allocation.

The most changes were caused by renames and the mentioned structure
changes so the chance of regressions is pretty low.  The meaning of
viafb_accel changed slightly as previously it was changed back and forth
in the code and allowed to enable the hardware acceleration by software if
previously disabled.  The new behaviour is that viafb_accel=0 always
prevents hardware acceleration.  With viafb_accel!=0 the acceleration can
be freely choosen by set_var.  This means viafb_accel is a diagnostic tool
and if someone has to use viafb_accel=0 the driver needs to be fixed.

As this is mostly a code cleanup no regressions beside the slightly change
of viafb_accel is expected.

Signed-off-by: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Cc: Scott Fang <ScottFang@viatech.com.cn>
Cc: Joseph Chan <JosephChan@via.com.tw>
Cc: Harald Welte <laforge@gnumonks.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/via/accel.c b/drivers/video/via/accel.c
index 503f9d8..42ab4b4 100644
--- a/drivers/video/via/accel.c
+++ b/drivers/video/via/accel.c
@@ -308,9 +308,22 @@ static int hw_bitblt_2(void __iomem *engine, u8 op, u32 width, u32 height,
 	return 0;
 }
 
-void viafb_init_accel(struct viafb_shared *shared)
+int viafb_init_engine(struct fb_info *info)
 {
-	switch (shared->chip_info.gfx_chip_name) {
+	struct viafb_par *viapar = info->par;
+	void __iomem *engine;
+	u32 vq_start_addr, vq_end_addr, vq_start_low, vq_end_low, vq_high,
+		vq_len, chip_name = viapar->shared->chip_info.gfx_chip_name;
+
+	engine = ioremap_nocache(info->fix.mmio_start, info->fix.mmio_len);
+	viapar->shared->engine_mmio = engine;
+	if (!engine) {
+		printk(KERN_WARNING "viafb_init_accel: ioremap failed, "
+			"hardware acceleration disabled\n");
+		return -ENOMEM;
+	}
+
+	switch (chip_name) {
 	case UNICHROME_CLE266:
 	case UNICHROME_K400:
 	case UNICHROME_K800:
@@ -321,186 +334,115 @@ void viafb_init_accel(struct viafb_shared *shared)
 	case UNICHROME_K8M890:
 	case UNICHROME_P4M890:
 	case UNICHROME_P4M900:
-		shared->hw_bitblt = hw_bitblt_1;
+		viapar->shared->hw_bitblt = hw_bitblt_1;
 		break;
 	case UNICHROME_VX800:
-		shared->hw_bitblt = hw_bitblt_2;
+		viapar->shared->hw_bitblt = hw_bitblt_2;
 		break;
 	default:
-		shared->hw_bitblt = NULL;
+		viapar->shared->hw_bitblt = NULL;
 	}
 
-	viaparinfo->fbmem_free -= CURSOR_SIZE;
-	shared->cursor_vram_addr = viaparinfo->fbmem_free;
-	viaparinfo->fbmem_used += CURSOR_SIZE;
+	viapar->fbmem_free -= CURSOR_SIZE;
+	viapar->shared->cursor_vram_addr = viapar->fbmem_free;
+	viapar->fbmem_used += CURSOR_SIZE;
 
-	/* Reverse 8*1024 memory space for cursor image */
-	viaparinfo->fbmem_free -= (CURSOR_SIZE + VQ_SIZE);
-	viaparinfo->VQ_start = viaparinfo->fbmem_free;
-	viaparinfo->VQ_end = viaparinfo->VQ_start + VQ_SIZE - 1;
-	viaparinfo->fbmem_used += (CURSOR_SIZE + VQ_SIZE);
-}
-
-void viafb_init_2d_engine(void)
-{
-	u32 dwVQStartAddr, dwVQEndAddr;
-	u32 dwVQLen, dwVQStartL, dwVQEndL, dwVQStartEndH;
+	viapar->fbmem_free -= VQ_SIZE;
+	viapar->shared->vq_vram_addr = viapar->fbmem_free;
+	viapar->fbmem_used += VQ_SIZE;
 
 	/* Init AGP and VQ regs */
-	switch (viaparinfo->chip_info->gfx_chip_name) {
+	switch (chip_name) {
 	case UNICHROME_K8M890:
 	case UNICHROME_P4M900:
-		writel(0x00100000, viaparinfo->io_virt + VIA_REG_CR_TRANSET);
-		writel(0x680A0000, viaparinfo->io_virt + VIA_REG_CR_TRANSPACE);
-		writel(0x02000000, viaparinfo->io_virt + VIA_REG_CR_TRANSPACE);
+		writel(0x00100000, engine + VIA_REG_CR_TRANSET);
+		writel(0x680A0000, engine + VIA_REG_CR_TRANSPACE);
+		writel(0x02000000, engine + VIA_REG_CR_TRANSPACE);
 		break;
 
 	default:
-		writel(0x00100000, viaparinfo->io_virt + VIA_REG_TRANSET);
-		writel(0x00000000, viaparinfo->io_virt + VIA_REG_TRANSPACE);
-		writel(0x00333004, viaparinfo->io_virt + VIA_REG_TRANSPACE);
-		writel(0x60000000, viaparinfo->io_virt + VIA_REG_TRANSPACE);
-		writel(0x61000000, viaparinfo->io_virt + VIA_REG_TRANSPACE);
-		writel(0x62000000, viaparinfo->io_virt + VIA_REG_TRANSPACE);
-		writel(0x63000000, viaparinfo->io_virt + VIA_REG_TRANSPACE);
-		writel(0x64000000, viaparinfo->io_virt + VIA_REG_TRANSPACE);
-		writel(0x7D000000, viaparinfo->io_virt + VIA_REG_TRANSPACE);
-
-		writel(0xFE020000, viaparinfo->io_virt + VIA_REG_TRANSET);
-		writel(0x00000000, viaparinfo->io_virt + VIA_REG_TRANSPACE);
+		writel(0x00100000, engine + VIA_REG_TRANSET);
+		writel(0x00000000, engine + VIA_REG_TRANSPACE);
+		writel(0x00333004, engine + VIA_REG_TRANSPACE);
+		writel(0x60000000, engine + VIA_REG_TRANSPACE);
+		writel(0x61000000, engine + VIA_REG_TRANSPACE);
+		writel(0x62000000, engine + VIA_REG_TRANSPACE);
+		writel(0x63000000, engine + VIA_REG_TRANSPACE);
+		writel(0x64000000, engine + VIA_REG_TRANSPACE);
+		writel(0x7D000000, engine + VIA_REG_TRANSPACE);
+
+		writel(0xFE020000, engine + VIA_REG_TRANSET);
+		writel(0x00000000, engine + VIA_REG_TRANSPACE);
 		break;
 	}
-	if (viaparinfo->VQ_start != 0) {
-		/* Enable VQ */
-		dwVQStartAddr = viaparinfo->VQ_start;
-		dwVQEndAddr = viaparinfo->VQ_end;
-
-		dwVQStartL = 0x50000000 | (dwVQStartAddr & 0xFFFFFF);
-		dwVQEndL = 0x51000000 | (dwVQEndAddr & 0xFFFFFF);
-		dwVQStartEndH = 0x52000000 |
-			((dwVQStartAddr & 0xFF000000) >> 24) |
-			((dwVQEndAddr & 0xFF000000) >> 16);
-		dwVQLen = 0x53000000 | (VQ_SIZE >> 3);
-		switch (viaparinfo->chip_info->gfx_chip_name) {
-		case UNICHROME_K8M890:
-		case UNICHROME_P4M900:
-			dwVQStartL |= 0x20000000;
-			dwVQEndL |= 0x20000000;
-			dwVQStartEndH |= 0x20000000;
-			dwVQLen |= 0x20000000;
-			break;
-		default:
-			break;
-		}
 
-		switch (viaparinfo->chip_info->gfx_chip_name) {
-		case UNICHROME_K8M890:
-		case UNICHROME_P4M900:
-			writel(0x00100000,
-				viaparinfo->io_virt + VIA_REG_CR_TRANSET);
-			writel(dwVQStartEndH,
-				viaparinfo->io_virt + VIA_REG_CR_TRANSPACE);
-			writel(dwVQStartL,
-				viaparinfo->io_virt + VIA_REG_CR_TRANSPACE);
-			writel(dwVQEndL,
-				viaparinfo->io_virt + VIA_REG_CR_TRANSPACE);
-			writel(dwVQLen,
-				viaparinfo->io_virt + VIA_REG_CR_TRANSPACE);
-			writel(0x74301001,
-				viaparinfo->io_virt + VIA_REG_CR_TRANSPACE);
-			writel(0x00000000,
-				viaparinfo->io_virt + VIA_REG_CR_TRANSPACE);
-			break;
-		default:
-			writel(0x00FE0000,
-				viaparinfo->io_virt + VIA_REG_TRANSET);
-			writel(0x080003FE,
-				viaparinfo->io_virt + VIA_REG_TRANSPACE);
-			writel(0x0A00027C,
-				viaparinfo->io_virt + VIA_REG_TRANSPACE);
-			writel(0x0B000260,
-				viaparinfo->io_virt + VIA_REG_TRANSPACE);
-			writel(0x0C000274,
-				viaparinfo->io_virt + VIA_REG_TRANSPACE);
-			writel(0x0D000264,
-				viaparinfo->io_virt + VIA_REG_TRANSPACE);
-			writel(0x0E000000,
-				viaparinfo->io_virt + VIA_REG_TRANSPACE);
-			writel(0x0F000020,
-				viaparinfo->io_virt + VIA_REG_TRANSPACE);
-			writel(0x1000027E,
-				viaparinfo->io_virt + VIA_REG_TRANSPACE);
-			writel(0x110002FE,
-				viaparinfo->io_virt + VIA_REG_TRANSPACE);
-			writel(0x200F0060,
-				viaparinfo->io_virt + VIA_REG_TRANSPACE);
-
-			writel(0x00000006,
-				viaparinfo->io_virt + VIA_REG_TRANSPACE);
-			writel(0x40008C0F,
-				viaparinfo->io_virt + VIA_REG_TRANSPACE);
-			writel(0x44000000,
-				viaparinfo->io_virt + VIA_REG_TRANSPACE);
-			writel(0x45080C04,
-				viaparinfo->io_virt + VIA_REG_TRANSPACE);
-			writel(0x46800408,
-				viaparinfo->io_virt + VIA_REG_TRANSPACE);
-
-			writel(dwVQStartEndH,
-				viaparinfo->io_virt + VIA_REG_TRANSPACE);
-			writel(dwVQStartL,
-				viaparinfo->io_virt + VIA_REG_TRANSPACE);
-			writel(dwVQEndL,
-				viaparinfo->io_virt + VIA_REG_TRANSPACE);
-			writel(dwVQLen,
-				viaparinfo->io_virt + VIA_REG_TRANSPACE);
-			break;
-		}
-	} else {
-		/* Disable VQ */
-		switch (viaparinfo->chip_info->gfx_chip_name) {
-		case UNICHROME_K8M890:
-		case UNICHROME_P4M900:
-			writel(0x00100000,
-				viaparinfo->io_virt + VIA_REG_CR_TRANSET);
-			writel(0x74301000,
-				viaparinfo->io_virt + VIA_REG_CR_TRANSPACE);
-			break;
-		default:
-			writel(0x00FE0000,
-				viaparinfo->io_virt + VIA_REG_TRANSET);
-			writel(0x00000004,
-				viaparinfo->io_virt + VIA_REG_TRANSPACE);
-			writel(0x40008C0F,
-				viaparinfo->io_virt + VIA_REG_TRANSPACE);
-			writel(0x44000000,
-				viaparinfo->io_virt + VIA_REG_TRANSPACE);
-			writel(0x45080C04,
-				viaparinfo->io_virt + VIA_REG_TRANSPACE);
-			writel(0x46800408,
-				viaparinfo->io_virt + VIA_REG_TRANSPACE);
-			break;
-		}
+	/* Enable VQ */
+	vq_start_addr = viapar->shared->vq_vram_addr;
+	vq_end_addr = viapar->shared->vq_vram_addr + VQ_SIZE - 1;
+
+	vq_start_low = 0x50000000 | (vq_start_addr & 0xFFFFFF);
+	vq_end_low = 0x51000000 | (vq_end_addr & 0xFFFFFF);
+	vq_high = 0x52000000 | ((vq_start_addr & 0xFF000000) >> 24) |
+		((vq_end_addr & 0xFF000000) >> 16);
+	vq_len = 0x53000000 | (VQ_SIZE >> 3);
+
+	switch (chip_name) {
+	case UNICHROME_K8M890:
+	case UNICHROME_P4M900:
+		vq_start_low |= 0x20000000;
+		vq_end_low |= 0x20000000;
+		vq_high |= 0x20000000;
+		vq_len |= 0x20000000;
+
+		writel(0x00100000, engine + VIA_REG_CR_TRANSET);
+		writel(vq_high, engine + VIA_REG_CR_TRANSPACE);
+		writel(vq_start_low, engine + VIA_REG_CR_TRANSPACE);
+		writel(vq_end_low, engine + VIA_REG_CR_TRANSPACE);
+		writel(vq_len, engine + VIA_REG_CR_TRANSPACE);
+		writel(0x74301001, engine + VIA_REG_CR_TRANSPACE);
+		writel(0x00000000, engine + VIA_REG_CR_TRANSPACE);
+		break;
+	default:
+		writel(0x00FE0000, engine + VIA_REG_TRANSET);
+		writel(0x080003FE, engine + VIA_REG_TRANSPACE);
+		writel(0x0A00027C, engine + VIA_REG_TRANSPACE);
+		writel(0x0B000260, engine + VIA_REG_TRANSPACE);
+		writel(0x0C000274, engine + VIA_REG_TRANSPACE);
+		writel(0x0D000264, engine + VIA_REG_TRANSPACE);
+		writel(0x0E000000, engine + VIA_REG_TRANSPACE);
+		writel(0x0F000020, engine + VIA_REG_TRANSPACE);
+		writel(0x1000027E, engine + VIA_REG_TRANSPACE);
+		writel(0x110002FE, engine + VIA_REG_TRANSPACE);
+		writel(0x200F0060, engine + VIA_REG_TRANSPACE);
+
+		writel(0x00000006, engine + VIA_REG_TRANSPACE);
+		writel(0x40008C0F, engine + VIA_REG_TRANSPACE);
+		writel(0x44000000, engine + VIA_REG_TRANSPACE);
+		writel(0x45080C04, engine + VIA_REG_TRANSPACE);
+		writel(0x46800408, engine + VIA_REG_TRANSPACE);
+
+		writel(vq_high, engine + VIA_REG_TRANSPACE);
+		writel(vq_start_low, engine + VIA_REG_TRANSPACE);
+		writel(vq_end_low, engine + VIA_REG_TRANSPACE);
+		writel(vq_len, engine + VIA_REG_TRANSPACE);
+		break;
 	}
-}
 
-void viafb_hw_cursor_init(void)
-{
 	/* Set Cursor Image Base Address */
-	writel(viaparinfo->shared->cursor_vram_addr,
-		viaparinfo->io_virt + VIA_REG_CURSOR_MODE);
-	writel(0x0, viaparinfo->io_virt + VIA_REG_CURSOR_POS);
-	writel(0x0, viaparinfo->io_virt + VIA_REG_CURSOR_ORG);
-	writel(0x0, viaparinfo->io_virt + VIA_REG_CURSOR_BG);
-	writel(0x0, viaparinfo->io_virt + VIA_REG_CURSOR_FG);
+	writel(viapar->shared->cursor_vram_addr, engine + VIA_REG_CURSOR_MODE);
+	writel(0x0, engine + VIA_REG_CURSOR_POS);
+	writel(0x0, engine + VIA_REG_CURSOR_ORG);
+	writel(0x0, engine + VIA_REG_CURSOR_BG);
+	writel(0x0, engine + VIA_REG_CURSOR_FG);
+	return 0;
 }
 
 void viafb_show_hw_cursor(struct fb_info *info, int Status)
 {
-	u32 temp;
-	u32 iga_path = ((struct viafb_par *)(info->par))->iga_path;
+	struct viafb_par *viapar = info->par;
+	u32 temp, iga_path = viapar->iga_path;
 
-	temp = readl(viaparinfo->io_virt + VIA_REG_CURSOR_MODE);
+	temp = readl(viapar->shared->engine_mmio + VIA_REG_CURSOR_MODE);
 	switch (Status) {
 	case HW_Cursor_ON:
 		temp |= 0x1;
@@ -517,25 +459,27 @@ void viafb_show_hw_cursor(struct fb_info *info, int Status)
 	default:
 		temp &= 0x7FFFFFFF;
 	}
-	writel(temp, viaparinfo->io_virt + VIA_REG_CURSOR_MODE);
+	writel(temp, viapar->shared->engine_mmio + VIA_REG_CURSOR_MODE);
 }
 
-int viafb_wait_engine_idle(void)
+void viafb_wait_engine_idle(struct fb_info *info)
 {
+	struct viafb_par *viapar = info->par;
 	int loop = 0;
 
-	while (!(readl(viaparinfo->io_virt + VIA_REG_STATUS) &
+	while (!(readl(viapar->shared->engine_mmio + VIA_REG_STATUS) &
 			VIA_VR_QUEUE_BUSY) && (loop < MAXLOOP)) {
 		loop++;
 		cpu_relax();
 	}
 
-	while ((readl(viaparinfo->io_virt + VIA_REG_STATUS) &
+	while ((readl(viapar->shared->engine_mmio + VIA_REG_STATUS) &
 		    (VIA_CMD_RGTR_BUSY | VIA_2D_ENG_BUSY | VIA_3D_ENG_BUSY)) &&
 		    (loop < MAXLOOP)) {
 		loop++;
 		cpu_relax();
 	}
 
-	return loop >= MAXLOOP;
+	if (loop >= MAXLOOP)
+		printk(KERN_ERR "viafb_wait_engine_idle: not syncing\n");
 }
diff --git a/drivers/video/via/accel.h b/drivers/video/via/accel.h
index 4d93eba..615c84a 100644
--- a/drivers/video/via/accel.h
+++ b/drivers/video/via/accel.h
@@ -163,10 +163,8 @@
 #define VIA_BITBLT_MONO		2
 #define VIA_BITBLT_FILL		3
 
-void viafb_init_accel(struct viafb_shared *shared);
-void viafb_init_2d_engine(void);
-void viafb_hw_cursor_init(void);
-void viafb_show_hw_cursor(struct fb_info *info, int Status); int
-viafb_wait_engine_idle(void); void viafb_set_2d_color_depth(int bpp);
+int viafb_init_engine(struct fb_info *info);
+void viafb_show_hw_cursor(struct fb_info *info, int Status);
+void viafb_wait_engine_idle(struct fb_info *info);
 
 #endif /* __ACCEL_H__ */
diff --git a/drivers/video/via/global.c b/drivers/video/via/global.c
index 33e7c45b..b675cdb 100644
--- a/drivers/video/via/global.c
+++ b/drivers/video/via/global.c
@@ -32,7 +32,6 @@ int viafb_lcd_dsp_method = LCD_EXPANDSION;
 int viafb_lcd_mode = LCD_OPENLDI;
 int viafb_bpp = 32;
 int viafb_bpp1 = 32;
-int viafb_accel = 1;
 int viafb_CRT_ON = 1;
 int viafb_DVI_ON;
 int viafb_LCD_ON ;
diff --git a/drivers/video/via/hw.h b/drivers/video/via/hw.h
index 817033d..7302b40 100644
--- a/drivers/video/via/hw.h
+++ b/drivers/video/via/hw.h
@@ -855,7 +855,6 @@ extern int viafb_dual_fb;
 extern int viafb_LCD2_ON;
 extern int viafb_LCD_ON;
 extern int viafb_DVI_ON;
-extern int viafb_accel;
 extern int viafb_hotplug;
 
 void viafb_write_reg_mask(u8 index, int io_port, u8 data, u8 mask);
diff --git a/drivers/video/via/viafbdev.c b/drivers/video/via/viafbdev.c
index e9c9f0e..8e43759 100644
--- a/drivers/video/via/viafbdev.c
+++ b/drivers/video/via/viafbdev.c
@@ -33,6 +33,8 @@ static u32 pseudo_pal[17];
 static char *viafb_mode = "640x480";
 static char *viafb_mode1 = "640x480";
 
+static int viafb_accel = 1;
+
 /* Added for specifying active devices.*/
 char *viafb_active_dev = "";
 
@@ -140,6 +142,9 @@ static int viafb_check_var(struct fb_var_screeninfo *var,
 
 	/* Adjust var according to our driver's own table */
 	viafb_fill_var_timing_info(var, viafb_refresh, vmode_index);
+	if (info->var.accel_flags & FB_ACCELF_TEXT &&
+		!ppar->shared->engine_mmio)
+		info->var.accel_flags = 0;
 
 	return 0;
 }
@@ -177,8 +182,10 @@ static int viafb_set_par(struct fb_info *info)
 
 		viafb_update_fix(info);
 		viafb_bpp = info->var.bits_per_pixel;
-		/* Update viafb_accel, it is necessary to our 2D accelerate */
-		viafb_accel = info->var.accel_flags;
+		if (info->var.accel_flags & FB_ACCELF_TEXT)
+			info->flags &= ~FBINFO_HWACCEL_DISABLED;
+		else
+			info->flags |= FBINFO_HWACCEL_DISABLED;
 	}
 
 	return 0;
@@ -764,10 +771,11 @@ static void viafb_fillrect(struct fb_info *info,
 	const struct fb_fillrect *rect)
 {
 	struct viafb_par *viapar = info->par;
+	struct viafb_shared *shared = viapar->shared;
 	u32 fg_color;
 	u8 rop;
 
-	if (!viapar->shared->hw_bitblt) {
+	if (info->flags & FBINFO_HWACCEL_DISABLED || !shared->hw_bitblt) {
 		cfb_fillrect(info, rect);
 		return;
 	}
@@ -786,7 +794,7 @@ static void viafb_fillrect(struct fb_info *info,
 		rop = 0xF0;
 
 	DEBUG_MSG(KERN_DEBUG "viafb 2D engine: fillrect\n");
-	if (viapar->shared->hw_bitblt(viapar->io_virt, VIA_BITBLT_FILL,
+	if (shared->hw_bitblt(shared->engine_mmio, VIA_BITBLT_FILL,
 		rect->width, rect->height, info->var.bits_per_pixel,
 		viapar->vram_addr, info->fix.line_length, rect->dx, rect->dy,
 		NULL, 0, 0, 0, 0, fg_color, 0, rop))
@@ -797,8 +805,9 @@ static void viafb_copyarea(struct fb_info *info,
 	const struct fb_copyarea *area)
 {
 	struct viafb_par *viapar = info->par;
+	struct viafb_shared *shared = viapar->shared;
 
-	if (!viapar->shared->hw_bitblt) {
+	if (info->flags & FBINFO_HWACCEL_DISABLED || !shared->hw_bitblt) {
 		cfb_copyarea(info, area);
 		return;
 	}
@@ -807,7 +816,7 @@ static void viafb_copyarea(struct fb_info *info,
 		return;
 
 	DEBUG_MSG(KERN_DEBUG "viafb 2D engine: copyarea\n");
-	if (viapar->shared->hw_bitblt(viapar->io_virt, VIA_BITBLT_COLOR,
+	if (shared->hw_bitblt(shared->engine_mmio, VIA_BITBLT_COLOR,
 		area->width, area->height, info->var.bits_per_pixel,
 		viapar->vram_addr, info->fix.line_length, area->dx, area->dy,
 		NULL, viapar->vram_addr, info->fix.line_length,
@@ -819,10 +828,11 @@ static void viafb_imageblit(struct fb_info *info,
 	const struct fb_image *image)
 {
 	struct viafb_par *viapar = info->par;
+	struct viafb_shared *shared = viapar->shared;
 	u32 fg_color = 0, bg_color = 0;
 	u8 op;
 
-	if (!viapar->shared->hw_bitblt ||
+	if (info->flags & FBINFO_HWACCEL_DISABLED || !shared->hw_bitblt ||
 		(image->depth != 1 && image->depth != viapar->depth)) {
 		cfb_imageblit(info, image);
 		return;
@@ -843,7 +853,7 @@ static void viafb_imageblit(struct fb_info *info,
 		op = VIA_BITBLT_COLOR;
 
 	DEBUG_MSG(KERN_DEBUG "viafb 2D engine: imageblit\n");
-	if (viapar->shared->hw_bitblt(viapar->io_virt, op,
+	if (shared->hw_bitblt(shared->engine_mmio, op,
 		image->width, image->height, info->var.bits_per_pixel,
 		viapar->vram_addr, info->fix.line_length, image->dx, image->dy,
 		(u32 *)image->data, 0, 0, 0, 0, fg_color, bg_color, 0))
@@ -853,6 +863,7 @@ static void viafb_imageblit(struct fb_info *info,
 static int viafb_cursor(struct fb_info *info, struct fb_cursor *cursor)
 {
 	struct viafb_par *viapar = info->par;
+	void __iomem *engine = viapar->shared->engine_mmio;
 	u32 temp, xx, yy, bg_color = 0, fg_color = 0,
 		chip_name = viapar->shared->chip_info.gfx_chip_name;
 	int i, j = 0, cur_size = 64;
@@ -867,7 +878,7 @@ static int viafb_cursor(struct fb_info *info, struct fb_cursor *cursor)
 
 	if (cursor->set & FB_CUR_SETHOT) {
 		temp = (cursor->hot.x << 16) + cursor->hot.y;
-		writel(temp, viapar->io_virt + VIA_REG_CURSOR_ORG);
+		writel(temp, engine + VIA_REG_CURSOR_ORG);
 	}
 
 	if (cursor->set & FB_CUR_SETPOS) {
@@ -875,7 +886,7 @@ static int viafb_cursor(struct fb_info *info, struct fb_cursor *cursor)
 		xx = cursor->image.dx - info->var.xoffset;
 		temp = yy & 0xFFFF;
 		temp |= (xx << 16);
-		writel(temp, viapar->io_virt + VIA_REG_CURSOR_POS);
+		writel(temp, engine + VIA_REG_CURSOR_POS);
 	}
 
 	if (cursor->image.width <= 32 && cursor->image.height <= 32)
@@ -889,13 +900,13 @@ static int viafb_cursor(struct fb_info *info, struct fb_cursor *cursor)
 	}
 
 	if (cursor->set & FB_CUR_SETSIZE) {
-		temp = readl(viapar->io_virt + VIA_REG_CURSOR_MODE);
+		temp = readl(engine + VIA_REG_CURSOR_MODE);
 		if (cur_size == 32)
 			temp |= 0x2;
 		else
 			temp &= ~0x2;
 
-		writel(temp, viapar->io_virt + VIA_REG_CURSOR_MODE);
+		writel(temp, engine + VIA_REG_CURSOR_MODE);
 	}
 
 	if (cursor->set & FB_CUR_SETCMAP) {
@@ -922,8 +933,8 @@ static int viafb_cursor(struct fb_info *info, struct fb_cursor *cursor)
 				((info->cmap.blue[bg_color] & 0xFF00) >> 8);
 		}
 
-		writel(bg_color, viapar->io_virt + VIA_REG_CURSOR_BG);
-		writel(fg_color, viapar->io_virt + VIA_REG_CURSOR_FG);
+		writel(bg_color, engine + VIA_REG_CURSOR_BG);
+		writel(fg_color, engine + VIA_REG_CURSOR_FG);
 	}
 
 	if (cursor->set & FB_CUR_SETSHAPE) {
@@ -996,8 +1007,8 @@ static int viafb_cursor(struct fb_info *info, struct fb_cursor *cursor)
 
 static int viafb_sync(struct fb_info *info)
 {
-	if (viafb_accel)
-		viafb_wait_engine_idle();
+	if (!(info->flags & FBINFO_HWACCEL_DISABLED))
+		viafb_wait_engine_idle(info);
 	return 0;
 }
 
@@ -1867,22 +1878,18 @@ static int __devinit via_pci_probe(void)
 
 	viafb_get_mmio_info(&viafbinfo->fix.mmio_start,
 		&viafbinfo->fix.mmio_len);
-	viaparinfo->io_virt = ioremap_nocache(viafbinfo->fix.mmio_start,
-		viafbinfo->fix.mmio_len);
-	if (!viaparinfo->io_virt) {
-		printk(KERN_WARNING "ioremap failed: hardware acceleration disabled\n");
-		viafb_accel = 0;
-	}
-
 	viafbinfo->node = 0;
 	viafbinfo->fbops = &viafb_ops;
 	viafbinfo->flags = FBINFO_DEFAULT | FBINFO_HWACCEL_YPAN;
 
 	viafbinfo->pseudo_palette = pseudo_pal;
-	if (viafb_accel) {
-		viafb_init_accel(viaparinfo->shared);
-		viafb_init_2d_engine();
-		viafb_hw_cursor_init();
+	if (viafb_accel && !viafb_init_engine(viafbinfo)) {
+		viafbinfo->flags |= FBINFO_HWACCEL_COPYAREA |
+			FBINFO_HWACCEL_FILLRECT |  FBINFO_HWACCEL_IMAGEBLIT;
+		default_var.accel_flags = FB_ACCELF_TEXT;
+	} else {
+		viafbinfo->flags |= FBINFO_HWACCEL_DISABLED;
+		default_var.accel_flags = 0;
 	}
 
 	if (viafb_second_size && (viafb_second_size < 8)) {
@@ -1963,15 +1970,6 @@ static int __devinit via_pci_probe(void)
 	default_var.lower_margin = 4;
 	default_var.hsync_len = default_var.left_margin;
 	default_var.vsync_len = 4;
-	default_var.accel_flags = 0;
-
-	if (viafb_accel) {
-		viafbinfo->flags |=
-		    (FBINFO_HWACCEL_COPYAREA | FBINFO_HWACCEL_FILLRECT |
-		     FBINFO_HWACCEL_IMAGEBLIT);
-		default_var.accel_flags |= FB_ACCELF_TEXT;
-	} else
-		viafbinfo->flags |= FBINFO_HWACCEL_DISABLED;
 
 	if (viafb_dual_fb) {
 		viafbinfo1 = framebuffer_alloc(viafb_par_length, NULL);
@@ -1994,12 +1992,6 @@ static int __devinit via_pci_probe(void)
 			viaparinfo1->fbmem_used;
 		viaparinfo->fbmem_free = viaparinfo->memsize;
 		viaparinfo->fbmem_used = 0;
-		if (viafb_accel) {
-			viaparinfo1->VQ_start = viaparinfo->VQ_start -
-				viafb_second_offset;
-			viaparinfo1->VQ_end = viaparinfo->VQ_end -
-				viafb_second_offset;
-		}
 
 		viaparinfo->iga_path = IGA1;
 		viaparinfo1->iga_path = IGA2;
@@ -2073,7 +2065,7 @@ static void __devexit via_pci_remove(void)
 	if (viafb_dual_fb)
 		unregister_framebuffer(viafbinfo1);
 	iounmap((void *)viafbinfo->screen_base);
-	iounmap(viaparinfo->io_virt);
+	iounmap(viaparinfo->shared->engine_mmio);
 
 	viafb_delete_i2c_buss(viaparinfo);
 
@@ -2235,7 +2227,7 @@ MODULE_PARM_DESC(viafb_SAMM_ON,
 
 module_param(viafb_accel, int, 0);
 MODULE_PARM_DESC(viafb_accel,
-	"Set 2D Hardware Acceleration.(Default = OFF)");
+	"Set 2D Hardware Acceleration: 0 = OFF, 1 = ON (default)");
 
 module_param(viafb_active_dev, charp, 0);
 MODULE_PARM_DESC(viafb_active_dev, "Specify active devices.");
diff --git a/drivers/video/via/viafbdev.h b/drivers/video/via/viafbdev.h
index 159619b..0c94d24 100644
--- a/drivers/video/via/viafbdev.h
+++ b/drivers/video/via/viafbdev.h
@@ -51,7 +51,9 @@ struct viafb_shared {
 	struct chip_information chip_info;
 
 	/* hardware acceleration stuff */
+	void __iomem *engine_mmio;
 	u32 cursor_vram_addr;
+	u32 vq_vram_addr;	/* virtual queue address in video ram */
 	int (*hw_bitblt)(void __iomem *engine, u8 op, u32 width, u32 height,
 		u8 dst_bpp, u32 dst_addr, u32 dst_pitch, u32 dst_x, u32 dst_y,
 		u32 *src_mem, u32 src_addr, u32 src_pitch, u32 src_x, u32 src_y,
@@ -61,13 +63,11 @@ struct viafb_shared {
 struct viafb_par {
 	u8 depth;
 	u32 vram_addr;
-	void __iomem *io_virt;	/*iospace virtual memory address */
+
 	unsigned int fbmem;	/*framebuffer physical memory address */
 	unsigned int memsize;	/*size of fbmem */
 	u32 fbmem_free;		/* Free FB memory */
 	u32 fbmem_used;		/* Use FB memory size */
-	u32 VQ_start;		/* Virtual Queue Start Address */
-	u32 VQ_end;		/* Virtual Queue End Address */
 	u32 iga_path;
 
 	struct viafb_shared *shared;
@@ -90,7 +90,6 @@ extern int viafb_dual_fb;
 extern int viafb_LCD2_ON;
 extern int viafb_LCD_ON;
 extern int viafb_DVI_ON;
-extern int viafb_accel;
 extern int viafb_hotplug;
 extern int viafb_memsize;
 
-- 
cgit v0.10.2


From 93967bee6f8b5f5bfbab0e8d6cab13f45d1ace75 Mon Sep 17 00:00:00 2001
From: Harald Welte <laforge@gnumonks.org>
Date: Tue, 22 Sep 2009 16:47:32 -0700
Subject: viafb: make module parameters visible in sysfs

Signed-off-by: Harald Welte <HaraldWelte@viatech.com>
Signed-off-by: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Cc: Scott Fang <ScottFang@viatech.com.cn>
Cc: Joseph Chan <JosephChan@via.com.tw>
Cc: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/via/viafbdev.c b/drivers/video/via/viafbdev.c
index 8e43759..94ade1f 100644
--- a/drivers/video/via/viafbdev.c
+++ b/drivers/video/via/viafbdev.c
@@ -21,6 +21,7 @@
 
 #include <linux/module.h>
 #include <linux/seq_file.h>
+#include <linux/stat.h>
 #define _MASTER_FILE
 
 #include "global.h"
@@ -2191,79 +2192,79 @@ module_init(viafb_init);
 module_exit(viafb_exit);
 
 #ifdef MODULE
-module_param(viafb_memsize, int, 0);
+module_param(viafb_memsize, int, S_IRUSR);
 
-module_param(viafb_mode, charp, 0);
+module_param(viafb_mode, charp, S_IRUSR);
 MODULE_PARM_DESC(viafb_mode, "Set resolution (default=640x480)");
 
-module_param(viafb_mode1, charp, 0);
+module_param(viafb_mode1, charp, S_IRUSR);
 MODULE_PARM_DESC(viafb_mode1, "Set resolution (default=640x480)");
 
-module_param(viafb_bpp, int, 0);
+module_param(viafb_bpp, int, S_IRUSR);
 MODULE_PARM_DESC(viafb_bpp, "Set color depth (default=32bpp)");
 
-module_param(viafb_bpp1, int, 0);
+module_param(viafb_bpp1, int, S_IRUSR);
 MODULE_PARM_DESC(viafb_bpp1, "Set color depth (default=32bpp)");
 
-module_param(viafb_refresh, int, 0);
+module_param(viafb_refresh, int, S_IRUSR);
 MODULE_PARM_DESC(viafb_refresh,
 	"Set CRT viafb_refresh rate (default = 60)");
 
-module_param(viafb_refresh1, int, 0);
+module_param(viafb_refresh1, int, S_IRUSR);
 MODULE_PARM_DESC(viafb_refresh1,
 	"Set CRT refresh rate (default = 60)");
 
-module_param(viafb_lcd_panel_id, int, 0);
+module_param(viafb_lcd_panel_id, int, S_IRUSR);
 MODULE_PARM_DESC(viafb_lcd_panel_id,
 	"Set Flat Panel type(Default=1024x768)");
 
-module_param(viafb_lcd_dsp_method, int, 0);
+module_param(viafb_lcd_dsp_method, int, S_IRUSR);
 MODULE_PARM_DESC(viafb_lcd_dsp_method,
 	"Set Flat Panel display scaling method.(Default=Expandsion)");
 
-module_param(viafb_SAMM_ON, int, 0);
+module_param(viafb_SAMM_ON, int, S_IRUSR);
 MODULE_PARM_DESC(viafb_SAMM_ON,
 	"Turn on/off flag of SAMM(Default=OFF)");
 
-module_param(viafb_accel, int, 0);
+module_param(viafb_accel, int, S_IRUSR);
 MODULE_PARM_DESC(viafb_accel,
 	"Set 2D Hardware Acceleration: 0 = OFF, 1 = ON (default)");
 
-module_param(viafb_active_dev, charp, 0);
+module_param(viafb_active_dev, charp, S_IRUSR);
 MODULE_PARM_DESC(viafb_active_dev, "Specify active devices.");
 
-module_param(viafb_display_hardware_layout, int, 0);
+module_param(viafb_display_hardware_layout, int, S_IRUSR);
 MODULE_PARM_DESC(viafb_display_hardware_layout,
 	"Display Hardware Layout (LCD Only, DVI Only...,etc)");
 
-module_param(viafb_second_size, int, 0);
+module_param(viafb_second_size, int, S_IRUSR);
 MODULE_PARM_DESC(viafb_second_size,
 	"Set secondary device memory size");
 
-module_param(viafb_dual_fb, int, 0);
+module_param(viafb_dual_fb, int, S_IRUSR);
 MODULE_PARM_DESC(viafb_dual_fb,
 	"Turn on/off flag of dual framebuffer devices.(Default = OFF)");
 
-module_param(viafb_platform_epia_dvi, int, 0);
+module_param(viafb_platform_epia_dvi, int, S_IRUSR);
 MODULE_PARM_DESC(viafb_platform_epia_dvi,
 	"Turn on/off flag of DVI devices on EPIA board.(Default = OFF)");
 
-module_param(viafb_device_lcd_dualedge, int, 0);
+module_param(viafb_device_lcd_dualedge, int, S_IRUSR);
 MODULE_PARM_DESC(viafb_device_lcd_dualedge,
 	"Turn on/off flag of dual edge panel.(Default = OFF)");
 
-module_param(viafb_bus_width, int, 0);
+module_param(viafb_bus_width, int, S_IRUSR);
 MODULE_PARM_DESC(viafb_bus_width,
 	"Set bus width of panel.(Default = 12)");
 
-module_param(viafb_lcd_mode, int, 0);
+module_param(viafb_lcd_mode, int, S_IRUSR);
 MODULE_PARM_DESC(viafb_lcd_mode,
 	"Set Flat Panel mode(Default=OPENLDI)");
 
-module_param(viafb_lcd_port, charp, 0);
+module_param(viafb_lcd_port, charp, S_IRUSR);
 MODULE_PARM_DESC(viafb_lcd_port, "Specify LCD output port.");
 
-module_param(viafb_dvi_port, charp, 0);
+module_param(viafb_dvi_port, charp, S_IRUSR);
 MODULE_PARM_DESC(viafb_dvi_port, "Specify DVI output port.");
 
 MODULE_LICENSE("GPL");
-- 
cgit v0.10.2


From 837b0abba19a0ac7840f41debe267da2e2add6a7 Mon Sep 17 00:00:00 2001
From: Harald Welte <laforge@gnumonks.org>
Date: Tue, 22 Sep 2009 16:47:33 -0700
Subject: viafb: remove unused structure member

Remove a structure member 'on_slot' in the chip_info structure which is
completely unused.

Signed-off-by: Harald Welte <HaraldWelte@viatech.com>
Signed-off-by: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Cc: Scott Fang <ScottFang@viatech.com.cn>
Cc: Joseph Chan <JosephChan@via.com.tw>
Cc: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/via/chip.h b/drivers/video/via/chip.h
index dde95ed..7f95967 100644
--- a/drivers/video/via/chip.h
+++ b/drivers/video/via/chip.h
@@ -122,7 +122,6 @@ struct lvds_chip_information {
 struct chip_information {
 	int gfx_chip_name;
 	int gfx_chip_revision;
-	int chip_on_slot;
 	struct tmds_chip_information tmds_chip_info;
 	struct lvds_chip_information lvds_chip_info;
 	struct lvds_chip_information lvds_chip_info2;
-- 
cgit v0.10.2


From 5ff32f69e75deca5ee1a2f421ca8a3e43cfaa339 Mon Sep 17 00:00:00 2001
From: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Date: Tue, 22 Sep 2009 16:47:34 -0700
Subject: viafb: use read-only mode parsing

viafb: use read-only mode parsing

The previous method of mode parsing wrote to the strings resulting in
truncated mode strings in the sysfs.

Signed-off-by: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Cc: Scott Fang <ScottFang@viatech.com.cn>
Cc: Joseph Chan <JosephChan@via.com.tw>
Cc: Harald Welte <laforge@gnumonks.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/via/viafbdev.c b/drivers/video/via/viafbdev.c
index 94ade1f..61e652c 100644
--- a/drivers/video/via/viafbdev.c
+++ b/drivers/video/via/viafbdev.c
@@ -1821,11 +1821,29 @@ static void viafb_remove_proc(struct proc_dir_entry *viafb_entry)
 	remove_proc_entry("viafb", NULL);
 }
 
+static void parse_mode(const char *str, u32 *xres, u32 *yres)
+{
+	char *ptr;
+
+	*xres = simple_strtoul(str, &ptr, 10);
+	if (ptr[0] != 'x')
+		goto out_default;
+
+	*yres = simple_strtoul(&ptr[1], &ptr, 10);
+	if (ptr[0])
+		goto out_default;
+
+	return;
+
+out_default:
+	printk(KERN_WARNING "viafb received invalid mode string: %s\n", str);
+	*xres = 640;
+	*yres = 480;
+}
+
 static int __devinit via_pci_probe(void)
 {
-	unsigned long default_xres, default_yres;
-	char *tmpc, *tmpm;
-	char *tmpc_sec, *tmpm_sec;
+	u32 default_xres, default_yres;
 	int vmode_index;
 	u32 viafb_par_length;
 
@@ -1902,26 +1920,14 @@ static int __devinit via_pci_probe(void)
 			viafb_second_size * 1024 * 1024;
 	}
 
-	tmpm = viafb_mode;
-	tmpc = strsep(&tmpm, "x");
-	strict_strtoul(tmpc, 0, &default_xres);
-	strict_strtoul(tmpm, 0, &default_yres);
-
+	parse_mode(viafb_mode, &default_xres, &default_yres);
 	vmode_index = viafb_get_mode_index(default_xres, default_yres);
 	DEBUG_MSG(KERN_INFO "0->index=%d\n", vmode_index);
 
 	if (viafb_SAMM_ON == 1) {
-		if (strcmp(viafb_mode, viafb_mode1)) {
-			tmpm_sec = viafb_mode1;
-			tmpc_sec = strsep(&tmpm_sec, "x");
-			strict_strtoul(tmpc_sec, 0,
-				(unsigned long *)&viafb_second_xres);
-			strict_strtoul(tmpm_sec, 0,
-				(unsigned long *)&viafb_second_yres);
-		} else {
-			viafb_second_xres = default_xres;
-			viafb_second_yres = default_yres;
-		}
+		parse_mode(viafb_mode1, &viafb_second_xres,
+			&viafb_second_yres);
+
 		if (0 == viafb_second_virtual_xres) {
 			switch (viafb_second_xres) {
 			case 1400:
-- 
cgit v0.10.2


From 0306ab11c396f93056009152464ff104e4721817 Mon Sep 17 00:00:00 2001
From: Harald Welte <laforge@gnumonks.org>
Date: Tue, 22 Sep 2009 16:47:35 -0700
Subject: viafb: add support for the VX855 chipset

Add support for a new VIA integrated graphics chipset, the VX855.

Signed-off-by: HaraldWelte <HaraldWelte@viatech.com>
Signed-off-by: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Cc: Scott Fang <ScottFang@viatech.com.cn>
Cc: Joseph Chan <JosephChan@via.com.tw>
Cc: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/via/chip.h b/drivers/video/via/chip.h
index 7f95967..474f428 100644
--- a/drivers/video/via/chip.h
+++ b/drivers/video/via/chip.h
@@ -68,6 +68,9 @@
 #define     UNICHROME_VX800         11
 #define     UNICHROME_VX800_DID     0x1122
 
+#define     UNICHROME_VX855         12
+#define     UNICHROME_VX855_DID     0x5122
+
 /**************************************************/
 /* Definition TMDS Trasmitter Information         */
 /**************************************************/
diff --git a/drivers/video/via/hw.c b/drivers/video/via/hw.c
index 4910561..95faf8b 100644
--- a/drivers/video/via/hw.c
+++ b/drivers/video/via/hw.c
@@ -33,106 +33,147 @@ static const struct pci_device_id_info pciidlist[] = {
 	{PCI_VIA_VENDOR_ID, UNICHROME_P4M900_DID, UNICHROME_P4M900},
 	{PCI_VIA_VENDOR_ID, UNICHROME_CN750_DID, UNICHROME_CN750},
 	{PCI_VIA_VENDOR_ID, UNICHROME_VX800_DID, UNICHROME_VX800},
+	{PCI_VIA_VENDOR_ID, UNICHROME_VX855_DID, UNICHROME_VX855},
 	{0, 0, 0}
 };
 
 static struct pll_map pll_value[] = {
-	{CLK_25_175M, CLE266_PLL_25_175M, K800_PLL_25_175M, CX700_25_175M},
-	{CLK_29_581M, CLE266_PLL_29_581M, K800_PLL_29_581M, CX700_29_581M},
-	{CLK_26_880M, CLE266_PLL_26_880M, K800_PLL_26_880M, CX700_26_880M},
-	{CLK_31_490M, CLE266_PLL_31_490M, K800_PLL_31_490M, CX700_31_490M},
-	{CLK_31_500M, CLE266_PLL_31_500M, K800_PLL_31_500M, CX700_31_500M},
-	{CLK_31_728M, CLE266_PLL_31_728M, K800_PLL_31_728M, CX700_31_728M},
-	{CLK_32_668M, CLE266_PLL_32_668M, K800_PLL_32_668M, CX700_32_668M},
-	{CLK_36_000M, CLE266_PLL_36_000M, K800_PLL_36_000M, CX700_36_000M},
-	{CLK_40_000M, CLE266_PLL_40_000M, K800_PLL_40_000M, CX700_40_000M},
-	{CLK_41_291M, CLE266_PLL_41_291M, K800_PLL_41_291M, CX700_41_291M},
-	{CLK_43_163M, CLE266_PLL_43_163M, K800_PLL_43_163M, CX700_43_163M},
-	{CLK_45_250M, CLE266_PLL_45_250M, K800_PLL_45_250M, CX700_45_250M},
-	{CLK_46_000M, CLE266_PLL_46_000M, K800_PLL_46_000M, CX700_46_000M},
-	{CLK_46_996M, CLE266_PLL_46_996M, K800_PLL_46_996M, CX700_46_996M},
-	{CLK_48_000M, CLE266_PLL_48_000M, K800_PLL_48_000M, CX700_48_000M},
-	{CLK_48_875M, CLE266_PLL_48_875M, K800_PLL_48_875M, CX700_48_875M},
-	{CLK_49_500M, CLE266_PLL_49_500M, K800_PLL_49_500M, CX700_49_500M},
-	{CLK_52_406M, CLE266_PLL_52_406M, K800_PLL_52_406M, CX700_52_406M},
-	{CLK_52_977M, CLE266_PLL_52_977M, K800_PLL_52_977M, CX700_52_977M},
-	{CLK_56_250M, CLE266_PLL_56_250M, K800_PLL_56_250M, CX700_56_250M},
-	{CLK_60_466M, CLE266_PLL_60_466M, K800_PLL_60_466M, CX700_60_466M},
-	{CLK_61_500M, CLE266_PLL_61_500M, K800_PLL_61_500M, CX700_61_500M},
-	{CLK_65_000M, CLE266_PLL_65_000M, K800_PLL_65_000M, CX700_65_000M},
-	{CLK_65_178M, CLE266_PLL_65_178M, K800_PLL_65_178M, CX700_65_178M},
-	{CLK_66_750M, CLE266_PLL_66_750M, K800_PLL_66_750M, CX700_66_750M},
-	{CLK_68_179M, CLE266_PLL_68_179M, K800_PLL_68_179M, CX700_68_179M},
-	{CLK_69_924M, CLE266_PLL_69_924M, K800_PLL_69_924M, CX700_69_924M},
-	{CLK_70_159M, CLE266_PLL_70_159M, K800_PLL_70_159M, CX700_70_159M},
-	{CLK_72_000M, CLE266_PLL_72_000M, K800_PLL_72_000M, CX700_72_000M},
-	{CLK_78_750M, CLE266_PLL_78_750M, K800_PLL_78_750M, CX700_78_750M},
-	{CLK_80_136M, CLE266_PLL_80_136M, K800_PLL_80_136M, CX700_80_136M},
-	{CLK_83_375M, CLE266_PLL_83_375M, K800_PLL_83_375M, CX700_83_375M},
-	{CLK_83_950M, CLE266_PLL_83_950M, K800_PLL_83_950M, CX700_83_950M},
-	{CLK_84_750M, CLE266_PLL_84_750M, K800_PLL_84_750M, CX700_84_750M},
-	{CLK_85_860M, CLE266_PLL_85_860M, K800_PLL_85_860M, CX700_85_860M},
-	{CLK_88_750M, CLE266_PLL_88_750M, K800_PLL_88_750M, CX700_88_750M},
-	{CLK_94_500M, CLE266_PLL_94_500M, K800_PLL_94_500M, CX700_94_500M},
-	{CLK_97_750M, CLE266_PLL_97_750M, K800_PLL_97_750M, CX700_97_750M},
+	{CLK_25_175M, CLE266_PLL_25_175M, K800_PLL_25_175M,
+	 CX700_25_175M, VX855_25_175M},
+	{CLK_29_581M, CLE266_PLL_29_581M, K800_PLL_29_581M,
+	 CX700_29_581M, VX855_29_581M},
+	{CLK_26_880M, CLE266_PLL_26_880M, K800_PLL_26_880M,
+	 CX700_26_880M, VX855_26_880M},
+	{CLK_31_490M, CLE266_PLL_31_490M, K800_PLL_31_490M,
+	 CX700_31_490M, VX855_31_490M},
+	{CLK_31_500M, CLE266_PLL_31_500M, K800_PLL_31_500M,
+	 CX700_31_500M, VX855_31_500M},
+	{CLK_31_728M, CLE266_PLL_31_728M, K800_PLL_31_728M,
+	 CX700_31_728M, VX855_31_728M},
+	{CLK_32_668M, CLE266_PLL_32_668M, K800_PLL_32_668M,
+	 CX700_32_668M, VX855_32_668M},
+	{CLK_36_000M, CLE266_PLL_36_000M, K800_PLL_36_000M,
+	 CX700_36_000M, VX855_36_000M},
+	{CLK_40_000M, CLE266_PLL_40_000M, K800_PLL_40_000M,
+	 CX700_40_000M, VX855_40_000M},
+	{CLK_41_291M, CLE266_PLL_41_291M, K800_PLL_41_291M,
+	 CX700_41_291M, VX855_41_291M},
+	{CLK_43_163M, CLE266_PLL_43_163M, K800_PLL_43_163M,
+	 CX700_43_163M, VX855_43_163M},
+	{CLK_45_250M, CLE266_PLL_45_250M, K800_PLL_45_250M,
+	 CX700_45_250M, VX855_45_250M},
+	{CLK_46_000M, CLE266_PLL_46_000M, K800_PLL_46_000M,
+	 CX700_46_000M, VX855_46_000M},
+	{CLK_46_996M, CLE266_PLL_46_996M, K800_PLL_46_996M,
+	 CX700_46_996M, VX855_46_996M},
+	{CLK_48_000M, CLE266_PLL_48_000M, K800_PLL_48_000M,
+	 CX700_48_000M, VX855_48_000M},
+	{CLK_48_875M, CLE266_PLL_48_875M, K800_PLL_48_875M,
+	 CX700_48_875M, VX855_48_875M},
+	{CLK_49_500M, CLE266_PLL_49_500M, K800_PLL_49_500M,
+	 CX700_49_500M, VX855_49_500M},
+	{CLK_52_406M, CLE266_PLL_52_406M, K800_PLL_52_406M,
+	 CX700_52_406M, VX855_52_406M},
+	{CLK_52_977M, CLE266_PLL_52_977M, K800_PLL_52_977M,
+	 CX700_52_977M,	VX855_52_977M},
+	{CLK_56_250M, CLE266_PLL_56_250M, K800_PLL_56_250M,
+	 CX700_56_250M, VX855_56_250M},
+	{CLK_60_466M, CLE266_PLL_60_466M, K800_PLL_60_466M,
+	 CX700_60_466M, VX855_60_466M},
+	{CLK_61_500M, CLE266_PLL_61_500M, K800_PLL_61_500M,
+	 CX700_61_500M, VX855_61_500M},
+	{CLK_65_000M, CLE266_PLL_65_000M, K800_PLL_65_000M,
+	 CX700_65_000M, VX855_65_000M},
+	{CLK_65_178M, CLE266_PLL_65_178M, K800_PLL_65_178M,
+	 CX700_65_178M, VX855_65_178M},
+	{CLK_66_750M, CLE266_PLL_66_750M, K800_PLL_66_750M,
+	 CX700_66_750M, VX855_66_750M},
+	{CLK_68_179M, CLE266_PLL_68_179M, K800_PLL_68_179M,
+	 CX700_68_179M, VX855_68_179M},
+	{CLK_69_924M, CLE266_PLL_69_924M, K800_PLL_69_924M,
+	 CX700_69_924M, VX855_69_924M},
+	{CLK_70_159M, CLE266_PLL_70_159M, K800_PLL_70_159M,
+	 CX700_70_159M, VX855_70_159M},
+	{CLK_72_000M, CLE266_PLL_72_000M, K800_PLL_72_000M,
+	 CX700_72_000M, VX855_72_000M},
+	{CLK_78_750M, CLE266_PLL_78_750M, K800_PLL_78_750M,
+	 CX700_78_750M, VX855_78_750M},
+	{CLK_80_136M, CLE266_PLL_80_136M, K800_PLL_80_136M,
+	 CX700_80_136M, VX855_80_136M},
+	{CLK_83_375M, CLE266_PLL_83_375M, K800_PLL_83_375M,
+	 CX700_83_375M, VX855_83_375M},
+	{CLK_83_950M, CLE266_PLL_83_950M, K800_PLL_83_950M,
+	 CX700_83_950M, VX855_83_950M},
+	{CLK_84_750M, CLE266_PLL_84_750M, K800_PLL_84_750M,
+	 CX700_84_750M, VX855_84_750M},
+	{CLK_85_860M, CLE266_PLL_85_860M, K800_PLL_85_860M,
+	 CX700_85_860M, VX855_85_860M},
+	{CLK_88_750M, CLE266_PLL_88_750M, K800_PLL_88_750M,
+	 CX700_88_750M, VX855_88_750M},
+	{CLK_94_500M, CLE266_PLL_94_500M, K800_PLL_94_500M,
+	 CX700_94_500M, VX855_94_500M},
+	{CLK_97_750M, CLE266_PLL_97_750M, K800_PLL_97_750M,
+	 CX700_97_750M, VX855_97_750M},
 	{CLK_101_000M, CLE266_PLL_101_000M, K800_PLL_101_000M,
-	 CX700_101_000M},
+	 CX700_101_000M, VX855_101_000M},
 	{CLK_106_500M, CLE266_PLL_106_500M, K800_PLL_106_500M,
-	 CX700_106_500M},
+	 CX700_106_500M, VX855_106_500M},
 	{CLK_108_000M, CLE266_PLL_108_000M, K800_PLL_108_000M,
-	 CX700_108_000M},
+	 CX700_108_000M, VX855_108_000M},
 	{CLK_113_309M, CLE266_PLL_113_309M, K800_PLL_113_309M,
-	 CX700_113_309M},
+	 CX700_113_309M, VX855_113_309M},
 	{CLK_118_840M, CLE266_PLL_118_840M, K800_PLL_118_840M,
-	 CX700_118_840M},
+	 CX700_118_840M, VX855_118_840M},
 	{CLK_119_000M, CLE266_PLL_119_000M, K800_PLL_119_000M,
-	 CX700_119_000M},
+	 CX700_119_000M, VX855_119_000M},
 	{CLK_121_750M, CLE266_PLL_121_750M, K800_PLL_121_750M,
-	 CX700_121_750M},
+	 CX700_121_750M, 0},
 	{CLK_125_104M, CLE266_PLL_125_104M, K800_PLL_125_104M,
-	 CX700_125_104M},
+	 CX700_125_104M, 0},
 	{CLK_133_308M, CLE266_PLL_133_308M, K800_PLL_133_308M,
-	 CX700_133_308M},
+	 CX700_133_308M, 0},
 	{CLK_135_000M, CLE266_PLL_135_000M, K800_PLL_135_000M,
-	 CX700_135_000M},
+	 CX700_135_000M, VX855_135_000M},
 	{CLK_136_700M, CLE266_PLL_136_700M, K800_PLL_136_700M,
-	 CX700_136_700M},
+	 CX700_136_700M, VX855_136_700M},
 	{CLK_138_400M, CLE266_PLL_138_400M, K800_PLL_138_400M,
-	 CX700_138_400M},
+	 CX700_138_400M, VX855_138_400M},
 	{CLK_146_760M, CLE266_PLL_146_760M, K800_PLL_146_760M,
-	 CX700_146_760M},
+	 CX700_146_760M, VX855_146_760M},
 	{CLK_153_920M, CLE266_PLL_153_920M, K800_PLL_153_920M,
-	 CX700_153_920M},
+	 CX700_153_920M, VX855_153_920M},
 	{CLK_156_000M, CLE266_PLL_156_000M, K800_PLL_156_000M,
-	 CX700_156_000M},
+	 CX700_156_000M, VX855_156_000M},
 	{CLK_157_500M, CLE266_PLL_157_500M, K800_PLL_157_500M,
-	 CX700_157_500M},
+	 CX700_157_500M, VX855_157_500M},
 	{CLK_162_000M, CLE266_PLL_162_000M, K800_PLL_162_000M,
-	 CX700_162_000M},
+	 CX700_162_000M, VX855_162_000M},
 	{CLK_187_000M, CLE266_PLL_187_000M, K800_PLL_187_000M,
-	 CX700_187_000M},
+	 CX700_187_000M, VX855_187_000M},
 	{CLK_193_295M, CLE266_PLL_193_295M, K800_PLL_193_295M,
-	 CX700_193_295M},
+	 CX700_193_295M, VX855_193_295M},
 	{CLK_202_500M, CLE266_PLL_202_500M, K800_PLL_202_500M,
-	 CX700_202_500M},
+	 CX700_202_500M, VX855_202_500M},
 	{CLK_204_000M, CLE266_PLL_204_000M, K800_PLL_204_000M,
-	 CX700_204_000M},
+	 CX700_204_000M, VX855_204_000M},
 	{CLK_218_500M, CLE266_PLL_218_500M, K800_PLL_218_500M,
-	 CX700_218_500M},
+	 CX700_218_500M, VX855_218_500M},
 	{CLK_234_000M, CLE266_PLL_234_000M, K800_PLL_234_000M,
-	 CX700_234_000M},
+	 CX700_234_000M, VX855_234_000M},
 	{CLK_267_250M, CLE266_PLL_267_250M, K800_PLL_267_250M,
-	 CX700_267_250M},
+	 CX700_267_250M, VX855_267_250M},
 	{CLK_297_500M, CLE266_PLL_297_500M, K800_PLL_297_500M,
-	 CX700_297_500M},
-	{CLK_74_481M, CLE266_PLL_74_481M, K800_PLL_74_481M, CX700_74_481M},
+	 CX700_297_500M, VX855_297_500M},
+	{CLK_74_481M, CLE266_PLL_74_481M, K800_PLL_74_481M,
+	 CX700_74_481M, VX855_74_481M},
 	{CLK_172_798M, CLE266_PLL_172_798M, K800_PLL_172_798M,
-	 CX700_172_798M},
+	 CX700_172_798M, VX855_172_798M},
 	{CLK_122_614M, CLE266_PLL_122_614M, K800_PLL_122_614M,
-	 CX700_122_614M},
-	{CLK_74_270M, CLE266_PLL_74_270M, K800_PLL_74_270M, CX700_74_270M},
+	 CX700_122_614M, VX855_122_614M},
+	{CLK_74_270M, CLE266_PLL_74_270M, K800_PLL_74_270M,
+	 CX700_74_270M, 0},
 	{CLK_148_500M, CLE266_PLL_148_500M, K800_PLL_148_500M,
-	 CX700_148_500M}
+	 CX700_148_500M, VX855_148_500M}
 };
 
 static struct fifo_depth_select display_fifo_depth_reg = {
@@ -1219,6 +1260,15 @@ void viafb_load_FIFO_reg(int set_iga, int hor_active, int ver_active)
 			    VX800_IGA1_DISPLAY_QUEUE_EXPIRE_NUM;
 		}
 
+		if (viaparinfo->chip_info->gfx_chip_name == UNICHROME_VX855) {
+			iga1_fifo_max_depth = VX855_IGA1_FIFO_MAX_DEPTH;
+			iga1_fifo_threshold = VX855_IGA1_FIFO_THRESHOLD;
+			iga1_fifo_high_threshold =
+			    VX855_IGA1_FIFO_HIGH_THRESHOLD;
+			iga1_display_queue_expire_num =
+			    VX855_IGA1_DISPLAY_QUEUE_EXPIRE_NUM;
+		}
+
 		/* Set Display FIFO Depath Select */
 		reg_value = IGA1_FIFO_DEPTH_SELECT_FORMULA(iga1_fifo_max_depth);
 		viafb_load_reg_num =
@@ -1350,6 +1400,15 @@ void viafb_load_FIFO_reg(int set_iga, int hor_active, int ver_active)
 			    VX800_IGA2_DISPLAY_QUEUE_EXPIRE_NUM;
 		}
 
+		if (viaparinfo->chip_info->gfx_chip_name == UNICHROME_VX855) {
+			iga2_fifo_max_depth = VX855_IGA2_FIFO_MAX_DEPTH;
+			iga2_fifo_threshold = VX855_IGA2_FIFO_THRESHOLD;
+			iga2_fifo_high_threshold =
+			    VX855_IGA2_FIFO_HIGH_THRESHOLD;
+			iga2_display_queue_expire_num =
+			    VX855_IGA2_DISPLAY_QUEUE_EXPIRE_NUM;
+		}
+
 		if (viaparinfo->chip_info->gfx_chip_name == UNICHROME_K800) {
 			/* Set Display FIFO Depath Select */
 			reg_value =
@@ -1438,6 +1497,8 @@ u32 viafb_get_clk_value(int clk)
 			case UNICHROME_P4M900:
 			case UNICHROME_VX800:
 				return pll_value[i].cx700_pll;
+			case UNICHROME_VX855:
+				return pll_value[i].vx855_pll;
 			}
 		}
 	}
@@ -1471,6 +1532,7 @@ void viafb_set_vclock(u32 CLK, int set_iga)
 		case UNICHROME_P4M890:
 		case UNICHROME_P4M900:
 		case UNICHROME_VX800:
+		case UNICHROME_VX855:
 			viafb_write_reg(SR44, VIASR, CLK / 0x10000);
 			DEBUG_MSG(KERN_INFO "\nSR44=%x", CLK / 0x10000);
 			viafb_write_reg(SR45, VIASR, (CLK & 0xFFFF) / 0x100);
@@ -1499,6 +1561,7 @@ void viafb_set_vclock(u32 CLK, int set_iga)
 		case UNICHROME_P4M890:
 		case UNICHROME_P4M900:
 		case UNICHROME_VX800:
+		case UNICHROME_VX855:
 			viafb_write_reg(SR4A, VIASR, CLK / 0x10000);
 			viafb_write_reg(SR4B, VIASR, (CLK & 0xFFFF) / 0x100);
 			viafb_write_reg(SR4C, VIASR, CLK % 0x100);
@@ -2215,6 +2278,10 @@ int viafb_setmode(int vmode_index, int hor_res, int ver_res, int video_bpp,
 	case UNICHROME_VX800:
 		viafb_write_regx(CX700_ModeXregs, NUM_TOTAL_CX700_ModeXregs);
 		break;
+
+	case UNICHROME_VX855:
+		viafb_write_regx(VX855_ModeXregs, NUM_TOTAL_VX855_ModeXregs);
+		break;
 	}
 
 	device_off();
@@ -2597,6 +2664,7 @@ static int get_fb_size_from_pci(void)
 		case P4M890_FUNCTION3:
 		case P4M900_FUNCTION3:
 		case VX800_FUNCTION3:
+		case VX855_FUNCTION3:
 			/*case CN750_FUNCTION3: */
 			outl(configid + 0xA0, (unsigned long)0xCF8);
 			FBSize = inl((unsigned long)0xCFC);
@@ -2660,6 +2728,10 @@ static int get_fb_size_from_pci(void)
 			VideoMemSize = (256 << 20);	/*256M */
 			break;
 
+		case 0x00007000:	/* Only on VX855/875 */
+			VideoMemSize = (512 << 20);	/*512M */
+			break;
+
 		default:
 			VideoMemSize = (32 << 20);	/*32M */
 			break;
diff --git a/drivers/video/via/hw.h b/drivers/video/via/hw.h
index 7302b40..4e54b2f 100644
--- a/drivers/video/via/hw.h
+++ b/drivers/video/via/hw.h
@@ -324,6 +324,17 @@ is reserved, so it may have problem to set 1600x1200 on IGA2. */
 /* location: {CR94,0,6} */
 #define VX800_IGA2_DISPLAY_QUEUE_EXPIRE_NUM     128
 
+/* For VT3409 */
+#define VX855_IGA1_FIFO_MAX_DEPTH               400
+#define VX855_IGA1_FIFO_THRESHOLD               320
+#define VX855_IGA1_FIFO_HIGH_THRESHOLD          320
+#define VX855_IGA1_DISPLAY_QUEUE_EXPIRE_NUM     160
+
+#define VX855_IGA2_FIFO_MAX_DEPTH               200
+#define VX855_IGA2_FIFO_THRESHOLD               160
+#define VX855_IGA2_FIFO_HIGH_THRESHOLD          160
+#define VX855_IGA2_DISPLAY_QUEUE_EXPIRE_NUM     320
+
 #define IGA1_FIFO_DEPTH_SELECT_REG_NUM          1
 #define IGA1_FIFO_THRESHOLD_REG_NUM             2
 #define IGA1_FIFO_HIGH_THRESHOLD_REG_NUM        2
@@ -688,6 +699,7 @@ struct pll_map {
 	u32 cle266_pll;
 	u32 k800_pll;
 	u32 cx700_pll;
+	u32 vx855_pll;
 };
 
 struct rgbLUT {
@@ -832,6 +844,8 @@ struct iga2_crtc_timing {
 #define P4M900_FUNCTION3    0x3364
 /* VT3353 chipset*/
 #define VX800_FUNCTION3     0x3353
+/* VT3409 chipset*/
+#define VX855_FUNCTION3     0x3409
 
 #define NUM_TOTAL_PLL_TABLE ARRAY_SIZE(pll_value)
 
diff --git a/drivers/video/via/share.h b/drivers/video/via/share.h
index 2e1254d..7cd03e2 100644
--- a/drivers/video/via/share.h
+++ b/drivers/video/via/share.h
@@ -167,6 +167,10 @@
 #define SR4B    0x4B
 #define SR4C    0x4C
 #define SR52    0x52
+#define SR57	0x57
+#define SR58	0x58
+#define SR59	0x59
+#define SR5D    0x5D
 #define SR5E    0x5E
 #define SR65    0x65
 
@@ -966,6 +970,100 @@
 #define CX700_297_500M    0x00CE0403
 #define CX700_122_614M    0x00870802
 
+/* PLL for VX855 */
+#define VX855_22_000M     0x007B1005
+#define VX855_25_175M     0x008D1005
+#define VX855_26_719M     0x00961005
+#define VX855_26_880M     0x00961005
+#define VX855_27_000M     0x00971005
+#define VX855_29_581M     0x00A51005
+#define VX855_29_829M     0x00641003
+#define VX855_31_490M     0x00B01005
+#define VX855_31_500M     0x00B01005
+#define VX855_31_728M     0x008E1004
+#define VX855_32_668M     0x00921004
+#define VX855_36_000M     0x00A11004
+#define VX855_40_000M     0x00700C05
+#define VX855_41_291M     0x00730C05
+#define VX855_43_163M     0x00790C05
+#define VX855_45_250M     0x007F0C05      /* 45.46MHz */
+#define VX855_46_000M     0x00670C04
+#define VX855_46_996M     0x00690C04
+#define VX855_48_000M     0x00860C05
+#define VX855_48_875M     0x00890C05
+#define VX855_49_500M     0x00530C03
+#define VX855_52_406M     0x00580C03
+#define VX855_52_977M     0x00940C05
+#define VX855_56_250M     0x009D0C05
+#define VX855_60_466M     0x00A90C05
+#define VX855_61_500M     0x00AC0C05
+#define VX855_65_000M     0x006D0C03
+#define VX855_65_178M     0x00B60C05
+#define VX855_66_750M     0x00700C03    /*67.116MHz */
+#define VX855_67_295M     0x00BC0C05
+#define VX855_68_179M     0x00BF0C05
+#define VX855_68_369M     0x00BF0C05
+#define VX855_69_924M     0x00C30C05
+#define VX855_70_159M     0x00C30C05
+#define VX855_72_000M     0x00A10C04
+#define VX855_73_023M     0x00CC0C05
+#define VX855_74_481M     0x00D10C05
+#define VX855_78_750M     0x006E0805
+#define VX855_79_466M     0x006F0805
+#define VX855_80_136M     0x00700805
+#define VX855_81_627M     0x00720805
+#define VX855_83_375M     0x00750805
+#define VX855_83_527M     0x00750805
+#define VX855_83_950M     0x00750805
+#define VX855_84_537M     0x00760805
+#define VX855_84_750M     0x00760805     /* 84.537Mhz */
+#define VX855_85_500M     0x00760805        /* 85.909080 MHz*/
+#define VX855_85_860M     0x00760805
+#define VX855_85_909M     0x00760805
+#define VX855_88_750M     0x007C0805
+#define VX855_89_489M     0x007D0805
+#define VX855_94_500M     0x00840805
+#define VX855_96_648M     0x00870805
+#define VX855_97_750M     0x00890805
+#define VX855_101_000M    0x008D0805
+#define VX855_106_500M    0x00950805
+#define VX855_108_000M    0x00970805
+#define VX855_110_125M    0x00990805
+#define VX855_112_000M    0x009D0805
+#define VX855_113_309M    0x009F0805
+#define VX855_115_000M    0x00A10805
+#define VX855_118_840M    0x00A60805
+#define VX855_119_000M    0x00A70805
+#define VX855_121_750M    0x00AA0805       /* 121.704MHz */
+#define VX855_122_614M    0x00AC0805
+#define VX855_126_266M    0x00B10805
+#define VX855_130_250M    0x00B60805      /* 130.250 */
+#define VX855_135_000M    0x00BD0805
+#define VX855_136_700M    0x00BF0805
+#define VX855_137_750M    0x00C10805
+#define VX855_138_400M    0x00C20805
+#define VX855_144_300M    0x00CA0805
+#define VX855_146_760M    0x00CE0805
+#define VX855_148_500M	  0x00D00805
+#define VX855_153_920M    0x00540402
+#define VX855_156_000M    0x006C0405
+#define VX855_156_867M    0x006E0405
+#define VX855_157_500M    0x006E0405
+#define VX855_162_000M    0x00710405
+#define VX855_172_798M    0x00790405
+#define VX855_187_000M    0x00830405
+#define VX855_193_295M    0x00870405
+#define VX855_202_500M    0x008E0405
+#define VX855_204_000M    0x008F0405
+#define VX855_218_500M    0x00990405
+#define VX855_229_500M    0x00A10405
+#define VX855_234_000M    0x00A40405
+#define VX855_267_250M    0x00BB0405
+#define VX855_297_500M    0x00D00405
+#define VX855_339_500M    0x00770005
+#define VX855_340_772M    0x00770005
+
+
 /* Definition CRTC Timing Index */
 #define H_TOTAL_INDEX               0
 #define H_ADDR_INDEX                1
diff --git a/drivers/video/via/viafbdev.c b/drivers/video/via/viafbdev.c
index 61e652c..3815a9b 100644
--- a/drivers/video/via/viafbdev.c
+++ b/drivers/video/via/viafbdev.c
@@ -914,7 +914,8 @@ static int viafb_cursor(struct fb_info *info, struct fb_cursor *cursor)
 		fg_color = cursor->image.fg_color;
 		bg_color = cursor->image.bg_color;
 		if (chip_name == UNICHROME_CX700 ||
-			chip_name == UNICHROME_VX800) {
+			chip_name == UNICHROME_VX800 ||
+			chip_name == UNICHROME_VX855) {
 			fg_color =
 				((info->cmap.red[fg_color] & 0xFFC0) << 14) |
 				((info->cmap.green[fg_color] & 0xFFC0) << 4) |
diff --git a/drivers/video/via/viamode.c b/drivers/video/via/viamode.c
index 16a8a97..b74f8a6 100644
--- a/drivers/video/via/viamode.c
+++ b/drivers/video/via/viamode.c
@@ -312,6 +312,60 @@ struct io_reg CX700_ModeXregs[] = { {VIASR, SR10, 0xFF, 0x01},
 {VIACR, CR9B, 0xFF, 0x00}
 };
 
+struct io_reg VX855_ModeXregs[] = {
+{VIASR, SR10, 0xFF, 0x01},
+{VIASR, SR15, 0x02, 0x02},
+{VIASR, SR16, 0xBF, 0x08},
+{VIASR, SR17, 0xFF, 0x1F},
+{VIASR, SR18, 0xFF, 0x4E},
+{VIASR, SR1A, 0xFB, 0x08},
+{VIASR, SR1B, 0xFF, 0xF0},
+{VIASR, SR1E, 0x07, 0x01},
+{VIASR, SR2A, 0xF0, 0x00},
+{VIASR, SR58, 0xFF, 0x00},
+{VIASR, SR59, 0xFF, 0x00},
+{VIASR, SR2D, 0xFF, 0xFF},	/* VCK and LCK PLL power on.           */
+{VIACR, CR09, 0xFF, 0x00},	/* Initial CR09=0*/
+{VIACR, CR11, 0x8F, 0x00},	/* IGA1 initial  Vertical end       */
+{VIACR, CR17, 0x7F, 0x00}, 	/* IGA1 CRT Mode control init   */
+{VIACR, CR0A, 0xFF, 0x1E},	/* Cursor Start                        */
+{VIACR, CR0B, 0xFF, 0x00},	/* Cursor End                          */
+{VIACR, CR0E, 0xFF, 0x00},	/* Cursor Location High                */
+{VIACR, CR0F, 0xFF, 0x00},	/* Cursor Localtion Low                */
+{VIACR, CR32, 0xFF, 0x00},
+{VIACR, CR33, 0x7F, 0x00},
+{VIACR, CR35, 0xFF, 0x00},
+{VIACR, CR36, 0x08, 0x00},
+{VIACR, CR69, 0xFF, 0x00},
+{VIACR, CR6A, 0xFD, 0x60},
+{VIACR, CR6B, 0xFF, 0x00},
+{VIACR, CR6C, 0xFF, 0x00},
+{VIACR, CR7A, 0xFF, 0x01},          /* LCD Scaling Parameter 1             */
+{VIACR, CR7B, 0xFF, 0x02},          /* LCD Scaling Parameter 2             */
+{VIACR, CR7C, 0xFF, 0x03},          /* LCD Scaling Parameter 3             */
+{VIACR, CR7D, 0xFF, 0x04},          /* LCD Scaling Parameter 4             */
+{VIACR, CR7E, 0xFF, 0x07},          /* LCD Scaling Parameter 5             */
+{VIACR, CR7F, 0xFF, 0x0A},          /* LCD Scaling Parameter 6             */
+{VIACR, CR80, 0xFF, 0x0D},          /* LCD Scaling Parameter 7             */
+{VIACR, CR81, 0xFF, 0x13},          /* LCD Scaling Parameter 8             */
+{VIACR, CR82, 0xFF, 0x16},          /* LCD Scaling Parameter 9             */
+{VIACR, CR83, 0xFF, 0x19},          /* LCD Scaling Parameter 10            */
+{VIACR, CR84, 0xFF, 0x1C},          /* LCD Scaling Parameter 11            */
+{VIACR, CR85, 0xFF, 0x1D},          /* LCD Scaling Parameter 12            */
+{VIACR, CR86, 0xFF, 0x1E},          /* LCD Scaling Parameter 13            */
+{VIACR, CR87, 0xFF, 0x1F},          /* LCD Scaling Parameter 14            */
+{VIACR, CR88, 0xFF, 0x40},          /* LCD Panel Type                      */
+{VIACR, CR89, 0xFF, 0x00},          /* LCD Timing Control 0                */
+{VIACR, CR8A, 0xFF, 0x88},          /* LCD Timing Control 1                */
+{VIACR, CRD4, 0xFF, 0x81},          /* Second power sequence control       */
+{VIACR, CR91, 0xFF, 0x80},          /* 24/12 bit LVDS Data off             */
+{VIACR, CR96, 0xFF, 0x00},
+{VIACR, CR97, 0xFF, 0x00},
+{VIACR, CR99, 0xFF, 0x00},
+{VIACR, CR9B, 0xFF, 0x00},
+{VIACR, CRD2, 0xFF, 0xFF}           /* TMDS/LVDS control register.         */
+};
+
 /* Video Mode Table */
 /* Common Setting for Video Mode */
 struct io_reg CLE266_ModeXregs[] = { {VIASR, SR1E, 0xF0, 0x00},
@@ -1012,6 +1066,7 @@ int NUM_TOTAL_CN400_ModeXregs = ARRAY_SIZE(CN400_ModeXregs);
 int NUM_TOTAL_CN700_ModeXregs = ARRAY_SIZE(CN700_ModeXregs);
 int NUM_TOTAL_KM400_ModeXregs = ARRAY_SIZE(KM400_ModeXregs);
 int NUM_TOTAL_CX700_ModeXregs = ARRAY_SIZE(CX700_ModeXregs);
+int NUM_TOTAL_VX855_ModeXregs = ARRAY_SIZE(VX855_ModeXregs);
 int NUM_TOTAL_CLE266_ModeXregs = ARRAY_SIZE(CLE266_ModeXregs);
 int NUM_TOTAL_PATCH_MODE = ARRAY_SIZE(res_patch_table);
 int NUM_TOTAL_MODETABLE = ARRAY_SIZE(CLE266Modes);
diff --git a/drivers/video/via/viamode.h b/drivers/video/via/viamode.h
index 504e16a..a9d6554 100644
--- a/drivers/video/via/viamode.h
+++ b/drivers/video/via/viamode.h
@@ -56,6 +56,7 @@ extern int NUM_TOTAL_CN400_ModeXregs;
 extern int NUM_TOTAL_CN700_ModeXregs;
 extern int NUM_TOTAL_KM400_ModeXregs;
 extern int NUM_TOTAL_CX700_ModeXregs;
+extern int NUM_TOTAL_VX855_ModeXregs;
 extern int NUM_TOTAL_CLE266_ModeXregs;
 extern int NUM_TOTAL_PATCH_MODE;
 extern int NUM_TOTAL_MODETABLE;
@@ -75,6 +76,7 @@ extern struct io_reg CN700_ModeXregs[];
 extern struct io_reg KM400_ModeXregs[];
 extern struct io_reg CX700_ModeXregs[];
 extern struct io_reg VX800_ModeXregs[];
+extern struct io_reg VX855_ModeXregs[];
 extern struct io_reg CLE266_ModeXregs[];
 extern struct io_reg PM1024x768[];
 extern struct patch_table res_patch_table[];
-- 
cgit v0.10.2


From 3a32456909380150a92ed207c160a3a0bd687e14 Mon Sep 17 00:00:00 2001
From: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Date: Tue, 22 Sep 2009 16:47:36 -0700
Subject: viafb: choose acceleration engine for VX855

Enable 2D hardware acceleration on VX855 for copyarea, imageblit and
fillrect by selecting the correct engine which is the same as in VX800.

Signed-off-by: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Cc: Scott Fang <ScottFang@viatech.com.cn>
Cc: Joseph Chan <JosephChan@via.com.tw>
Cc: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/via/accel.c b/drivers/video/via/accel.c
index 42ab4b4..9d4f3a4 100644
--- a/drivers/video/via/accel.c
+++ b/drivers/video/via/accel.c
@@ -337,6 +337,7 @@ int viafb_init_engine(struct fb_info *info)
 		viapar->shared->hw_bitblt = hw_bitblt_1;
 		break;
 	case UNICHROME_VX800:
+	case UNICHROME_VX855:
 		viapar->shared->hw_bitblt = hw_bitblt_2;
 		break;
 	default:
-- 
cgit v0.10.2


From 2d280f758b2044151cb77fdd9c59d1903bbb6a19 Mon Sep 17 00:00:00 2001
From: Harald Welte <laforge@gnumonks.org>
Date: Tue, 22 Sep 2009 16:47:37 -0700
Subject: viafb: make viafb a first-class citizen using pci_driver

Signed-off-by: Harald Welte <HaraldWelte@viatech.com>
Signed-off-by: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Cc: Scott Fang <ScottFang@viatech.com.cn>
Cc: Joseph Chan <JosephChan@via.com.tw>
Cc: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/via/hw.c b/drivers/video/via/hw.c
index 95faf8b..3e083ff 100644
--- a/drivers/video/via/hw.c
+++ b/drivers/video/via/hw.c
@@ -21,22 +21,6 @@
 
 #include "global.h"
 
-static const struct pci_device_id_info pciidlist[] = {
-	{PCI_VIA_VENDOR_ID, UNICHROME_CLE266_DID, UNICHROME_CLE266},
-	{PCI_VIA_VENDOR_ID, UNICHROME_PM800_DID, UNICHROME_PM800},
-	{PCI_VIA_VENDOR_ID, UNICHROME_K400_DID, UNICHROME_K400},
-	{PCI_VIA_VENDOR_ID, UNICHROME_K800_DID, UNICHROME_K800},
-	{PCI_VIA_VENDOR_ID, UNICHROME_CN700_DID, UNICHROME_CN700},
-	{PCI_VIA_VENDOR_ID, UNICHROME_P4M890_DID, UNICHROME_P4M890},
-	{PCI_VIA_VENDOR_ID, UNICHROME_K8M890_DID, UNICHROME_K8M890},
-	{PCI_VIA_VENDOR_ID, UNICHROME_CX700_DID, UNICHROME_CX700},
-	{PCI_VIA_VENDOR_ID, UNICHROME_P4M900_DID, UNICHROME_P4M900},
-	{PCI_VIA_VENDOR_ID, UNICHROME_CN750_DID, UNICHROME_CN750},
-	{PCI_VIA_VENDOR_ID, UNICHROME_VX800_DID, UNICHROME_VX800},
-	{PCI_VIA_VENDOR_ID, UNICHROME_VX855_DID, UNICHROME_VX855},
-	{0, 0, 0}
-};
-
 static struct pll_map pll_value[] = {
 	{CLK_25_175M, CLE266_PLL_25_175M, K800_PLL_25_175M,
 	 CX700_25_175M, VX855_25_175M},
@@ -542,7 +526,8 @@ static void set_dvi_output_path(int set_iga, int output_interface);
 static void set_lcd_output_path(int set_iga, int output_interface);
 static int search_mode_setting(int ModeInfoIndex);
 static void load_fix_bit_crtc_reg(void);
-static void init_gfx_chip_info(void);
+static void init_gfx_chip_info(struct pci_dev *pdev,
+				const struct pci_device_id *pdi);
 static void init_tmds_chip_info(void);
 static void init_lvds_chip_info(void);
 static void device_screen_off(void);
@@ -552,7 +537,6 @@ static void device_off(void);
 static void device_on(void);
 static void enable_second_display_channel(void);
 static void disable_second_display_channel(void);
-static int get_fb_size_from_pci(void);
 
 void viafb_write_reg(u8 index, u16 io_port, u8 data)
 {
@@ -1937,9 +1921,10 @@ void viafb_fill_crtc_timing(struct crt_mode_table *crt_table,
 
 }
 
-void viafb_init_chip_info(void)
+void viafb_init_chip_info(struct pci_dev *pdev,
+			  const struct pci_device_id *pdi)
 {
-	init_gfx_chip_info();
+	init_gfx_chip_info(pdev, pdi);
 	init_tmds_chip_info();
 	init_lvds_chip_info();
 
@@ -2012,24 +1997,12 @@ void viafb_update_device_setting(int hres, int vres,
 	}
 }
 
-static void init_gfx_chip_info(void)
+static void init_gfx_chip_info(struct pci_dev *pdev,
+			       const struct pci_device_id *pdi)
 {
-	struct pci_dev *pdev = NULL;
-	u32 i;
 	u8 tmp;
 
-	/* Indentify GFX Chip Name */
-	for (i = 0; pciidlist[i].vendor != 0; i++) {
-		pdev = pci_get_device(pciidlist[i].vendor,
-			pciidlist[i].device, 0);
-		if (pdev)
-			break;
-	}
-
-	if (!pciidlist[i].vendor)
-		return ;
-
-	viaparinfo->chip_info->gfx_chip_name = pciidlist[i].chip_index;
+	viaparinfo->chip_info->gfx_chip_name = pdi->driver_data;
 
 	/* Check revision of CLE266 Chip */
 	if (viaparinfo->chip_info->gfx_chip_name == UNICHROME_CLE266) {
@@ -2060,8 +2033,6 @@ static void init_gfx_chip_info(void)
 				CX700_REVISION_700;
 		}
 	}
-
-	pci_dev_put(pdev);
 }
 
 static void init_tmds_chip_info(void)
@@ -2554,37 +2525,6 @@ void viafb_crt_enable(void)
 	viafb_write_reg_mask(CR36, VIACR, 0x0, BIT5 + BIT4);
 }
 
-void viafb_get_mmio_info(unsigned long *mmio_base, u32 *mmio_len)
-{
-	struct pci_dev *pdev = NULL;
-	u32 vendor, device;
-	u32 i;
-
-	for (i = 0; pciidlist[i].vendor != 0; i++)
-		if (viaparinfo->chip_info->gfx_chip_name ==
-			pciidlist[i].chip_index)
-			break;
-
-	if (!pciidlist[i].vendor)
-		return ;
-
-	vendor = pciidlist[i].vendor;
-	device = pciidlist[i].device;
-
-	pdev = pci_get_device(vendor, device, NULL);
-
-	if (!pdev) {
-		*mmio_base = 0;
-		*mmio_len = 0;
-		return ;
-	}
-
-	*mmio_base = pci_resource_start(pdev, 1);
-	*mmio_len = pci_resource_len(pdev, 1);
-
-	pci_dev_put(pdev);
-}
-
 static void enable_second_display_channel(void)
 {
 	/* to enable second display channel. */
@@ -2601,44 +2541,7 @@ static void disable_second_display_channel(void)
 	viafb_write_reg_mask(CR6A, VIACR, BIT6, BIT6);
 }
 
-void viafb_get_fb_info(unsigned int *fb_base, unsigned int *fb_len)
-{
-	struct pci_dev *pdev = NULL;
-	u32 vendor, device;
-	u32 i;
-
-	for (i = 0; pciidlist[i].vendor != 0; i++)
-		if (viaparinfo->chip_info->gfx_chip_name ==
-			pciidlist[i].chip_index)
-			break;
-
-	if (!pciidlist[i].vendor)
-		return ;
-
-	vendor = pciidlist[i].vendor;
-	device = pciidlist[i].device;
-
-	pdev = pci_get_device(vendor, device, NULL);
-
-	if (!pdev) {
-		*fb_base = viafb_read_reg(VIASR, SR30) << 24;
-		*fb_len = viafb_get_memsize();
-		DEBUG_MSG(KERN_INFO "Get FB info from SR30!\n");
-		DEBUG_MSG(KERN_INFO "fb_base = %08x\n", *fb_base);
-		DEBUG_MSG(KERN_INFO "fb_len = %08x\n", *fb_len);
-		return ;
-	}
-
-	*fb_base = (unsigned int)pci_resource_start(pdev, 0);
-	*fb_len = get_fb_size_from_pci();
-	DEBUG_MSG(KERN_INFO "Get FB info from PCI system!\n");
-	DEBUG_MSG(KERN_INFO "fb_base = %08x\n", *fb_base);
-	DEBUG_MSG(KERN_INFO "fb_len = %08x\n", *fb_len);
-
-	pci_dev_put(pdev);
-}
-
-static int get_fb_size_from_pci(void)
+int viafb_get_fb_size_from_pci(void)
 {
 	unsigned long configid, deviceid, FBSize = 0;
 	int VideoMemSize;
diff --git a/drivers/video/via/hw.h b/drivers/video/via/hw.h
index 4e54b2f..b874d95 100644
--- a/drivers/video/via/hw.h
+++ b/drivers/video/via/hw.h
@@ -901,14 +901,15 @@ void viafb_set_dpa_gfx(int output_interface, struct GFX_DPA_SETTING\
 int viafb_setmode(int vmode_index, int hor_res, int ver_res,
 	    int video_bpp, int vmode_index1, int hor_res1,
 	    int ver_res1, int video_bpp1);
-void viafb_init_chip_info(void);
+void viafb_init_chip_info(struct pci_dev *pdev,
+			  const struct pci_device_id *pdi);
 void viafb_init_dac(int set_iga);
 int viafb_get_pixclock(int hres, int vres, int vmode_refresh);
 int viafb_get_refresh(int hres, int vres, u32 float_refresh);
 void viafb_update_device_setting(int hres, int vres, int bpp,
 			   int vmode_refresh, int flag);
-void viafb_get_mmio_info(unsigned long *mmio_base, u32 *mmio_len);
 
+int viafb_get_fb_size_from_pci(void);
 void viafb_set_iga_path(void);
 void viafb_set_primary_address(u32 addr);
 void viafb_set_secondary_address(u32 addr);
diff --git a/drivers/video/via/viafbdev.c b/drivers/video/via/viafbdev.c
index 3815a9b..c661ab6 100644
--- a/drivers/video/via/viafbdev.c
+++ b/drivers/video/via/viafbdev.c
@@ -1842,7 +1842,8 @@ out_default:
 	*yres = 480;
 }
 
-static int __devinit via_pci_probe(void)
+static int __devinit via_pci_probe(struct pci_dev *pdev,
+				   const struct pci_device_id *ent)
 {
 	u32 default_xres, default_yres;
 	int vmode_index;
@@ -1885,8 +1886,9 @@ static int __devinit via_pci_probe(void)
 	/* Set up I2C bus stuff */
 	viafb_create_i2c_bus(viaparinfo);
 
-	viafb_init_chip_info();
-	viafb_get_fb_info(&viaparinfo->fbmem, &viaparinfo->memsize);
+	viafb_init_chip_info(pdev, ent);
+	viaparinfo->fbmem = pci_resource_start(pdev, 0);
+	viaparinfo->memsize = viafb_get_fb_size_from_pci();
 	viaparinfo->fbmem_free = viaparinfo->memsize;
 	viaparinfo->fbmem_used = 0;
 	viafbinfo->screen_base = ioremap_nocache(viaparinfo->fbmem,
@@ -1896,8 +1898,8 @@ static int __devinit via_pci_probe(void)
 		return -ENOMEM;
 	}
 
-	viafb_get_mmio_info(&viafbinfo->fix.mmio_start,
-		&viafbinfo->fix.mmio_len);
+	viafbinfo->fix.mmio_start = pci_resource_start(pdev, 1);
+	viafbinfo->fix.mmio_len = pci_resource_len(pdev, 1);
 	viafbinfo->node = 0;
 	viafbinfo->fbops = &viafb_ops;
 	viafbinfo->flags = FBINFO_DEFAULT | FBINFO_HWACCEL_YPAN;
@@ -2065,7 +2067,7 @@ static int __devinit via_pci_probe(void)
 	return 0;
 }
 
-static void __devexit via_pci_remove(void)
+static void __devexit via_pci_remove(struct pci_dev *pdev)
 {
 	DEBUG_MSG(KERN_INFO "via_pci_remove!\n");
 	fb_dealloc_cmap(&viafbinfo->cmap);
@@ -2157,6 +2159,40 @@ static int __init viafb_setup(char *options)
 }
 #endif
 
+static struct pci_device_id viafb_pci_table[] __devinitdata = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_VIA, UNICHROME_CLE266_DID),
+	  .driver_data = UNICHROME_CLE266 },
+	{ PCI_DEVICE(PCI_VENDOR_ID_VIA, UNICHROME_PM800_DID),
+	  .driver_data = UNICHROME_PM800 },
+	{ PCI_DEVICE(PCI_VENDOR_ID_VIA, UNICHROME_K400_DID),
+	  .driver_data = UNICHROME_K400 },
+	{ PCI_DEVICE(PCI_VENDOR_ID_VIA, UNICHROME_K800_DID),
+	  .driver_data = UNICHROME_K800 },
+	{ PCI_DEVICE(PCI_VENDOR_ID_VIA, UNICHROME_P4M890_DID),
+	  .driver_data = UNICHROME_CN700 },
+	{ PCI_DEVICE(PCI_VENDOR_ID_VIA, UNICHROME_K8M890_DID),
+	  .driver_data = UNICHROME_K8M890 },
+	{ PCI_DEVICE(PCI_VENDOR_ID_VIA, UNICHROME_CX700_DID),
+	  .driver_data = UNICHROME_CX700 },
+	{ PCI_DEVICE(PCI_VENDOR_ID_VIA, UNICHROME_P4M900_DID),
+	  .driver_data = UNICHROME_P4M900 },
+	{ PCI_DEVICE(PCI_VENDOR_ID_VIA, UNICHROME_CN750_DID),
+	  .driver_data = UNICHROME_CN750 },
+	{ PCI_DEVICE(PCI_VENDOR_ID_VIA, UNICHROME_VX800_DID),
+	  .driver_data = UNICHROME_VX800 },
+	{ PCI_DEVICE(PCI_VENDOR_ID_VIA, UNICHROME_VX855_DID),
+	  .driver_data = UNICHROME_VX855 },
+	{ }
+};
+MODULE_DEVICE_TABLE(pci, viafb_pci_table);
+
+static struct pci_driver viafb_driver = {
+	.name		= "viafb",
+	.id_table	= viafb_pci_table,
+	.probe		= via_pci_probe,
+	.remove		= __devexit_p(via_pci_remove),
+};
+
 static int __init viafb_init(void)
 {
 #ifndef MODULE
@@ -2168,13 +2204,13 @@ static int __init viafb_init(void)
 	printk(KERN_INFO
        "VIA Graphics Intergration Chipset framebuffer %d.%d initializing\n",
 	       VERSION_MAJOR, VERSION_MINOR);
-	return via_pci_probe();
+	return pci_register_driver(&viafb_driver);
 }
 
 static void __exit viafb_exit(void)
 {
 	DEBUG_MSG(KERN_INFO "viafb_exit!\n");
-	via_pci_remove();
+	pci_unregister_driver(&viafb_driver);
 }
 
 static struct fb_ops viafb_ops = {
-- 
cgit v0.10.2


From db8df7b0622cc6ddad993da2e4dfaf3b5d98ee30 Mon Sep 17 00:00:00 2001
From: Harald Welte <laforge@gnumonks.org>
Date: Tue, 22 Sep 2009 16:47:38 -0700
Subject: viafb: pass reference to pci device when calling framebuffer_alloc()

Signed-off-by: Harald Welte <HaraldWelte@viatech.com>
Signed-off-by: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Cc: Scott Fang <ScottFang@viatech.com.cn>
Cc: Joseph Chan <JosephChan@via.com.tw>
Cc: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/via/viafbdev.c b/drivers/video/via/viafbdev.c
index c661ab6..56ec696 100644
--- a/drivers/video/via/viafbdev.c
+++ b/drivers/video/via/viafbdev.c
@@ -1857,7 +1857,8 @@ static int __devinit via_pci_probe(struct pci_dev *pdev,
 	 * variables
 	*/
 	viafbinfo = framebuffer_alloc(viafb_par_length +
-		ALIGN(sizeof(struct viafb_shared), BITS_PER_LONG/8), NULL);
+		ALIGN(sizeof(struct viafb_shared), BITS_PER_LONG/8),
+		&pdev->dev);
 	if (!viafbinfo) {
 		printk(KERN_ERR"Could not allocate memory for viafb_info.\n");
 		return -ENODEV;
@@ -1982,7 +1983,7 @@ static int __devinit via_pci_probe(struct pci_dev *pdev,
 	default_var.vsync_len = 4;
 
 	if (viafb_dual_fb) {
-		viafbinfo1 = framebuffer_alloc(viafb_par_length, NULL);
+		viafbinfo1 = framebuffer_alloc(viafb_par_length, &pdev->dev);
 		if (!viafbinfo1) {
 			printk(KERN_ERR
 			"allocate the second framebuffer struct error\n");
-- 
cgit v0.10.2


From f7a595e98c3140f1271957aa742a6b84407620d4 Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Tue, 22 Sep 2009 16:47:38 -0700
Subject: drivers/video/console/newport_con.c: fix read outside array bounds

It reads linetable[] before checking bounds of index, and ARRAY_SIZE is
required because linetable[] are unsigned shorts.

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/console/newport_con.c b/drivers/video/console/newport_con.c
index d31b203..3772433 100644
--- a/drivers/video/console/newport_con.c
+++ b/drivers/video/console/newport_con.c
@@ -216,7 +216,7 @@ static void newport_get_screensize(void)
 	}
 
 	newport_xsize = newport_ysize = 0;
-	for (i = 0; linetable[i + 1] && (i < sizeof(linetable)); i += 2) {
+	for (i = 0; i < ARRAY_SIZE(linetable) - 1 && linetable[i + 1]; i += 2) {
 		cols = 0;
 		newport_vc2_set(npregs, VC2_IREG_RADDR, linetable[i]);
 		npregs->set.dcbmode = (NPORT_DMODE_AVC2 | VC2_REGADDR_RAM |
-- 
cgit v0.10.2


From ff8147fe71246b81a48de5f37041b026b57d60ca Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Tue, 22 Sep 2009 16:47:39 -0700
Subject: drivers/video: add kmalloc NULL tests

Check that the result of kmalloc is not NULL before passing it to other
functions.

The semantic match that finds this problem is as follows:
(http://www.emn.fr/x-info/coccinelle/)

// <smpl>
@@
expression *x;
identifier f;
constant char *C;
@@

x = \(kmalloc\|kcalloc\|kzalloc\)(...);
... when != x == NULL
    when != x != NULL
    when != (x || ...)
(
kfree(x)
|
f(...,C,...,x,...)
|
*f(...,x,...)
|
*x->f
)
// </smpl>

[akpm@linux-foundation.org: convert to kstrdup() as well]
Signed-off-by: Julia Lawall <julia@diku.dk>
Cc: Ming Lei <tom.leiming@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/au1100fb.c b/drivers/video/au1100fb.c
index 378f277..a699aab 100644
--- a/drivers/video/au1100fb.c
+++ b/drivers/video/au1100fb.c
@@ -715,8 +715,11 @@ int au1100fb_setup(char *options)
 			}
 			/* Mode option (only option that start with digit) */
 			else if (isdigit(this_opt[0])) {
-				mode = kmalloc(strlen(this_opt) + 1, GFP_KERNEL);
-				strncpy(mode, this_opt, strlen(this_opt) + 1);
+				mode = kstrdup(this_opt, GFP_KERNEL);
+				if (!mode) {
+					print_err("memory allocation failed");
+					return -ENOMEM;
+				}
 			}
 			/* Unsupported option */
 			else {
-- 
cgit v0.10.2


From 99e9e7d62becd6c7413a9e8fbda7f5b66adb5cbf Mon Sep 17 00:00:00 2001
From: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Date: Tue, 22 Sep 2009 16:47:41 -0700
Subject: fb: fix fb_pan_display range check

Fix the range check for panning.  The current code fails to detect some
invalid values (very high ones that can occur if an app tries to move
further up/left than 0,0) as the check uses the unknown values for
calculation so that an overflow can occur.

To fix this it is sufficient to move the calculation to the right side to
use only trusted values.

Kai Jiang detected this problem and proposed an initial patch.

Signed-off-by: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Cc: Kai Jiang <b18973@freescale.com>
Cc: Krzysztof Helt <krzysztof.h1@poczta.fm>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c
index a85c818..346f257 100644
--- a/drivers/video/fbmem.c
+++ b/drivers/video/fbmem.c
@@ -871,8 +871,8 @@ fb_pan_display(struct fb_info *info, struct fb_var_screeninfo *var)
 		err = -EINVAL;
 
 	if (err || !info->fbops->fb_pan_display ||
-	    var->yoffset + yres > info->var.yres_virtual ||
-	    var->xoffset + info->var.xres > info->var.xres_virtual)
+	    var->yoffset > info->var.yres_virtual - yres ||
+	    var->xoffset > info->var.xres_virtual - info->var.xres)
 		return -EINVAL;
 
 	if ((err = info->fbops->fb_pan_display(var, info)))
-- 
cgit v0.10.2


From 9a4a83d2ed83da0c4b45289ca72f10205aa96589 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Tue, 22 Sep 2009 16:47:41 -0700
Subject: video: console, use DIV_ROUND_UP

Use DIV_ROUND_UP explicitly instead of manual shifts and adds.  It makes
the code more readable and consistent (sometimes there were shifts,
sometimes divs).

There is no change on the assembly level (compilers should do the right
job).

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Krzysztof Helt <krzysztof.h1@poczta.fm>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/console/bitblit.c b/drivers/video/console/bitblit.c
index 69864b1..6b7c8fb 100644
--- a/drivers/video/console/bitblit.c
+++ b/drivers/video/console/bitblit.c
@@ -25,7 +25,7 @@ static inline void update_attr(u8 *dst, u8 *src, int attribute,
 			       struct vc_data *vc)
 {
 	int i, offset = (vc->vc_font.height < 10) ? 1 : 2;
-	int width = (vc->vc_font.width + 7) >> 3;
+	int width = DIV_ROUND_UP(vc->vc_font.width, 8);
 	unsigned int cellsize = vc->vc_font.height * width;
 	u8 c;
 
@@ -144,7 +144,7 @@ static void bit_putcs(struct vc_data *vc, struct fb_info *info,
 		      int fg, int bg)
 {
 	struct fb_image image;
-	u32 width = (vc->vc_font.width + 7)/8;
+	u32 width = DIV_ROUND_UP(vc->vc_font.width, 8);
 	u32 cellsize = width * vc->vc_font.height;
 	u32 maxcnt = info->pixmap.size/cellsize;
 	u32 scan_align = info->pixmap.scan_align - 1;
@@ -173,7 +173,7 @@ static void bit_putcs(struct vc_data *vc, struct fb_info *info,
 			cnt = count;
 
 		image.width = vc->vc_font.width * cnt;
-		pitch = ((image.width + 7) >> 3) + scan_align;
+		pitch = DIV_ROUND_UP(image.width, 8) + scan_align;
 		pitch &= ~scan_align;
 		size = pitch * image.height + buf_align;
 		size &= ~buf_align;
@@ -239,7 +239,7 @@ static void bit_cursor(struct vc_data *vc, struct fb_info *info, int mode,
 	struct fb_cursor cursor;
 	struct fbcon_ops *ops = info->fbcon_par;
 	unsigned short charmask = vc->vc_hi_font_mask ? 0x1ff : 0xff;
-	int w = (vc->vc_font.width + 7) >> 3, c;
+	int w = DIV_ROUND_UP(vc->vc_font.width, 8), c;
 	int y = real_y(ops->p, vc->vc_y);
 	int attribute, use_sw = (vc->vc_cursor_type & 0x10);
 	int err = 1;
-- 
cgit v0.10.2


From 360fa58828784f307c3977d5ff4c8e400f074a56 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben@simtec.co.uk>
Date: Tue, 22 Sep 2009 16:47:43 -0700
Subject: s3c2410fb: fix clockrate calculation

In the final part of the calculation for the tft display clockrate we
divide the output pf s3c2410fb_calc_pixclk() by 2 which leaves us with a
rounding error if the result is odd.

Change to using DIV_ROUND_UP() to ensure that we always choose a higher
divisor and thus a lower frequency.

Signed-off-by: Ben Dooks <ben@simtec.co.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/s3c2410fb.c b/drivers/video/s3c2410fb.c
index 5ffca2a..aac6612 100644
--- a/drivers/video/s3c2410fb.c
+++ b/drivers/video/s3c2410fb.c
@@ -369,7 +369,9 @@ static void s3c2410fb_activate_var(struct fb_info *info)
 	void __iomem *regs = fbi->io;
 	int type = fbi->regs.lcdcon1 & S3C2410_LCDCON1_TFT;
 	struct fb_var_screeninfo *var = &info->var;
-	int clkdiv = s3c2410fb_calc_pixclk(fbi, var->pixclock) / 2;
+	int clkdiv;
+
+	clkdiv = DIV_ROUND_UP(s3c2410fb_calc_pixclk(fbi, var->pixclock), 2);
 
 	dprintk("%s: var->xres  = %d\n", __func__, var->xres);
 	dprintk("%s: var->yres  = %d\n", __func__, var->yres);
-- 
cgit v0.10.2


From 0fcf6ada2b8eb42d132c0846384f1299889609e3 Mon Sep 17 00:00:00 2001
From: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Date: Tue, 22 Sep 2009 16:47:44 -0700
Subject: fb: do not ignore fb_set_par errors

At the moment about half of the framebuffer drivers can return an error
code in fb_set_par. Until now it would be silently ignored by fbmem.c
and fbcon.c. This patch fixes fbmem.c to return the error code and
restore var on error.

But it is not clear in which video mode the device is when fb_set_par
fails.  It would be good and reasonable if it were in the old state but
there is no guarantee that this is true for all existing drivers.
Additionally print a message if a failing fb_set_par is detected in
fbmem.c or fbcon.c.

Although most errors should be caught by the previous fb_check_var some
errors can't as they are dynamic (memory allocations, ...) and can only be
detected while performing the operations which is forbidden in
fb_check_var.

This patch shouldn't have a negative impact on normal operation as all
drivers return 0 on success.  The impact in case of error depends heavily
on the driver and caller but it's expected to be better than before.

Signed-off-by: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Cc: Krzysztof Helt <krzysztof.h1@poczta.fm>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c
index 3a44695..4bee7c2 100644
--- a/drivers/video/console/fbcon.c
+++ b/drivers/video/console/fbcon.c
@@ -725,7 +725,7 @@ static int con2fb_release_oldinfo(struct vc_data *vc, struct fb_info *oldinfo,
 				  int oldidx, int found)
 {
 	struct fbcon_ops *ops = oldinfo->fbcon_par;
-	int err = 0;
+	int err = 0, ret;
 
 	if (oldinfo->fbops->fb_release &&
 	    oldinfo->fbops->fb_release(oldinfo, 0)) {
@@ -752,8 +752,14 @@ static int con2fb_release_oldinfo(struct vc_data *vc, struct fb_info *oldinfo,
 		  newinfo in an undefined state. Thus, a call to
 		  fb_set_par() may be needed for the newinfo.
 		*/
-		if (newinfo->fbops->fb_set_par)
-			newinfo->fbops->fb_set_par(newinfo);
+		if (newinfo->fbops->fb_set_par) {
+			ret = newinfo->fbops->fb_set_par(newinfo);
+
+			if (ret)
+				printk(KERN_ERR "con2fb_release_oldinfo: "
+					"detected unhandled fb_set_par error, "
+					"error code %d\n", ret);
+		}
 	}
 
 	return err;
@@ -763,11 +769,18 @@ static void con2fb_init_display(struct vc_data *vc, struct fb_info *info,
 				int unit, int show_logo)
 {
 	struct fbcon_ops *ops = info->fbcon_par;
+	int ret;
 
 	ops->currcon = fg_console;
 
-	if (info->fbops->fb_set_par && !(ops->flags & FBCON_FLAGS_INIT))
-		info->fbops->fb_set_par(info);
+	if (info->fbops->fb_set_par && !(ops->flags & FBCON_FLAGS_INIT)) {
+		ret = info->fbops->fb_set_par(info);
+
+		if (ret)
+			printk(KERN_ERR "con2fb_init_display: detected "
+				"unhandled fb_set_par error, "
+				"error code %d\n", ret);
+	}
 
 	ops->flags |= FBCON_FLAGS_INIT;
 	ops->graphics = 0;
@@ -1006,7 +1019,7 @@ static void fbcon_init(struct vc_data *vc, int init)
 	struct vc_data *svc = *default_mode;
 	struct display *t, *p = &fb_display[vc->vc_num];
 	int logo = 1, new_rows, new_cols, rows, cols, charcnt = 256;
-	int cap;
+	int cap, ret;
 
 	if (info_idx == -1 || info == NULL)
 	    return;
@@ -1092,8 +1105,15 @@ static void fbcon_init(struct vc_data *vc, int init)
 	 */
 	if (CON_IS_VISIBLE(vc) && vc->vc_mode == KD_TEXT) {
 		if (info->fbops->fb_set_par &&
-		    !(ops->flags & FBCON_FLAGS_INIT))
-			info->fbops->fb_set_par(info);
+		    !(ops->flags & FBCON_FLAGS_INIT)) {
+			ret = info->fbops->fb_set_par(info);
+
+			if (ret)
+				printk(KERN_ERR "fbcon_init: detected "
+					"unhandled fb_set_par error, "
+					"error code %d\n", ret);
+		}
+
 		ops->flags |= FBCON_FLAGS_INIT;
 	}
 
@@ -2119,7 +2139,7 @@ static int fbcon_switch(struct vc_data *vc)
 	struct fbcon_ops *ops;
 	struct display *p = &fb_display[vc->vc_num];
 	struct fb_var_screeninfo var;
-	int i, prev_console, charcnt = 256;
+	int i, ret, prev_console, charcnt = 256;
 
 	info = registered_fb[con2fb_map[vc->vc_num]];
 	ops = info->fbcon_par;
@@ -2174,8 +2194,14 @@ static int fbcon_switch(struct vc_data *vc)
 
 	if (old_info != NULL && (old_info != info ||
 				 info->flags & FBINFO_MISC_ALWAYS_SETPAR)) {
-		if (info->fbops->fb_set_par)
-			info->fbops->fb_set_par(info);
+		if (info->fbops->fb_set_par) {
+			ret = info->fbops->fb_set_par(info);
+
+			if (ret)
+				printk(KERN_ERR "fbcon_switch: detected "
+					"unhandled fb_set_par error, "
+					"error code %d\n", ret);
+		}
 
 		if (old_info != info)
 			fbcon_del_cursor_timer(old_info);
diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c
index 346f257..a1f2e7c 100644
--- a/drivers/video/fbmem.c
+++ b/drivers/video/fbmem.c
@@ -954,6 +954,7 @@ fb_set_var(struct fb_info *info, struct fb_var_screeninfo *var)
 			goto done;
 
 		if ((var->activate & FB_ACTIVATE_MASK) == FB_ACTIVATE_NOW) {
+			struct fb_var_screeninfo old_var;
 			struct fb_videomode mode;
 
 			if (info->fbops->fb_get_caps) {
@@ -963,10 +964,20 @@ fb_set_var(struct fb_info *info, struct fb_var_screeninfo *var)
 					goto done;
 			}
 
+			old_var = info->var;
 			info->var = *var;
 
-			if (info->fbops->fb_set_par)
-				info->fbops->fb_set_par(info);
+			if (info->fbops->fb_set_par) {
+				ret = info->fbops->fb_set_par(info);
+
+				if (ret) {
+					info->var = old_var;
+					printk(KERN_WARNING "detected "
+						"fb_set_par error, "
+						"error code: %d\n", ret);
+					goto done;
+				}
+			}
 
 			fb_pan_display(info, &info->var);
 			fb_set_cmap(&info->cmap, info);
-- 
cgit v0.10.2


From 0728bacbba3b0267fa8ca8be69aa43d81b57ab51 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Tue, 22 Sep 2009 16:47:47 -0700
Subject: matroxfb: make CONFIG_FB_MATROX_MULTIHEAD=y mandatory

I would like to get rid of option CONFIG_FB_MATROX_MULTIHEAD and just
always enable it.  There are many reasons for doing this:

* CONFIG_FB_MATROX_MULTIHEAD=y is what all x86 distributions do, so it
  definitely works or we would know by now.

* Building the matroxfb driver with CONFIG_FB_MATROX_MULTIHEAD not set
  results in the following build warning:

drivers/video/matrox/matroxfb_crtc2.c: In function 'matroxfb_dh_open':
drivers/video/matrox/matroxfb_crtc2.c:265: warning: the address of 'matroxfb_global_mxinfo' will always evaluate as 'true'
drivers/video/matrox/matroxfb_crtc2.c: In function 'matroxfb_dh_release':
drivers/video/matrox/matroxfb_crtc2.c:285: warning: the address of 'matroxfb_global_mxinfo' will always evaluate as 'true'

This is nothing to be worried about, the driver will work fine, but build
warnings are still annoying.

* The trick to get multihead support without CONFIG_FB_MATROX_MULTIHEAD,
  which is described in the config help text, no longer works: you can't
  load the same kernel module more than once.

* I fail to see how CONFIG_FB_MATROX_MULTIHEAD=y would make the code
  significantly slower, contrary to what the help text says.  A few extra
  parameters on the stack here and there can't really slow things down in
  comaprison to the rest of the code, and register access.

* The driver built without CONFIG_FB_MATROX_MULTIHEAD is larger than the
  driver build with CONFIG_FB_MATROX_MULTIHEAD=y by 8%.

* One less configuration option makes things simpler.  We add options
  all the time, being able to remove one for once is nice.  It improves
  testing coverage.  And I don't think the Matrox adapters are still
  popular enough to warrant overdetailed configuration settings.

* We should be able to unobfuscate the driver code quite a bit after
  this change (patches follow.)

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Acked-by: Petr Vandrovec <vandrove@vc.cvut.cz>
Cc: Krzysztof Helt <krzysztof.h1@poczta.fm>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/Documentation/fb/matroxfb.txt b/Documentation/fb/matroxfb.txt
index ad7a677..e5ce8a1 100644
--- a/Documentation/fb/matroxfb.txt
+++ b/Documentation/fb/matroxfb.txt
@@ -186,9 +186,7 @@ noinverse - show true colors on screen. It is default.
 dev:X    - bind driver to device X. Driver numbers device from 0 up to N,
            where device 0 is first `known' device found, 1 second and so on.
 	   lspci lists devices in this order.
-	   Default is `every' known device for driver with multihead support
-	   and first working device (usually dev:0) for driver without
-	   multihead support.
+	   Default is `every' known device.
 nohwcursor - disables hardware cursor (use software cursor instead).
 hwcursor - enables hardware cursor. It is default. If you are using
            non-accelerated mode (`noaccel' or `fbset -accel false'), software
diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index 7be5062..9bbb285 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -1275,26 +1275,6 @@ config FB_MATROX_MAVEN
 	  painting procedures (the secondary head does not use acceleration
 	  engine).
 
-config FB_MATROX_MULTIHEAD
-	bool "Multihead support"
-	depends on FB_MATROX
-	---help---
-	  Say Y here if you have more than one (supported) Matrox device in
-	  your computer and you want to use all of them for different monitors
-	  ("multihead"). If you have only one device, you should say N because
-	  the driver compiled with Y is larger and a bit slower, especially on
-	  ia32 (ix86).
-
-	  If you said M to "Matrox unified accelerated driver" and N here, you
-	  will still be able to use several Matrox devices simultaneously:
-	  insert several instances of the module matroxfb into the kernel
-	  with insmod, supplying the parameter "dev=N" where N is 0, 1, etc.
-	  for the different Matrox devices. This method is slightly faster but
-	  uses 40 KB of kernel memory per Matrox card.
-
-	  There is no need for enabling 'Matrox multihead support' if you have
-	  only one Matrox card in the box.
-
 config FB_RADEON
 	tristate "ATI Radeon display support"
 	depends on FB && PCI
diff --git a/drivers/video/matrox/matroxfb_base.c b/drivers/video/matrox/matroxfb_base.c
index 0c1049b..6ede98d 100644
--- a/drivers/video/matrox/matroxfb_base.c
+++ b/drivers/video/matrox/matroxfb_base.c
@@ -379,9 +379,7 @@ static void matroxfb_remove(WPMINFO int dummy) {
 	mga_iounmap(ACCESS_FBINFO(video.vbase));
 	release_mem_region(ACCESS_FBINFO(video.base), ACCESS_FBINFO(video.len_maximum));
 	release_mem_region(ACCESS_FBINFO(mmio.base), 16384);
-#ifdef CONFIG_FB_MATROX_MULTIHEAD
 	kfree(minfo);
-#endif
 }
 
 	/*
@@ -644,9 +642,7 @@ static int matroxfb_setcolreg(unsigned regno, unsigned red, unsigned green,
 			      unsigned blue, unsigned transp,
 			      struct fb_info *fb_info)
 {
-#ifdef CONFIG_FB_MATROX_MULTIHEAD
 	struct matrox_fb_info* minfo = container_of(fb_info, struct matrox_fb_info, fbcon);
-#endif
 
 	DBG(__func__)
 
@@ -2011,9 +2007,6 @@ static int matroxfb_probe(struct pci_dev* pdev, const struct pci_device_id* dumm
 	struct matrox_fb_info* minfo;
 	int err;
 	u_int32_t cmd;
-#ifndef CONFIG_FB_MATROX_MULTIHEAD
-	static int registered = 0;
-#endif
 	DBG(__func__)
 
 	svid = pdev->subsystem_vendor;
@@ -2037,15 +2030,9 @@ static int matroxfb_probe(struct pci_dev* pdev, const struct pci_device_id* dumm
 		return -1;
 	}
 
-#ifdef CONFIG_FB_MATROX_MULTIHEAD
 	minfo = kmalloc(sizeof(*minfo), GFP_KERNEL);
 	if (!minfo)
 		return -1;
-#else
-	if (registered)	/* singlehead driver... */
-		return -1;
-	minfo = &matroxfb_global_mxinfo;
-#endif
 	memset(MINFO, 0, sizeof(*MINFO));
 
 	ACCESS_FBINFO(pcidev) = pdev;
@@ -2090,15 +2077,10 @@ static int matroxfb_probe(struct pci_dev* pdev, const struct pci_device_id* dumm
 
 	err = initMatrox2(PMINFO b);
 	if (!err) {
-#ifndef CONFIG_FB_MATROX_MULTIHEAD
-		registered = 1;
-#endif
 		matroxfb_register_device(MINFO);
 		return 0;
 	}
-#ifdef CONFIG_FB_MATROX_MULTIHEAD
 	kfree(minfo);
-#endif
 	return -1;
 }
 
@@ -2510,13 +2492,8 @@ module_param(inv24, int, 0);
 MODULE_PARM_DESC(inv24, "Inverts clock polarity for 24bpp and loop frequency > 100MHz (default=do not invert polarity)");
 module_param(inverse, int, 0);
 MODULE_PARM_DESC(inverse, "Inverse (0 or 1) (default=0)");
-#ifdef CONFIG_FB_MATROX_MULTIHEAD
 module_param(dev, int, 0);
 MODULE_PARM_DESC(dev, "Multihead support, attach to device ID (0..N) (default=all working)");
-#else
-module_param(dev, int, 0);
-MODULE_PARM_DESC(dev, "Multihead support, attach to device ID (0..N) (default=first working)");
-#endif
 module_param(vesa, int, 0);
 MODULE_PARM_DESC(vesa, "Startup videomode (0x000-0x1FF) (default=0x101)");
 module_param(xres, int, 0);
diff --git a/drivers/video/matrox/matroxfb_base.h b/drivers/video/matrox/matroxfb_base.h
index 9588323..ba64f5e 100644
--- a/drivers/video/matrox/matroxfb_base.h
+++ b/drivers/video/matrox/matroxfb_base.h
@@ -524,7 +524,6 @@ struct matrox_fb_info {
 
 #define info2minfo(info) container_of(info, struct matrox_fb_info, fbcon)
 
-#ifdef CONFIG_FB_MATROX_MULTIHEAD
 #define ACCESS_FBINFO2(info, x) (info->x)
 #define ACCESS_FBINFO(x) ACCESS_FBINFO2(minfo,x)
 
@@ -538,25 +537,6 @@ struct matrox_fb_info {
 #define PMINFO   PMINFO2 ,
 
 #define MINFO_FROM(x)	   struct matrox_fb_info* minfo = x
-#else
-
-extern struct matrox_fb_info matroxfb_global_mxinfo;
-
-#define ACCESS_FBINFO(x) (matroxfb_global_mxinfo.x)
-#define ACCESS_FBINFO2(info, x) (matroxfb_global_mxinfo.x)
-
-#define MINFO (&matroxfb_global_mxinfo)
-
-#define WPMINFO2 void
-#define WPMINFO
-#define CPMINFO2 void
-#define CPMINFO
-#define PMINFO2
-#define PMINFO
-
-#define MINFO_FROM(x)
-
-#endif
 
 #define MINFO_FROM_INFO(x) MINFO_FROM(info2minfo(x))
 
diff --git a/drivers/video/matrox/matroxfb_misc.c b/drivers/video/matrox/matroxfb_misc.c
index 5b5f072..d5b9e78 100644
--- a/drivers/video/matrox/matroxfb_misc.c
+++ b/drivers/video/matrox/matroxfb_misc.c
@@ -784,10 +784,6 @@ EXPORT_SYMBOL(matroxfb_DAC_in);
 EXPORT_SYMBOL(matroxfb_DAC_out);
 EXPORT_SYMBOL(matroxfb_var2my);
 EXPORT_SYMBOL(matroxfb_PLL_calcclock);
-#ifndef CONFIG_FB_MATROX_MULTIHEAD
-struct matrox_fb_info matroxfb_global_mxinfo;
-EXPORT_SYMBOL(matroxfb_global_mxinfo);
-#endif
 EXPORT_SYMBOL(matroxfb_vgaHWinit);		/* DAC1064, Ti3026 */
 EXPORT_SYMBOL(matroxfb_vgaHWrestore);		/* DAC1064, Ti3026 */
 EXPORT_SYMBOL(matroxfb_read_pins);
-- 
cgit v0.10.2


From fc2d10ddfc8989e82f74d2a38c7d6bfa45bcaba9 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Tue, 22 Sep 2009 16:47:48 -0700
Subject: matroxfb: get rid of unneeded macros ACCESS_FBINFO and MINFO

With multihead support always enabled, these macros are no longer needed
and make the code harder to read.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Acked-by: Petr Vandrovec <vandrove@vc.cvut.cz>
Cc: Krzysztof Helt <krzysztof.h1@poczta.fm>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/matrox/g450_pll.c b/drivers/video/matrox/g450_pll.c
index d42346e..8c75427 100644
--- a/drivers/video/matrox/g450_pll.c
+++ b/drivers/video/matrox/g450_pll.c
@@ -30,7 +30,7 @@ static unsigned int g450_mnp2vco(CPMINFO unsigned int mnp) {
 
 	m = ((mnp >> 16) & 0x0FF) + 1;
 	n = ((mnp >>  7) & 0x1FE) + 4;
-	return (ACCESS_FBINFO(features).pll.ref_freq * n + (m >> 1)) / m;
+	return (minfo->features.pll.ref_freq * n + (m >> 1)) / m;
 }
 
 unsigned int g450_mnp2f(CPMINFO unsigned int mnp) {
@@ -90,7 +90,7 @@ static unsigned int g450_nextpll(CPMINFO const struct matrox_pll_limits* pi, uns
 		} else {
 			m--;
 		}
-        	n = ((tvco * (m+1) + ACCESS_FBINFO(features).pll.ref_freq) / (ACCESS_FBINFO(features).pll.ref_freq * 2)) - 2;
+		n = ((tvco * (m+1) + minfo->features.pll.ref_freq) / (minfo->features.pll.ref_freq * 2)) - 2;
 	} while (n < 0x03 || n > 0x7A);
 	return (m << 16) | (n << 8) | p;
 }
@@ -333,7 +333,7 @@ static int __g450_setclk(WPMINFO unsigned int fout, unsigned int pll,
 				matroxfb_DAC_out(PMINFO M1064_XPIXCLKCTRL, tmp);
 				/* DVI PLL preferred for frequencies up to
 				   panel link max, standard PLL otherwise */
-				if (fout >= MINFO->max_pixel_clock_panellink)
+				if (fout >= minfo->max_pixel_clock_panellink)
 					tmp = 0;
 				else tmp =
 					M1064_XDVICLKCTRL_DVIDATAPATHSEL |
@@ -363,20 +363,20 @@ static int __g450_setclk(WPMINFO unsigned int fout, unsigned int pll,
 				}
 				mga_outb(M_MISC_REG, misc);
 			}
-			pi = &ACCESS_FBINFO(limits.pixel);
-			ci = &ACCESS_FBINFO(cache.pixel);
+			pi = &minfo->limits.pixel;
+			ci = &minfo->cache.pixel;
 			break;
 		case M_SYSTEM_PLL:
 			{
 				u_int32_t opt;
 
-				pci_read_config_dword(ACCESS_FBINFO(pcidev), PCI_OPTION_REG, &opt);
+				pci_read_config_dword(minfo->pcidev, PCI_OPTION_REG, &opt);
 				if (!(opt & 0x20)) {
-					pci_write_config_dword(ACCESS_FBINFO(pcidev), PCI_OPTION_REG, opt | 0x20);
+					pci_write_config_dword(minfo->pcidev, PCI_OPTION_REG, opt | 0x20);
 				}
 			}
-			pi = &ACCESS_FBINFO(limits.system);
-			ci = &ACCESS_FBINFO(cache.system);
+			pi = &minfo->limits.system;
+			ci = &minfo->cache.system;
 			break;
 		case M_VIDEO_PLL:
 			{
@@ -395,8 +395,8 @@ static int __g450_setclk(WPMINFO unsigned int fout, unsigned int pll,
 				pixel_vco = g450_mnp2vco(PMINFO mnp);
 				matroxfb_DAC_unlock_irqrestore(flags);
 			}
-			pi = &ACCESS_FBINFO(limits.video);
-			ci = &ACCESS_FBINFO(cache.video);
+			pi = &minfo->limits.video;
+			ci = &minfo->cache.video;
 			break;
 		default:
 			return -EINVAL;
@@ -475,7 +475,7 @@ static int __g450_setclk(WPMINFO unsigned int fout, unsigned int pll,
 			mnp = g450_findworkingpll(PMINFO pll, mnparray, mnpcount);
 			g450_addcache(ci, mnparray[0], mnp);
 		}
-		updatehwstate_clk(&ACCESS_FBINFO(hw), mnp, pll);
+		updatehwstate_clk(&minfo->hw, mnp, pll);
 		matroxfb_DAC_unlock_irqrestore(flags);
 		return mnp;
 	}
diff --git a/drivers/video/matrox/i2c-matroxfb.c b/drivers/video/matrox/i2c-matroxfb.c
index c14e3e2..cefabe8 100644
--- a/drivers/video/matrox/i2c-matroxfb.c
+++ b/drivers/video/matrox/i2c-matroxfb.c
@@ -112,7 +112,7 @@ static int i2c_bus_reg(struct i2c_bit_adapter* b, struct matrox_fb_info* minfo,
 	i2c_set_adapdata(&b->adapter, b);
 	b->adapter.class = class;
 	b->adapter.algo_data = &b->bac;
-	b->adapter.dev.parent = &ACCESS_FBINFO(pcidev)->dev;
+	b->adapter.dev.parent = &minfo->pcidev->dev;
 	b->bac = matrox_i2c_algo_template;
 	b->bac.data = b;
 	err = i2c_bit_add_bus(&b->adapter);
@@ -153,7 +153,7 @@ static void* i2c_matroxfb_probe(struct matrox_fb_info* minfo) {
 	matroxfb_DAC_out(PMINFO DAC_XGENIOCTRL, 0x00);
 	matroxfb_DAC_unlock_irqrestore(flags);
 
-	switch (ACCESS_FBINFO(chip)) {
+	switch (minfo->chip) {
 		case MGA_2064:
 		case MGA_2164:
 			err = i2c_bus_reg(&m2info->ddc1, minfo,
@@ -168,7 +168,7 @@ static void* i2c_matroxfb_probe(struct matrox_fb_info* minfo) {
 	}
 	if (err)
 		goto fail_ddc1;
-	if (ACCESS_FBINFO(devflags.dualhead)) {
+	if (minfo->devflags.dualhead) {
 		err = i2c_bus_reg(&m2info->ddc2, minfo,
 				  DDC2_DATA, DDC2_CLK,
 				  "DDC:fb%u #1", I2C_CLASS_DDC);
diff --git a/drivers/video/matrox/matroxfb_DAC1064.c b/drivers/video/matrox/matroxfb_DAC1064.c
index a74e5da..7662a28 100644
--- a/drivers/video/matrox/matroxfb_DAC1064.c
+++ b/drivers/video/matrox/matroxfb_DAC1064.c
@@ -85,19 +85,19 @@ static void DAC1064_setpclk(WPMINFO unsigned long fout) {
 
 	DBG(__func__)
 
-	DAC1064_calcclock(PMINFO fout, ACCESS_FBINFO(max_pixel_clock), &m, &n, &p);
-	ACCESS_FBINFO(hw).DACclk[0] = m;
-	ACCESS_FBINFO(hw).DACclk[1] = n;
-	ACCESS_FBINFO(hw).DACclk[2] = p;
+	DAC1064_calcclock(PMINFO fout, minfo->max_pixel_clock, &m, &n, &p);
+	minfo->hw.DACclk[0] = m;
+	minfo->hw.DACclk[1] = n;
+	minfo->hw.DACclk[2] = p;
 }
 
 static void DAC1064_setmclk(WPMINFO int oscinfo, unsigned long fmem) {
 	u_int32_t mx;
-	struct matrox_hw_state* hw = &ACCESS_FBINFO(hw);
+	struct matrox_hw_state *hw = &minfo->hw;
 
 	DBG(__func__)
 
-	if (ACCESS_FBINFO(devflags.noinit)) {
+	if (minfo->devflags.noinit) {
 		/* read MCLK and give up... */
 		hw->DACclk[3] = inDAC1064(PMINFO DAC1064_XSYSPLLM);
 		hw->DACclk[4] = inDAC1064(PMINFO DAC1064_XSYSPLLN);
@@ -105,7 +105,7 @@ static void DAC1064_setmclk(WPMINFO int oscinfo, unsigned long fmem) {
 		return;
 	}
 	mx = hw->MXoptionReg | 0x00000004;
-	pci_write_config_dword(ACCESS_FBINFO(pcidev), PCI_OPTION_REG, mx);
+	pci_write_config_dword(minfo->pcidev, PCI_OPTION_REG, mx);
 	mx &= ~0x000000BB;
 	if (oscinfo & DAC1064_OPT_GDIV1)
 		mx |= 0x00000008;
@@ -120,9 +120,9 @@ static void DAC1064_setmclk(WPMINFO int oscinfo, unsigned long fmem) {
 
 		/* powerup system PLL, select PCI clock */
 		mx |= 0x00000020;
-		pci_write_config_dword(ACCESS_FBINFO(pcidev), PCI_OPTION_REG, mx);
+		pci_write_config_dword(minfo->pcidev, PCI_OPTION_REG, mx);
 		mx &= ~0x00000004;
-		pci_write_config_dword(ACCESS_FBINFO(pcidev), PCI_OPTION_REG, mx);
+		pci_write_config_dword(minfo->pcidev, PCI_OPTION_REG, mx);
 
 		/* !!! you must not access device if MCLK is not running !!!
 		   Doing so cause immediate PCI lockup :-( Maybe they should
@@ -131,7 +131,7 @@ static void DAC1064_setmclk(WPMINFO int oscinfo, unsigned long fmem) {
 		   perfect... */
 		/* (bit 2 of PCI_OPTION_REG must be 0... and bits 0,1 must not
 		   select PLL... because of PLL can be stopped at this time) */
-		DAC1064_calcclock(PMINFO fmem, ACCESS_FBINFO(max_pixel_clock), &m, &n, &p);
+		DAC1064_calcclock(PMINFO fmem, minfo->max_pixel_clock, &m, &n, &p);
 		outDAC1064(PMINFO DAC1064_XSYSPLLM, hw->DACclk[3] = m);
 		outDAC1064(PMINFO DAC1064_XSYSPLLN, hw->DACclk[4] = n);
 		outDAC1064(PMINFO DAC1064_XSYSPLLP, hw->DACclk[5] = p);
@@ -147,9 +147,9 @@ static void DAC1064_setmclk(WPMINFO int oscinfo, unsigned long fmem) {
 		/* select specified system clock source */
 		mx |= oscinfo & DAC1064_OPT_SCLK_MASK;
 	}
-	pci_write_config_dword(ACCESS_FBINFO(pcidev), PCI_OPTION_REG, mx);
+	pci_write_config_dword(minfo->pcidev, PCI_OPTION_REG, mx);
 	mx &= ~0x00000004;
-	pci_write_config_dword(ACCESS_FBINFO(pcidev), PCI_OPTION_REG, mx);
+	pci_write_config_dword(minfo->pcidev, PCI_OPTION_REG, mx);
 	hw->MXoptionReg = mx;
 }
 
@@ -157,19 +157,19 @@ static void DAC1064_setmclk(WPMINFO int oscinfo, unsigned long fmem) {
 static void g450_set_plls(WPMINFO2) {
 	u_int32_t c2_ctl;
 	unsigned int pxc;
-	struct matrox_hw_state* hw = &ACCESS_FBINFO(hw);
+	struct matrox_hw_state *hw = &minfo->hw;
 	int pixelmnp;
 	int videomnp;
 	
 	c2_ctl = hw->crtc2.ctl & ~0x4007;	/* Clear PLL + enable for CRTC2 */
 	c2_ctl |= 0x0001;			/* Enable CRTC2 */
 	hw->DACreg[POS1064_XPWRCTRL] &= ~0x02;	/* Stop VIDEO PLL */
-	pixelmnp = ACCESS_FBINFO(crtc1).mnp;
-	videomnp = ACCESS_FBINFO(crtc2).mnp;
+	pixelmnp = minfo->crtc1.mnp;
+	videomnp = minfo->crtc2.mnp;
 	if (videomnp < 0) {
 		c2_ctl &= ~0x0001;			/* Disable CRTC2 */
 		hw->DACreg[POS1064_XPWRCTRL] &= ~0x10;	/* Powerdown CRTC2 */
-	} else if (ACCESS_FBINFO(crtc2).pixclock == ACCESS_FBINFO(features).pll.ref_freq) {
+	} else if (minfo->crtc2.pixclock == minfo->features.pll.ref_freq) {
 		c2_ctl |=  0x4002;	/* Use reference directly */
 	} else if (videomnp == pixelmnp) {
 		c2_ctl |=  0x0004;	/* Use pixel PLL */
@@ -200,11 +200,11 @@ static void g450_set_plls(WPMINFO2) {
 		mga_outl(0x3C10, c2_ctl);
 	}
 
-	pxc = ACCESS_FBINFO(crtc1).pixclock;
-	if (pxc == 0 || ACCESS_FBINFO(outputs[2]).src == MATROXFB_SRC_CRTC2) {
-		pxc = ACCESS_FBINFO(crtc2).pixclock;
+	pxc = minfo->crtc1.pixclock;
+	if (pxc == 0 || minfo->outputs[2].src == MATROXFB_SRC_CRTC2) {
+		pxc = minfo->crtc2.pixclock;
 	}
-	if (ACCESS_FBINFO(chip) == MGA_G550) {
+	if (minfo->chip == MGA_G550) {
 		if (pxc < 45000) {
 			hw->DACreg[POS1064_XPANMODE] = 0x00;	/* 0-50 */
 		} else if (pxc < 55000) {
@@ -246,17 +246,17 @@ static void g450_set_plls(WPMINFO2) {
 #endif
 
 void DAC1064_global_init(WPMINFO2) {
-	struct matrox_hw_state* hw = &ACCESS_FBINFO(hw);
+	struct matrox_hw_state *hw = &minfo->hw;
 
 	hw->DACreg[POS1064_XMISCCTRL] &= M1064_XMISCCTRL_DAC_WIDTHMASK;
 	hw->DACreg[POS1064_XMISCCTRL] |= M1064_XMISCCTRL_LUT_EN;
 	hw->DACreg[POS1064_XPIXCLKCTRL] = M1064_XPIXCLKCTRL_PLL_UP | M1064_XPIXCLKCTRL_EN | M1064_XPIXCLKCTRL_SRC_PLL;
 #ifdef CONFIG_FB_MATROX_G
-	if (ACCESS_FBINFO(devflags.g450dac)) {
+	if (minfo->devflags.g450dac) {
 		hw->DACreg[POS1064_XPWRCTRL] = 0x1F;	/* powerup everything */
 		hw->DACreg[POS1064_XOUTPUTCONN] = 0x00;	/* disable outputs */
 		hw->DACreg[POS1064_XMISCCTRL] |= M1064_XMISCCTRL_DAC_EN;
-		switch (ACCESS_FBINFO(outputs[0]).src) {
+		switch (minfo->outputs[0].src) {
 			case MATROXFB_SRC_CRTC1:
 			case MATROXFB_SRC_CRTC2:
 				hw->DACreg[POS1064_XOUTPUTCONN] |= 0x01;	/* enable output; CRTC1/2 selection is in CRTC2 ctl */
@@ -265,12 +265,12 @@ void DAC1064_global_init(WPMINFO2) {
 				hw->DACreg[POS1064_XMISCCTRL] &= ~M1064_XMISCCTRL_DAC_EN;
 				break;
 		}
-		switch (ACCESS_FBINFO(outputs[1]).src) {
+		switch (minfo->outputs[1].src) {
 			case MATROXFB_SRC_CRTC1:
 				hw->DACreg[POS1064_XOUTPUTCONN] |= 0x04;
 				break;
 			case MATROXFB_SRC_CRTC2:
-				if (ACCESS_FBINFO(outputs[1]).mode == MATROXFB_OUTPUT_MODE_MONITOR) {
+				if (minfo->outputs[1].mode == MATROXFB_OUTPUT_MODE_MONITOR) {
 					hw->DACreg[POS1064_XOUTPUTCONN] |= 0x08;
 				} else {
 					hw->DACreg[POS1064_XOUTPUTCONN] |= 0x0C;
@@ -280,7 +280,7 @@ void DAC1064_global_init(WPMINFO2) {
 				hw->DACreg[POS1064_XPWRCTRL] &= ~0x01;		/* Poweroff DAC2 */
 				break;
 		}
-		switch (ACCESS_FBINFO(outputs[2]).src) {
+		switch (minfo->outputs[2].src) {
 			case MATROXFB_SRC_CRTC1:
 				hw->DACreg[POS1064_XOUTPUTCONN] |= 0x20;
 				break;
@@ -303,30 +303,30 @@ void DAC1064_global_init(WPMINFO2) {
 	} else
 #endif
 	{
-		if (ACCESS_FBINFO(outputs[1]).src == MATROXFB_SRC_CRTC1) {
+		if (minfo->outputs[1].src == MATROXFB_SRC_CRTC1) {
 			hw->DACreg[POS1064_XPIXCLKCTRL] = M1064_XPIXCLKCTRL_PLL_UP | M1064_XPIXCLKCTRL_EN | M1064_XPIXCLKCTRL_SRC_EXT;
 			hw->DACreg[POS1064_XMISCCTRL] |= GX00_XMISCCTRL_MFC_MAFC | G400_XMISCCTRL_VDO_MAFC12;
-		} else if (ACCESS_FBINFO(outputs[1]).src == MATROXFB_SRC_CRTC2) {
+		} else if (minfo->outputs[1].src == MATROXFB_SRC_CRTC2) {
 			hw->DACreg[POS1064_XMISCCTRL] |= GX00_XMISCCTRL_MFC_MAFC | G400_XMISCCTRL_VDO_C2_MAFC12;
-		} else if (ACCESS_FBINFO(outputs[2]).src == MATROXFB_SRC_CRTC1)
+		} else if (minfo->outputs[2].src == MATROXFB_SRC_CRTC1)
 			hw->DACreg[POS1064_XMISCCTRL] |= GX00_XMISCCTRL_MFC_PANELLINK | G400_XMISCCTRL_VDO_MAFC12;
 		else
 			hw->DACreg[POS1064_XMISCCTRL] |= GX00_XMISCCTRL_MFC_DIS;
 
-		if (ACCESS_FBINFO(outputs[0]).src != MATROXFB_SRC_NONE)
+		if (minfo->outputs[0].src != MATROXFB_SRC_NONE)
 			hw->DACreg[POS1064_XMISCCTRL] |= M1064_XMISCCTRL_DAC_EN;
 	}
 }
 
 void DAC1064_global_restore(WPMINFO2) {
-	struct matrox_hw_state* hw = &ACCESS_FBINFO(hw);
+	struct matrox_hw_state *hw = &minfo->hw;
 
 	outDAC1064(PMINFO M1064_XPIXCLKCTRL, hw->DACreg[POS1064_XPIXCLKCTRL]);
 	outDAC1064(PMINFO M1064_XMISCCTRL, hw->DACreg[POS1064_XMISCCTRL]);
-	if (ACCESS_FBINFO(devflags.accelerator) == FB_ACCEL_MATROX_MGAG400) {
+	if (minfo->devflags.accelerator == FB_ACCEL_MATROX_MGAG400) {
 		outDAC1064(PMINFO 0x20, 0x04);
-		outDAC1064(PMINFO 0x1F, ACCESS_FBINFO(devflags.dfp_type));
-		if (ACCESS_FBINFO(devflags.g450dac)) {
+		outDAC1064(PMINFO 0x1F, minfo->devflags.dfp_type);
+		if (minfo->devflags.g450dac) {
 			outDAC1064(PMINFO M1064_XSYNCCTRL, 0xCC);
 			outDAC1064(PMINFO M1064_XPWRCTRL, hw->DACreg[POS1064_XPWRCTRL]);
 			outDAC1064(PMINFO M1064_XPANMODE, hw->DACreg[POS1064_XPANMODE]);
@@ -336,18 +336,18 @@ void DAC1064_global_restore(WPMINFO2) {
 }
 
 static int DAC1064_init_1(WPMINFO struct my_timming* m) {
-	struct matrox_hw_state* hw = &ACCESS_FBINFO(hw);
+	struct matrox_hw_state *hw = &minfo->hw;
 
 	DBG(__func__)
 
 	memcpy(hw->DACreg, MGA1064_DAC, sizeof(MGA1064_DAC_regs));
-	switch (ACCESS_FBINFO(fbcon).var.bits_per_pixel) {
+	switch (minfo->fbcon.var.bits_per_pixel) {
 		/* case 4: not supported by MGA1064 DAC */
 		case 8:
 			hw->DACreg[POS1064_XMULCTRL] = M1064_XMULCTRL_DEPTH_8BPP | M1064_XMULCTRL_GRAPHICS_PALETIZED;
 			break;
 		case 16:
-			if (ACCESS_FBINFO(fbcon).var.green.length == 5)
+			if (minfo->fbcon.var.green.length == 5)
 				hw->DACreg[POS1064_XMULCTRL] = M1064_XMULCTRL_DEPTH_15BPP_1BPP | M1064_XMULCTRL_GRAPHICS_PALETIZED;
 			else
 				hw->DACreg[POS1064_XMULCTRL] = M1064_XMULCTRL_DEPTH_16BPP | M1064_XMULCTRL_GRAPHICS_PALETIZED;
@@ -361,7 +361,7 @@ static int DAC1064_init_1(WPMINFO struct my_timming* m) {
 		default:
 			return 1;	/* unsupported depth */
 	}
-	hw->DACreg[POS1064_XVREFCTRL] = ACCESS_FBINFO(features.DAC1064.xvrefctrl);
+	hw->DACreg[POS1064_XVREFCTRL] = minfo->features.DAC1064.xvrefctrl;
 	hw->DACreg[POS1064_XGENCTRL] &= ~M1064_XGENCTRL_SYNC_ON_GREEN_MASK;
 	hw->DACreg[POS1064_XGENCTRL] |= (m->sync & FB_SYNC_ON_GREEN)?M1064_XGENCTRL_SYNC_ON_GREEN:M1064_XGENCTRL_NO_SYNC_ON_GREEN;
 	hw->DACreg[POS1064_XCURADDL] = 0;
@@ -372,11 +372,11 @@ static int DAC1064_init_1(WPMINFO struct my_timming* m) {
 }
 
 static int DAC1064_init_2(WPMINFO struct my_timming* m) {
-	struct matrox_hw_state* hw = &ACCESS_FBINFO(hw);
+	struct matrox_hw_state *hw = &minfo->hw;
 
 	DBG(__func__)
 
-	if (ACCESS_FBINFO(fbcon).var.bits_per_pixel > 16) {	/* 256 entries */
+	if (minfo->fbcon.var.bits_per_pixel > 16) {	/* 256 entries */
 		int i;
 
 		for (i = 0; i < 256; i++) {
@@ -384,8 +384,8 @@ static int DAC1064_init_2(WPMINFO struct my_timming* m) {
 			hw->DACpal[i * 3 + 1] = i;
 			hw->DACpal[i * 3 + 2] = i;
 		}
-	} else if (ACCESS_FBINFO(fbcon).var.bits_per_pixel > 8) {
-		if (ACCESS_FBINFO(fbcon).var.green.length == 5) {	/* 0..31, 128..159 */
+	} else if (minfo->fbcon.var.bits_per_pixel > 8) {
+		if (minfo->fbcon.var.green.length == 5) {	/* 0..31, 128..159 */
 			int i;
 
 			for (i = 0; i < 32; i++) {
@@ -414,7 +414,7 @@ static int DAC1064_init_2(WPMINFO struct my_timming* m) {
 }
 
 static void DAC1064_restore_1(WPMINFO2) {
-	struct matrox_hw_state* hw = &ACCESS_FBINFO(hw);
+	struct matrox_hw_state *hw = &minfo->hw;
 
 	CRITFLAGS
 
@@ -453,12 +453,12 @@ static void DAC1064_restore_2(WPMINFO2) {
 #ifdef DEBUG
 	dprintk(KERN_DEBUG "DAC1064regs ");
 	for (i = 0; i < sizeof(MGA1064_DAC_regs); i++) {
-		dprintk("R%02X=%02X ", MGA1064_DAC_regs[i], ACCESS_FBINFO(hw).DACreg[i]);
+		dprintk("R%02X=%02X ", MGA1064_DAC_regs[i], minfo->hw.DACreg[i]);
 		if ((i & 0x7) == 0x7) dprintk(KERN_DEBUG "continuing... ");
 	}
 	dprintk(KERN_DEBUG "DAC1064clk ");
 	for (i = 0; i < 6; i++)
-		dprintk("C%02X=%02X ", i, ACCESS_FBINFO(hw).DACclk[i]);
+		dprintk("C%02X=%02X ", i, minfo->hw.DACclk[i]);
 	dprintk("\n");
 #endif
 }
@@ -475,7 +475,7 @@ static int m1064_compute(void* out, struct my_timming* m) {
 		CRITBEGIN
 
 		for (i = 0; i < 3; i++)
-			outDAC1064(PMINFO M1064_XPIXPLLCM + i, ACCESS_FBINFO(hw).DACclk[i]);
+			outDAC1064(PMINFO M1064_XPIXPLLCM + i, minfo->hw.DACclk[i]);
 		for (tmout = 500000; tmout; tmout--) {
 			if (inDAC1064(PMINFO M1064_XPIXPLLSTAT) & 0x40)
 				break;
@@ -519,7 +519,7 @@ static struct matrox_altout g450out = {
 
 #ifdef CONFIG_FB_MATROX_MYSTIQUE
 static int MGA1064_init(WPMINFO struct my_timming* m) {
-	struct matrox_hw_state* hw = &ACCESS_FBINFO(hw);
+	struct matrox_hw_state *hw = &minfo->hw;
 
 	DBG(__func__)
 
@@ -541,7 +541,7 @@ static int MGA1064_init(WPMINFO struct my_timming* m) {
 
 #ifdef CONFIG_FB_MATROX_G
 static int MGAG100_init(WPMINFO struct my_timming* m) {
-	struct matrox_hw_state* hw = &ACCESS_FBINFO(hw);
+	struct matrox_hw_state *hw = &minfo->hw;
 
 	DBG(__func__)
 
@@ -567,15 +567,15 @@ static void MGA1064_ramdac_init(WPMINFO2) {
 
 	DBG(__func__)
 
-	/* ACCESS_FBINFO(features.DAC1064.vco_freq_min) = 120000; */
-	ACCESS_FBINFO(features.pll.vco_freq_min) = 62000;
-	ACCESS_FBINFO(features.pll.ref_freq)	 = 14318;
-	ACCESS_FBINFO(features.pll.feed_div_min) = 100;
-	ACCESS_FBINFO(features.pll.feed_div_max) = 127;
-	ACCESS_FBINFO(features.pll.in_div_min)	 = 1;
-	ACCESS_FBINFO(features.pll.in_div_max)	 = 31;
-	ACCESS_FBINFO(features.pll.post_shift_max) = 3;
-	ACCESS_FBINFO(features.DAC1064.xvrefctrl) = DAC1064_XVREFCTRL_EXTERNAL;
+	/* minfo->features.DAC1064.vco_freq_min = 120000; */
+	minfo->features.pll.vco_freq_min = 62000;
+	minfo->features.pll.ref_freq	 = 14318;
+	minfo->features.pll.feed_div_min = 100;
+	minfo->features.pll.feed_div_max = 127;
+	minfo->features.pll.in_div_min	 = 1;
+	minfo->features.pll.in_div_max	 = 31;
+	minfo->features.pll.post_shift_max = 3;
+	minfo->features.DAC1064.xvrefctrl = DAC1064_XVREFCTRL_EXTERNAL;
 	/* maybe cmdline MCLK= ?, doc says gclk=44MHz, mclk=66MHz... it was 55/83 with old values */
 	DAC1064_setmclk(PMINFO DAC1064_OPT_MDIV2 | DAC1064_OPT_GDIV3 | DAC1064_OPT_SCLK_PLL, 133333);
 }
@@ -638,7 +638,7 @@ static void MGAG100_setPixClock(CPMINFO int flags, int freq) {
 
 	DBG(__func__)
 
-	DAC1064_calcclock(PMINFO freq, ACCESS_FBINFO(max_pixel_clock), &m, &n, &p);
+	DAC1064_calcclock(PMINFO freq, minfo->max_pixel_clock, &m, &n, &p);
 	MGAG100_progPixClock(PMINFO flags, m, n, p);
 }
 #endif
@@ -648,30 +648,30 @@ static int MGA1064_preinit(WPMINFO2) {
 	static const int vxres_mystique[] = { 512,        640, 768,  800,  832,  960,
 					     1024, 1152, 1280,      1600, 1664, 1920,
 					     2048,    0};
-	struct matrox_hw_state* hw = &ACCESS_FBINFO(hw);
+	struct matrox_hw_state *hw = &minfo->hw;
 
 	DBG(__func__)
 
-	/* ACCESS_FBINFO(capable.cfb4) = 0; ... preinitialized by 0 */
-	ACCESS_FBINFO(capable.text) = 1;
-	ACCESS_FBINFO(capable.vxres) = vxres_mystique;
+	/* minfo->capable.cfb4 = 0; ... preinitialized by 0 */
+	minfo->capable.text = 1;
+	minfo->capable.vxres = vxres_mystique;
 
-	ACCESS_FBINFO(outputs[0]).output = &m1064;
-	ACCESS_FBINFO(outputs[0]).src = ACCESS_FBINFO(outputs[0]).default_src;
-	ACCESS_FBINFO(outputs[0]).data = MINFO;
-	ACCESS_FBINFO(outputs[0]).mode = MATROXFB_OUTPUT_MODE_MONITOR;
+	minfo->outputs[0].output = &m1064;
+	minfo->outputs[0].src = minfo->outputs[0].default_src;
+	minfo->outputs[0].data = minfo;
+	minfo->outputs[0].mode = MATROXFB_OUTPUT_MODE_MONITOR;
 
-	if (ACCESS_FBINFO(devflags.noinit))
+	if (minfo->devflags.noinit)
 		return 0;	/* do not modify settings */
 	hw->MXoptionReg &= 0xC0000100;
 	hw->MXoptionReg |= 0x00094E20;
-	if (ACCESS_FBINFO(devflags.novga))
+	if (minfo->devflags.novga)
 		hw->MXoptionReg &= ~0x00000100;
-	if (ACCESS_FBINFO(devflags.nobios))
+	if (minfo->devflags.nobios)
 		hw->MXoptionReg &= ~0x40000000;
-	if (ACCESS_FBINFO(devflags.nopciretry))
+	if (minfo->devflags.nopciretry)
 		hw->MXoptionReg |=  0x20000000;
-	pci_write_config_dword(ACCESS_FBINFO(pcidev), PCI_OPTION_REG, hw->MXoptionReg);
+	pci_write_config_dword(minfo->pcidev, PCI_OPTION_REG, hw->MXoptionReg);
 	mga_setr(M_SEQ_INDEX, 0x01, 0x20);
 	mga_outl(M_CTLWTST, 0x00000000);
 	udelay(200);
@@ -692,14 +692,14 @@ static void MGA1064_reset(WPMINFO2) {
 #ifdef CONFIG_FB_MATROX_G
 static void g450_mclk_init(WPMINFO2) {
 	/* switch all clocks to PCI source */
-	pci_write_config_dword(ACCESS_FBINFO(pcidev), PCI_OPTION_REG, ACCESS_FBINFO(hw).MXoptionReg | 4);
-	pci_write_config_dword(ACCESS_FBINFO(pcidev), PCI_OPTION3_REG, ACCESS_FBINFO(values).reg.opt3 & ~0x00300C03);
-	pci_write_config_dword(ACCESS_FBINFO(pcidev), PCI_OPTION_REG, ACCESS_FBINFO(hw).MXoptionReg);
-	
-	if (((ACCESS_FBINFO(values).reg.opt3 & 0x000003) == 0x000003) ||
-	    ((ACCESS_FBINFO(values).reg.opt3 & 0x000C00) == 0x000C00) ||
-	    ((ACCESS_FBINFO(values).reg.opt3 & 0x300000) == 0x300000)) {
-		matroxfb_g450_setclk(PMINFO ACCESS_FBINFO(values.pll.video), M_VIDEO_PLL);
+	pci_write_config_dword(minfo->pcidev, PCI_OPTION_REG, minfo->hw.MXoptionReg | 4);
+	pci_write_config_dword(minfo->pcidev, PCI_OPTION3_REG, minfo->values.reg.opt3 & ~0x00300C03);
+	pci_write_config_dword(minfo->pcidev, PCI_OPTION_REG, minfo->hw.MXoptionReg);
+
+	if (((minfo->values.reg.opt3 & 0x000003) == 0x000003) ||
+	    ((minfo->values.reg.opt3 & 0x000C00) == 0x000C00) ||
+	    ((minfo->values.reg.opt3 & 0x300000) == 0x300000)) {
+		matroxfb_g450_setclk(PMINFO minfo->values.pll.video, M_VIDEO_PLL);
 	} else {
 		unsigned long flags;
 		unsigned int pwr;
@@ -709,53 +709,53 @@ static void g450_mclk_init(WPMINFO2) {
 		outDAC1064(PMINFO M1064_XPWRCTRL, pwr);
 		matroxfb_DAC_unlock_irqrestore(flags);
 	}
-	matroxfb_g450_setclk(PMINFO ACCESS_FBINFO(values.pll.system), M_SYSTEM_PLL);
+	matroxfb_g450_setclk(PMINFO minfo->values.pll.system, M_SYSTEM_PLL);
 	
 	/* switch clocks to their real PLL source(s) */
-	pci_write_config_dword(ACCESS_FBINFO(pcidev), PCI_OPTION_REG, ACCESS_FBINFO(hw).MXoptionReg | 4);
-	pci_write_config_dword(ACCESS_FBINFO(pcidev), PCI_OPTION3_REG, ACCESS_FBINFO(values).reg.opt3);
-	pci_write_config_dword(ACCESS_FBINFO(pcidev), PCI_OPTION_REG, ACCESS_FBINFO(hw).MXoptionReg);
+	pci_write_config_dword(minfo->pcidev, PCI_OPTION_REG, minfo->hw.MXoptionReg | 4);
+	pci_write_config_dword(minfo->pcidev, PCI_OPTION3_REG, minfo->values.reg.opt3);
+	pci_write_config_dword(minfo->pcidev, PCI_OPTION_REG, minfo->hw.MXoptionReg);
 
 }
 
 static void g450_memory_init(WPMINFO2) {
 	/* disable memory refresh */
-	ACCESS_FBINFO(hw).MXoptionReg &= ~0x001F8000;
-	pci_write_config_dword(ACCESS_FBINFO(pcidev), PCI_OPTION_REG, ACCESS_FBINFO(hw).MXoptionReg);
+	minfo->hw.MXoptionReg &= ~0x001F8000;
+	pci_write_config_dword(minfo->pcidev, PCI_OPTION_REG, minfo->hw.MXoptionReg);
 	
 	/* set memory interface parameters */
-	ACCESS_FBINFO(hw).MXoptionReg &= ~0x00207E00;
-	ACCESS_FBINFO(hw).MXoptionReg |= 0x00207E00 & ACCESS_FBINFO(values).reg.opt;
-	pci_write_config_dword(ACCESS_FBINFO(pcidev), PCI_OPTION_REG, ACCESS_FBINFO(hw).MXoptionReg);
-	pci_write_config_dword(ACCESS_FBINFO(pcidev), PCI_OPTION2_REG, ACCESS_FBINFO(values).reg.opt2);
+	minfo->hw.MXoptionReg &= ~0x00207E00;
+	minfo->hw.MXoptionReg |= 0x00207E00 & minfo->values.reg.opt;
+	pci_write_config_dword(minfo->pcidev, PCI_OPTION_REG, minfo->hw.MXoptionReg);
+	pci_write_config_dword(minfo->pcidev, PCI_OPTION2_REG, minfo->values.reg.opt2);
 	
-	mga_outl(M_CTLWTST, ACCESS_FBINFO(values).reg.mctlwtst);
+	mga_outl(M_CTLWTST, minfo->values.reg.mctlwtst);
 	
 	/* first set up memory interface with disabled memory interface clocks */
-	pci_write_config_dword(ACCESS_FBINFO(pcidev), PCI_MEMMISC_REG, ACCESS_FBINFO(values).reg.memmisc & ~0x80000000U);
-	mga_outl(M_MEMRDBK, ACCESS_FBINFO(values).reg.memrdbk);
-	mga_outl(M_MACCESS, ACCESS_FBINFO(values).reg.maccess);
+	pci_write_config_dword(minfo->pcidev, PCI_MEMMISC_REG, minfo->values.reg.memmisc & ~0x80000000U);
+	mga_outl(M_MEMRDBK, minfo->values.reg.memrdbk);
+	mga_outl(M_MACCESS, minfo->values.reg.maccess);
 	/* start memory clocks */
-	pci_write_config_dword(ACCESS_FBINFO(pcidev), PCI_MEMMISC_REG, ACCESS_FBINFO(values).reg.memmisc | 0x80000000U);
+	pci_write_config_dword(minfo->pcidev, PCI_MEMMISC_REG, minfo->values.reg.memmisc | 0x80000000U);
 
 	udelay(200);
 	
-	if (ACCESS_FBINFO(values).memory.ddr && (!ACCESS_FBINFO(values).memory.emrswen || !ACCESS_FBINFO(values).memory.dll)) {
-		mga_outl(M_MEMRDBK, ACCESS_FBINFO(values).reg.memrdbk & ~0x1000);
+	if (minfo->values.memory.ddr && (!minfo->values.memory.emrswen || !minfo->values.memory.dll)) {
+		mga_outl(M_MEMRDBK, minfo->values.reg.memrdbk & ~0x1000);
 	}
-	mga_outl(M_MACCESS, ACCESS_FBINFO(values).reg.maccess | 0x8000);
+	mga_outl(M_MACCESS, minfo->values.reg.maccess | 0x8000);
 	
 	udelay(200);
 	
-	ACCESS_FBINFO(hw).MXoptionReg |= 0x001F8000 & ACCESS_FBINFO(values).reg.opt;
-	pci_write_config_dword(ACCESS_FBINFO(pcidev), PCI_OPTION_REG, ACCESS_FBINFO(hw).MXoptionReg);
+	minfo->hw.MXoptionReg |= 0x001F8000 & minfo->values.reg.opt;
+	pci_write_config_dword(minfo->pcidev, PCI_OPTION_REG, minfo->hw.MXoptionReg);
 	
 	/* value is written to memory chips only if old != new */
 	mga_outl(M_PLNWT, 0);
 	mga_outl(M_PLNWT, ~0);
 	
-	if (ACCESS_FBINFO(values).reg.mctlwtst != ACCESS_FBINFO(values).reg.mctlwtst_core) {
-		mga_outl(M_CTLWTST, ACCESS_FBINFO(values).reg.mctlwtst_core);
+	if (minfo->values.reg.mctlwtst != minfo->values.reg.mctlwtst_core) {
+		mga_outl(M_CTLWTST, minfo->values.reg.mctlwtst_core);
 	}
 	
 }
@@ -765,17 +765,17 @@ static void g450_preinit(WPMINFO2) {
 	u_int8_t curctl;
 	u_int8_t c1ctl;
 	
-	/* ACCESS_FBINFO(hw).MXoptionReg = minfo->values.reg.opt; */
-	ACCESS_FBINFO(hw).MXoptionReg &= 0xC0000100;
-	ACCESS_FBINFO(hw).MXoptionReg |= 0x00000020;
-	if (ACCESS_FBINFO(devflags.novga))
-		ACCESS_FBINFO(hw).MXoptionReg &= ~0x00000100;
-	if (ACCESS_FBINFO(devflags.nobios))
-		ACCESS_FBINFO(hw).MXoptionReg &= ~0x40000000;
-	if (ACCESS_FBINFO(devflags.nopciretry))
-		ACCESS_FBINFO(hw).MXoptionReg |=  0x20000000;
-	ACCESS_FBINFO(hw).MXoptionReg |= ACCESS_FBINFO(values).reg.opt & 0x03400040;
-	pci_write_config_dword(ACCESS_FBINFO(pcidev), PCI_OPTION_REG, ACCESS_FBINFO(hw).MXoptionReg);
+	/* minfo->hw.MXoptionReg = minfo->values.reg.opt; */
+	minfo->hw.MXoptionReg &= 0xC0000100;
+	minfo->hw.MXoptionReg |= 0x00000020;
+	if (minfo->devflags.novga)
+		minfo->hw.MXoptionReg &= ~0x00000100;
+	if (minfo->devflags.nobios)
+		minfo->hw.MXoptionReg &= ~0x40000000;
+	if (minfo->devflags.nopciretry)
+		minfo->hw.MXoptionReg |=  0x20000000;
+	minfo->hw.MXoptionReg |= minfo->values.reg.opt & 0x03400040;
+	pci_write_config_dword(minfo->pcidev, PCI_OPTION_REG, minfo->hw.MXoptionReg);
 
 	/* Init system clocks */
 		
@@ -812,7 +812,7 @@ static int MGAG100_preinit(WPMINFO2) {
 	static const int vxres_g100[] = {  512,        640, 768,  800,  832,  960,
                                           1024, 1152, 1280,      1600, 1664, 1920,
                                           2048, 0};
-	struct matrox_hw_state* hw = &ACCESS_FBINFO(hw);
+	struct matrox_hw_state *hw = &minfo->hw;
 
         u_int32_t reg50;
 #if 0
@@ -822,68 +822,68 @@ static int MGAG100_preinit(WPMINFO2) {
 	DBG(__func__)
 
 	/* there are some instabilities if in_div > 19 && vco < 61000 */
-	if (ACCESS_FBINFO(devflags.g450dac)) {
-		ACCESS_FBINFO(features.pll.vco_freq_min) = 130000;	/* my sample: >118 */
+	if (minfo->devflags.g450dac) {
+		minfo->features.pll.vco_freq_min = 130000;	/* my sample: >118 */
 	} else {
-		ACCESS_FBINFO(features.pll.vco_freq_min) = 62000;
+		minfo->features.pll.vco_freq_min = 62000;
 	}
-	if (!ACCESS_FBINFO(features.pll.ref_freq)) {
-		ACCESS_FBINFO(features.pll.ref_freq)	 = 27000;
+	if (!minfo->features.pll.ref_freq) {
+		minfo->features.pll.ref_freq	 = 27000;
 	}
-	ACCESS_FBINFO(features.pll.feed_div_min) = 7;
-	ACCESS_FBINFO(features.pll.feed_div_max) = 127;
-	ACCESS_FBINFO(features.pll.in_div_min)	 = 1;
-	ACCESS_FBINFO(features.pll.in_div_max)	 = 31;
-	ACCESS_FBINFO(features.pll.post_shift_max) = 3;
-	ACCESS_FBINFO(features.DAC1064.xvrefctrl) = DAC1064_XVREFCTRL_G100_DEFAULT;
-	/* ACCESS_FBINFO(capable.cfb4) = 0; ... preinitialized by 0 */
-	ACCESS_FBINFO(capable.text) = 1;
-	ACCESS_FBINFO(capable.vxres) = vxres_g100;
-	ACCESS_FBINFO(capable.plnwt) = ACCESS_FBINFO(devflags.accelerator) == FB_ACCEL_MATROX_MGAG100
-			? ACCESS_FBINFO(devflags.sgram) : 1;
+	minfo->features.pll.feed_div_min = 7;
+	minfo->features.pll.feed_div_max = 127;
+	minfo->features.pll.in_div_min	 = 1;
+	minfo->features.pll.in_div_max	 = 31;
+	minfo->features.pll.post_shift_max = 3;
+	minfo->features.DAC1064.xvrefctrl = DAC1064_XVREFCTRL_G100_DEFAULT;
+	/* minfo->capable.cfb4 = 0; ... preinitialized by 0 */
+	minfo->capable.text = 1;
+	minfo->capable.vxres = vxres_g100;
+	minfo->capable.plnwt = minfo->devflags.accelerator == FB_ACCEL_MATROX_MGAG100
+			? minfo->devflags.sgram : 1;
 
 #ifdef CONFIG_FB_MATROX_G
-	if (ACCESS_FBINFO(devflags.g450dac)) {
-		ACCESS_FBINFO(outputs[0]).output = &g450out;
+	if (minfo->devflags.g450dac) {
+		minfo->outputs[0].output = &g450out;
 	} else
 #endif
 	{
-		ACCESS_FBINFO(outputs[0]).output = &m1064;
+		minfo->outputs[0].output = &m1064;
 	}
-	ACCESS_FBINFO(outputs[0]).src = ACCESS_FBINFO(outputs[0]).default_src;
-	ACCESS_FBINFO(outputs[0]).data = MINFO;
-	ACCESS_FBINFO(outputs[0]).mode = MATROXFB_OUTPUT_MODE_MONITOR;
+	minfo->outputs[0].src = minfo->outputs[0].default_src;
+	minfo->outputs[0].data = minfo;
+	minfo->outputs[0].mode = MATROXFB_OUTPUT_MODE_MONITOR;
 
-	if (ACCESS_FBINFO(devflags.g450dac)) {
+	if (minfo->devflags.g450dac) {
 		/* we must do this always, BIOS does not do it for us
 		   and accelerator dies without it */
 		mga_outl(0x1C0C, 0);
 	}
-	if (ACCESS_FBINFO(devflags.noinit))
+	if (minfo->devflags.noinit)
 		return 0;
-	if (ACCESS_FBINFO(devflags.g450dac)) {
+	if (minfo->devflags.g450dac) {
 		g450_preinit(PMINFO2);
 		return 0;
 	}
 	hw->MXoptionReg &= 0xC0000100;
 	hw->MXoptionReg |= 0x00000020;
-	if (ACCESS_FBINFO(devflags.novga))
+	if (minfo->devflags.novga)
 		hw->MXoptionReg &= ~0x00000100;
-	if (ACCESS_FBINFO(devflags.nobios))
+	if (minfo->devflags.nobios)
 		hw->MXoptionReg &= ~0x40000000;
-	if (ACCESS_FBINFO(devflags.nopciretry))
+	if (minfo->devflags.nopciretry)
 		hw->MXoptionReg |=  0x20000000;
-	pci_write_config_dword(ACCESS_FBINFO(pcidev), PCI_OPTION_REG, hw->MXoptionReg);
+	pci_write_config_dword(minfo->pcidev, PCI_OPTION_REG, hw->MXoptionReg);
 	DAC1064_setmclk(PMINFO DAC1064_OPT_MDIV2 | DAC1064_OPT_GDIV3 | DAC1064_OPT_SCLK_PCI, 133333);
 
-	if (ACCESS_FBINFO(devflags.accelerator) == FB_ACCEL_MATROX_MGAG100) {
-		pci_read_config_dword(ACCESS_FBINFO(pcidev), PCI_OPTION2_REG, &reg50);
+	if (minfo->devflags.accelerator == FB_ACCEL_MATROX_MGAG100) {
+		pci_read_config_dword(minfo->pcidev, PCI_OPTION2_REG, &reg50);
 		reg50 &= ~0x3000;
-		pci_write_config_dword(ACCESS_FBINFO(pcidev), PCI_OPTION2_REG, reg50);
+		pci_write_config_dword(minfo->pcidev, PCI_OPTION2_REG, reg50);
 
 		hw->MXoptionReg |= 0x1080;
-		pci_write_config_dword(ACCESS_FBINFO(pcidev), PCI_OPTION_REG, hw->MXoptionReg);
-		mga_outl(M_CTLWTST, ACCESS_FBINFO(values).reg.mctlwtst);
+		pci_write_config_dword(minfo->pcidev, PCI_OPTION_REG, hw->MXoptionReg);
+		mga_outl(M_CTLWTST, minfo->values.reg.mctlwtst);
 		udelay(100);
 		mga_outb(0x1C05, 0x00);
 		mga_outb(0x1C05, 0x80);
@@ -893,68 +893,68 @@ static int MGAG100_preinit(WPMINFO2) {
 		udelay(100);
 		reg50 &= ~0xFF;
 		reg50 |=  0x07;
-		pci_write_config_dword(ACCESS_FBINFO(pcidev), PCI_OPTION2_REG, reg50);
+		pci_write_config_dword(minfo->pcidev, PCI_OPTION2_REG, reg50);
 		/* it should help with G100 */
 		mga_outb(M_GRAPHICS_INDEX, 6);
 		mga_outb(M_GRAPHICS_DATA, (mga_inb(M_GRAPHICS_DATA) & 3) | 4);
 		mga_setr(M_EXTVGA_INDEX, 0x03, 0x81);
 		mga_setr(M_EXTVGA_INDEX, 0x04, 0x00);
-		mga_writeb(ACCESS_FBINFO(video.vbase), 0x0000, 0xAA);
-		mga_writeb(ACCESS_FBINFO(video.vbase), 0x0800, 0x55);
-		mga_writeb(ACCESS_FBINFO(video.vbase), 0x4000, 0x55);
+		mga_writeb(minfo->video.vbase, 0x0000, 0xAA);
+		mga_writeb(minfo->video.vbase, 0x0800, 0x55);
+		mga_writeb(minfo->video.vbase, 0x4000, 0x55);
 #if 0
-		if (mga_readb(ACCESS_FBINFO(video.vbase), 0x0000) != 0xAA) {
+		if (mga_readb(minfo->video.vbase, 0x0000) != 0xAA) {
 			hw->MXoptionReg &= ~0x1000;
 		}
 #endif
 		hw->MXoptionReg |= 0x00078020;
-	} else if (ACCESS_FBINFO(devflags.accelerator) == FB_ACCEL_MATROX_MGAG200) {
-		pci_read_config_dword(ACCESS_FBINFO(pcidev), PCI_OPTION2_REG, &reg50);
+	} else if (minfo->devflags.accelerator == FB_ACCEL_MATROX_MGAG200) {
+		pci_read_config_dword(minfo->pcidev, PCI_OPTION2_REG, &reg50);
 		reg50 &= ~0x3000;
-		pci_write_config_dword(ACCESS_FBINFO(pcidev), PCI_OPTION2_REG, reg50);
+		pci_write_config_dword(minfo->pcidev, PCI_OPTION2_REG, reg50);
 
-		if (ACCESS_FBINFO(devflags.memtype) == -1)
-			hw->MXoptionReg |= ACCESS_FBINFO(values).reg.opt & 0x1C00;
+		if (minfo->devflags.memtype == -1)
+			hw->MXoptionReg |= minfo->values.reg.opt & 0x1C00;
 		else
-			hw->MXoptionReg |= (ACCESS_FBINFO(devflags.memtype) & 7) << 10;
-		if (ACCESS_FBINFO(devflags.sgram))
+			hw->MXoptionReg |= (minfo->devflags.memtype & 7) << 10;
+		if (minfo->devflags.sgram)
 			hw->MXoptionReg |= 0x4000;
-		mga_outl(M_CTLWTST, ACCESS_FBINFO(values).reg.mctlwtst);
-		mga_outl(M_MEMRDBK, ACCESS_FBINFO(values).reg.memrdbk);
+		mga_outl(M_CTLWTST, minfo->values.reg.mctlwtst);
+		mga_outl(M_MEMRDBK, minfo->values.reg.memrdbk);
 		udelay(200);
 		mga_outl(M_MACCESS, 0x00000000);
 		mga_outl(M_MACCESS, 0x00008000);
 		udelay(100);
-		mga_outw(M_MEMRDBK, ACCESS_FBINFO(values).reg.memrdbk);
+		mga_outw(M_MEMRDBK, minfo->values.reg.memrdbk);
 		hw->MXoptionReg |= 0x00078020;
 	} else {
-		pci_read_config_dword(ACCESS_FBINFO(pcidev), PCI_OPTION2_REG, &reg50);
+		pci_read_config_dword(minfo->pcidev, PCI_OPTION2_REG, &reg50);
 		reg50 &= ~0x00000100;
 		reg50 |=  0x00000000;
-		pci_write_config_dword(ACCESS_FBINFO(pcidev), PCI_OPTION2_REG, reg50);
+		pci_write_config_dword(minfo->pcidev, PCI_OPTION2_REG, reg50);
 
-		if (ACCESS_FBINFO(devflags.memtype) == -1)
-			hw->MXoptionReg |= ACCESS_FBINFO(values).reg.opt & 0x1C00;
+		if (minfo->devflags.memtype == -1)
+			hw->MXoptionReg |= minfo->values.reg.opt & 0x1C00;
 		else
-			hw->MXoptionReg |= (ACCESS_FBINFO(devflags.memtype) & 7) << 10;
-		if (ACCESS_FBINFO(devflags.sgram))
+			hw->MXoptionReg |= (minfo->devflags.memtype & 7) << 10;
+		if (minfo->devflags.sgram)
 			hw->MXoptionReg |= 0x4000;
-		mga_outl(M_CTLWTST, ACCESS_FBINFO(values).reg.mctlwtst);
-		mga_outl(M_MEMRDBK, ACCESS_FBINFO(values).reg.memrdbk);
+		mga_outl(M_CTLWTST, minfo->values.reg.mctlwtst);
+		mga_outl(M_MEMRDBK, minfo->values.reg.memrdbk);
 		udelay(200);
 		mga_outl(M_MACCESS, 0x00000000);
 		mga_outl(M_MACCESS, 0x00008000);
 		udelay(100);
-		mga_outl(M_MEMRDBK, ACCESS_FBINFO(values).reg.memrdbk);
+		mga_outl(M_MEMRDBK, minfo->values.reg.memrdbk);
 		hw->MXoptionReg |= 0x00040020;
 	}
-	pci_write_config_dword(ACCESS_FBINFO(pcidev), PCI_OPTION_REG, hw->MXoptionReg);
+	pci_write_config_dword(minfo->pcidev, PCI_OPTION_REG, hw->MXoptionReg);
 	return 0;
 }
 
 static void MGAG100_reset(WPMINFO2) {
 	u_int8_t b;
-	struct matrox_hw_state* hw = &ACCESS_FBINFO(hw);
+	struct matrox_hw_state *hw = &minfo->hw;
 
 	DBG(__func__)
 
@@ -964,22 +964,22 @@ static void MGAG100_reset(WPMINFO2) {
 
 		find 1014/22 (IBM/82351); /* if found and bridging Matrox, do some strange stuff */
 		pci_read_config_byte(ibm, PCI_SECONDARY_BUS, &b);
-		if (b == ACCESS_FBINFO(pcidev)->bus->number) {
+		if (b == minfo->pcidev->bus->number) {
 			pci_write_config_byte(ibm, PCI_COMMAND+1, 0);	/* disable back-to-back & SERR */
 			pci_write_config_byte(ibm, 0x41, 0xF4);		/* ??? */
 			pci_write_config_byte(ibm, PCI_IO_BASE, 0xF0);	/* ??? */
 			pci_write_config_byte(ibm, PCI_IO_LIMIT, 0x00);	/* ??? */
 		}
 #endif
-		if (!ACCESS_FBINFO(devflags.noinit)) {
+		if (!minfo->devflags.noinit) {
 			if (x7AF4 & 8) {
 				hw->MXoptionReg |= 0x40;	/* FIXME... */
-				pci_write_config_dword(ACCESS_FBINFO(pcidev), PCI_OPTION_REG, hw->MXoptionReg);
+				pci_write_config_dword(minfo->pcidev, PCI_OPTION_REG, hw->MXoptionReg);
 			}
 			mga_setr(M_EXTVGA_INDEX, 0x06, 0x00);
 		}
 	}
-	if (ACCESS_FBINFO(devflags.g450dac)) {
+	if (minfo->devflags.g450dac) {
 		/* either leave MCLK as is... or they were set in preinit */
 		hw->DACclk[3] = inDAC1064(PMINFO DAC1064_XSYSPLLM);
 		hw->DACclk[4] = inDAC1064(PMINFO DAC1064_XSYSPLLN);
@@ -987,14 +987,14 @@ static void MGAG100_reset(WPMINFO2) {
 	} else {
 		DAC1064_setmclk(PMINFO DAC1064_OPT_RESERVED | DAC1064_OPT_MDIV2 | DAC1064_OPT_GDIV1 | DAC1064_OPT_SCLK_PLL, 133333);
 	}
-	if (ACCESS_FBINFO(devflags.accelerator) == FB_ACCEL_MATROX_MGAG400) {
-		if (ACCESS_FBINFO(devflags.dfp_type) == -1) {
-			ACCESS_FBINFO(devflags.dfp_type) = inDAC1064(PMINFO 0x1F);
+	if (minfo->devflags.accelerator == FB_ACCEL_MATROX_MGAG400) {
+		if (minfo->devflags.dfp_type == -1) {
+			minfo->devflags.dfp_type = inDAC1064(PMINFO 0x1F);
 		}
 	}
-	if (ACCESS_FBINFO(devflags.noinit))
+	if (minfo->devflags.noinit)
 		return;
-	if (ACCESS_FBINFO(devflags.g450dac)) {
+	if (minfo->devflags.g450dac) {
 	} else {
 		MGAG100_setPixClock(PMINFO 4, 25175);
 		MGAG100_setPixClock(PMINFO 5, 28322);
@@ -1011,7 +1011,7 @@ static void MGAG100_reset(WPMINFO2) {
 #ifdef CONFIG_FB_MATROX_MYSTIQUE
 static void MGA1064_restore(WPMINFO2) {
 	int i;
-	struct matrox_hw_state* hw = &ACCESS_FBINFO(hw);
+	struct matrox_hw_state *hw = &minfo->hw;
 
 	CRITFLAGS
 
@@ -1019,7 +1019,7 @@ static void MGA1064_restore(WPMINFO2) {
 
 	CRITBEGIN
 
-	pci_write_config_dword(ACCESS_FBINFO(pcidev), PCI_OPTION_REG, hw->MXoptionReg);
+	pci_write_config_dword(minfo->pcidev, PCI_OPTION_REG, hw->MXoptionReg);
 	mga_outb(M_IEN, 0x00);
 	mga_outb(M_CACHEFLUSH, 0x00);
 
@@ -1027,7 +1027,7 @@ static void MGA1064_restore(WPMINFO2) {
 
 	DAC1064_restore_1(PMINFO2);
 	matroxfb_vgaHWrestore(PMINFO2);
-	ACCESS_FBINFO(crtc1.panpos) = -1;
+	minfo->crtc1.panpos = -1;
 	for (i = 0; i < 6; i++)
 		mga_setr(M_EXTVGA_INDEX, i, hw->CRTCEXT[i]);
 	DAC1064_restore_2(PMINFO2);
@@ -1037,7 +1037,7 @@ static void MGA1064_restore(WPMINFO2) {
 #ifdef CONFIG_FB_MATROX_G
 static void MGAG100_restore(WPMINFO2) {
 	int i;
-	struct matrox_hw_state* hw = &ACCESS_FBINFO(hw);
+	struct matrox_hw_state *hw = &minfo->hw;
 
 	CRITFLAGS
 
@@ -1045,16 +1045,16 @@ static void MGAG100_restore(WPMINFO2) {
 
 	CRITBEGIN
 
-	pci_write_config_dword(ACCESS_FBINFO(pcidev), PCI_OPTION_REG, hw->MXoptionReg);
+	pci_write_config_dword(minfo->pcidev, PCI_OPTION_REG, hw->MXoptionReg);
 	CRITEND
 
 	DAC1064_restore_1(PMINFO2);
 	matroxfb_vgaHWrestore(PMINFO2);
 #ifdef CONFIG_FB_MATROX_32MB
-	if (ACCESS_FBINFO(devflags.support32MB))
+	if (minfo->devflags.support32MB)
 		mga_setr(M_EXTVGA_INDEX, 8, hw->CRTCEXT[8]);
 #endif
-	ACCESS_FBINFO(crtc1.panpos) = -1;
+	minfo->crtc1.panpos = -1;
 	for (i = 0; i < 6; i++)
 		mga_setr(M_EXTVGA_INDEX, i, hw->CRTCEXT[i]);
 	DAC1064_restore_2(PMINFO2);
diff --git a/drivers/video/matrox/matroxfb_Ti3026.c b/drivers/video/matrox/matroxfb_Ti3026.c
index 4e82511..bc9c274 100644
--- a/drivers/video/matrox/matroxfb_Ti3026.c
+++ b/drivers/video/matrox/matroxfb_Ti3026.c
@@ -295,11 +295,11 @@ static int Ti3026_calcclock(CPMINFO unsigned int freq, unsigned int fmax, int* i
 static int Ti3026_setpclk(WPMINFO int clk) {
 	unsigned int f_pll;
 	unsigned int pixfeed, pixin, pixpost;
-	struct matrox_hw_state* hw = &ACCESS_FBINFO(hw);
+	struct matrox_hw_state *hw = &minfo->hw;
 
 	DBG(__func__)
 
-	f_pll = Ti3026_calcclock(PMINFO clk, ACCESS_FBINFO(max_pixel_clock), &pixin, &pixfeed, &pixpost);
+	f_pll = Ti3026_calcclock(PMINFO clk, minfo->max_pixel_clock, &pixin, &pixfeed, &pixpost);
 
 	hw->DACclk[0] = pixin | 0xC0;
 	hw->DACclk[1] = pixfeed;
@@ -309,9 +309,9 @@ static int Ti3026_setpclk(WPMINFO int clk) {
 		unsigned int loopfeed, loopin, looppost, loopdiv, z;
 		unsigned int Bpp;
 
-		Bpp = ACCESS_FBINFO(curr.final_bppShift);
+		Bpp = minfo->curr.final_bppShift;
 
-		if (ACCESS_FBINFO(fbcon).var.bits_per_pixel == 24) {
+		if (minfo->fbcon.var.bits_per_pixel == 24) {
 			loopfeed = 3;		/* set lm to any possible value */
 			loopin = 3 * 32 / Bpp;
 		} else {
@@ -330,18 +330,18 @@ static int Ti3026_setpclk(WPMINFO int clk) {
 			looppost = 3;
 			loopdiv = z/16;
 		}
-		if (ACCESS_FBINFO(fbcon).var.bits_per_pixel == 24) {
+		if (minfo->fbcon.var.bits_per_pixel == 24) {
 			hw->DACclk[3] = ((65 - loopin) & 0x3F) | 0xC0;
 			hw->DACclk[4] = (65 - loopfeed) | 0x80;
-			if (ACCESS_FBINFO(accel.ramdac_rev) > 0x20) {
-				if (isInterleave(MINFO))
+			if (minfo->accel.ramdac_rev > 0x20) {
+				if (isInterleave(minfo))
 					hw->DACreg[POS3026_XLATCHCTRL] = TVP3026B_XLATCHCTRL_8_3;
 				else {
 					hw->DACclk[4] &= ~0xC0;
 					hw->DACreg[POS3026_XLATCHCTRL] = TVP3026B_XLATCHCTRL_4_3;
 				}
 			} else {
-				if (isInterleave(MINFO))
+				if (isInterleave(minfo))
 					;	/* default... */
 				else {
 					hw->DACclk[4] ^= 0xC0;	/* change from 0x80 to 0x40 */
@@ -349,7 +349,7 @@ static int Ti3026_setpclk(WPMINFO int clk) {
 				}
 			}
 			hw->DACclk[5] = looppost | 0xF8;
-			if (ACCESS_FBINFO(devflags.mga_24bpp_fix))
+			if (minfo->devflags.mga_24bpp_fix)
 				hw->DACclk[5] ^= 0x40;
 		} else {
 			hw->DACclk[3] = ((65 - loopin) & 0x3F) | 0xC0;
@@ -362,13 +362,13 @@ static int Ti3026_setpclk(WPMINFO int clk) {
 }
 
 static int Ti3026_init(WPMINFO struct my_timming* m) {
-	u_int8_t muxctrl = isInterleave(MINFO) ? TVP3026_XMUXCTRL_MEMORY_64BIT : TVP3026_XMUXCTRL_MEMORY_32BIT;
-	struct matrox_hw_state* hw = &ACCESS_FBINFO(hw);
+	u_int8_t muxctrl = isInterleave(minfo) ? TVP3026_XMUXCTRL_MEMORY_64BIT : TVP3026_XMUXCTRL_MEMORY_32BIT;
+	struct matrox_hw_state *hw = &minfo->hw;
 
 	DBG(__func__)
 
 	memcpy(hw->DACreg, MGADACbpp32, sizeof(hw->DACreg));
-	switch (ACCESS_FBINFO(fbcon).var.bits_per_pixel) {
+	switch (minfo->fbcon.var.bits_per_pixel) {
 		case 4:	hw->DACreg[POS3026_XLATCHCTRL] = TVP3026_XLATCHCTRL_16_1;	/* or _8_1, they are same */
 			hw->DACreg[POS3026_XTRUECOLORCTRL] = TVP3026_XTRUECOLORCTRL_PSEUDOCOLOR;
 			hw->DACreg[POS3026_XMUXCTRL] = muxctrl | TVP3026_XMUXCTRL_PIXEL_4BIT;
@@ -383,7 +383,7 @@ static int Ti3026_init(WPMINFO struct my_timming* m) {
 			break;
 		case 16:
 			/* XLATCHCTRL should be _4_1 / _2_1... Why is not? (_2_1 is used everytime) */
-			hw->DACreg[POS3026_XTRUECOLORCTRL] = (ACCESS_FBINFO(fbcon).var.green.length == 5)? (TVP3026_XTRUECOLORCTRL_DIRECTCOLOR | TVP3026_XTRUECOLORCTRL_ORGB_1555 ) : (TVP3026_XTRUECOLORCTRL_DIRECTCOLOR | TVP3026_XTRUECOLORCTRL_RGB_565);
+			hw->DACreg[POS3026_XTRUECOLORCTRL] = (minfo->fbcon.var.green.length == 5) ? (TVP3026_XTRUECOLORCTRL_DIRECTCOLOR | TVP3026_XTRUECOLORCTRL_ORGB_1555) : (TVP3026_XTRUECOLORCTRL_DIRECTCOLOR | TVP3026_XTRUECOLORCTRL_RGB_565);
 			hw->DACreg[POS3026_XMUXCTRL] = muxctrl | TVP3026_XMUXCTRL_PIXEL_16BIT;
 			hw->DACreg[POS3026_XCLKCTRL] = TVP3026_XCLKCTRL_SRC_PLL | TVP3026_XCLKCTRL_DIV2;
 			break;
@@ -412,9 +412,9 @@ static int Ti3026_init(WPMINFO struct my_timming* m) {
 		hw->DACreg[POS3026_XGENCTRL] |= TVP3026_XGENCTRL_SYNC_ON_GREEN;
 
 	/* set DELAY */
-	if (ACCESS_FBINFO(video.len) < 0x400000)
+	if (minfo->video.len < 0x400000)
 		hw->CRTCEXT[3] |= 0x08;
-	else if (ACCESS_FBINFO(video.len) > 0x400000)
+	else if (minfo->video.len > 0x400000)
 		hw->CRTCEXT[3] |= 0x10;
 
 	/* set HWCURSOR */
@@ -426,7 +426,7 @@ static int Ti3026_init(WPMINFO struct my_timming* m) {
 
 	/* set interleaving */
 	hw->MXoptionReg &= ~0x00001000;
-	if (isInterleave(MINFO)) hw->MXoptionReg |= 0x00001000;
+	if (isInterleave(minfo)) hw->MXoptionReg |= 0x00001000;
 
 	/* set DAC */
 	Ti3026_setpclk(PMINFO m->pixclock);
@@ -442,7 +442,7 @@ static void ti3026_setMCLK(WPMINFO int fout){
 
 	DBG(__func__)
 
-	f_pll = Ti3026_calcclock(PMINFO fout, ACCESS_FBINFO(max_pixel_clock), &mclk_n, &mclk_m, &mclk_p);
+	f_pll = Ti3026_calcclock(PMINFO fout, minfo->max_pixel_clock, &mclk_n, &mclk_m, &mclk_p);
 
 	/* save pclk */
 	outTi3026(PMINFO TVP3026_XPLLADDR, 0xFC);
@@ -496,7 +496,7 @@ static void ti3026_setMCLK(WPMINFO int fout){
 		printk(KERN_ERR "matroxfb: Memory PLL not locked after 5 secs\n");
 
 	f_pll = f_pll * 333 / (10000 << mclk_p);
-	if (isMilleniumII(MINFO)) {
+	if (isMilleniumII(minfo)) {
 		rfhcnt = (f_pll - 128) / 256;
 		if (rfhcnt > 15)
 			rfhcnt = 15;
@@ -505,8 +505,8 @@ static void ti3026_setMCLK(WPMINFO int fout){
 		if (rfhcnt > 15)
 			rfhcnt = 0;
 	}
-	ACCESS_FBINFO(hw).MXoptionReg = (ACCESS_FBINFO(hw).MXoptionReg & ~0x000F0000) | (rfhcnt << 16);
-	pci_write_config_dword(ACCESS_FBINFO(pcidev), PCI_OPTION_REG, ACCESS_FBINFO(hw).MXoptionReg);
+	minfo->hw.MXoptionReg = (minfo->hw.MXoptionReg & ~0x000F0000) | (rfhcnt << 16);
+	pci_write_config_dword(minfo->pcidev, PCI_OPTION_REG, minfo->hw.MXoptionReg);
 
 	/* output MCLK to MCLK pin */
 	outTi3026(PMINFO TVP3026_XMEMPLLCTRL, (mclk_ctl & 0xE7) | TVP3026_XMEMPLLCTRL_MCLK_MCLKPLL);
@@ -536,14 +536,14 @@ static void ti3026_ramdac_init(WPMINFO2) {
 
 	DBG(__func__)
 
-	ACCESS_FBINFO(features.pll.vco_freq_min) = 110000;
-	ACCESS_FBINFO(features.pll.ref_freq)	 = 114545;
-	ACCESS_FBINFO(features.pll.feed_div_min) = 2;
-	ACCESS_FBINFO(features.pll.feed_div_max) = 24;
-	ACCESS_FBINFO(features.pll.in_div_min)	 = 2;
-	ACCESS_FBINFO(features.pll.in_div_max)	 = 63;
-	ACCESS_FBINFO(features.pll.post_shift_max) = 3;
-	if (ACCESS_FBINFO(devflags.noinit))
+	minfo->features.pll.vco_freq_min = 110000;
+	minfo->features.pll.ref_freq	 = 114545;
+	minfo->features.pll.feed_div_min = 2;
+	minfo->features.pll.feed_div_max = 24;
+	minfo->features.pll.in_div_min	 = 2;
+	minfo->features.pll.in_div_max	 = 63;
+	minfo->features.pll.post_shift_max = 3;
+	if (minfo->devflags.noinit)
 		return;
 	ti3026_setMCLK(PMINFO 60000);
 }
@@ -551,7 +551,7 @@ static void ti3026_ramdac_init(WPMINFO2) {
 static void Ti3026_restore(WPMINFO2) {
 	int i;
 	unsigned char progdac[6];
-	struct matrox_hw_state* hw = &ACCESS_FBINFO(hw);
+	struct matrox_hw_state *hw = &minfo->hw;
 	CRITFLAGS
 
 	DBG(__func__)
@@ -565,7 +565,7 @@ static void Ti3026_restore(WPMINFO2) {
 
 	CRITBEGIN
 
-	pci_write_config_dword(ACCESS_FBINFO(pcidev), PCI_OPTION_REG, hw->MXoptionReg);
+	pci_write_config_dword(minfo->pcidev, PCI_OPTION_REG, hw->MXoptionReg);
 
 	CRITEND
 
@@ -573,7 +573,7 @@ static void Ti3026_restore(WPMINFO2) {
 
 	CRITBEGIN
 
-	ACCESS_FBINFO(crtc1.panpos) = -1;
+	minfo->crtc1.panpos = -1;
 	for (i = 0; i < 6; i++)
 		mga_setr(M_EXTVGA_INDEX, i, hw->CRTCEXT[i]);
 
@@ -678,35 +678,35 @@ static int Ti3026_preinit(WPMINFO2) {
 	static const int vxres_mill1[] = {             640, 768,  800,        960,
 					  1024, 1152, 1280,      1600,       1920,
 					  2048, 0};
-	struct matrox_hw_state* hw = &ACCESS_FBINFO(hw);
+	struct matrox_hw_state *hw = &minfo->hw;
 
 	DBG(__func__)
 
-	ACCESS_FBINFO(millenium) = 1;
-	ACCESS_FBINFO(milleniumII) = (ACCESS_FBINFO(pcidev)->device != PCI_DEVICE_ID_MATROX_MIL);
-	ACCESS_FBINFO(capable.cfb4) = 1;
-	ACCESS_FBINFO(capable.text) = 1; /* isMilleniumII(MINFO); */
-	ACCESS_FBINFO(capable.vxres) = isMilleniumII(MINFO)?vxres_mill2:vxres_mill1;
+	minfo->millenium = 1;
+	minfo->milleniumII = (minfo->pcidev->device != PCI_DEVICE_ID_MATROX_MIL);
+	minfo->capable.cfb4 = 1;
+	minfo->capable.text = 1; /* isMilleniumII(minfo); */
+	minfo->capable.vxres = isMilleniumII(minfo) ? vxres_mill2 : vxres_mill1;
 
-	ACCESS_FBINFO(outputs[0]).data = MINFO;
-	ACCESS_FBINFO(outputs[0]).output = &ti3026_output;
-	ACCESS_FBINFO(outputs[0]).src = ACCESS_FBINFO(outputs[0]).default_src;
-	ACCESS_FBINFO(outputs[0]).mode = MATROXFB_OUTPUT_MODE_MONITOR;
+	minfo->outputs[0].data = minfo;
+	minfo->outputs[0].output = &ti3026_output;
+	minfo->outputs[0].src = minfo->outputs[0].default_src;
+	minfo->outputs[0].mode = MATROXFB_OUTPUT_MODE_MONITOR;
 
-	if (ACCESS_FBINFO(devflags.noinit))
+	if (minfo->devflags.noinit)
 		return 0;
 	/* preserve VGA I/O, BIOS and PPC */
 	hw->MXoptionReg &= 0xC0000100;
 	hw->MXoptionReg |= 0x002C0000;
-	if (ACCESS_FBINFO(devflags.novga))
+	if (minfo->devflags.novga)
 		hw->MXoptionReg &= ~0x00000100;
-	if (ACCESS_FBINFO(devflags.nobios))
+	if (minfo->devflags.nobios)
 		hw->MXoptionReg &= ~0x40000000;
-	if (ACCESS_FBINFO(devflags.nopciretry))
+	if (minfo->devflags.nopciretry)
 		hw->MXoptionReg |=  0x20000000;
-	pci_write_config_dword(ACCESS_FBINFO(pcidev), PCI_OPTION_REG, hw->MXoptionReg);
+	pci_write_config_dword(minfo->pcidev, PCI_OPTION_REG, hw->MXoptionReg);
 
-	ACCESS_FBINFO(accel.ramdac_rev) = inTi3026(PMINFO TVP3026_XSILICONREV);
+	minfo->accel.ramdac_rev = inTi3026(PMINFO TVP3026_XSILICONREV);
 
 	outTi3026(PMINFO TVP3026_XCLKCTRL, TVP3026_XCLKCTRL_SRC_CLK0VGA | TVP3026_XCLKCTRL_CLKSTOPPED);
 	outTi3026(PMINFO TVP3026_XTRUECOLORCTRL, TVP3026_XTRUECOLORCTRL_PSEUDOCOLOR);
diff --git a/drivers/video/matrox/matroxfb_accel.c b/drivers/video/matrox/matroxfb_accel.c
index 9c3aeee..ed42bf6 100644
--- a/drivers/video/matrox/matroxfb_accel.c
+++ b/drivers/video/matrox/matroxfb_accel.c
@@ -81,7 +81,7 @@
 #include "matroxfb_Ti3026.h"
 #include "matroxfb_misc.h"
 
-#define curr_ydstorg(x)	ACCESS_FBINFO2(x, curr.ydstorg.pixels)
+#define curr_ydstorg(x)	((x)->curr.ydstorg.pixels)
 
 #define mga_ydstlen(y,l) mga_outl(M_YDSTLEN | M_EXEC, ((y) << 16) | (l))
 
@@ -115,59 +115,59 @@ void matrox_cfbX_init(WPMINFO2) {
 
 	DBG(__func__)
 
-	mpitch = ACCESS_FBINFO(fbcon).var.xres_virtual;
+	mpitch = minfo->fbcon.var.xres_virtual;
 
-	ACCESS_FBINFO(fbops).fb_copyarea = cfb_copyarea;
-	ACCESS_FBINFO(fbops).fb_fillrect = cfb_fillrect;
-	ACCESS_FBINFO(fbops).fb_imageblit = cfb_imageblit;
-	ACCESS_FBINFO(fbops).fb_cursor = NULL;
+	minfo->fbops.fb_copyarea = cfb_copyarea;
+	minfo->fbops.fb_fillrect = cfb_fillrect;
+	minfo->fbops.fb_imageblit = cfb_imageblit;
+	minfo->fbops.fb_cursor = NULL;
 
-	accel = (ACCESS_FBINFO(fbcon).var.accel_flags & FB_ACCELF_TEXT) == FB_ACCELF_TEXT;
+	accel = (minfo->fbcon.var.accel_flags & FB_ACCELF_TEXT) == FB_ACCELF_TEXT;
 
-	switch (ACCESS_FBINFO(fbcon).var.bits_per_pixel) {
+	switch (minfo->fbcon.var.bits_per_pixel) {
 		case 4:		maccess = 0x00000000;	/* accelerate as 8bpp video */
 				mpitch = (mpitch >> 1) | 0x8000; /* disable linearization */
 				mopmode = M_OPMODE_4BPP;
-				matrox_cfb4_pal(ACCESS_FBINFO(cmap));
+				matrox_cfb4_pal(minfo->cmap);
 				if (accel && !(mpitch & 1)) {
-					ACCESS_FBINFO(fbops).fb_copyarea = matroxfb_cfb4_copyarea;
-					ACCESS_FBINFO(fbops).fb_fillrect = matroxfb_cfb4_fillrect;
+					minfo->fbops.fb_copyarea = matroxfb_cfb4_copyarea;
+					minfo->fbops.fb_fillrect = matroxfb_cfb4_fillrect;
 				}
 				break;
 		case 8:		maccess = 0x00000000;
 				mopmode = M_OPMODE_8BPP;
-				matrox_cfb8_pal(ACCESS_FBINFO(cmap));
+				matrox_cfb8_pal(minfo->cmap);
 				if (accel) {
-					ACCESS_FBINFO(fbops).fb_copyarea = matroxfb_copyarea;
-					ACCESS_FBINFO(fbops).fb_fillrect = matroxfb_fillrect;
-					ACCESS_FBINFO(fbops).fb_imageblit = matroxfb_imageblit;
+					minfo->fbops.fb_copyarea = matroxfb_copyarea;
+					minfo->fbops.fb_fillrect = matroxfb_fillrect;
+					minfo->fbops.fb_imageblit = matroxfb_imageblit;
 				}
 				break;
-		case 16:	if (ACCESS_FBINFO(fbcon).var.green.length == 5)
+		case 16:	if (minfo->fbcon.var.green.length == 5)
 					maccess = 0xC0000001;
 				else
 					maccess = 0x40000001;
 				mopmode = M_OPMODE_16BPP;
 				if (accel) {
-					ACCESS_FBINFO(fbops).fb_copyarea = matroxfb_copyarea;
-					ACCESS_FBINFO(fbops).fb_fillrect = matroxfb_fillrect;
-					ACCESS_FBINFO(fbops).fb_imageblit = matroxfb_imageblit;
+					minfo->fbops.fb_copyarea = matroxfb_copyarea;
+					minfo->fbops.fb_fillrect = matroxfb_fillrect;
+					minfo->fbops.fb_imageblit = matroxfb_imageblit;
 				}
 				break;
 		case 24:	maccess = 0x00000003;
 				mopmode = M_OPMODE_24BPP;
 				if (accel) {
-					ACCESS_FBINFO(fbops).fb_copyarea = matroxfb_copyarea;
-					ACCESS_FBINFO(fbops).fb_fillrect = matroxfb_fillrect;
-					ACCESS_FBINFO(fbops).fb_imageblit = matroxfb_imageblit;
+					minfo->fbops.fb_copyarea = matroxfb_copyarea;
+					minfo->fbops.fb_fillrect = matroxfb_fillrect;
+					minfo->fbops.fb_imageblit = matroxfb_imageblit;
 				}
 				break;
 		case 32:	maccess = 0x00000002;
 				mopmode = M_OPMODE_32BPP;
 				if (accel) {
-					ACCESS_FBINFO(fbops).fb_copyarea = matroxfb_copyarea;
-					ACCESS_FBINFO(fbops).fb_fillrect = matroxfb_fillrect;
-					ACCESS_FBINFO(fbops).fb_imageblit = matroxfb_imageblit;
+					minfo->fbops.fb_copyarea = matroxfb_copyarea;
+					minfo->fbops.fb_fillrect = matroxfb_fillrect;
+					minfo->fbops.fb_imageblit = matroxfb_imageblit;
 				}
 				break;
 		default:	maccess = 0x00000000;
@@ -176,10 +176,10 @@ void matrox_cfbX_init(WPMINFO2) {
 	}
 	mga_fifo(8);
 	mga_outl(M_PITCH, mpitch);
-	mga_outl(M_YDSTORG, curr_ydstorg(MINFO));
-	if (ACCESS_FBINFO(capable.plnwt))
+	mga_outl(M_YDSTORG, curr_ydstorg(minfo));
+	if (minfo->capable.plnwt)
 		mga_outl(M_PLNWT, -1);
-	if (ACCESS_FBINFO(capable.srcorg)) {
+	if (minfo->capable.srcorg) {
 		mga_outl(M_SRCORG, 0);
 		mga_outl(M_DSTORG, 0);
 	}
@@ -188,9 +188,9 @@ void matrox_cfbX_init(WPMINFO2) {
 	mga_outl(M_YTOP, 0);
 	mga_outl(M_YBOT, 0x01FFFFFF);
 	mga_outl(M_MACCESS, maccess);
-	ACCESS_FBINFO(accel.m_dwg_rect) = M_DWG_TRAP | M_DWG_SOLID | M_DWG_ARZERO | M_DWG_SGNZERO | M_DWG_SHIFTZERO;
-	if (isMilleniumII(MINFO)) ACCESS_FBINFO(accel.m_dwg_rect) |= M_DWG_TRANSC;
-	ACCESS_FBINFO(accel.m_opmode) = mopmode;
+	minfo->accel.m_dwg_rect = M_DWG_TRAP | M_DWG_SOLID | M_DWG_ARZERO | M_DWG_SGNZERO | M_DWG_SHIFTZERO;
+	if (isMilleniumII(minfo)) minfo->accel.m_dwg_rect |= M_DWG_TRANSC;
+	minfo->accel.m_opmode = mopmode;
 }
 
 EXPORT_SYMBOL(matrox_cfbX_init);
@@ -209,7 +209,7 @@ static void matrox_accel_bmove(WPMINFO int vxres, int sy, int sx, int dy, int dx
 			 M_DWG_BFCOL | M_DWG_REPLACE);
 		mga_outl(M_AR5, vxres);
 		width--;
-		start = sy*vxres+sx+curr_ydstorg(MINFO);
+		start = sy*vxres+sx+curr_ydstorg(minfo);
 		end = start+width;
 	} else {
 		mga_fifo(3);
@@ -217,7 +217,7 @@ static void matrox_accel_bmove(WPMINFO int vxres, int sy, int sx, int dy, int dx
 		mga_outl(M_SGN, 5);
 		mga_outl(M_AR5, -vxres);
 		width--;
-		end = (sy+height-1)*vxres+sx+curr_ydstorg(MINFO);
+		end = (sy+height-1)*vxres+sx+curr_ydstorg(minfo);
 		start = end+width;
 		dy += height-1;
 	}
@@ -245,7 +245,7 @@ static void matrox_accel_bmove_lin(WPMINFO int vxres, int sy, int sx, int dy, in
 			M_DWG_BFCOL | M_DWG_REPLACE);
 		mga_outl(M_AR5, vxres);
 		width--;
-		start = sy*vxres+sx+curr_ydstorg(MINFO);
+		start = sy*vxres+sx+curr_ydstorg(minfo);
 		end = start+width;
 	} else {
 		mga_fifo(3);
@@ -253,7 +253,7 @@ static void matrox_accel_bmove_lin(WPMINFO int vxres, int sy, int sx, int dy, in
 		mga_outl(M_SGN, 5);
 		mga_outl(M_AR5, -vxres);
 		width--;
-		end = (sy+height-1)*vxres+sx+curr_ydstorg(MINFO);
+		end = (sy+height-1)*vxres+sx+curr_ydstorg(minfo);
 		start = end+width;
 		dy += height-1;
 	}
@@ -274,13 +274,13 @@ static void matroxfb_cfb4_copyarea(struct fb_info* info, const struct fb_copyare
 	if ((area->sx | area->dx | area->width) & 1)
 		cfb_copyarea(info, area);
 	else
-		matrox_accel_bmove_lin(PMINFO ACCESS_FBINFO(fbcon.var.xres_virtual) >> 1, area->sy, area->sx >> 1, area->dy, area->dx >> 1, area->height, area->width >> 1);
+		matrox_accel_bmove_lin(PMINFO minfo->fbcon.var.xres_virtual >> 1, area->sy, area->sx >> 1, area->dy, area->dx >> 1, area->height, area->width >> 1);
 }
 
 static void matroxfb_copyarea(struct fb_info* info, const struct fb_copyarea* area) {
 	MINFO_FROM_INFO(info);
 
-	matrox_accel_bmove(PMINFO ACCESS_FBINFO(fbcon.var.xres_virtual), area->sy, area->sx, area->dy, area->dx, area->height, area->width);
+	matrox_accel_bmove(PMINFO minfo->fbcon.var.xres_virtual, area->sy, area->sx, area->dy, area->dx, area->height, area->width);
 }
 
 static void matroxfb_accel_clear(WPMINFO u_int32_t color, int sy, int sx, int height,
@@ -292,7 +292,7 @@ static void matroxfb_accel_clear(WPMINFO u_int32_t color, int sy, int sx, int he
 	CRITBEGIN
 
 	mga_fifo(5);
-	mga_outl(M_DWGCTL, ACCESS_FBINFO(accel.m_dwg_rect) | M_DWG_REPLACE);
+	mga_outl(M_DWGCTL, minfo->accel.m_dwg_rect | M_DWG_REPLACE);
 	mga_outl(M_FCOL, color);
 	mga_outl(M_FXBNDRY, ((sx + width) << 16) | sx);
 	mga_ydstlen(sy, height);
@@ -333,16 +333,16 @@ static void matroxfb_cfb4_clear(WPMINFO u_int32_t bgx, int sy, int sx, int heigh
 	sx >>= 1;
 	if (width) {
 		mga_fifo(5);
-		mga_outl(M_DWGCTL, ACCESS_FBINFO(accel.m_dwg_rect) | M_DWG_REPLACE2);
+		mga_outl(M_DWGCTL, minfo->accel.m_dwg_rect | M_DWG_REPLACE2);
 		mga_outl(M_FCOL, bgx);
 		mga_outl(M_FXBNDRY, ((sx + width) << 16) | sx);
-		mga_outl(M_YDST, sy * ACCESS_FBINFO(fbcon).var.xres_virtual >> 6);
+		mga_outl(M_YDST, sy * minfo->fbcon.var.xres_virtual >> 6);
 		mga_outl(M_LEN | M_EXEC, height);
 		WaitTillIdle();
 	}
 	if (whattodo) {
-		u_int32_t step = ACCESS_FBINFO(fbcon).var.xres_virtual >> 1;
-		vaddr_t vbase = ACCESS_FBINFO(video.vbase);
+		u_int32_t step = minfo->fbcon.var.xres_virtual >> 1;
+		vaddr_t vbase = minfo->video.vbase;
 		if (whattodo & 1) {
 			unsigned int uaddr = sy * step + sx - 1;
 			u_int32_t loop;
@@ -412,7 +412,7 @@ static void matroxfb_1bpp_imageblit(WPMINFO u_int32_t fgx, u_int32_t bgx,
 	mga_outl(M_FCOL, fgx);
 	mga_outl(M_BCOL, bgx);
 	fxbndry = ((xx + width - 1) << 16) | xx;
-	mmio = ACCESS_FBINFO(mmio.vbase);
+	mmio = minfo->mmio.vbase;
 
 	mga_fifo(6);
 	mga_writel(mmio, M_FXBNDRY, fxbndry);
diff --git a/drivers/video/matrox/matroxfb_base.c b/drivers/video/matrox/matroxfb_base.c
index 6ede98d..867a4d9 100644
--- a/drivers/video/matrox/matroxfb_base.c
+++ b/drivers/video/matrox/matroxfb_base.c
@@ -155,20 +155,20 @@ static struct fb_var_screeninfo vesafb_defined = {
 
 /* --------------------------------------------------------------------- */
 static void update_crtc2(WPMINFO unsigned int pos) {
-	struct matroxfb_dh_fb_info* info = ACCESS_FBINFO(crtc2.info);
+	struct matroxfb_dh_fb_info *info = minfo->crtc2.info;
 
 	/* Make sure that displays are compatible */
-	if (info && (info->fbcon.var.bits_per_pixel == ACCESS_FBINFO(fbcon).var.bits_per_pixel)
-		 && (info->fbcon.var.xres_virtual == ACCESS_FBINFO(fbcon).var.xres_virtual)
-		 && (info->fbcon.var.green.length == ACCESS_FBINFO(fbcon).var.green.length)
+	if (info && (info->fbcon.var.bits_per_pixel == minfo->fbcon.var.bits_per_pixel)
+		 && (info->fbcon.var.xres_virtual == minfo->fbcon.var.xres_virtual)
+		 && (info->fbcon.var.green.length == minfo->fbcon.var.green.length)
 		 ) {
-		switch (ACCESS_FBINFO(fbcon).var.bits_per_pixel) {
+		switch (minfo->fbcon.var.bits_per_pixel) {
 			case 16:
 			case 32:
 				pos = pos * 8;
 				if (info->interlaced) {
 					mga_outl(0x3C2C, pos);
-					mga_outl(0x3C28, pos + ACCESS_FBINFO(fbcon).var.xres_virtual * ACCESS_FBINFO(fbcon).var.bits_per_pixel / 8);
+					mga_outl(0x3C28, pos + minfo->fbcon.var.xres_virtual * minfo->fbcon.var.bits_per_pixel / 8);
 				} else {
 					mga_outl(0x3C28, pos);
 				}
@@ -178,16 +178,16 @@ static void update_crtc2(WPMINFO unsigned int pos) {
 }
 
 static void matroxfb_crtc1_panpos(WPMINFO2) {
-	if (ACCESS_FBINFO(crtc1.panpos) >= 0) {
+	if (minfo->crtc1.panpos >= 0) {
 		unsigned long flags;
 		int panpos;
 
 		matroxfb_DAC_lock_irqsave(flags);
-		panpos = ACCESS_FBINFO(crtc1.panpos);
+		panpos = minfo->crtc1.panpos;
 		if (panpos >= 0) {
 			unsigned int extvga_reg;
 
-			ACCESS_FBINFO(crtc1.panpos) = -1; /* No update pending anymore */
+			minfo->crtc1.panpos = -1; /* No update pending anymore */
 			extvga_reg = mga_inb(M_EXTVGA_INDEX);
 			mga_setr(M_EXTVGA_INDEX, 0x00, panpos);
 			if (extvga_reg != 0x00) {
@@ -209,15 +209,15 @@ static irqreturn_t matrox_irq(int irq, void *dev_id)
 
 	if (status & 0x20) {
 		mga_outl(M_ICLEAR, 0x20);
-		ACCESS_FBINFO(crtc1.vsync.cnt)++;
+		minfo->crtc1.vsync.cnt++;
 		matroxfb_crtc1_panpos(PMINFO2);
-		wake_up_interruptible(&ACCESS_FBINFO(crtc1.vsync.wait));
+		wake_up_interruptible(&minfo->crtc1.vsync.wait);
 		handled = 1;
 	}
 	if (status & 0x200) {
 		mga_outl(M_ICLEAR, 0x200);
-		ACCESS_FBINFO(crtc2.vsync.cnt)++;
-		wake_up_interruptible(&ACCESS_FBINFO(crtc2.vsync.wait));
+		minfo->crtc2.vsync.cnt++;
+		wake_up_interruptible(&minfo->crtc2.vsync.wait);
 		handled = 1;
 	}
 	return IRQ_RETVAL(handled);
@@ -226,15 +226,15 @@ static irqreturn_t matrox_irq(int irq, void *dev_id)
 int matroxfb_enable_irq(WPMINFO int reenable) {
 	u_int32_t bm;
 
-	if (ACCESS_FBINFO(devflags.accelerator) == FB_ACCEL_MATROX_MGAG400)
+	if (minfo->devflags.accelerator == FB_ACCEL_MATROX_MGAG400)
 		bm = 0x220;
 	else
 		bm = 0x020;
 
-	if (!test_and_set_bit(0, &ACCESS_FBINFO(irq_flags))) {
-		if (request_irq(ACCESS_FBINFO(pcidev)->irq, matrox_irq,
-				IRQF_SHARED, "matroxfb", MINFO)) {
-			clear_bit(0, &ACCESS_FBINFO(irq_flags));
+	if (!test_and_set_bit(0, &minfo->irq_flags)) {
+		if (request_irq(minfo->pcidev->irq, matrox_irq,
+				IRQF_SHARED, "matroxfb", minfo)) {
+			clear_bit(0, &minfo->irq_flags);
 			return -EINVAL;
 		}
 		/* Clear any pending field interrupts */
@@ -253,14 +253,14 @@ int matroxfb_enable_irq(WPMINFO int reenable) {
 }
 
 static void matroxfb_disable_irq(WPMINFO2) {
-	if (test_and_clear_bit(0, &ACCESS_FBINFO(irq_flags))) {
+	if (test_and_clear_bit(0, &minfo->irq_flags)) {
 		/* Flush pending pan-at-vbl request... */
 		matroxfb_crtc1_panpos(PMINFO2);
-		if (ACCESS_FBINFO(devflags.accelerator) == FB_ACCEL_MATROX_MGAG400)
+		if (minfo->devflags.accelerator == FB_ACCEL_MATROX_MGAG400)
 			mga_outl(M_IEN, mga_inl(M_IEN) & ~0x220);
 		else
 			mga_outl(M_IEN, mga_inl(M_IEN) & ~0x20);
-		free_irq(ACCESS_FBINFO(pcidev)->irq, MINFO);
+		free_irq(minfo->pcidev->irq, minfo);
 	}
 }
 
@@ -271,13 +271,13 @@ int matroxfb_wait_for_sync(WPMINFO u_int32_t crtc) {
 
 	switch (crtc) {
 		case 0:
-			vs = &ACCESS_FBINFO(crtc1.vsync);
+			vs = &minfo->crtc1.vsync;
 			break;
 		case 1:
-			if (ACCESS_FBINFO(devflags.accelerator) != FB_ACCEL_MATROX_MGAG400) {
+			if (minfo->devflags.accelerator != FB_ACCEL_MATROX_MGAG400) {
 				return -ENODEV;
 			}
-			vs = &ACCESS_FBINFO(crtc2.vsync);
+			vs = &minfo->crtc2.vsync;
 			break;
 		default:
 			return -ENODEV;
@@ -314,18 +314,18 @@ static void matrox_pan_var(WPMINFO struct fb_var_screeninfo *var) {
 
 	DBG(__func__)
 
-	if (ACCESS_FBINFO(dead))
+	if (minfo->dead)
 		return;
 
-	ACCESS_FBINFO(fbcon).var.xoffset = var->xoffset;
-	ACCESS_FBINFO(fbcon).var.yoffset = var->yoffset;
-	pos = (ACCESS_FBINFO(fbcon).var.yoffset * ACCESS_FBINFO(fbcon).var.xres_virtual + ACCESS_FBINFO(fbcon).var.xoffset) * ACCESS_FBINFO(curr.final_bppShift) / 32;
-	pos += ACCESS_FBINFO(curr.ydstorg.chunks);
-	p0 = ACCESS_FBINFO(hw).CRTC[0x0D] = pos & 0xFF;
-	p1 = ACCESS_FBINFO(hw).CRTC[0x0C] = (pos & 0xFF00) >> 8;
-	p2 = ACCESS_FBINFO(hw).CRTCEXT[0] = (ACCESS_FBINFO(hw).CRTCEXT[0] & 0xB0) | ((pos >> 16) & 0x0F) | ((pos >> 14) & 0x40);
+	minfo->fbcon.var.xoffset = var->xoffset;
+	minfo->fbcon.var.yoffset = var->yoffset;
+	pos = (minfo->fbcon.var.yoffset * minfo->fbcon.var.xres_virtual + minfo->fbcon.var.xoffset) * minfo->curr.final_bppShift / 32;
+	pos += minfo->curr.ydstorg.chunks;
+	p0 = minfo->hw.CRTC[0x0D] = pos & 0xFF;
+	p1 = minfo->hw.CRTC[0x0C] = (pos & 0xFF00) >> 8;
+	p2 = minfo->hw.CRTCEXT[0] = (minfo->hw.CRTCEXT[0] & 0xB0) | ((pos >> 16) & 0x0F) | ((pos >> 14) & 0x40);
 #ifdef CONFIG_FB_MATROX_32MB
-	p3 = ACCESS_FBINFO(hw).CRTCEXT[8] = pos >> 21;
+	p3 = minfo->hw.CRTCEXT[8] = pos >> 21;
 #endif
 
 	/* FB_ACTIVATE_VBL and we can acquire interrupts? Honor FB_ACTIVATE_VBL then... */
@@ -337,14 +337,14 @@ static void matrox_pan_var(WPMINFO struct fb_var_screeninfo *var) {
 	mga_setr(M_CRTC_INDEX, 0x0D, p0);
 	mga_setr(M_CRTC_INDEX, 0x0C, p1);
 #ifdef CONFIG_FB_MATROX_32MB
-	if (ACCESS_FBINFO(devflags.support32MB))
+	if (minfo->devflags.support32MB)
 		mga_setr(M_EXTVGA_INDEX, 0x08, p3);
 #endif
 	if (vbl) {
-		ACCESS_FBINFO(crtc1.panpos) = p2;
+		minfo->crtc1.panpos = p2;
 	} else {
 		/* Abort any pending change */
-		ACCESS_FBINFO(crtc1.panpos) = -1;
+		minfo->crtc1.panpos = -1;
 		mga_setr(M_EXTVGA_INDEX, 0x00, p2);
 	}
 	matroxfb_DAC_unlock_irqrestore(flags);
@@ -363,22 +363,22 @@ static void matroxfb_remove(WPMINFO int dummy) {
 	 * write data without causing too much damage...
 	 */
 
-	ACCESS_FBINFO(dead) = 1;
-	if (ACCESS_FBINFO(usecount)) {
+	minfo->dead = 1;
+	if (minfo->usecount) {
 		/* destroy it later */
 		return;
 	}
-	matroxfb_unregister_device(MINFO);
-	unregister_framebuffer(&ACCESS_FBINFO(fbcon));
+	matroxfb_unregister_device(minfo);
+	unregister_framebuffer(&minfo->fbcon);
 	matroxfb_g450_shutdown(PMINFO2);
 #ifdef CONFIG_MTRR
-	if (ACCESS_FBINFO(mtrr.vram_valid))
-		mtrr_del(ACCESS_FBINFO(mtrr.vram), ACCESS_FBINFO(video.base), ACCESS_FBINFO(video.len));
+	if (minfo->mtrr.vram_valid)
+		mtrr_del(minfo->mtrr.vram, minfo->video.base, minfo->video.len);
 #endif
-	mga_iounmap(ACCESS_FBINFO(mmio.vbase));
-	mga_iounmap(ACCESS_FBINFO(video.vbase));
-	release_mem_region(ACCESS_FBINFO(video.base), ACCESS_FBINFO(video.len_maximum));
-	release_mem_region(ACCESS_FBINFO(mmio.base), 16384);
+	mga_iounmap(minfo->mmio.vbase);
+	mga_iounmap(minfo->video.vbase);
+	release_mem_region(minfo->video.base, minfo->video.len_maximum);
+	release_mem_region(minfo->mmio.base, 16384);
 	kfree(minfo);
 }
 
@@ -392,12 +392,12 @@ static int matroxfb_open(struct fb_info *info, int user)
 
 	DBG_LOOP(__func__)
 
-	if (ACCESS_FBINFO(dead)) {
+	if (minfo->dead) {
 		return -ENXIO;
 	}
-	ACCESS_FBINFO(usecount)++;
+	minfo->usecount++;
 	if (user) {
-		ACCESS_FBINFO(userusecount)++;
+		minfo->userusecount++;
 	}
 	return(0);
 }
@@ -409,11 +409,11 @@ static int matroxfb_release(struct fb_info *info, int user)
 	DBG_LOOP(__func__)
 
 	if (user) {
-		if (0 == --ACCESS_FBINFO(userusecount)) {
+		if (0 == --minfo->userusecount) {
 			matroxfb_disable_irq(PMINFO2);
 		}
 	}
-	if (!(--ACCESS_FBINFO(usecount)) && ACCESS_FBINFO(dead)) {
+	if (!(--minfo->usecount) && minfo->dead) {
 		matroxfb_remove(PMINFO 0);
 	}
 	return(0);
@@ -438,9 +438,9 @@ static int matroxfb_get_final_bppShift(CPMINFO int bpp) {
 	if (!bppshft2) {
 		return 8;
 	}
-	if (isInterleave(MINFO))
+	if (isInterleave(minfo))
 		bppshft2 >>= 1;
-	if (ACCESS_FBINFO(devflags.video64bits))
+	if (minfo->devflags.video64bits)
 		bppshft2 >>= 1;
 	return bppshft2;
 }
@@ -463,11 +463,11 @@ static int matroxfb_test_and_set_rounding(CPMINFO int xres, int bpp) {
 				break;
 		default:	rounding = 16;
 				/* on G400, 16 really does not work */
-				if (ACCESS_FBINFO(devflags.accelerator) == FB_ACCEL_MATROX_MGAG400)
+				if (minfo->devflags.accelerator == FB_ACCEL_MATROX_MGAG400)
 					rounding = 32;
 				break;
 	}
-	if (isInterleave(MINFO)) {
+	if (isInterleave(minfo)) {
 		rounding *= 2;
 	}
 	over = xres % rounding;
@@ -484,9 +484,9 @@ static int matroxfb_pitch_adjust(CPMINFO int xres, int bpp) {
 
 	if (!bpp) return xres;
 
-	width = ACCESS_FBINFO(capable.vxres);
+	width = minfo->capable.vxres;
 
-	if (ACCESS_FBINFO(devflags.precise_width)) {
+	if (minfo->devflags.precise_width) {
 		while (*width) {
 			if ((*width >= xres) && (matroxfb_test_and_set_rounding(PMINFO *width, bpp) == *width)) {
 				break;
@@ -549,7 +549,7 @@ static int matroxfb_decode_var(CPMINFO struct fb_var_screeninfo *var, int *visua
 	DBG(__func__)
 
 	switch (bpp) {
-		case 4:	 if (!ACCESS_FBINFO(capable.cfb4)) return -EINVAL;
+		case 4:	 if (!minfo->capable.cfb4) return -EINVAL;
 			 break;
 		case 8:	 break;
 		case 16: break;
@@ -558,7 +558,7 @@ static int matroxfb_decode_var(CPMINFO struct fb_var_screeninfo *var, int *visua
 		default: return -EINVAL;
 	}
 	*ydstorg = 0;
-	vramlen = ACCESS_FBINFO(video.len_usable);
+	vramlen = minfo->video.len_usable;
 	if (var->yres_virtual < var->yres)
 		var->yres_virtual = var->yres;
 	if (var->xres_virtual < var->xres)
@@ -573,7 +573,7 @@ static int matroxfb_decode_var(CPMINFO struct fb_var_screeninfo *var, int *visua
 	/* There is hardware bug that no line can cross 4MB boundary */
 	/* give up for CFB24, it is impossible to easy workaround it */
 	/* for other try to do something */
-	if (!ACCESS_FBINFO(capable.cross4MB) && (memlen > 0x400000)) {
+	if (!minfo->capable.cross4MB && (memlen > 0x400000)) {
 		if (bpp == 24) {
 			/* sorry */
 		} else {
@@ -653,20 +653,20 @@ static int matroxfb_setcolreg(unsigned regno, unsigned red, unsigned green,
 	 *  != 0 for invalid regno.
 	 */
 
-	if (regno >= ACCESS_FBINFO(curr.cmap_len))
+	if (regno >= minfo->curr.cmap_len)
 		return 1;
 
-	if (ACCESS_FBINFO(fbcon).var.grayscale) {
+	if (minfo->fbcon.var.grayscale) {
 		/* gray = 0.30*R + 0.59*G + 0.11*B */
 		red = green = blue = (red * 77 + green * 151 + blue * 28) >> 8;
 	}
 
-	red = CNVT_TOHW(red, ACCESS_FBINFO(fbcon).var.red.length);
-	green = CNVT_TOHW(green, ACCESS_FBINFO(fbcon).var.green.length);
-	blue = CNVT_TOHW(blue, ACCESS_FBINFO(fbcon).var.blue.length);
-	transp = CNVT_TOHW(transp, ACCESS_FBINFO(fbcon).var.transp.length);
+	red = CNVT_TOHW(red, minfo->fbcon.var.red.length);
+	green = CNVT_TOHW(green, minfo->fbcon.var.green.length);
+	blue = CNVT_TOHW(blue, minfo->fbcon.var.blue.length);
+	transp = CNVT_TOHW(transp, minfo->fbcon.var.transp.length);
 
-	switch (ACCESS_FBINFO(fbcon).var.bits_per_pixel) {
+	switch (minfo->fbcon.var.bits_per_pixel) {
 	case 4:
 	case 8:
 		mga_outb(M_DAC_REG, regno);
@@ -679,22 +679,22 @@ static int matroxfb_setcolreg(unsigned regno, unsigned red, unsigned green,
 			break;
 		{
 			u_int16_t col =
-				(red << ACCESS_FBINFO(fbcon).var.red.offset)     |
-				(green << ACCESS_FBINFO(fbcon).var.green.offset) |
-				(blue << ACCESS_FBINFO(fbcon).var.blue.offset)   |
-				(transp << ACCESS_FBINFO(fbcon).var.transp.offset); /* for 1:5:5:5 */
-			ACCESS_FBINFO(cmap[regno]) = col | (col << 16);
+				(red << minfo->fbcon.var.red.offset)     |
+				(green << minfo->fbcon.var.green.offset) |
+				(blue << minfo->fbcon.var.blue.offset)   |
+				(transp << minfo->fbcon.var.transp.offset); /* for 1:5:5:5 */
+			minfo->cmap[regno] = col | (col << 16);
 		}
 		break;
 	case 24:
 	case 32:
 		if (regno >= 16)
 			break;
-		ACCESS_FBINFO(cmap[regno]) =
-			(red   << ACCESS_FBINFO(fbcon).var.red.offset)   |
-			(green << ACCESS_FBINFO(fbcon).var.green.offset) |
-			(blue  << ACCESS_FBINFO(fbcon).var.blue.offset)  |
-			(transp << ACCESS_FBINFO(fbcon).var.transp.offset);	/* 8:8:8:8 */
+		minfo->cmap[regno] =
+			(red   << minfo->fbcon.var.red.offset)   |
+			(green << minfo->fbcon.var.green.offset) |
+			(blue  << minfo->fbcon.var.blue.offset)  |
+			(transp << minfo->fbcon.var.transp.offset);	/* 8:8:8:8 */
 		break;
 	}
 	return 0;
@@ -702,7 +702,7 @@ static int matroxfb_setcolreg(unsigned regno, unsigned red, unsigned green,
 
 static void matroxfb_init_fix(WPMINFO2)
 {
-	struct fb_fix_screeninfo *fix = &ACCESS_FBINFO(fbcon).fix;
+	struct fb_fix_screeninfo *fix = &minfo->fbcon.fix;
 	DBG(__func__)
 
 	strcpy(fix->id,"MATROX");
@@ -710,20 +710,20 @@ static void matroxfb_init_fix(WPMINFO2)
 	fix->xpanstep = 8;	/* 8 for 8bpp, 4 for 16bpp, 2 for 32bpp */
 	fix->ypanstep = 1;
 	fix->ywrapstep = 0;
-	fix->mmio_start = ACCESS_FBINFO(mmio.base);
-	fix->mmio_len = ACCESS_FBINFO(mmio.len);
-	fix->accel = ACCESS_FBINFO(devflags.accelerator);
+	fix->mmio_start = minfo->mmio.base;
+	fix->mmio_len = minfo->mmio.len;
+	fix->accel = minfo->devflags.accelerator;
 }
 
 static void matroxfb_update_fix(WPMINFO2)
 {
-	struct fb_fix_screeninfo *fix = &ACCESS_FBINFO(fbcon).fix;
+	struct fb_fix_screeninfo *fix = &minfo->fbcon.fix;
 	DBG(__func__)
 
-	mutex_lock(&ACCESS_FBINFO(fbcon).mm_lock);
-	fix->smem_start = ACCESS_FBINFO(video.base) + ACCESS_FBINFO(curr.ydstorg.bytes);
-	fix->smem_len = ACCESS_FBINFO(video.len_usable) - ACCESS_FBINFO(curr.ydstorg.bytes);
-	mutex_unlock(&ACCESS_FBINFO(fbcon).mm_lock);
+	mutex_lock(&minfo->fbcon.mm_lock);
+	fix->smem_start = minfo->video.base + minfo->curr.ydstorg.bytes;
+	fix->smem_len = minfo->video.len_usable - minfo->curr.ydstorg.bytes;
+	mutex_unlock(&minfo->fbcon.mm_lock);
 }
 
 static int matroxfb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
@@ -734,7 +734,7 @@ static int matroxfb_check_var(struct fb_var_screeninfo *var, struct fb_info *inf
 	unsigned int ydstorg;
 	MINFO_FROM_INFO(info);
 
-	if (ACCESS_FBINFO(dead)) {
+	if (minfo->dead) {
 		return -ENXIO;
 	}
 	if ((err = matroxfb_decode_var(PMINFO var, &visual, &cmap_len, &ydstorg)) != 0)
@@ -753,31 +753,31 @@ static int matroxfb_set_par(struct fb_info *info)
 
 	DBG(__func__)
 
-	if (ACCESS_FBINFO(dead)) {
+	if (minfo->dead) {
 		return -ENXIO;
 	}
 
 	var = &info->var;
 	if ((err = matroxfb_decode_var(PMINFO var, &visual, &cmap_len, &ydstorg)) != 0)
 		return err;
-	ACCESS_FBINFO(fbcon.screen_base) = vaddr_va(ACCESS_FBINFO(video.vbase)) + ydstorg;
+	minfo->fbcon.screen_base = vaddr_va(minfo->video.vbase) + ydstorg;
 	matroxfb_update_fix(PMINFO2);
-	ACCESS_FBINFO(fbcon).fix.visual = visual;
-	ACCESS_FBINFO(fbcon).fix.type = FB_TYPE_PACKED_PIXELS;
-	ACCESS_FBINFO(fbcon).fix.type_aux = 0;
-	ACCESS_FBINFO(fbcon).fix.line_length = (var->xres_virtual * var->bits_per_pixel) >> 3;
+	minfo->fbcon.fix.visual = visual;
+	minfo->fbcon.fix.type = FB_TYPE_PACKED_PIXELS;
+	minfo->fbcon.fix.type_aux = 0;
+	minfo->fbcon.fix.line_length = (var->xres_virtual * var->bits_per_pixel) >> 3;
 	{
 		unsigned int pos;
 
-		ACCESS_FBINFO(curr.cmap_len) = cmap_len;
-		ydstorg += ACCESS_FBINFO(devflags.ydstorg);
-		ACCESS_FBINFO(curr.ydstorg.bytes) = ydstorg;
-		ACCESS_FBINFO(curr.ydstorg.chunks) = ydstorg >> (isInterleave(MINFO)?3:2);
+		minfo->curr.cmap_len = cmap_len;
+		ydstorg += minfo->devflags.ydstorg;
+		minfo->curr.ydstorg.bytes = ydstorg;
+		minfo->curr.ydstorg.chunks = ydstorg >> (isInterleave(minfo) ? 3 : 2);
 		if (var->bits_per_pixel == 4)
-			ACCESS_FBINFO(curr.ydstorg.pixels) = ydstorg;
+			minfo->curr.ydstorg.pixels = ydstorg;
 		else
-			ACCESS_FBINFO(curr.ydstorg.pixels) = (ydstorg * 8) / var->bits_per_pixel;
-		ACCESS_FBINFO(curr.final_bppShift) = matroxfb_get_final_bppShift(PMINFO var->bits_per_pixel);
+			minfo->curr.ydstorg.pixels = (ydstorg * 8) / var->bits_per_pixel;
+		minfo->curr.final_bppShift = matroxfb_get_final_bppShift(PMINFO var->bits_per_pixel);
 		{	struct my_timming mt;
 			struct matrox_hw_state* hw;
 			int out;
@@ -793,46 +793,46 @@ static int matroxfb_set_par(struct fb_info *info)
 				default:	mt.delay = 31 + 8; break;
 			}
 
-			hw = &ACCESS_FBINFO(hw);
+			hw = &minfo->hw;
 
-			down_read(&ACCESS_FBINFO(altout).lock);
+			down_read(&minfo->altout.lock);
 			for (out = 0; out < MATROXFB_MAX_OUTPUTS; out++) {
-				if (ACCESS_FBINFO(outputs[out]).src == MATROXFB_SRC_CRTC1 &&
-				    ACCESS_FBINFO(outputs[out]).output->compute) {
-					ACCESS_FBINFO(outputs[out]).output->compute(ACCESS_FBINFO(outputs[out]).data, &mt);
+				if (minfo->outputs[out].src == MATROXFB_SRC_CRTC1 &&
+				    minfo->outputs[out].output->compute) {
+					minfo->outputs[out].output->compute(minfo->outputs[out].data, &mt);
 				}
 			}
-			up_read(&ACCESS_FBINFO(altout).lock);
-			ACCESS_FBINFO(crtc1).pixclock = mt.pixclock;
-			ACCESS_FBINFO(crtc1).mnp = mt.mnp;
-			ACCESS_FBINFO(hw_switch->init(PMINFO &mt));
-			pos = (var->yoffset * var->xres_virtual + var->xoffset) * ACCESS_FBINFO(curr.final_bppShift) / 32;
-			pos += ACCESS_FBINFO(curr.ydstorg.chunks);
+			up_read(&minfo->altout.lock);
+			minfo->crtc1.pixclock = mt.pixclock;
+			minfo->crtc1.mnp = mt.mnp;
+			minfo->hw_switch->init(PMINFO &mt);
+			pos = (var->yoffset * var->xres_virtual + var->xoffset) * minfo->curr.final_bppShift / 32;
+			pos += minfo->curr.ydstorg.chunks;
 
 			hw->CRTC[0x0D] = pos & 0xFF;
 			hw->CRTC[0x0C] = (pos & 0xFF00) >> 8;
 			hw->CRTCEXT[0] = (hw->CRTCEXT[0] & 0xF0) | ((pos >> 16) & 0x0F) | ((pos >> 14) & 0x40);
 			hw->CRTCEXT[8] = pos >> 21;
-			ACCESS_FBINFO(hw_switch->restore(PMINFO2));
+			minfo->hw_switch->restore(PMINFO2);
 			update_crtc2(PMINFO pos);
-			down_read(&ACCESS_FBINFO(altout).lock);
+			down_read(&minfo->altout.lock);
 			for (out = 0; out < MATROXFB_MAX_OUTPUTS; out++) {
-				if (ACCESS_FBINFO(outputs[out]).src == MATROXFB_SRC_CRTC1 &&
-				    ACCESS_FBINFO(outputs[out]).output->program) {
-					ACCESS_FBINFO(outputs[out]).output->program(ACCESS_FBINFO(outputs[out]).data);
+				if (minfo->outputs[out].src == MATROXFB_SRC_CRTC1 &&
+				    minfo->outputs[out].output->program) {
+					minfo->outputs[out].output->program(minfo->outputs[out].data);
 				}
 			}
 			for (out = 0; out < MATROXFB_MAX_OUTPUTS; out++) {
-				if (ACCESS_FBINFO(outputs[out]).src == MATROXFB_SRC_CRTC1 &&
-				    ACCESS_FBINFO(outputs[out]).output->start) {
-					ACCESS_FBINFO(outputs[out]).output->start(ACCESS_FBINFO(outputs[out]).data);
+				if (minfo->outputs[out].src == MATROXFB_SRC_CRTC1 &&
+				    minfo->outputs[out].output->start) {
+					minfo->outputs[out].output->start(minfo->outputs[out].data);
 				}
 			}
-			up_read(&ACCESS_FBINFO(altout).lock);
+			up_read(&minfo->altout.lock);
 			matrox_cfbX_init(PMINFO2);
 		}
 	}
-	ACCESS_FBINFO(initialized) = 1;
+	minfo->initialized = 1;
 	return 0;
 }
 
@@ -853,13 +853,13 @@ static int matroxfb_get_vblank(WPMINFO struct fb_vblank *vblank)
 		vblank->flags |= FB_VBLANK_HBLANKING;
 	if (sts1 & 8)
 		vblank->flags |= FB_VBLANK_VSYNCING;
-	if (vblank->vcount >= ACCESS_FBINFO(fbcon).var.yres)
+	if (vblank->vcount >= minfo->fbcon.var.yres)
 		vblank->flags |= FB_VBLANK_VBLANKING;
-	if (test_bit(0, &ACCESS_FBINFO(irq_flags))) {
+	if (test_bit(0, &minfo->irq_flags)) {
 		vblank->flags |= FB_VBLANK_HAVE_COUNT;
 		/* Only one writer, aligned int value...
 		   it should work without lock and without atomic_t */
-		vblank->count = ACCESS_FBINFO(crtc1).vsync.cnt;
+		vblank->count = minfo->crtc1.vsync.cnt;
 	}
 	return 0;
 }
@@ -876,7 +876,7 @@ static int matroxfb_ioctl(struct fb_info *info,
 
 	DBG(__func__)
 
-	if (ACCESS_FBINFO(dead)) {
+	if (minfo->dead) {
 		return -ENXIO;
 	}
 
@@ -912,8 +912,8 @@ static int matroxfb_ioctl(struct fb_info *info,
 					return -EFAULT;
 				if (mom.output >= MATROXFB_MAX_OUTPUTS)
 					return -ENXIO;
-				down_read(&ACCESS_FBINFO(altout.lock));
-				oproc = ACCESS_FBINFO(outputs[mom.output]).output;
+				down_read(&minfo->altout.lock);
+				oproc = minfo->outputs[mom.output].output;
 				if (!oproc) {
 					val = -ENXIO;
 				} else if (!oproc->verifymode) {
@@ -923,18 +923,18 @@ static int matroxfb_ioctl(struct fb_info *info,
 						val = -EINVAL;
 					}
 				} else {
-					val = oproc->verifymode(ACCESS_FBINFO(outputs[mom.output]).data, mom.mode);
+					val = oproc->verifymode(minfo->outputs[mom.output].data, mom.mode);
 				}
 				if (!val) {
-					if (ACCESS_FBINFO(outputs[mom.output]).mode != mom.mode) {
-						ACCESS_FBINFO(outputs[mom.output]).mode = mom.mode;
+					if (minfo->outputs[mom.output].mode != mom.mode) {
+						minfo->outputs[mom.output].mode = mom.mode;
 						val = 1;
 					}
 				}
-				up_read(&ACCESS_FBINFO(altout.lock));
+				up_read(&minfo->altout.lock);
 				if (val != 1)
 					return val;
-				switch (ACCESS_FBINFO(outputs[mom.output]).src) {
+				switch (minfo->outputs[mom.output].src) {
 					case MATROXFB_SRC_CRTC1:
 						matroxfb_set_par(info);
 						break;
@@ -942,11 +942,11 @@ static int matroxfb_ioctl(struct fb_info *info,
 						{
 							struct matroxfb_dh_fb_info* crtc2;
 
-							down_read(&ACCESS_FBINFO(crtc2.lock));
-							crtc2 = ACCESS_FBINFO(crtc2.info);
+							down_read(&minfo->crtc2.lock);
+							crtc2 = minfo->crtc2.info;
 							if (crtc2)
 								crtc2->fbcon.fbops->fb_set_par(&crtc2->fbcon);
-							up_read(&ACCESS_FBINFO(crtc2.lock));
+							up_read(&minfo->crtc2.lock);
 						}
 						break;
 				}
@@ -962,15 +962,15 @@ static int matroxfb_ioctl(struct fb_info *info,
 					return -EFAULT;
 				if (mom.output >= MATROXFB_MAX_OUTPUTS)
 					return -ENXIO;
-				down_read(&ACCESS_FBINFO(altout.lock));
-				oproc = ACCESS_FBINFO(outputs[mom.output]).output;
+				down_read(&minfo->altout.lock);
+				oproc = minfo->outputs[mom.output].output;
 				if (!oproc) {
 					val = -ENXIO;
 				} else {
-					mom.mode = ACCESS_FBINFO(outputs[mom.output]).mode;
+					mom.mode = minfo->outputs[mom.output].mode;
 					val = 0;
 				}
-				up_read(&ACCESS_FBINFO(altout.lock));
+				up_read(&minfo->altout.lock);
 				if (val)
 					return val;
 				if (copy_to_user(argp, &mom, sizeof(mom)))
@@ -989,9 +989,9 @@ static int matroxfb_ioctl(struct fb_info *info,
 					if (tmp & (1 << i)) {
 						if (i >= MATROXFB_MAX_OUTPUTS)
 							return -ENXIO;
-						if (!ACCESS_FBINFO(outputs[i]).output)
+						if (!minfo->outputs[i].output)
 							return -ENXIO;
-						switch (ACCESS_FBINFO(outputs[i]).src) {
+						switch (minfo->outputs[i].src) {
 							case MATROXFB_SRC_NONE:
 							case MATROXFB_SRC_CRTC1:
 								break;
@@ -1000,12 +1000,12 @@ static int matroxfb_ioctl(struct fb_info *info,
 						}
 					}
 				}
-				if (ACCESS_FBINFO(devflags.panellink)) {
+				if (minfo->devflags.panellink) {
 					if (tmp & MATROXFB_OUTPUT_CONN_DFP) {
 						if (tmp & MATROXFB_OUTPUT_CONN_SECONDARY)
 							return -EINVAL;
 						for (i = 0; i < MATROXFB_MAX_OUTPUTS; i++) {
-							if (ACCESS_FBINFO(outputs[i]).src == MATROXFB_SRC_CRTC2) {
+							if (minfo->outputs[i].src == MATROXFB_SRC_CRTC2) {
 								return -EBUSY;
 							}
 						}
@@ -1014,13 +1014,13 @@ static int matroxfb_ioctl(struct fb_info *info,
 				changes = 0;
 				for (i = 0; i < MATROXFB_MAX_OUTPUTS; i++) {
 					if (tmp & (1 << i)) {
-						if (ACCESS_FBINFO(outputs[i]).src != MATROXFB_SRC_CRTC1) {
+						if (minfo->outputs[i].src != MATROXFB_SRC_CRTC1) {
 							changes = 1;
-							ACCESS_FBINFO(outputs[i]).src = MATROXFB_SRC_CRTC1;
+							minfo->outputs[i].src = MATROXFB_SRC_CRTC1;
 						}
-					} else if (ACCESS_FBINFO(outputs[i]).src == MATROXFB_SRC_CRTC1) {
+					} else if (minfo->outputs[i].src == MATROXFB_SRC_CRTC1) {
 						changes = 1;
-						ACCESS_FBINFO(outputs[i]).src = MATROXFB_SRC_NONE;
+						minfo->outputs[i].src = MATROXFB_SRC_NONE;
 					}
 				}
 				if (!changes)
@@ -1034,7 +1034,7 @@ static int matroxfb_ioctl(struct fb_info *info,
 				int i;
 
 				for (i = 0; i < MATROXFB_MAX_OUTPUTS; i++) {
-					if (ACCESS_FBINFO(outputs[i]).src == MATROXFB_SRC_CRTC1) {
+					if (minfo->outputs[i].src == MATROXFB_SRC_CRTC1) {
 						conn |= 1 << i;
 					}
 				}
@@ -1048,8 +1048,8 @@ static int matroxfb_ioctl(struct fb_info *info,
 				int i;
 
 				for (i = 0; i < MATROXFB_MAX_OUTPUTS; i++) {
-					if (ACCESS_FBINFO(outputs[i]).output) {
-						switch (ACCESS_FBINFO(outputs[i]).src) {
+					if (minfo->outputs[i].output) {
+						switch (minfo->outputs[i].src) {
 							case MATROXFB_SRC_NONE:
 							case MATROXFB_SRC_CRTC1:
 								conn |= 1 << i;
@@ -1057,7 +1057,7 @@ static int matroxfb_ioctl(struct fb_info *info,
 						}
 					}
 				}
-				if (ACCESS_FBINFO(devflags.panellink)) {
+				if (minfo->devflags.panellink) {
 					if (conn & MATROXFB_OUTPUT_CONN_DFP)
 						conn &= ~MATROXFB_OUTPUT_CONN_SECONDARY;
 					if (conn & MATROXFB_OUTPUT_CONN_SECONDARY)
@@ -1073,7 +1073,7 @@ static int matroxfb_ioctl(struct fb_info *info,
 				int i;
 
 				for (i = 0; i < MATROXFB_MAX_OUTPUTS; i++) {
-					if (ACCESS_FBINFO(outputs[i]).output) {
+					if (minfo->outputs[i].output) {
 						conn |= 1 << i;
 					}
 				}
@@ -1088,7 +1088,7 @@ static int matroxfb_ioctl(struct fb_info *info,
 				memset(&r, 0, sizeof(r));
 				strcpy(r.driver, "matroxfb");
 				strcpy(r.card, "Matrox");
-				sprintf(r.bus_info, "PCI:%s", pci_name(ACCESS_FBINFO(pcidev)));
+				sprintf(r.bus_info, "PCI:%s", pci_name(minfo->pcidev));
 				r.version = KERNEL_VERSION(1,0,0);
 				r.capabilities = V4L2_CAP_VIDEO_OUTPUT;
 				if (copy_to_user(argp, &r, sizeof(r)))
@@ -1104,15 +1104,15 @@ static int matroxfb_ioctl(struct fb_info *info,
 				if (copy_from_user(&qctrl, argp, sizeof(qctrl)))
 					return -EFAULT;
 
-				down_read(&ACCESS_FBINFO(altout).lock);
-				if (!ACCESS_FBINFO(outputs[1]).output) {
+				down_read(&minfo->altout.lock);
+				if (!minfo->outputs[1].output) {
 					err = -ENXIO;
-				} else if (ACCESS_FBINFO(outputs[1]).output->getqueryctrl) {
-					err = ACCESS_FBINFO(outputs[1]).output->getqueryctrl(ACCESS_FBINFO(outputs[1]).data, &qctrl);
+				} else if (minfo->outputs[1].output->getqueryctrl) {
+					err = minfo->outputs[1].output->getqueryctrl(minfo->outputs[1].data, &qctrl);
 				} else {
 					err = -EINVAL;
 				}
-				up_read(&ACCESS_FBINFO(altout).lock);
+				up_read(&minfo->altout.lock);
 				if (err >= 0 &&
 				    copy_to_user(argp, &qctrl, sizeof(qctrl)))
 					return -EFAULT;
@@ -1126,15 +1126,15 @@ static int matroxfb_ioctl(struct fb_info *info,
 				if (copy_from_user(&ctrl, argp, sizeof(ctrl)))
 					return -EFAULT;
 
-				down_read(&ACCESS_FBINFO(altout).lock);
-				if (!ACCESS_FBINFO(outputs[1]).output) {
+				down_read(&minfo->altout.lock);
+				if (!minfo->outputs[1].output) {
 					err = -ENXIO;
-				} else if (ACCESS_FBINFO(outputs[1]).output->getctrl) {
-					err = ACCESS_FBINFO(outputs[1]).output->getctrl(ACCESS_FBINFO(outputs[1]).data, &ctrl);
+				} else if (minfo->outputs[1].output->getctrl) {
+					err = minfo->outputs[1].output->getctrl(minfo->outputs[1].data, &ctrl);
 				} else {
 					err = -EINVAL;
 				}
-				up_read(&ACCESS_FBINFO(altout).lock);
+				up_read(&minfo->altout.lock);
 				if (err >= 0 &&
 				    copy_to_user(argp, &ctrl, sizeof(ctrl)))
 					return -EFAULT;
@@ -1149,15 +1149,15 @@ static int matroxfb_ioctl(struct fb_info *info,
 				if (copy_from_user(&ctrl, argp, sizeof(ctrl)))
 					return -EFAULT;
 
-				down_read(&ACCESS_FBINFO(altout).lock);
-				if (!ACCESS_FBINFO(outputs[1]).output) {
+				down_read(&minfo->altout.lock);
+				if (!minfo->outputs[1].output) {
 					err = -ENXIO;
-				} else if (ACCESS_FBINFO(outputs[1]).output->setctrl) {
-					err = ACCESS_FBINFO(outputs[1]).output->setctrl(ACCESS_FBINFO(outputs[1]).data, &ctrl);
+				} else if (minfo->outputs[1].output->setctrl) {
+					err = minfo->outputs[1].output->setctrl(minfo->outputs[1].data, &ctrl);
 				} else {
 					err = -EINVAL;
 				}
-				up_read(&ACCESS_FBINFO(altout).lock);
+				up_read(&minfo->altout.lock);
 				return err;
 			}
 	}
@@ -1175,7 +1175,7 @@ static int matroxfb_blank(int blank, struct fb_info *info)
 
 	DBG(__func__)
 
-	if (ACCESS_FBINFO(dead))
+	if (minfo->dead)
 		return 1;
 
 	switch (blank) {
@@ -1287,7 +1287,7 @@ static int matroxfb_getmemory(WPMINFO unsigned int maxSize, unsigned int *realSi
 
 	DBG(__func__)
 
-	vm = ACCESS_FBINFO(video.vbase);
+	vm = minfo->video.vbase;
 	maxSize &= ~0x1FFFFF;	/* must be X*2MB (really it must be 2 or X*4MB) */
 	/* at least 2MB */
 	if (maxSize < 0x0200000) return 0;
@@ -1319,7 +1319,7 @@ static int matroxfb_getmemory(WPMINFO unsigned int maxSize, unsigned int *realSi
 
 	*realSize = offs - 0x100000;
 #ifdef CONFIG_FB_MATROX_MILLENIUM
-	ACCESS_FBINFO(interleave) = !(!isMillenium(MINFO) || ((offs - 0x100000) & 0x3FFFFF));
+	minfo->interleave = !(!isMillenium(minfo) || ((offs - 0x100000) & 0x3FFFFF));
 #endif
 	return 1;
 }
@@ -1558,12 +1558,12 @@ static void setDefaultOutputs(WPMINFO2) {
 	unsigned int i;
 	const char* ptr;
 
-	ACCESS_FBINFO(outputs[0]).default_src = MATROXFB_SRC_CRTC1;
-	if (ACCESS_FBINFO(devflags.g450dac)) {
-		ACCESS_FBINFO(outputs[1]).default_src = MATROXFB_SRC_CRTC1;
-		ACCESS_FBINFO(outputs[2]).default_src = MATROXFB_SRC_CRTC1;
+	minfo->outputs[0].default_src = MATROXFB_SRC_CRTC1;
+	if (minfo->devflags.g450dac) {
+		minfo->outputs[1].default_src = MATROXFB_SRC_CRTC1;
+		minfo->outputs[2].default_src = MATROXFB_SRC_CRTC1;
 	} else if (dfp) {
-		ACCESS_FBINFO(outputs[2]).default_src = MATROXFB_SRC_CRTC1;
+		minfo->outputs[2].default_src = MATROXFB_SRC_CRTC1;
 	}
 	ptr = outputs;
 	for (i = 0; i < MATROXFB_MAX_OUTPUTS; i++) {
@@ -1573,11 +1573,11 @@ static void setDefaultOutputs(WPMINFO2) {
 			break;
 		}
 		if (c == '0') {
-			ACCESS_FBINFO(outputs[i]).default_src = MATROXFB_SRC_NONE;
+			minfo->outputs[i].default_src = MATROXFB_SRC_NONE;
 		} else if (c == '1') {
-			ACCESS_FBINFO(outputs[i]).default_src = MATROXFB_SRC_CRTC1;
-		} else if (c == '2' && ACCESS_FBINFO(devflags.crtc2)) {
-			ACCESS_FBINFO(outputs[i]).default_src = MATROXFB_SRC_CRTC2;
+			minfo->outputs[i].default_src = MATROXFB_SRC_CRTC1;
+		} else if (c == '2' && minfo->devflags.crtc2) {
+			minfo->outputs[i].default_src = MATROXFB_SRC_CRTC2;
 		} else {
 			printk(KERN_ERR "matroxfb: Unknown outputs setting\n");
 			break;
@@ -1603,58 +1603,58 @@ static int initMatrox2(WPMINFO struct board* b){
 	/* set default values... */
 	vesafb_defined.accel_flags = FB_ACCELF_TEXT;
 
-	ACCESS_FBINFO(hw_switch) = b->base->lowlevel;
-	ACCESS_FBINFO(devflags.accelerator) = b->base->accelID;
-	ACCESS_FBINFO(max_pixel_clock) = b->maxclk;
+	minfo->hw_switch = b->base->lowlevel;
+	minfo->devflags.accelerator = b->base->accelID;
+	minfo->max_pixel_clock = b->maxclk;
 
 	printk(KERN_INFO "matroxfb: Matrox %s detected\n", b->name);
-	ACCESS_FBINFO(capable.plnwt) = 1;
-	ACCESS_FBINFO(chip) = b->chip;
-	ACCESS_FBINFO(capable.srcorg) = b->flags & DEVF_SRCORG;
-	ACCESS_FBINFO(devflags.video64bits) = b->flags & DEVF_VIDEO64BIT;
+	minfo->capable.plnwt = 1;
+	minfo->chip = b->chip;
+	minfo->capable.srcorg = b->flags & DEVF_SRCORG;
+	minfo->devflags.video64bits = b->flags & DEVF_VIDEO64BIT;
 	if (b->flags & DEVF_TEXT4B) {
-		ACCESS_FBINFO(devflags.vgastep) = 4;
-		ACCESS_FBINFO(devflags.textmode) = 4;
-		ACCESS_FBINFO(devflags.text_type_aux) = FB_AUX_TEXT_MGA_STEP16;
+		minfo->devflags.vgastep = 4;
+		minfo->devflags.textmode = 4;
+		minfo->devflags.text_type_aux = FB_AUX_TEXT_MGA_STEP16;
 	} else if (b->flags & DEVF_TEXT16B) {
-		ACCESS_FBINFO(devflags.vgastep) = 16;
-		ACCESS_FBINFO(devflags.textmode) = 1;
-		ACCESS_FBINFO(devflags.text_type_aux) = FB_AUX_TEXT_MGA_STEP16;
+		minfo->devflags.vgastep = 16;
+		minfo->devflags.textmode = 1;
+		minfo->devflags.text_type_aux = FB_AUX_TEXT_MGA_STEP16;
 	} else {
-		ACCESS_FBINFO(devflags.vgastep) = 8;
-		ACCESS_FBINFO(devflags.textmode) = 1;
-		ACCESS_FBINFO(devflags.text_type_aux) = FB_AUX_TEXT_MGA_STEP8;
+		minfo->devflags.vgastep = 8;
+		minfo->devflags.textmode = 1;
+		minfo->devflags.text_type_aux = FB_AUX_TEXT_MGA_STEP8;
 	}
 #ifdef CONFIG_FB_MATROX_32MB
-	ACCESS_FBINFO(devflags.support32MB) = (b->flags & DEVF_SUPPORT32MB) != 0;
+	minfo->devflags.support32MB = (b->flags & DEVF_SUPPORT32MB) != 0;
 #endif
-	ACCESS_FBINFO(devflags.precise_width) = !(b->flags & DEVF_ANY_VXRES);
-	ACCESS_FBINFO(devflags.crtc2) = (b->flags & DEVF_CRTC2) != 0;
-	ACCESS_FBINFO(devflags.maven_capable) = (b->flags & DEVF_MAVEN_CAPABLE) != 0;
-	ACCESS_FBINFO(devflags.dualhead) = (b->flags & DEVF_DUALHEAD) != 0;
-	ACCESS_FBINFO(devflags.dfp_type) = dfp_type;
-	ACCESS_FBINFO(devflags.g450dac) = (b->flags & DEVF_G450DAC) != 0;
-	ACCESS_FBINFO(devflags.textstep) = ACCESS_FBINFO(devflags.vgastep) * ACCESS_FBINFO(devflags.textmode);
-	ACCESS_FBINFO(devflags.textvram) = 65536 / ACCESS_FBINFO(devflags.textmode);
+	minfo->devflags.precise_width = !(b->flags & DEVF_ANY_VXRES);
+	minfo->devflags.crtc2 = (b->flags & DEVF_CRTC2) != 0;
+	minfo->devflags.maven_capable = (b->flags & DEVF_MAVEN_CAPABLE) != 0;
+	minfo->devflags.dualhead = (b->flags & DEVF_DUALHEAD) != 0;
+	minfo->devflags.dfp_type = dfp_type;
+	minfo->devflags.g450dac = (b->flags & DEVF_G450DAC) != 0;
+	minfo->devflags.textstep = minfo->devflags.vgastep * minfo->devflags.textmode;
+	minfo->devflags.textvram = 65536 / minfo->devflags.textmode;
 	setDefaultOutputs(PMINFO2);
 	if (b->flags & DEVF_PANELLINK_CAPABLE) {
-		ACCESS_FBINFO(outputs[2]).data = MINFO;
-		ACCESS_FBINFO(outputs[2]).output = &panellink_output;
-		ACCESS_FBINFO(outputs[2]).src = ACCESS_FBINFO(outputs[2]).default_src;
-		ACCESS_FBINFO(outputs[2]).mode = MATROXFB_OUTPUT_MODE_MONITOR;
-		ACCESS_FBINFO(devflags.panellink) = 1;
+		minfo->outputs[2].data = minfo;
+		minfo->outputs[2].output = &panellink_output;
+		minfo->outputs[2].src = minfo->outputs[2].default_src;
+		minfo->outputs[2].mode = MATROXFB_OUTPUT_MODE_MONITOR;
+		minfo->devflags.panellink = 1;
 	}
 
-	if (ACCESS_FBINFO(capable.cross4MB) < 0)
-		ACCESS_FBINFO(capable.cross4MB) = b->flags & DEVF_CROSS4MB;
+	if (minfo->capable.cross4MB < 0)
+		minfo->capable.cross4MB = b->flags & DEVF_CROSS4MB;
 	if (b->flags & DEVF_SWAPS) {
-		ctrlptr_phys = pci_resource_start(ACCESS_FBINFO(pcidev), 1);
-		video_base_phys = pci_resource_start(ACCESS_FBINFO(pcidev), 0);
-		ACCESS_FBINFO(devflags.fbResource) = PCI_BASE_ADDRESS_0;
+		ctrlptr_phys = pci_resource_start(minfo->pcidev, 1);
+		video_base_phys = pci_resource_start(minfo->pcidev, 0);
+		minfo->devflags.fbResource = PCI_BASE_ADDRESS_0;
 	} else {
-		ctrlptr_phys = pci_resource_start(ACCESS_FBINFO(pcidev), 0);
-		video_base_phys = pci_resource_start(ACCESS_FBINFO(pcidev), 1);
-		ACCESS_FBINFO(devflags.fbResource) = PCI_BASE_ADDRESS_1;
+		ctrlptr_phys = pci_resource_start(minfo->pcidev, 0);
+		video_base_phys = pci_resource_start(minfo->pcidev, 1);
+		minfo->devflags.fbResource = PCI_BASE_ADDRESS_1;
 	}
 	err = -EINVAL;
 	if (!ctrlptr_phys) {
@@ -1672,7 +1672,7 @@ static int initMatrox2(WPMINFO struct board* b){
 	if (!request_mem_region(video_base_phys, memsize, "matroxfb FB")) {
 		goto failCtrlMR;
 	}
-	ACCESS_FBINFO(video.len_maximum) = memsize;
+	minfo->video.len_maximum = memsize;
 	/* convert mem (autodetect k, M) */
 	if (mem < 1024) mem *= 1024;
 	if (mem < 0x00100000) mem *= 1024;
@@ -1680,14 +1680,14 @@ static int initMatrox2(WPMINFO struct board* b){
 	if (mem && (mem < memsize))
 		memsize = mem;
 	err = -ENOMEM;
-	if (mga_ioremap(ctrlptr_phys, 16384, MGA_IOREMAP_MMIO, &ACCESS_FBINFO(mmio.vbase))) {
+	if (mga_ioremap(ctrlptr_phys, 16384, MGA_IOREMAP_MMIO, &minfo->mmio.vbase)) {
 		printk(KERN_ERR "matroxfb: cannot ioremap(%lX, 16384), matroxfb disabled\n", ctrlptr_phys);
 		goto failVideoMR;
 	}
-	ACCESS_FBINFO(mmio.base) = ctrlptr_phys;
-	ACCESS_FBINFO(mmio.len) = 16384;
-	ACCESS_FBINFO(video.base) = video_base_phys;
-	if (mga_ioremap(video_base_phys, memsize, MGA_IOREMAP_FB, &ACCESS_FBINFO(video.vbase))) {
+	minfo->mmio.base = ctrlptr_phys;
+	minfo->mmio.len = 16384;
+	minfo->video.base = video_base_phys;
+	if (mga_ioremap(video_base_phys, memsize, MGA_IOREMAP_FB, &minfo->video.vbase)) {
 		printk(KERN_ERR "matroxfb: cannot ioremap(%lX, %d), matroxfb disabled\n",
 			video_base_phys, memsize);
 		goto failCtrlIO;
@@ -1696,63 +1696,63 @@ static int initMatrox2(WPMINFO struct board* b){
 		u_int32_t cmd;
 		u_int32_t mga_option;
 
-		pci_read_config_dword(ACCESS_FBINFO(pcidev), PCI_OPTION_REG, &mga_option);
-		pci_read_config_dword(ACCESS_FBINFO(pcidev), PCI_COMMAND, &cmd);
+		pci_read_config_dword(minfo->pcidev, PCI_OPTION_REG, &mga_option);
+		pci_read_config_dword(minfo->pcidev, PCI_COMMAND, &cmd);
 		mga_option &= 0x7FFFFFFF; /* clear BIG_ENDIAN */
 		mga_option |= MX_OPTION_BSWAP;
 		/* disable palette snooping */
 		cmd &= ~PCI_COMMAND_VGA_PALETTE;
 		if (pci_dev_present(intel_82437)) {
-			if (!(mga_option & 0x20000000) && !ACCESS_FBINFO(devflags.nopciretry)) {
+			if (!(mga_option & 0x20000000) && !minfo->devflags.nopciretry) {
 				printk(KERN_WARNING "matroxfb: Disabling PCI retries due to i82437 present\n");
 			}
 			mga_option |= 0x20000000;
-			ACCESS_FBINFO(devflags.nopciretry) = 1;
+			minfo->devflags.nopciretry = 1;
 		}
-		pci_write_config_dword(ACCESS_FBINFO(pcidev), PCI_COMMAND, cmd);
-		pci_write_config_dword(ACCESS_FBINFO(pcidev), PCI_OPTION_REG, mga_option);
-		ACCESS_FBINFO(hw).MXoptionReg = mga_option;
+		pci_write_config_dword(minfo->pcidev, PCI_COMMAND, cmd);
+		pci_write_config_dword(minfo->pcidev, PCI_OPTION_REG, mga_option);
+		minfo->hw.MXoptionReg = mga_option;
 
 		/* select non-DMA memory for PCI_MGA_DATA, otherwise dump of PCI cfg space can lock PCI bus */
 		/* maybe preinit() candidate, but it is same... for all devices... at this time... */
-		pci_write_config_dword(ACCESS_FBINFO(pcidev), PCI_MGA_INDEX, 0x00003C00);
+		pci_write_config_dword(minfo->pcidev, PCI_MGA_INDEX, 0x00003C00);
 	}
 
 	err = -ENXIO;
 	matroxfb_read_pins(PMINFO2);
-	if (ACCESS_FBINFO(hw_switch)->preinit(PMINFO2)) {
+	if (minfo->hw_switch->preinit(PMINFO2)) {
 		goto failVideoIO;
 	}
 
 	err = -ENOMEM;
-	if (!matroxfb_getmemory(PMINFO memsize, &ACCESS_FBINFO(video.len)) || !ACCESS_FBINFO(video.len)) {
+	if (!matroxfb_getmemory(PMINFO memsize, &minfo->video.len) || !minfo->video.len) {
 		printk(KERN_ERR "matroxfb: cannot determine memory size\n");
 		goto failVideoIO;
 	}
-	ACCESS_FBINFO(devflags.ydstorg) = 0;
+	minfo->devflags.ydstorg = 0;
 
-	ACCESS_FBINFO(video.base) = video_base_phys;
-	ACCESS_FBINFO(video.len_usable) = ACCESS_FBINFO(video.len);
-	if (ACCESS_FBINFO(video.len_usable) > b->base->maxdisplayable)
-		ACCESS_FBINFO(video.len_usable) = b->base->maxdisplayable;
+	minfo->video.base = video_base_phys;
+	minfo->video.len_usable = minfo->video.len;
+	if (minfo->video.len_usable > b->base->maxdisplayable)
+		minfo->video.len_usable = b->base->maxdisplayable;
 #ifdef CONFIG_MTRR
 	if (mtrr) {
-		ACCESS_FBINFO(mtrr.vram) = mtrr_add(video_base_phys, ACCESS_FBINFO(video.len), MTRR_TYPE_WRCOMB, 1);
-		ACCESS_FBINFO(mtrr.vram_valid) = 1;
+		minfo->mtrr.vram = mtrr_add(video_base_phys, minfo->video.len, MTRR_TYPE_WRCOMB, 1);
+		minfo->mtrr.vram_valid = 1;
 		printk(KERN_INFO "matroxfb: MTRR's turned on\n");
 	}
 #endif	/* CONFIG_MTRR */
 
-	if (!ACCESS_FBINFO(devflags.novga))
+	if (!minfo->devflags.novga)
 		request_region(0x3C0, 32, "matrox");
 	matroxfb_g450_connect(PMINFO2);
-	ACCESS_FBINFO(hw_switch->reset(PMINFO2));
+	minfo->hw_switch->reset(PMINFO2);
 
-	ACCESS_FBINFO(fbcon.monspecs.hfmin) = 0;
-	ACCESS_FBINFO(fbcon.monspecs.hfmax) = fh;
-	ACCESS_FBINFO(fbcon.monspecs.vfmin) = 0;
-	ACCESS_FBINFO(fbcon.monspecs.vfmax) = fv;
-	ACCESS_FBINFO(fbcon.monspecs.dpms) = 0;	/* TBD */
+	minfo->fbcon.monspecs.hfmin = 0;
+	minfo->fbcon.monspecs.hfmax = fh;
+	minfo->fbcon.monspecs.vfmin = 0;
+	minfo->fbcon.monspecs.vfmax = fv;
+	minfo->fbcon.monspecs.dpms = 0;	/* TBD */
 
 	/* static settings */
 	vesafb_defined.red = colors[depth-1].red;
@@ -1764,24 +1764,24 @@ static int initMatrox2(WPMINFO struct board* b){
 	if (noaccel)
 		vesafb_defined.accel_flags &= ~FB_ACCELF_TEXT;
 
-	ACCESS_FBINFO(fbops) = matroxfb_ops;
-	ACCESS_FBINFO(fbcon.fbops) = &ACCESS_FBINFO(fbops);
-	ACCESS_FBINFO(fbcon.pseudo_palette) = ACCESS_FBINFO(cmap);
+	minfo->fbops = matroxfb_ops;
+	minfo->fbcon.fbops = &minfo->fbops;
+	minfo->fbcon.pseudo_palette = minfo->cmap;
 	/* after __init time we are like module... no logo */
-	ACCESS_FBINFO(fbcon.flags) = hotplug ? FBINFO_FLAG_MODULE : FBINFO_FLAG_DEFAULT;
-	ACCESS_FBINFO(fbcon.flags) |= FBINFO_PARTIAL_PAN_OK | 	 /* Prefer panning for scroll under MC viewer/edit */
+	minfo->fbcon.flags = hotplug ? FBINFO_FLAG_MODULE : FBINFO_FLAG_DEFAULT;
+	minfo->fbcon.flags |= FBINFO_PARTIAL_PAN_OK | 	 /* Prefer panning for scroll under MC viewer/edit */
 				      FBINFO_HWACCEL_COPYAREA |  /* We have hw-assisted bmove */
 				      FBINFO_HWACCEL_FILLRECT |  /* And fillrect */
 				      FBINFO_HWACCEL_IMAGEBLIT | /* And imageblit */
 				      FBINFO_HWACCEL_XPAN |      /* And we support both horizontal */
 				      FBINFO_HWACCEL_YPAN;       /* And vertical panning */
-	ACCESS_FBINFO(video.len_usable) &= PAGE_MASK;
-	fb_alloc_cmap(&ACCESS_FBINFO(fbcon.cmap), 256, 1);
+	minfo->video.len_usable &= PAGE_MASK;
+	fb_alloc_cmap(&minfo->fbcon.cmap, 256, 1);
 
 #ifndef MODULE
 	/* mode database is marked __init!!! */
 	if (!hotplug) {
-		fb_find_mode(&vesafb_defined, &ACCESS_FBINFO(fbcon), videomode[0]?videomode:NULL,
+		fb_find_mode(&vesafb_defined, &minfo->fbcon, videomode[0] ? videomode : NULL,
 			NULL, 0, &defaultmode, vesafb_defined.bits_per_pixel);
 	}
 #endif /* !MODULE */
@@ -1871,51 +1871,51 @@ static int initMatrox2(WPMINFO struct board* b){
 							to yres_virtual * xres_virtual < 2^32 */
 	}
 	matroxfb_init_fix(PMINFO2);
-	ACCESS_FBINFO(fbcon.screen_base) = vaddr_va(ACCESS_FBINFO(video.vbase));
+	minfo->fbcon.screen_base = vaddr_va(minfo->video.vbase);
 	/* Normalize values (namely yres_virtual) */
-	matroxfb_check_var(&vesafb_defined, &ACCESS_FBINFO(fbcon));
+	matroxfb_check_var(&vesafb_defined, &minfo->fbcon);
 	/* And put it into "current" var. Do NOT program hardware yet, or we'll not take over
 	 * vgacon correctly. fbcon_startup will call fb_set_par for us, WITHOUT check_var,
 	 * and unfortunately it will do it BEFORE vgacon contents is saved, so it won't work
 	 * anyway. But we at least tried... */
-	ACCESS_FBINFO(fbcon.var) = vesafb_defined;
+	minfo->fbcon.var = vesafb_defined;
 	err = -EINVAL;
 
 	printk(KERN_INFO "matroxfb: %dx%dx%dbpp (virtual: %dx%d)\n",
 		vesafb_defined.xres, vesafb_defined.yres, vesafb_defined.bits_per_pixel,
 		vesafb_defined.xres_virtual, vesafb_defined.yres_virtual);
 	printk(KERN_INFO "matroxfb: framebuffer at 0x%lX, mapped to 0x%p, size %d\n",
-		ACCESS_FBINFO(video.base), vaddr_va(ACCESS_FBINFO(video.vbase)), ACCESS_FBINFO(video.len));
+		minfo->video.base, vaddr_va(minfo->video.vbase), minfo->video.len);
 
 /* We do not have to set currcon to 0... register_framebuffer do it for us on first console
  * and we do not want currcon == 0 for subsequent framebuffers */
 
-	ACCESS_FBINFO(fbcon).device = &ACCESS_FBINFO(pcidev)->dev;
-	if (register_framebuffer(&ACCESS_FBINFO(fbcon)) < 0) {
+	minfo->fbcon.device = &minfo->pcidev->dev;
+	if (register_framebuffer(&minfo->fbcon) < 0) {
 		goto failVideoIO;
 	}
 	printk("fb%d: %s frame buffer device\n",
-	       ACCESS_FBINFO(fbcon.node), ACCESS_FBINFO(fbcon.fix.id));
+	       minfo->fbcon.node, minfo->fbcon.fix.id);
 
 	/* there is no console on this fb... but we have to initialize hardware
 	 * until someone tells me what is proper thing to do */
-	if (!ACCESS_FBINFO(initialized)) {
+	if (!minfo->initialized) {
 		printk(KERN_INFO "fb%d: initializing hardware\n",
-		       ACCESS_FBINFO(fbcon.node));
+		       minfo->fbcon.node);
 		/* We have to use FB_ACTIVATE_FORCE, as we had to put vesafb_defined to the fbcon.var
 		 * already before, so register_framebuffer works correctly. */
 		vesafb_defined.activate |= FB_ACTIVATE_FORCE;
-		fb_set_var(&ACCESS_FBINFO(fbcon), &vesafb_defined);
+		fb_set_var(&minfo->fbcon, &vesafb_defined);
 	}
 
 	return 0;
 failVideoIO:;
 	matroxfb_g450_shutdown(PMINFO2);
-	mga_iounmap(ACCESS_FBINFO(video.vbase));
+	mga_iounmap(minfo->video.vbase);
 failCtrlIO:;
-	mga_iounmap(ACCESS_FBINFO(mmio.vbase));
+	mga_iounmap(minfo->mmio.vbase);
 failVideoMR:;
-	release_mem_region(video_base_phys, ACCESS_FBINFO(video.len_maximum));
+	release_mem_region(video_base_phys, minfo->video.len_maximum);
 failCtrlMR:;
 	release_mem_region(ctrlptr_phys, 16384);
 fail:;
@@ -1971,7 +1971,7 @@ void matroxfb_unregister_driver(struct matroxfb_driver* drv) {
 static void matroxfb_register_device(struct matrox_fb_info* minfo) {
 	struct matroxfb_driver* drv;
 	int i = 0;
-	list_add(&ACCESS_FBINFO(next_fb), &matroxfb_list);
+	list_add(&minfo->next_fb, &matroxfb_list);
 	for (drv = matroxfb_driver_l(matroxfb_driver_list.next);
 	     drv != matroxfb_driver_l(&matroxfb_driver_list);
 	     drv = matroxfb_driver_l(drv->node.next)) {
@@ -1991,7 +1991,7 @@ static void matroxfb_register_device(struct matrox_fb_info* minfo) {
 static void matroxfb_unregister_device(struct matrox_fb_info* minfo) {
 	int i;
 
-	list_del(&ACCESS_FBINFO(next_fb));
+	list_del(&minfo->next_fb);
 	for (i = 0; i < minfo->drivers_count; i++) {
 		struct matroxfb_driver* drv = minfo->drivers[i];
 
@@ -2033,51 +2033,51 @@ static int matroxfb_probe(struct pci_dev* pdev, const struct pci_device_id* dumm
 	minfo = kmalloc(sizeof(*minfo), GFP_KERNEL);
 	if (!minfo)
 		return -1;
-	memset(MINFO, 0, sizeof(*MINFO));
+	memset(minfo, 0, sizeof(*minfo));
 
-	ACCESS_FBINFO(pcidev) = pdev;
-	ACCESS_FBINFO(dead) = 0;
-	ACCESS_FBINFO(usecount) = 0;
-	ACCESS_FBINFO(userusecount) = 0;
+	minfo->pcidev = pdev;
+	minfo->dead = 0;
+	minfo->usecount = 0;
+	minfo->userusecount = 0;
 
-	pci_set_drvdata(pdev, MINFO);
+	pci_set_drvdata(pdev, minfo);
 	/* DEVFLAGS */
-	ACCESS_FBINFO(devflags.memtype) = memtype;
+	minfo->devflags.memtype = memtype;
 	if (memtype != -1)
 		noinit = 0;
 	if (cmd & PCI_COMMAND_MEMORY) {
-		ACCESS_FBINFO(devflags.novga) = novga;
-		ACCESS_FBINFO(devflags.nobios) = nobios;
-		ACCESS_FBINFO(devflags.noinit) = noinit;
+		minfo->devflags.novga = novga;
+		minfo->devflags.nobios = nobios;
+		minfo->devflags.noinit = noinit;
 		/* subsequent heads always needs initialization and must not enable BIOS */
 		novga = 1;
 		nobios = 1;
 		noinit = 0;
 	} else {
-		ACCESS_FBINFO(devflags.novga) = 1;
-		ACCESS_FBINFO(devflags.nobios) = 1;
-		ACCESS_FBINFO(devflags.noinit) = 0;
-	}
-
-	ACCESS_FBINFO(devflags.nopciretry) = no_pci_retry;
-	ACCESS_FBINFO(devflags.mga_24bpp_fix) = inv24;
-	ACCESS_FBINFO(devflags.precise_width) = option_precise_width;
-	ACCESS_FBINFO(devflags.sgram) = sgram;
-	ACCESS_FBINFO(capable.cross4MB) = cross4MB;
-
-	spin_lock_init(&ACCESS_FBINFO(lock.DAC));
-	spin_lock_init(&ACCESS_FBINFO(lock.accel));
-	init_rwsem(&ACCESS_FBINFO(crtc2.lock));
-	init_rwsem(&ACCESS_FBINFO(altout.lock));
-	mutex_init(&ACCESS_FBINFO(fbcon).mm_lock);
-	ACCESS_FBINFO(irq_flags) = 0;
-	init_waitqueue_head(&ACCESS_FBINFO(crtc1.vsync.wait));
-	init_waitqueue_head(&ACCESS_FBINFO(crtc2.vsync.wait));
-	ACCESS_FBINFO(crtc1.panpos) = -1;
+		minfo->devflags.novga = 1;
+		minfo->devflags.nobios = 1;
+		minfo->devflags.noinit = 0;
+	}
+
+	minfo->devflags.nopciretry = no_pci_retry;
+	minfo->devflags.mga_24bpp_fix = inv24;
+	minfo->devflags.precise_width = option_precise_width;
+	minfo->devflags.sgram = sgram;
+	minfo->capable.cross4MB = cross4MB;
+
+	spin_lock_init(&minfo->lock.DAC);
+	spin_lock_init(&minfo->lock.accel);
+	init_rwsem(&minfo->crtc2.lock);
+	init_rwsem(&minfo->altout.lock);
+	mutex_init(&minfo->fbcon.mm_lock);
+	minfo->irq_flags = 0;
+	init_waitqueue_head(&minfo->crtc1.vsync.wait);
+	init_waitqueue_head(&minfo->crtc2.vsync.wait);
+	minfo->crtc1.panpos = -1;
 
 	err = initMatrox2(PMINFO b);
 	if (!err) {
-		matroxfb_register_device(MINFO);
+		matroxfb_register_device(minfo);
 		return 0;
 	}
 	kfree(minfo);
diff --git a/drivers/video/matrox/matroxfb_base.h b/drivers/video/matrox/matroxfb_base.h
index ba64f5e..bbd7c04 100644
--- a/drivers/video/matrox/matroxfb_base.h
+++ b/drivers/video/matrox/matroxfb_base.h
@@ -524,11 +524,6 @@ struct matrox_fb_info {
 
 #define info2minfo(info) container_of(info, struct matrox_fb_info, fbcon)
 
-#define ACCESS_FBINFO2(info, x) (info->x)
-#define ACCESS_FBINFO(x) ACCESS_FBINFO2(minfo,x)
-
-#define MINFO minfo
-
 #define WPMINFO2 struct matrox_fb_info* minfo
 #define WPMINFO  WPMINFO2 ,
 #define CPMINFO2 const struct matrox_fb_info* minfo
@@ -707,11 +702,11 @@ void matroxfb_unregister_driver(struct matroxfb_driver* drv);
 #endif
 #endif
 
-#define mga_inb(addr)		mga_readb(ACCESS_FBINFO(mmio.vbase), (addr))
-#define mga_inl(addr)		mga_readl(ACCESS_FBINFO(mmio.vbase), (addr))
-#define mga_outb(addr,val)	mga_writeb(ACCESS_FBINFO(mmio.vbase), (addr), (val))
-#define mga_outw(addr,val)	mga_writew(ACCESS_FBINFO(mmio.vbase), (addr), (val))
-#define mga_outl(addr,val)	mga_writel(ACCESS_FBINFO(mmio.vbase), (addr), (val))
+#define mga_inb(addr)		mga_readb(minfo->mmio.vbase, (addr))
+#define mga_inl(addr)		mga_readl(minfo->mmio.vbase, (addr))
+#define mga_outb(addr,val)	mga_writeb(minfo->mmio.vbase, (addr), (val))
+#define mga_outw(addr,val)	mga_writew(minfo->mmio.vbase, (addr), (val))
+#define mga_outl(addr,val)	mga_writel(minfo->mmio.vbase, (addr), (val))
 #define mga_readr(port,idx)	(mga_outb((port),(idx)), mga_inb((port)+1))
 #define mga_setr(addr,port,val)	mga_outw(addr, ((val)<<8) | (port))
 
@@ -730,10 +725,10 @@ void matroxfb_unregister_driver(struct matroxfb_driver* drv);
 #define isMilleniumII(x) (0)
 #endif
 
-#define matroxfb_DAC_lock()                   spin_lock(&ACCESS_FBINFO(lock.DAC))
-#define matroxfb_DAC_unlock()                 spin_unlock(&ACCESS_FBINFO(lock.DAC))
-#define matroxfb_DAC_lock_irqsave(flags)      spin_lock_irqsave(&ACCESS_FBINFO(lock.DAC),flags)
-#define matroxfb_DAC_unlock_irqrestore(flags) spin_unlock_irqrestore(&ACCESS_FBINFO(lock.DAC),flags)
+#define matroxfb_DAC_lock()                   spin_lock(&minfo->lock.DAC)
+#define matroxfb_DAC_unlock()                 spin_unlock(&minfo->lock.DAC)
+#define matroxfb_DAC_lock_irqsave(flags)      spin_lock_irqsave(&minfo->lock.DAC, flags)
+#define matroxfb_DAC_unlock_irqrestore(flags) spin_unlock_irqrestore(&minfo->lock.DAC, flags)
 extern void matroxfb_DAC_out(CPMINFO int reg, int val);
 extern int matroxfb_DAC_in(CPMINFO int reg);
 extern void matroxfb_var2my(struct fb_var_screeninfo* fvsi, struct my_timming* mt);
@@ -741,8 +736,8 @@ extern int matroxfb_wait_for_sync(WPMINFO u_int32_t crtc);
 extern int matroxfb_enable_irq(WPMINFO int reenable);
 
 #ifdef MATROXFB_USE_SPINLOCKS
-#define CRITBEGIN  spin_lock_irqsave(&ACCESS_FBINFO(lock.accel), critflags);
-#define CRITEND	   spin_unlock_irqrestore(&ACCESS_FBINFO(lock.accel), critflags);
+#define CRITBEGIN  spin_lock_irqsave(&minfo->lock.accel, critflags);
+#define CRITEND	   spin_unlock_irqrestore(&minfo->lock.accel, critflags);
 #define CRITFLAGS  unsigned long critflags;
 #else
 #define CRITBEGIN
diff --git a/drivers/video/matrox/matroxfb_crtc2.c b/drivers/video/matrox/matroxfb_crtc2.c
index ebcb5c6..8249959 100644
--- a/drivers/video/matrox/matroxfb_crtc2.c
+++ b/drivers/video/matrox/matroxfb_crtc2.c
@@ -81,11 +81,11 @@ static void matroxfb_dh_restore(struct matroxfb_dh_fb_info* m2info,
 	}
 	tmp |= 0x00000001;	/* enable CRTC2 */
 	datactl = 0;
-	if (ACCESS_FBINFO(outputs[1]).src == MATROXFB_SRC_CRTC2) {
-		if (ACCESS_FBINFO(devflags.g450dac)) {
+	if (minfo->outputs[1].src == MATROXFB_SRC_CRTC2) {
+		if (minfo->devflags.g450dac) {
 			tmp |= 0x00000006; /* source from secondary pixel PLL */
 			/* no vidrst when in monitor mode */
-			if (ACCESS_FBINFO(outputs[1]).mode != MATROXFB_OUTPUT_MODE_MONITOR) {
+			if (minfo->outputs[1].mode != MATROXFB_OUTPUT_MODE_MONITOR) {
 				tmp |=  0xC0001000; /* Enable H/V vidrst */
 			}
 		} else {
@@ -93,11 +93,11 @@ static void matroxfb_dh_restore(struct matroxfb_dh_fb_info* m2info,
 			tmp |= 0xC0000000; /* enable vvidrst & hvidrst */
 			/* MGA TVO is our clock source */
 		}
-	} else if (ACCESS_FBINFO(outputs[0]).src == MATROXFB_SRC_CRTC2) {
+	} else if (minfo->outputs[0].src == MATROXFB_SRC_CRTC2) {
 		tmp |= 0x00000004; /* source from pixclock */
 		/* PIXPLL is our clock source */
 	}
-	if (ACCESS_FBINFO(outputs[0]).src == MATROXFB_SRC_CRTC2) {
+	if (minfo->outputs[0].src == MATROXFB_SRC_CRTC2) {
 		tmp |= 0x00100000;	/* connect CRTC2 to DAC */
 	}
 	if (mt->interlaced) {
@@ -146,7 +146,7 @@ static void matroxfb_dh_restore(struct matroxfb_dh_fb_info* m2info,
 		}
 	}
 	mga_outl(0x3C10, tmp);
-	ACCESS_FBINFO(hw).crtc2.ctl = tmp;
+	minfo->hw.crtc2.ctl = tmp;
 
 	tmp = mt->VDisplay << 16;	/* line compare */
 	if (mt->sync & FB_SYNC_HOR_HIGH_ACT)
@@ -160,7 +160,7 @@ static void matroxfb_dh_disable(struct matroxfb_dh_fb_info* m2info) {
 	MINFO_FROM(m2info->primary_dev);
 
 	mga_outl(0x3C10, 0x00000004);	/* disable CRTC2, CRTC1->DAC1, PLL as clock source */
-	ACCESS_FBINFO(hw).crtc2.ctl = 0x00000004;
+	minfo->hw.crtc2.ctl = 0x00000004;
 }
 
 static void matroxfb_dh_pan_var(struct matroxfb_dh_fb_info* m2info,
@@ -262,13 +262,13 @@ static int matroxfb_dh_open(struct fb_info* info, int user) {
 #define m2info (container_of(info, struct matroxfb_dh_fb_info, fbcon))
 	MINFO_FROM(m2info->primary_dev);
 
-	if (MINFO) {
+	if (minfo) {
 		int err;
 
-		if (ACCESS_FBINFO(dead)) {
+		if (minfo->dead) {
 			return -ENXIO;
 		}
-		err = ACCESS_FBINFO(fbops).fb_open(&ACCESS_FBINFO(fbcon), user);
+		err = minfo->fbops.fb_open(&minfo->fbcon, user);
 		if (err) {
 			return err;
 		}
@@ -282,8 +282,8 @@ static int matroxfb_dh_release(struct fb_info* info, int user) {
 	int err = 0;
 	MINFO_FROM(m2info->primary_dev);
 
-	if (MINFO) {
-		err = ACCESS_FBINFO(fbops).fb_release(&ACCESS_FBINFO(fbcon), user);
+	if (minfo) {
+		err = minfo->fbops.fb_release(&minfo->fbcon, user);
 	}
 	return err;
 #undef m2info
@@ -352,18 +352,18 @@ static int matroxfb_dh_set_par(struct fb_info* info) {
 		pos = (m2info->fbcon.var.yoffset * m2info->fbcon.var.xres_virtual + m2info->fbcon.var.xoffset) * m2info->fbcon.var.bits_per_pixel >> 3;
 		pos += m2info->video.offbase;
 		cnt = 0;
-		down_read(&ACCESS_FBINFO(altout).lock);
+		down_read(&minfo->altout.lock);
 		for (out = 0; out < MATROXFB_MAX_OUTPUTS; out++) {
-			if (ACCESS_FBINFO(outputs[out]).src == MATROXFB_SRC_CRTC2) {
+			if (minfo->outputs[out].src == MATROXFB_SRC_CRTC2) {
 				cnt++;
-				if (ACCESS_FBINFO(outputs[out]).output->compute) {
-					ACCESS_FBINFO(outputs[out]).output->compute(ACCESS_FBINFO(outputs[out]).data, &mt);
+				if (minfo->outputs[out].output->compute) {
+					minfo->outputs[out].output->compute(minfo->outputs[out].data, &mt);
 				}
 			}
 		}
-		ACCESS_FBINFO(crtc2).pixclock = mt.pixclock;
-		ACCESS_FBINFO(crtc2).mnp = mt.mnp;
-		up_read(&ACCESS_FBINFO(altout).lock);
+		minfo->crtc2.pixclock = mt.pixclock;
+		minfo->crtc2.mnp = mt.mnp;
+		up_read(&minfo->altout.lock);
 		if (cnt) {
 			matroxfb_dh_restore(m2info, &mt, mode, pos);
 		} else {
@@ -371,20 +371,20 @@ static int matroxfb_dh_set_par(struct fb_info* info) {
 		}
 		DAC1064_global_init(PMINFO2);
 		DAC1064_global_restore(PMINFO2);
-		down_read(&ACCESS_FBINFO(altout).lock);
+		down_read(&minfo->altout.lock);
 		for (out = 0; out < MATROXFB_MAX_OUTPUTS; out++) {
-			if (ACCESS_FBINFO(outputs[out]).src == MATROXFB_SRC_CRTC2 &&
-			    ACCESS_FBINFO(outputs[out]).output->program) {
-				ACCESS_FBINFO(outputs[out]).output->program(ACCESS_FBINFO(outputs[out]).data);
+			if (minfo->outputs[out].src == MATROXFB_SRC_CRTC2 &&
+			    minfo->outputs[out].output->program) {
+				minfo->outputs[out].output->program(minfo->outputs[out].data);
 			}
 		}
 		for (out = 0; out < MATROXFB_MAX_OUTPUTS; out++) {
-			if (ACCESS_FBINFO(outputs[out]).src == MATROXFB_SRC_CRTC2 &&
-			    ACCESS_FBINFO(outputs[out]).output->start) {
-				ACCESS_FBINFO(outputs[out]).output->start(ACCESS_FBINFO(outputs[out]).data);
+			if (minfo->outputs[out].src == MATROXFB_SRC_CRTC2 &&
+			    minfo->outputs[out].output->start) {
+				minfo->outputs[out].output->start(minfo->outputs[out].data);
 			}
 		}
-		up_read(&ACCESS_FBINFO(altout).lock);
+		up_read(&minfo->altout.lock);
 	}
 	m2info->initialized = 1;
 	return 0;
@@ -409,11 +409,11 @@ static int matroxfb_dh_get_vblank(const struct matroxfb_dh_fb_info* m2info, stru
 	/* compatibility stuff */
 	if (vblank->vcount >= m2info->fbcon.var.yres)
 		vblank->flags |= FB_VBLANK_VBLANKING;
-        if (test_bit(0, &ACCESS_FBINFO(irq_flags))) {
+	if (test_bit(0, &minfo->irq_flags)) {
                 vblank->flags |= FB_VBLANK_HAVE_COUNT;
                 /* Only one writer, aligned int value...
                    it should work without lock and without atomic_t */
-                vblank->count = ACCESS_FBINFO(crtc2).vsync.cnt;
+		vblank->count = minfo->crtc2.vsync.cnt;
         }
 	return 0;
 }
@@ -455,7 +455,7 @@ static int matroxfb_dh_ioctl(struct fb_info *info,
 		case MATROXFB_GET_OUTPUT_MODE:
 		case MATROXFB_GET_ALL_OUTPUTS:
 			{
-				return ACCESS_FBINFO(fbcon.fbops)->fb_ioctl(&ACCESS_FBINFO(fbcon), cmd, arg);
+				return minfo->fbcon.fbops->fb_ioctl(&minfo->fbcon, cmd, arg);
 			}
 		case MATROXFB_SET_OUTPUT_CONNECTION:
 			{
@@ -469,9 +469,9 @@ static int matroxfb_dh_ioctl(struct fb_info *info,
 					if (tmp & (1 << out)) {
 						if (out >= MATROXFB_MAX_OUTPUTS)
 							return -ENXIO;
-						if (!ACCESS_FBINFO(outputs[out]).output)
+						if (!minfo->outputs[out].output)
 							return -ENXIO;
-						switch (ACCESS_FBINFO(outputs[out]).src) {
+						switch (minfo->outputs[out].src) {
 							case MATROXFB_SRC_NONE:
 							case MATROXFB_SRC_CRTC2:
 								break;
@@ -480,22 +480,22 @@ static int matroxfb_dh_ioctl(struct fb_info *info,
 						}
 					}
 				}
-				if (ACCESS_FBINFO(devflags.panellink)) {
+				if (minfo->devflags.panellink) {
 					if (tmp & MATROXFB_OUTPUT_CONN_DFP)
 						return -EINVAL;
-					if ((ACCESS_FBINFO(outputs[2]).src == MATROXFB_SRC_CRTC1) && tmp)
+					if ((minfo->outputs[2].src == MATROXFB_SRC_CRTC1) && tmp)
 						return -EBUSY;
 				}
 				changes = 0;
 				for (out = 0; out < MATROXFB_MAX_OUTPUTS; out++) {
 					if (tmp & (1 << out)) {
-						if (ACCESS_FBINFO(outputs[out]).src != MATROXFB_SRC_CRTC2) {
+						if (minfo->outputs[out].src != MATROXFB_SRC_CRTC2) {
 							changes = 1;
-							ACCESS_FBINFO(outputs[out]).src = MATROXFB_SRC_CRTC2;
+							minfo->outputs[out].src = MATROXFB_SRC_CRTC2;
 						}
-					} else if (ACCESS_FBINFO(outputs[out]).src == MATROXFB_SRC_CRTC2) {
+					} else if (minfo->outputs[out].src == MATROXFB_SRC_CRTC2) {
 						changes = 1;
-						ACCESS_FBINFO(outputs[out]).src = MATROXFB_SRC_NONE;
+						minfo->outputs[out].src = MATROXFB_SRC_NONE;
 					}
 				}
 				if (!changes)
@@ -509,7 +509,7 @@ static int matroxfb_dh_ioctl(struct fb_info *info,
 				int out;
 
 				for (out = 0; out < MATROXFB_MAX_OUTPUTS; out++) {
-					if (ACCESS_FBINFO(outputs[out]).src == MATROXFB_SRC_CRTC2) {
+					if (minfo->outputs[out].src == MATROXFB_SRC_CRTC2) {
 						conn |= 1 << out;
 					}
 				}
@@ -523,8 +523,8 @@ static int matroxfb_dh_ioctl(struct fb_info *info,
 				int out;
 
 				for (out = 0; out < MATROXFB_MAX_OUTPUTS; out++) {
-					if (ACCESS_FBINFO(outputs[out]).output) {
-						switch (ACCESS_FBINFO(outputs[out]).src) {
+					if (minfo->outputs[out].output) {
+						switch (minfo->outputs[out].src) {
 							case MATROXFB_SRC_NONE:
 							case MATROXFB_SRC_CRTC2:
 								tmp |= 1 << out;
@@ -532,9 +532,9 @@ static int matroxfb_dh_ioctl(struct fb_info *info,
 						}
 					}
 				}
-				if (ACCESS_FBINFO(devflags.panellink)) {
+				if (minfo->devflags.panellink) {
 					tmp &= ~MATROXFB_OUTPUT_CONN_DFP;
-					if (ACCESS_FBINFO(outputs[2]).src == MATROXFB_SRC_CRTC1) {
+					if (minfo->outputs[2].src == MATROXFB_SRC_CRTC1) {
 						tmp = 0;
 					}
 				}
@@ -611,21 +611,21 @@ static int matroxfb_dh_regit(CPMINFO struct matroxfb_dh_fb_info* m2info) {
 	if (mem < 64*1024)
 		mem *= 1024;
 	mem &= ~0x00000FFF;	/* PAGE_MASK? */
-	if (ACCESS_FBINFO(video.len_usable) + mem <= ACCESS_FBINFO(video.len))
-		m2info->video.offbase = ACCESS_FBINFO(video.len) - mem;
-	else if (ACCESS_FBINFO(video.len) < mem) {
+	if (minfo->video.len_usable + mem <= minfo->video.len)
+		m2info->video.offbase = minfo->video.len - mem;
+	else if (minfo->video.len < mem) {
 		return -ENOMEM;
 	} else { /* check yres on first head... */
 		m2info->video.borrowed = mem;
-		ACCESS_FBINFO(video.len_usable) -= mem;
-		m2info->video.offbase = ACCESS_FBINFO(video.len_usable);
+		minfo->video.len_usable -= mem;
+		m2info->video.offbase = minfo->video.len_usable;
 	}
-	m2info->video.base = ACCESS_FBINFO(video.base) + m2info->video.offbase;
+	m2info->video.base = minfo->video.base + m2info->video.offbase;
 	m2info->video.len = m2info->video.len_usable = m2info->video.len_maximum = mem;
-	m2info->video.vbase.vaddr = vaddr_va(ACCESS_FBINFO(video.vbase)) + m2info->video.offbase;
-	m2info->mmio.base = ACCESS_FBINFO(mmio.base);
-	m2info->mmio.vbase = ACCESS_FBINFO(mmio.vbase);
-	m2info->mmio.len = ACCESS_FBINFO(mmio.len);
+	m2info->video.vbase.vaddr = vaddr_va(minfo->video.vbase) + m2info->video.offbase;
+	m2info->mmio.base = minfo->mmio.base;
+	m2info->mmio.vbase = minfo->mmio.vbase;
+	m2info->mmio.len = minfo->mmio.len;
 
 	matroxfb_dh_init_fix(m2info);
 	if (register_framebuffer(&m2info->fbcon)) {
@@ -633,10 +633,10 @@ static int matroxfb_dh_regit(CPMINFO struct matroxfb_dh_fb_info* m2info) {
 	}
 	if (!m2info->initialized)
 		fb_set_var(&m2info->fbcon, &matroxfb_dh_defined);
-	down_write(&ACCESS_FBINFO(crtc2.lock));
-	oldcrtc2 = ACCESS_FBINFO(crtc2.info);
-	ACCESS_FBINFO(crtc2.info) = m2info;
-	up_write(&ACCESS_FBINFO(crtc2.lock));
+	down_write(&minfo->crtc2.lock);
+	oldcrtc2 = minfo->crtc2.info;
+	minfo->crtc2.info = m2info;
+	up_write(&minfo->crtc2.lock);
 	if (oldcrtc2) {
 		printk(KERN_ERR "matroxfb_crtc2: Internal consistency check failed: crtc2 already present: %p\n",
 			oldcrtc2);
@@ -654,7 +654,7 @@ static int matroxfb_dh_registerfb(struct matroxfb_dh_fb_info* m2info) {
 		return -1;
 	}
 	printk(KERN_INFO "matroxfb_crtc2: secondary head of fb%u was registered as fb%u\n",
-		ACCESS_FBINFO(fbcon.node), m2info->fbcon.node);
+		minfo->fbcon.node, m2info->fbcon.node);
 	m2info->fbcon_registered = 1;
 	return 0;
 #undef minfo
@@ -666,11 +666,11 @@ static void matroxfb_dh_deregisterfb(struct matroxfb_dh_fb_info* m2info) {
 		int id;
 		struct matroxfb_dh_fb_info* crtc2;
 
-		down_write(&ACCESS_FBINFO(crtc2.lock));
-		crtc2 = ACCESS_FBINFO(crtc2.info);
+		down_write(&minfo->crtc2.lock);
+		crtc2 = minfo->crtc2.info;
 		if (crtc2 == m2info)
-			ACCESS_FBINFO(crtc2.info) = NULL;
-		up_write(&ACCESS_FBINFO(crtc2.lock));
+			minfo->crtc2.info = NULL;
+		up_write(&minfo->crtc2.lock);
 		if (crtc2 != m2info) {
 			printk(KERN_ERR "matroxfb_crtc2: Internal consistency check failed: crtc2 mismatch at unload: %p != %p\n",
 				crtc2, m2info);
@@ -680,7 +680,7 @@ static void matroxfb_dh_deregisterfb(struct matroxfb_dh_fb_info* m2info) {
 		id = m2info->fbcon.node;
 		unregister_framebuffer(&m2info->fbcon);
 		/* return memory back to primary head */
-		ACCESS_FBINFO(video.len_usable) += m2info->video.borrowed;
+		minfo->video.len_usable += m2info->video.borrowed;
 		printk(KERN_INFO "matroxfb_crtc2: fb%u unregistered\n", id);
 		m2info->fbcon_registered = 0;
 	}
@@ -691,14 +691,14 @@ static void* matroxfb_crtc2_probe(struct matrox_fb_info* minfo) {
 	struct matroxfb_dh_fb_info* m2info;
 
 	/* hardware is CRTC2 incapable... */
-	if (!ACCESS_FBINFO(devflags.crtc2))
+	if (!minfo->devflags.crtc2)
 		return NULL;
 	m2info = kzalloc(sizeof(*m2info), GFP_KERNEL);
 	if (!m2info) {
 		printk(KERN_ERR "matroxfb_crtc2: Not enough memory for CRTC2 control structs\n");
 		return NULL;
 	}
-	m2info->primary_dev = MINFO;
+	m2info->primary_dev = minfo;
 	if (matroxfb_dh_registerfb(m2info)) {
 		kfree(m2info);
 		printk(KERN_ERR "matroxfb_crtc2: CRTC2 framebuffer failed to register\n");
diff --git a/drivers/video/matrox/matroxfb_g450.c b/drivers/video/matrox/matroxfb_g450.c
index 6209a76..841b439 100644
--- a/drivers/video/matrox/matroxfb_g450.c
+++ b/drivers/video/matrox/matroxfb_g450.c
@@ -81,7 +81,7 @@ static int get_ctrl_id(__u32 v4l2_id) {
 }
 
 static inline int* get_ctrl_ptr(WPMINFO unsigned int idx) {
-	return (int*)((char*)MINFO + g450_controls[idx].control);
+	return (int*)((char*)minfo + g450_controls[idx].control);
 }
 
 static void tvo_fill_defaults(WPMINFO2) {
@@ -124,8 +124,8 @@ static void cve2_set_reg10(WPMINFO int reg, int val) {
 }
 
 static void g450_compute_bwlevel(CPMINFO int *bl, int *wl) {
-	const int b = ACCESS_FBINFO(altout.tvo_params.brightness) + BLMIN;
-	const int c = ACCESS_FBINFO(altout.tvo_params.contrast);
+	const int b = minfo->altout.tvo_params.brightness + BLMIN;
+	const int c = minfo->altout.tvo_params.contrast;
 
 	*bl = max(b - c, BLMIN);
 	*wl = min(b + c, WLMAX);
@@ -509,31 +509,31 @@ static void cve2_init_TV(WPMINFO const struct mavenregs* m) {
 static int matroxfb_g450_compute(void* md, struct my_timming* mt) {
 	MINFO_FROM(md);
 
-	dprintk(KERN_DEBUG "Computing, mode=%u\n", ACCESS_FBINFO(outputs[1]).mode);
+	dprintk(KERN_DEBUG "Computing, mode=%u\n", minfo->outputs[1].mode);
 
 	if (mt->crtc == MATROXFB_SRC_CRTC2 &&
-	    ACCESS_FBINFO(outputs[1]).mode != MATROXFB_OUTPUT_MODE_MONITOR) {
+	    minfo->outputs[1].mode != MATROXFB_OUTPUT_MODE_MONITOR) {
 		const struct output_desc* outd;
 
-		cve2_init_TVdata(ACCESS_FBINFO(outputs[1]).mode, &ACCESS_FBINFO(hw).maven, &outd);
+		cve2_init_TVdata(minfo->outputs[1].mode, &minfo->hw.maven, &outd);
 		{
 			int blacklevel, whitelevel;
 			g450_compute_bwlevel(PMINFO &blacklevel, &whitelevel);
-			ACCESS_FBINFO(hw).maven.regs[0x0E] = blacklevel >> 2;
-			ACCESS_FBINFO(hw).maven.regs[0x0F] = blacklevel & 3;
-			ACCESS_FBINFO(hw).maven.regs[0x1E] = whitelevel >> 2;
-			ACCESS_FBINFO(hw).maven.regs[0x1F] = whitelevel & 3;
+			minfo->hw.maven.regs[0x0E] = blacklevel >> 2;
+			minfo->hw.maven.regs[0x0F] = blacklevel & 3;
+			minfo->hw.maven.regs[0x1E] = whitelevel >> 2;
+			minfo->hw.maven.regs[0x1F] = whitelevel & 3;
 
-			ACCESS_FBINFO(hw).maven.regs[0x20] =
-			ACCESS_FBINFO(hw).maven.regs[0x22] = ACCESS_FBINFO(altout.tvo_params.saturation);
+			minfo->hw.maven.regs[0x20] =
+			minfo->hw.maven.regs[0x22] = minfo->altout.tvo_params.saturation;
 
-			ACCESS_FBINFO(hw).maven.regs[0x25] = ACCESS_FBINFO(altout.tvo_params.hue);
+			minfo->hw.maven.regs[0x25] = minfo->altout.tvo_params.hue;
 
-			if (ACCESS_FBINFO(altout.tvo_params.testout)) {
-				ACCESS_FBINFO(hw).maven.regs[0x05] |= 0x02;
+			if (minfo->altout.tvo_params.testout) {
+				minfo->hw.maven.regs[0x05] |= 0x02;
 			}
 		}
-		computeRegs(PMINFO &ACCESS_FBINFO(hw).maven, mt, outd);
+		computeRegs(PMINFO &minfo->hw.maven, mt, outd);
 	} else if (mt->mnp < 0) {
 		/* We must program clocks before CRTC2, otherwise interlaced mode
 		   startup may fail */
@@ -547,8 +547,8 @@ static int matroxfb_g450_compute(void* md, struct my_timming* mt) {
 static int matroxfb_g450_program(void* md) {
 	MINFO_FROM(md);
 	
-	if (ACCESS_FBINFO(outputs[1]).mode != MATROXFB_OUTPUT_MODE_MONITOR) {
-		cve2_init_TV(PMINFO &ACCESS_FBINFO(hw).maven);
+	if (minfo->outputs[1].mode != MATROXFB_OUTPUT_MODE_MONITOR) {
+		cve2_init_TV(PMINFO &minfo->hw.maven);
 	}
 	return 0;
 }
@@ -589,33 +589,33 @@ static struct matrox_altout matroxfb_g450_dvi = {
 };
 
 void matroxfb_g450_connect(WPMINFO2) {
-	if (ACCESS_FBINFO(devflags.g450dac)) {
-		down_write(&ACCESS_FBINFO(altout.lock));
+	if (minfo->devflags.g450dac) {
+		down_write(&minfo->altout.lock);
 		tvo_fill_defaults(PMINFO2);
-		ACCESS_FBINFO(outputs[1]).src = ACCESS_FBINFO(outputs[1]).default_src;
-		ACCESS_FBINFO(outputs[1]).data = MINFO;
-		ACCESS_FBINFO(outputs[1]).output = &matroxfb_g450_altout;
-		ACCESS_FBINFO(outputs[1]).mode = MATROXFB_OUTPUT_MODE_MONITOR;
-		ACCESS_FBINFO(outputs[2]).src = ACCESS_FBINFO(outputs[2]).default_src;
-		ACCESS_FBINFO(outputs[2]).data = MINFO;
-		ACCESS_FBINFO(outputs[2]).output = &matroxfb_g450_dvi;
-		ACCESS_FBINFO(outputs[2]).mode = MATROXFB_OUTPUT_MODE_MONITOR;
-		up_write(&ACCESS_FBINFO(altout.lock));
+		minfo->outputs[1].src = minfo->outputs[1].default_src;
+		minfo->outputs[1].data = minfo;
+		minfo->outputs[1].output = &matroxfb_g450_altout;
+		minfo->outputs[1].mode = MATROXFB_OUTPUT_MODE_MONITOR;
+		minfo->outputs[2].src = minfo->outputs[2].default_src;
+		minfo->outputs[2].data = minfo;
+		minfo->outputs[2].output = &matroxfb_g450_dvi;
+		minfo->outputs[2].mode = MATROXFB_OUTPUT_MODE_MONITOR;
+		up_write(&minfo->altout.lock);
 	}
 }
 
 void matroxfb_g450_shutdown(WPMINFO2) {
-	if (ACCESS_FBINFO(devflags.g450dac)) {
-		down_write(&ACCESS_FBINFO(altout.lock));
-		ACCESS_FBINFO(outputs[1]).src = MATROXFB_SRC_NONE;
-		ACCESS_FBINFO(outputs[1]).output = NULL;
-		ACCESS_FBINFO(outputs[1]).data = NULL;
-		ACCESS_FBINFO(outputs[1]).mode = MATROXFB_OUTPUT_MODE_MONITOR;
-		ACCESS_FBINFO(outputs[2]).src = MATROXFB_SRC_NONE;
-		ACCESS_FBINFO(outputs[2]).output = NULL;
-		ACCESS_FBINFO(outputs[2]).data = NULL;
-		ACCESS_FBINFO(outputs[2]).mode = MATROXFB_OUTPUT_MODE_MONITOR;
-		up_write(&ACCESS_FBINFO(altout.lock));
+	if (minfo->devflags.g450dac) {
+		down_write(&minfo->altout.lock);
+		minfo->outputs[1].src = MATROXFB_SRC_NONE;
+		minfo->outputs[1].output = NULL;
+		minfo->outputs[1].data = NULL;
+		minfo->outputs[1].mode = MATROXFB_OUTPUT_MODE_MONITOR;
+		minfo->outputs[2].src = MATROXFB_SRC_NONE;
+		minfo->outputs[2].output = NULL;
+		minfo->outputs[2].data = NULL;
+		minfo->outputs[2].mode = MATROXFB_OUTPUT_MODE_MONITOR;
+		up_write(&minfo->altout.lock);
 	}
 }
 
diff --git a/drivers/video/matrox/matroxfb_maven.c b/drivers/video/matrox/matroxfb_maven.c
index 042408a..4fbb386 100644
--- a/drivers/video/matrox/matroxfb_maven.c
+++ b/drivers/video/matrox/matroxfb_maven.c
@@ -460,7 +460,7 @@ static void maven_init_TVdata(const struct maven_data* md, struct mavenregs* dat
 	}, MATROXFB_OUTPUT_MODE_NTSC, 525, 60 };
 	MINFO_FROM(md->primary_head);
 
-	if (ACCESS_FBINFO(outputs[1]).mode == MATROXFB_OUTPUT_MODE_PAL)
+	if (minfo->outputs[1].mode == MATROXFB_OUTPUT_MODE_PAL)
 		*data = palregs;
 	else
 		*data = ntscregs;
@@ -496,11 +496,11 @@ static void maven_init_TVdata(const struct maven_data* md, struct mavenregs* dat
 	/* Set saturation */
 	{
 		data->regs[0x20] =
-		data->regs[0x22] = ACCESS_FBINFO(altout.tvo_params.saturation);
+		data->regs[0x22] = minfo->altout.tvo_params.saturation;
 	}
  
 	/* Set HUE */
-	data->regs[0x25] = ACCESS_FBINFO(altout.tvo_params.hue);
+	data->regs[0x25] = minfo->altout.tvo_params.hue;
 	return;
 }
 
@@ -743,7 +743,7 @@ static inline int maven_compute_timming(struct maven_data* md,
 	unsigned int a, bv, c;
 	MINFO_FROM(md->primary_head);
 
-	m->mode = ACCESS_FBINFO(outputs[1]).mode;
+	m->mode = minfo->outputs[1].mode;
 	if (m->mode != MATROXFB_OUTPUT_MODE_MONITOR) {
 		unsigned int lmargin;
 		unsigned int umargin;
@@ -1132,7 +1132,7 @@ static int maven_get_control (struct maven_data* md,
 static int maven_out_compute(void* md, struct my_timming* mt) {
 #define mdinfo ((struct maven_data*)md)
 #define minfo (mdinfo->primary_head)
-	return maven_compute_timming(md, mt, &ACCESS_FBINFO(hw).maven);
+	return maven_compute_timming(md, mt, &minfo->hw.maven);
 #undef minfo
 #undef mdinfo
 }
@@ -1140,7 +1140,7 @@ static int maven_out_compute(void* md, struct my_timming* mt) {
 static int maven_out_program(void* md) {
 #define mdinfo ((struct maven_data*)md)
 #define minfo (mdinfo->primary_head)
-	return maven_program_timming(md, &ACCESS_FBINFO(hw).maven);
+	return maven_program_timming(md, &minfo->hw.maven);
 #undef minfo
 #undef mdinfo
 }
@@ -1186,14 +1186,14 @@ static int maven_init_client(struct i2c_client* clnt) {
 	struct maven_data* md = i2c_get_clientdata(clnt);
 	MINFO_FROM(container_of(clnt->adapter, struct i2c_bit_adapter, adapter)->minfo);
 
-	md->primary_head = MINFO;
+	md->primary_head = minfo;
 	md->client = clnt;
-	down_write(&ACCESS_FBINFO(altout.lock));
-	ACCESS_FBINFO(outputs[1]).output = &maven_altout;
-	ACCESS_FBINFO(outputs[1]).src = ACCESS_FBINFO(outputs[1]).default_src;
-	ACCESS_FBINFO(outputs[1]).data = md;
-	ACCESS_FBINFO(outputs[1]).mode = MATROXFB_OUTPUT_MODE_MONITOR;
-	up_write(&ACCESS_FBINFO(altout.lock));
+	down_write(&minfo->altout.lock);
+	minfo->outputs[1].output = &maven_altout;
+	minfo->outputs[1].src = minfo->outputs[1].default_src;
+	minfo->outputs[1].data = md;
+	minfo->outputs[1].mode = MATROXFB_OUTPUT_MODE_MONITOR;
+	up_write(&minfo->altout.lock);
 	if (maven_get_reg(clnt, 0xB2) < 0x14) {
 		md->version = MGATVO_B;
 		/* Tweak some things for this old chip */
@@ -1220,12 +1220,12 @@ static int maven_shutdown_client(struct i2c_client* clnt) {
 	if (md->primary_head) {
 		MINFO_FROM(md->primary_head);
 
-		down_write(&ACCESS_FBINFO(altout.lock));
-		ACCESS_FBINFO(outputs[1]).src = MATROXFB_SRC_NONE;
-		ACCESS_FBINFO(outputs[1]).output = NULL;
-		ACCESS_FBINFO(outputs[1]).data = NULL;
-		ACCESS_FBINFO(outputs[1]).mode = MATROXFB_OUTPUT_MODE_MONITOR;
-		up_write(&ACCESS_FBINFO(altout.lock));
+		down_write(&minfo->altout.lock);
+		minfo->outputs[1].src = MATROXFB_SRC_NONE;
+		minfo->outputs[1].output = NULL;
+		minfo->outputs[1].data = NULL;
+		minfo->outputs[1].mode = MATROXFB_OUTPUT_MODE_MONITOR;
+		up_write(&minfo->altout.lock);
 		md->primary_head = NULL;
 	}
 	return 0;
diff --git a/drivers/video/matrox/matroxfb_misc.c b/drivers/video/matrox/matroxfb_misc.c
index d5b9e78..fe07af8 100644
--- a/drivers/video/matrox/matroxfb_misc.c
+++ b/drivers/video/matrox/matroxfb_misc.c
@@ -190,7 +190,7 @@ int matroxfb_vgaHWinit(WPMINFO struct my_timming* m) {
 	unsigned int wd;
 	unsigned int divider;
 	int i;
-	struct matrox_hw_state * const hw = &ACCESS_FBINFO(hw);
+	struct matrox_hw_state * const hw = &minfo->hw;
 
 	DBG(__func__)
 
@@ -240,7 +240,7 @@ int matroxfb_vgaHWinit(WPMINFO struct my_timming* m) {
 	/* standard timmings are in 8pixels, but for interleaved we cannot */
 	/* do it for 4bpp (because of (4bpp >> 1(interleaved))/4 == 0) */
 	/* using 16 or more pixels per unit can save us */
-	divider = ACCESS_FBINFO(curr.final_bppShift);
+	divider = minfo->curr.final_bppShift;
 	while (divider & 3) {
 		hd >>= 1;
 		hs >>= 1;
@@ -270,7 +270,7 @@ int matroxfb_vgaHWinit(WPMINFO struct my_timming* m) {
 	if (((ht & 0x07) == 0x06) || ((ht & 0x0F) == 0x04))
 		ht++;
 	hbe = ht;
-	wd = ACCESS_FBINFO(fbcon).var.xres_virtual * ACCESS_FBINFO(curr.final_bppShift) / 64;
+	wd = minfo->fbcon.var.xres_virtual * minfo->curr.final_bppShift / 64;
 
 	hw->CRTCEXT[0] = 0;
 	hw->CRTCEXT[5] = 0;
@@ -287,7 +287,7 @@ int matroxfb_vgaHWinit(WPMINFO struct my_timming* m) {
 			  ((hs      & 0x100) >> 6) | /* sync start */
 			   (hbe     & 0x040);	 /* end hor. blanking */
 	/* FIXME: Enable vidrst only on G400, and only if TV-out is used */
-	if (ACCESS_FBINFO(outputs[1]).src == MATROXFB_SRC_CRTC1)
+	if (minfo->outputs[1].src == MATROXFB_SRC_CRTC1)
 		hw->CRTCEXT[1] |= 0x88;		/* enable horizontal and vertical vidrst */
 	hw->CRTCEXT[2] =  ((vt & 0xC00) >> 10) |
 			  ((vd & 0x400) >>  8) |	/* disp end */
@@ -333,7 +333,7 @@ int matroxfb_vgaHWinit(WPMINFO struct my_timming* m) {
 
 void matroxfb_vgaHWrestore(WPMINFO2) {
 	int i;
-	struct matrox_hw_state * const hw = &ACCESS_FBINFO(hw);
+	struct matrox_hw_state * const hw = &minfo->hw;
 	CRITFLAGS
 
 	DBG(__func__)
@@ -533,98 +533,98 @@ static int parse_pins1(WPMINFO const struct matrox_bios* bd) {
 	if (get_unaligned_le16(bd->pins + 24)) {
 		maxdac = get_unaligned_le16(bd->pins + 24) * 10;
 	}
-	MINFO->limits.pixel.vcomax = maxdac;
-	MINFO->values.pll.system = get_unaligned_le16(bd->pins + 28) ?
+	minfo->limits.pixel.vcomax = maxdac;
+	minfo->values.pll.system = get_unaligned_le16(bd->pins + 28) ?
 		get_unaligned_le16(bd->pins + 28) * 10 : 50000;
 	/* ignore 4MB, 8MB, module clocks */
-	MINFO->features.pll.ref_freq = 14318;
-	MINFO->values.reg.mctlwtst	= 0x00030101;
+	minfo->features.pll.ref_freq = 14318;
+	minfo->values.reg.mctlwtst	= 0x00030101;
 	return 0;
 }
 
 static void default_pins1(WPMINFO2) {
 	/* Millennium */
-	MINFO->limits.pixel.vcomax	= 220000;
-	MINFO->values.pll.system	=  50000;
-	MINFO->features.pll.ref_freq	=  14318;
-	MINFO->values.reg.mctlwtst	= 0x00030101;
+	minfo->limits.pixel.vcomax	= 220000;
+	minfo->values.pll.system	=  50000;
+	minfo->features.pll.ref_freq	=  14318;
+	minfo->values.reg.mctlwtst	= 0x00030101;
 }
 
 static int parse_pins2(WPMINFO const struct matrox_bios* bd) {
-	MINFO->limits.pixel.vcomax	=
-	MINFO->limits.system.vcomax	= (bd->pins[41] == 0xFF) ? 230000 : ((bd->pins[41] + 100) * 1000);
-	MINFO->values.reg.mctlwtst	= ((bd->pins[51] & 0x01) ? 0x00000001 : 0) |
+	minfo->limits.pixel.vcomax	=
+	minfo->limits.system.vcomax	= (bd->pins[41] == 0xFF) ? 230000 : ((bd->pins[41] + 100) * 1000);
+	minfo->values.reg.mctlwtst	= ((bd->pins[51] & 0x01) ? 0x00000001 : 0) |
 					  ((bd->pins[51] & 0x02) ? 0x00000100 : 0) |
 					  ((bd->pins[51] & 0x04) ? 0x00010000 : 0) |
 					  ((bd->pins[51] & 0x08) ? 0x00020000 : 0);
-	MINFO->values.pll.system	= (bd->pins[43] == 0xFF) ? 50000 : ((bd->pins[43] + 100) * 1000);
-	MINFO->features.pll.ref_freq	= 14318;
+	minfo->values.pll.system	= (bd->pins[43] == 0xFF) ? 50000 : ((bd->pins[43] + 100) * 1000);
+	minfo->features.pll.ref_freq	= 14318;
 	return 0;
 }
 
 static void default_pins2(WPMINFO2) {
 	/* Millennium II, Mystique */
-	MINFO->limits.pixel.vcomax	=
-	MINFO->limits.system.vcomax	= 230000;
-	MINFO->values.reg.mctlwtst	= 0x00030101;
-	MINFO->values.pll.system	=  50000;
-	MINFO->features.pll.ref_freq	=  14318;
+	minfo->limits.pixel.vcomax	=
+	minfo->limits.system.vcomax	= 230000;
+	minfo->values.reg.mctlwtst	= 0x00030101;
+	minfo->values.pll.system	=  50000;
+	minfo->features.pll.ref_freq	=  14318;
 }
 
 static int parse_pins3(WPMINFO const struct matrox_bios* bd) {
-	MINFO->limits.pixel.vcomax	=
-	MINFO->limits.system.vcomax	= (bd->pins[36] == 0xFF) ? 230000			: ((bd->pins[36] + 100) * 1000);
-	MINFO->values.reg.mctlwtst	= get_unaligned_le32(bd->pins + 48) == 0xFFFFFFFF ?
+	minfo->limits.pixel.vcomax	=
+	minfo->limits.system.vcomax	= (bd->pins[36] == 0xFF) ? 230000			: ((bd->pins[36] + 100) * 1000);
+	minfo->values.reg.mctlwtst	= get_unaligned_le32(bd->pins + 48) == 0xFFFFFFFF ?
 		0x01250A21 : get_unaligned_le32(bd->pins + 48);
 	/* memory config */
-	MINFO->values.reg.memrdbk	= ((bd->pins[57] << 21) & 0x1E000000) |
+	minfo->values.reg.memrdbk	= ((bd->pins[57] << 21) & 0x1E000000) |
 					  ((bd->pins[57] << 22) & 0x00C00000) |
 					  ((bd->pins[56] <<  1) & 0x000001E0) |
 					  ( bd->pins[56]        & 0x0000000F);
-	MINFO->values.reg.opt		= (bd->pins[54] & 7) << 10;
-	MINFO->values.reg.opt2		= bd->pins[58] << 12;
-	MINFO->features.pll.ref_freq	= (bd->pins[52] & 0x20) ? 14318 : 27000;
+	minfo->values.reg.opt		= (bd->pins[54] & 7) << 10;
+	minfo->values.reg.opt2		= bd->pins[58] << 12;
+	minfo->features.pll.ref_freq	= (bd->pins[52] & 0x20) ? 14318 : 27000;
 	return 0;
 }
 
 static void default_pins3(WPMINFO2) {
 	/* G100, G200 */
-	MINFO->limits.pixel.vcomax	=
-	MINFO->limits.system.vcomax	= 230000;
-	MINFO->values.reg.mctlwtst	= 0x01250A21;
-	MINFO->values.reg.memrdbk	= 0x00000000;
-	MINFO->values.reg.opt		= 0x00000C00;
-	MINFO->values.reg.opt2		= 0x00000000;
-	MINFO->features.pll.ref_freq	=  27000;
+	minfo->limits.pixel.vcomax	=
+	minfo->limits.system.vcomax	= 230000;
+	minfo->values.reg.mctlwtst	= 0x01250A21;
+	minfo->values.reg.memrdbk	= 0x00000000;
+	minfo->values.reg.opt		= 0x00000C00;
+	minfo->values.reg.opt2		= 0x00000000;
+	minfo->features.pll.ref_freq	=  27000;
 }
 
 static int parse_pins4(WPMINFO const struct matrox_bios* bd) {
-	MINFO->limits.pixel.vcomax	= (bd->pins[ 39] == 0xFF) ? 230000			: bd->pins[ 39] * 4000;
-	MINFO->limits.system.vcomax	= (bd->pins[ 38] == 0xFF) ? MINFO->limits.pixel.vcomax	: bd->pins[ 38] * 4000;
-	MINFO->values.reg.mctlwtst	= get_unaligned_le32(bd->pins + 71);
-	MINFO->values.reg.memrdbk	= ((bd->pins[87] << 21) & 0x1E000000) |
+	minfo->limits.pixel.vcomax	= (bd->pins[ 39] == 0xFF) ? 230000			: bd->pins[ 39] * 4000;
+	minfo->limits.system.vcomax	= (bd->pins[ 38] == 0xFF) ? minfo->limits.pixel.vcomax	: bd->pins[ 38] * 4000;
+	minfo->values.reg.mctlwtst	= get_unaligned_le32(bd->pins + 71);
+	minfo->values.reg.memrdbk	= ((bd->pins[87] << 21) & 0x1E000000) |
 					  ((bd->pins[87] << 22) & 0x00C00000) |
 					  ((bd->pins[86] <<  1) & 0x000001E0) |
 					  ( bd->pins[86]        & 0x0000000F);
-	MINFO->values.reg.opt		= ((bd->pins[53] << 15) & 0x00400000) |
+	minfo->values.reg.opt		= ((bd->pins[53] << 15) & 0x00400000) |
 					  ((bd->pins[53] << 22) & 0x10000000) |
 					  ((bd->pins[53] <<  7) & 0x00001C00);
-	MINFO->values.reg.opt3		= get_unaligned_le32(bd->pins + 67);
-	MINFO->values.pll.system	= (bd->pins[ 65] == 0xFF) ? 200000 			: bd->pins[ 65] * 4000;
-	MINFO->features.pll.ref_freq	= (bd->pins[ 92] & 0x01) ? 14318 : 27000;
+	minfo->values.reg.opt3		= get_unaligned_le32(bd->pins + 67);
+	minfo->values.pll.system	= (bd->pins[ 65] == 0xFF) ? 200000 			: bd->pins[ 65] * 4000;
+	minfo->features.pll.ref_freq	= (bd->pins[ 92] & 0x01) ? 14318 : 27000;
 	return 0;
 }
 
 static void default_pins4(WPMINFO2) {
 	/* G400 */
-	MINFO->limits.pixel.vcomax	=
-	MINFO->limits.system.vcomax	= 252000;
-	MINFO->values.reg.mctlwtst	= 0x04A450A1;
-	MINFO->values.reg.memrdbk	= 0x000000E7;
-	MINFO->values.reg.opt		= 0x10000400;
-	MINFO->values.reg.opt3		= 0x0190A419;
-	MINFO->values.pll.system	= 200000;
-	MINFO->features.pll.ref_freq	= 27000;
+	minfo->limits.pixel.vcomax	=
+	minfo->limits.system.vcomax	= 252000;
+	minfo->values.reg.mctlwtst	= 0x04A450A1;
+	minfo->values.reg.memrdbk	= 0x000000E7;
+	minfo->values.reg.opt		= 0x10000400;
+	minfo->values.reg.opt3		= 0x0190A419;
+	minfo->values.pll.system	= 200000;
+	minfo->features.pll.ref_freq	= 27000;
 }
 
 static int parse_pins5(WPMINFO const struct matrox_bios* bd) {
@@ -632,65 +632,65 @@ static int parse_pins5(WPMINFO const struct matrox_bios* bd) {
 	
 	mult = bd->pins[4]?8000:6000;
 	
-	MINFO->limits.pixel.vcomax	= (bd->pins[ 38] == 0xFF) ? 600000			: bd->pins[ 38] * mult;
-	MINFO->limits.system.vcomax	= (bd->pins[ 36] == 0xFF) ? MINFO->limits.pixel.vcomax	: bd->pins[ 36] * mult;
-	MINFO->limits.video.vcomax	= (bd->pins[ 37] == 0xFF) ? MINFO->limits.system.vcomax	: bd->pins[ 37] * mult;
-	MINFO->limits.pixel.vcomin	= (bd->pins[123] == 0xFF) ? 256000			: bd->pins[123] * mult;
-	MINFO->limits.system.vcomin	= (bd->pins[121] == 0xFF) ? MINFO->limits.pixel.vcomin	: bd->pins[121] * mult;
-	MINFO->limits.video.vcomin	= (bd->pins[122] == 0xFF) ? MINFO->limits.system.vcomin	: bd->pins[122] * mult;
-	MINFO->values.pll.system	=
-	MINFO->values.pll.video		= (bd->pins[ 92] == 0xFF) ? 284000			: bd->pins[ 92] * 4000;
-	MINFO->values.reg.opt		= get_unaligned_le32(bd->pins + 48);
-	MINFO->values.reg.opt2		= get_unaligned_le32(bd->pins + 52);
-	MINFO->values.reg.opt3		= get_unaligned_le32(bd->pins + 94);
-	MINFO->values.reg.mctlwtst	= get_unaligned_le32(bd->pins + 98);
-	MINFO->values.reg.memmisc	= get_unaligned_le32(bd->pins + 102);
-	MINFO->values.reg.memrdbk	= get_unaligned_le32(bd->pins + 106);
-	MINFO->features.pll.ref_freq	= (bd->pins[110] & 0x01) ? 14318 : 27000;
-	MINFO->values.memory.ddr	= (bd->pins[114] & 0x60) == 0x20;
-	MINFO->values.memory.dll	= (bd->pins[115] & 0x02) != 0;
-	MINFO->values.memory.emrswen	= (bd->pins[115] & 0x01) != 0;
-	MINFO->values.reg.maccess	= MINFO->values.memory.emrswen ? 0x00004000 : 0x00000000;
+	minfo->limits.pixel.vcomax	= (bd->pins[ 38] == 0xFF) ? 600000			: bd->pins[ 38] * mult;
+	minfo->limits.system.vcomax	= (bd->pins[ 36] == 0xFF) ? minfo->limits.pixel.vcomax	: bd->pins[ 36] * mult;
+	minfo->limits.video.vcomax	= (bd->pins[ 37] == 0xFF) ? minfo->limits.system.vcomax	: bd->pins[ 37] * mult;
+	minfo->limits.pixel.vcomin	= (bd->pins[123] == 0xFF) ? 256000			: bd->pins[123] * mult;
+	minfo->limits.system.vcomin	= (bd->pins[121] == 0xFF) ? minfo->limits.pixel.vcomin	: bd->pins[121] * mult;
+	minfo->limits.video.vcomin	= (bd->pins[122] == 0xFF) ? minfo->limits.system.vcomin	: bd->pins[122] * mult;
+	minfo->values.pll.system	=
+	minfo->values.pll.video		= (bd->pins[ 92] == 0xFF) ? 284000			: bd->pins[ 92] * 4000;
+	minfo->values.reg.opt		= get_unaligned_le32(bd->pins + 48);
+	minfo->values.reg.opt2		= get_unaligned_le32(bd->pins + 52);
+	minfo->values.reg.opt3		= get_unaligned_le32(bd->pins + 94);
+	minfo->values.reg.mctlwtst	= get_unaligned_le32(bd->pins + 98);
+	minfo->values.reg.memmisc	= get_unaligned_le32(bd->pins + 102);
+	minfo->values.reg.memrdbk	= get_unaligned_le32(bd->pins + 106);
+	minfo->features.pll.ref_freq	= (bd->pins[110] & 0x01) ? 14318 : 27000;
+	minfo->values.memory.ddr	= (bd->pins[114] & 0x60) == 0x20;
+	minfo->values.memory.dll	= (bd->pins[115] & 0x02) != 0;
+	minfo->values.memory.emrswen	= (bd->pins[115] & 0x01) != 0;
+	minfo->values.reg.maccess	= minfo->values.memory.emrswen ? 0x00004000 : 0x00000000;
 	if (bd->pins[115] & 4) {
-		MINFO->values.reg.mctlwtst_core = MINFO->values.reg.mctlwtst;
+		minfo->values.reg.mctlwtst_core = minfo->values.reg.mctlwtst;
 	} else {
 		u_int32_t wtst_xlat[] = { 0, 1, 5, 6, 7, 5, 2, 3 };
-		MINFO->values.reg.mctlwtst_core = (MINFO->values.reg.mctlwtst & ~7) |
-		                                  wtst_xlat[MINFO->values.reg.mctlwtst & 7];
+		minfo->values.reg.mctlwtst_core = (minfo->values.reg.mctlwtst & ~7) |
+						  wtst_xlat[minfo->values.reg.mctlwtst & 7];
 	}
-	MINFO->max_pixel_clock_panellink = bd->pins[47] * 4000;
+	minfo->max_pixel_clock_panellink = bd->pins[47] * 4000;
 	return 0;
 }
 
 static void default_pins5(WPMINFO2) {
 	/* Mine 16MB G450 with SDRAM DDR */
-	MINFO->limits.pixel.vcomax	=
-	MINFO->limits.system.vcomax	=
-	MINFO->limits.video.vcomax	= 600000;
-	MINFO->limits.pixel.vcomin	=
-	MINFO->limits.system.vcomin	=
-	MINFO->limits.video.vcomin	= 256000;
-	MINFO->values.pll.system	=
-	MINFO->values.pll.video		= 284000;
-	MINFO->values.reg.opt		= 0x404A1160;
-	MINFO->values.reg.opt2		= 0x0000AC00;
-	MINFO->values.reg.opt3		= 0x0090A409;
-	MINFO->values.reg.mctlwtst_core	=
-	MINFO->values.reg.mctlwtst	= 0x0C81462B;
-	MINFO->values.reg.memmisc	= 0x80000004;
-	MINFO->values.reg.memrdbk	= 0x01001103;
-	MINFO->features.pll.ref_freq	= 27000;
-	MINFO->values.memory.ddr	= 1;
-	MINFO->values.memory.dll	= 1;
-	MINFO->values.memory.emrswen	= 1;
-	MINFO->values.reg.maccess	= 0x00004000;
+	minfo->limits.pixel.vcomax	=
+	minfo->limits.system.vcomax	=
+	minfo->limits.video.vcomax	= 600000;
+	minfo->limits.pixel.vcomin	=
+	minfo->limits.system.vcomin	=
+	minfo->limits.video.vcomin	= 256000;
+	minfo->values.pll.system	=
+	minfo->values.pll.video		= 284000;
+	minfo->values.reg.opt		= 0x404A1160;
+	minfo->values.reg.opt2		= 0x0000AC00;
+	minfo->values.reg.opt3		= 0x0090A409;
+	minfo->values.reg.mctlwtst_core	=
+	minfo->values.reg.mctlwtst	= 0x0C81462B;
+	minfo->values.reg.memmisc	= 0x80000004;
+	minfo->values.reg.memrdbk	= 0x01001103;
+	minfo->features.pll.ref_freq	= 27000;
+	minfo->values.memory.ddr	= 1;
+	minfo->values.memory.dll	= 1;
+	minfo->values.memory.emrswen	= 1;
+	minfo->values.reg.maccess	= 0x00004000;
 }
 
 static int matroxfb_set_limits(WPMINFO const struct matrox_bios* bd) {
 	unsigned int pins_version;
 	static const unsigned int pinslen[] = { 64, 64, 64, 128, 128 };
 
-	switch (ACCESS_FBINFO(chip)) {
+	switch (minfo->chip) {
 		case MGA_2064:	default_pins1(PMINFO2); break;
 		case MGA_2164:
 		case MGA_1064:
@@ -743,19 +743,19 @@ void matroxfb_read_pins(WPMINFO2) {
 	u32 opt;
 	u32 biosbase;
 	u32 fbbase;
-	struct pci_dev* pdev = ACCESS_FBINFO(pcidev);
+	struct pci_dev *pdev = minfo->pcidev;
 	
-	memset(&ACCESS_FBINFO(bios), 0, sizeof(ACCESS_FBINFO(bios)));
+	memset(&minfo->bios, 0, sizeof(minfo->bios));
 	pci_read_config_dword(pdev, PCI_OPTION_REG, &opt);
 	pci_write_config_dword(pdev, PCI_OPTION_REG, opt | PCI_OPTION_ENABLE_ROM);
 	pci_read_config_dword(pdev, PCI_ROM_ADDRESS, &biosbase);
-	pci_read_config_dword(pdev, ACCESS_FBINFO(devflags.fbResource), &fbbase);
+	pci_read_config_dword(pdev, minfo->devflags.fbResource, &fbbase);
 	pci_write_config_dword(pdev, PCI_ROM_ADDRESS, (fbbase & PCI_ROM_ADDRESS_MASK) | PCI_ROM_ADDRESS_ENABLE);
-	parse_bios(vaddr_va(ACCESS_FBINFO(video).vbase), &ACCESS_FBINFO(bios));
+	parse_bios(vaddr_va(minfo->video.vbase), &minfo->bios);
 	pci_write_config_dword(pdev, PCI_ROM_ADDRESS, biosbase);
 	pci_write_config_dword(pdev, PCI_OPTION_REG, opt);
 #ifdef CONFIG_X86
-	if (!ACCESS_FBINFO(bios).bios_valid) {
+	if (!minfo->bios.bios_valid) {
 		unsigned char __iomem* b;
 
 		b = ioremap(0x000C0000, 65536);
@@ -769,15 +769,15 @@ void matroxfb_read_pins(WPMINFO2) {
 				printk(KERN_INFO "matroxfb: Legacy BIOS is for %04X:%04X, while this device is %04X:%04X\n",
 					ven, dev, pdev->vendor, pdev->device);
 			} else {
-				parse_bios(b, &ACCESS_FBINFO(bios));
+				parse_bios(b, &minfo->bios);
 			}
 			iounmap(b);
 		}
 	}
 #endif
-	matroxfb_set_limits(PMINFO &ACCESS_FBINFO(bios));
+	matroxfb_set_limits(PMINFO &minfo->bios);
 	printk(KERN_INFO "PInS memtype = %u\n",
-	       (ACCESS_FBINFO(values).reg.opt & 0x1C00) >> 10);
+	       (minfo->values.reg.opt & 0x1C00) >> 10);
 }
 
 EXPORT_SYMBOL(matroxfb_DAC_in);
diff --git a/drivers/video/matrox/matroxfb_misc.h b/drivers/video/matrox/matroxfb_misc.h
index cb62cc0..b460045 100644
--- a/drivers/video/matrox/matroxfb_misc.h
+++ b/drivers/video/matrox/matroxfb_misc.h
@@ -8,7 +8,7 @@ int matroxfb_PLL_calcclock(const struct matrox_pll_features* pll, unsigned int f
 	unsigned int* in, unsigned int* feed, unsigned int* post);
 static inline int PLL_calcclock(CPMINFO unsigned int freq, unsigned int fmax,
 		unsigned int* in, unsigned int* feed, unsigned int* post) {
-	return matroxfb_PLL_calcclock(&ACCESS_FBINFO(features.pll), freq, fmax, in, feed, post);
+	return matroxfb_PLL_calcclock(&minfo->features.pll, freq, fmax, in, feed, post);
 }
 
 int matroxfb_vgaHWinit(WPMINFO struct my_timming* m);
-- 
cgit v0.10.2


From 316b4d644caceb2cf7432d8a27e45b88f57ef2a0 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Tue, 22 Sep 2009 16:47:49 -0700
Subject: matroxfb: get rid of unneeded macros WPMINFO and friends

With multihead support always enabled, these macros are no longer needed
and make the code harder to read.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Acked-by: Petr Vandrovec <vandrove@vc.cvut.cz>
Cc: Krzysztof Helt <krzysztof.h1@poczta.fm>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/matrox/g450_pll.c b/drivers/video/matrox/g450_pll.c
index 8c75427..09f6e04 100644
--- a/drivers/video/matrox/g450_pll.c
+++ b/drivers/video/matrox/g450_pll.c
@@ -25,7 +25,9 @@ static inline unsigned int g450_f2vco(unsigned char p, unsigned int fin) {
 	return (p & 0x40) ? fin : fin << ((p & 3) + 1);
 }
 
-static unsigned int g450_mnp2vco(CPMINFO unsigned int mnp) {
+static unsigned int g450_mnp2vco(const struct matrox_fb_info *minfo,
+				 unsigned int mnp)
+{
 	unsigned int m, n;
 
 	m = ((mnp >> 16) & 0x0FF) + 1;
@@ -33,8 +35,9 @@ static unsigned int g450_mnp2vco(CPMINFO unsigned int mnp) {
 	return (minfo->features.pll.ref_freq * n + (m >> 1)) / m;
 }
 
-unsigned int g450_mnp2f(CPMINFO unsigned int mnp) {
-	return g450_vco2f(mnp, g450_mnp2vco(PMINFO mnp));
+unsigned int g450_mnp2f(const struct matrox_fb_info *minfo, unsigned int mnp)
+{
+	return g450_vco2f(mnp, g450_mnp2vco(minfo, mnp));
 }
 
 static inline unsigned int pll_freq_delta(unsigned int f1, unsigned int f2) {
@@ -49,7 +52,10 @@ static inline unsigned int pll_freq_delta(unsigned int f1, unsigned int f2) {
 #define NO_MORE_MNP	0x01FFFFFF
 #define G450_MNP_FREQBITS	(0xFFFFFF43)	/* do not mask high byte so we'll catch NO_MORE_MNP */
 
-static unsigned int g450_nextpll(CPMINFO const struct matrox_pll_limits* pi, unsigned int* fvco, unsigned int mnp) {
+static unsigned int g450_nextpll(const struct matrox_fb_info *minfo,
+				 const struct matrox_pll_limits *pi,
+				 unsigned int *fvco, unsigned int mnp)
+{
 	unsigned int m, n, p;
 	unsigned int tvco = *fvco;
 
@@ -95,7 +101,10 @@ static unsigned int g450_nextpll(CPMINFO const struct matrox_pll_limits* pi, uns
 	return (m << 16) | (n << 8) | p;
 }
 
-static unsigned int g450_firstpll(CPMINFO const struct matrox_pll_limits* pi, unsigned int* vco, unsigned int fout) {
+static unsigned int g450_firstpll(const struct matrox_fb_info *minfo,
+				  const struct matrox_pll_limits *pi,
+				  unsigned int *vco, unsigned int fout)
+{
 	unsigned int p;
 	unsigned int vcomax;
 
@@ -121,88 +130,94 @@ static unsigned int g450_firstpll(CPMINFO const struct matrox_pll_limits* pi, un
 		}
 		*vco = tvco;
 	}
-	return g450_nextpll(PMINFO pi, vco, 0xFF0000 | p);
+	return g450_nextpll(minfo, pi, vco, 0xFF0000 | p);
 }
 
-static inline unsigned int g450_setpll(CPMINFO unsigned int mnp, unsigned int pll) {
+static inline unsigned int g450_setpll(const struct matrox_fb_info *minfo,
+				       unsigned int mnp, unsigned int pll)
+{
 	switch (pll) {
 		case M_PIXEL_PLL_A:
-			matroxfb_DAC_out(PMINFO M1064_XPIXPLLAM, mnp >> 16);
-			matroxfb_DAC_out(PMINFO M1064_XPIXPLLAN, mnp >> 8);
-			matroxfb_DAC_out(PMINFO M1064_XPIXPLLAP, mnp);
+			matroxfb_DAC_out(minfo, M1064_XPIXPLLAM, mnp >> 16);
+			matroxfb_DAC_out(minfo, M1064_XPIXPLLAN, mnp >> 8);
+			matroxfb_DAC_out(minfo, M1064_XPIXPLLAP, mnp);
 			return M1064_XPIXPLLSTAT;
 
 		case M_PIXEL_PLL_B:
-			matroxfb_DAC_out(PMINFO M1064_XPIXPLLBM, mnp >> 16);
-			matroxfb_DAC_out(PMINFO M1064_XPIXPLLBN, mnp >> 8);
-			matroxfb_DAC_out(PMINFO M1064_XPIXPLLBP, mnp);
+			matroxfb_DAC_out(minfo, M1064_XPIXPLLBM, mnp >> 16);
+			matroxfb_DAC_out(minfo, M1064_XPIXPLLBN, mnp >> 8);
+			matroxfb_DAC_out(minfo, M1064_XPIXPLLBP, mnp);
 			return M1064_XPIXPLLSTAT;
 
 		case M_PIXEL_PLL_C:
-			matroxfb_DAC_out(PMINFO M1064_XPIXPLLCM, mnp >> 16);
-			matroxfb_DAC_out(PMINFO M1064_XPIXPLLCN, mnp >> 8);
-			matroxfb_DAC_out(PMINFO M1064_XPIXPLLCP, mnp);
+			matroxfb_DAC_out(minfo, M1064_XPIXPLLCM, mnp >> 16);
+			matroxfb_DAC_out(minfo, M1064_XPIXPLLCN, mnp >> 8);
+			matroxfb_DAC_out(minfo, M1064_XPIXPLLCP, mnp);
 			return M1064_XPIXPLLSTAT;
 
 		case M_SYSTEM_PLL:
-			matroxfb_DAC_out(PMINFO DAC1064_XSYSPLLM, mnp >> 16);
-			matroxfb_DAC_out(PMINFO DAC1064_XSYSPLLN, mnp >> 8);
-			matroxfb_DAC_out(PMINFO DAC1064_XSYSPLLP, mnp);
+			matroxfb_DAC_out(minfo, DAC1064_XSYSPLLM, mnp >> 16);
+			matroxfb_DAC_out(minfo, DAC1064_XSYSPLLN, mnp >> 8);
+			matroxfb_DAC_out(minfo, DAC1064_XSYSPLLP, mnp);
 			return DAC1064_XSYSPLLSTAT;
 
 		case M_VIDEO_PLL:
-			matroxfb_DAC_out(PMINFO M1064_XVIDPLLM, mnp >> 16);
-			matroxfb_DAC_out(PMINFO M1064_XVIDPLLN, mnp >> 8);
-			matroxfb_DAC_out(PMINFO M1064_XVIDPLLP, mnp);
+			matroxfb_DAC_out(minfo, M1064_XVIDPLLM, mnp >> 16);
+			matroxfb_DAC_out(minfo, M1064_XVIDPLLN, mnp >> 8);
+			matroxfb_DAC_out(minfo, M1064_XVIDPLLP, mnp);
 			return M1064_XVIDPLLSTAT;
 	}
 	return 0;
 }
 
-static inline unsigned int g450_cmppll(CPMINFO unsigned int mnp, unsigned int pll) {
+static inline unsigned int g450_cmppll(const struct matrox_fb_info *minfo,
+				       unsigned int mnp, unsigned int pll)
+{
 	unsigned char m = mnp >> 16;
 	unsigned char n = mnp >> 8;
 	unsigned char p = mnp;
 
 	switch (pll) {
 		case M_PIXEL_PLL_A:
-			return (matroxfb_DAC_in(PMINFO M1064_XPIXPLLAM) != m ||
-				matroxfb_DAC_in(PMINFO M1064_XPIXPLLAN) != n ||
-				matroxfb_DAC_in(PMINFO M1064_XPIXPLLAP) != p);
+			return (matroxfb_DAC_in(minfo, M1064_XPIXPLLAM) != m ||
+				matroxfb_DAC_in(minfo, M1064_XPIXPLLAN) != n ||
+				matroxfb_DAC_in(minfo, M1064_XPIXPLLAP) != p);
 
 		case M_PIXEL_PLL_B:
-			return (matroxfb_DAC_in(PMINFO M1064_XPIXPLLBM) != m ||
-				matroxfb_DAC_in(PMINFO M1064_XPIXPLLBN) != n ||
-				matroxfb_DAC_in(PMINFO M1064_XPIXPLLBP) != p);
+			return (matroxfb_DAC_in(minfo, M1064_XPIXPLLBM) != m ||
+				matroxfb_DAC_in(minfo, M1064_XPIXPLLBN) != n ||
+				matroxfb_DAC_in(minfo, M1064_XPIXPLLBP) != p);
 
 		case M_PIXEL_PLL_C:
-			return (matroxfb_DAC_in(PMINFO M1064_XPIXPLLCM) != m ||
-				matroxfb_DAC_in(PMINFO M1064_XPIXPLLCN) != n ||
-				matroxfb_DAC_in(PMINFO M1064_XPIXPLLCP) != p);
+			return (matroxfb_DAC_in(minfo, M1064_XPIXPLLCM) != m ||
+				matroxfb_DAC_in(minfo, M1064_XPIXPLLCN) != n ||
+				matroxfb_DAC_in(minfo, M1064_XPIXPLLCP) != p);
 
 		case M_SYSTEM_PLL:
-			return (matroxfb_DAC_in(PMINFO DAC1064_XSYSPLLM) != m ||
-				matroxfb_DAC_in(PMINFO DAC1064_XSYSPLLN) != n ||
-				matroxfb_DAC_in(PMINFO DAC1064_XSYSPLLP) != p);
+			return (matroxfb_DAC_in(minfo, DAC1064_XSYSPLLM) != m ||
+				matroxfb_DAC_in(minfo, DAC1064_XSYSPLLN) != n ||
+				matroxfb_DAC_in(minfo, DAC1064_XSYSPLLP) != p);
 
 		case M_VIDEO_PLL:
-			return (matroxfb_DAC_in(PMINFO M1064_XVIDPLLM) != m ||
-				matroxfb_DAC_in(PMINFO M1064_XVIDPLLN) != n ||
-				matroxfb_DAC_in(PMINFO M1064_XVIDPLLP) != p);
+			return (matroxfb_DAC_in(minfo, M1064_XVIDPLLM) != m ||
+				matroxfb_DAC_in(minfo, M1064_XVIDPLLN) != n ||
+				matroxfb_DAC_in(minfo, M1064_XVIDPLLP) != p);
 	}
 	return 1;
 }
 
-static inline int g450_isplllocked(CPMINFO unsigned int regidx) {
+static inline int g450_isplllocked(const struct matrox_fb_info *minfo,
+				   unsigned int regidx)
+{
 	unsigned int j;
 
 	for (j = 0; j < 1000; j++) {
-		if (matroxfb_DAC_in(PMINFO regidx) & 0x40) {
+		if (matroxfb_DAC_in(minfo, regidx) & 0x40) {
 			unsigned int r = 0;
 			int i;
 
 			for (i = 0; i < 100; i++) {
-				r += matroxfb_DAC_in(PMINFO regidx) & 0x40;
+				r += matroxfb_DAC_in(minfo, regidx) & 0x40;
 			}
 			return r >= (90 * 0x40);
 		}
@@ -211,8 +226,10 @@ static inline int g450_isplllocked(CPMINFO unsigned int regidx) {
 	return 0;
 }
 
-static int g450_testpll(CPMINFO unsigned int mnp, unsigned int pll) {
-	return g450_isplllocked(PMINFO g450_setpll(PMINFO mnp, pll));
+static int g450_testpll(const struct matrox_fb_info *minfo, unsigned int mnp,
+			unsigned int pll)
+{
+	return g450_isplllocked(minfo, g450_setpll(minfo, mnp, pll));
 }
 
 static void updatehwstate_clk(struct matrox_hw_state* hw, unsigned int mnp, unsigned int pll) {
@@ -225,13 +242,19 @@ static void updatehwstate_clk(struct matrox_hw_state* hw, unsigned int mnp, unsi
 	}
 }
 
-void matroxfb_g450_setpll_cond(WPMINFO unsigned int mnp, unsigned int pll) {
-	if (g450_cmppll(PMINFO mnp, pll)) {
-		g450_setpll(PMINFO mnp, pll);
+void matroxfb_g450_setpll_cond(struct matrox_fb_info *minfo, unsigned int mnp,
+			       unsigned int pll)
+{
+	if (g450_cmppll(minfo, mnp, pll)) {
+		g450_setpll(minfo, mnp, pll);
 	}
 }
 
-static inline unsigned int g450_findworkingpll(WPMINFO unsigned int pll, unsigned int* mnparray, unsigned int mnpcount) {
+static inline unsigned int g450_findworkingpll(struct matrox_fb_info *minfo,
+					       unsigned int pll,
+					       unsigned int *mnparray,
+					       unsigned int mnpcount)
+{
 	unsigned int found = 0;
 	unsigned int idx;
 	unsigned int mnpfound = mnparray[0];
@@ -255,22 +278,22 @@ static inline unsigned int g450_findworkingpll(WPMINFO unsigned int pll, unsigne
 		while (sptr >= sarray) {
 			unsigned int mnp = *sptr--;
 		
-			if (g450_testpll(PMINFO mnp - 0x0300, pll) &&
-			    g450_testpll(PMINFO mnp + 0x0300, pll) &&
-			    g450_testpll(PMINFO mnp - 0x0200, pll) &&
-			    g450_testpll(PMINFO mnp + 0x0200, pll) &&
-			    g450_testpll(PMINFO mnp - 0x0100, pll) &&
-			    g450_testpll(PMINFO mnp + 0x0100, pll)) {
-				if (g450_testpll(PMINFO mnp, pll)) {
+			if (g450_testpll(minfo, mnp - 0x0300, pll) &&
+			    g450_testpll(minfo, mnp + 0x0300, pll) &&
+			    g450_testpll(minfo, mnp - 0x0200, pll) &&
+			    g450_testpll(minfo, mnp + 0x0200, pll) &&
+			    g450_testpll(minfo, mnp - 0x0100, pll) &&
+			    g450_testpll(minfo, mnp + 0x0100, pll)) {
+				if (g450_testpll(minfo, mnp, pll)) {
 					return mnp;
 				}
-			} else if (!found && g450_testpll(PMINFO mnp, pll)) {
+			} else if (!found && g450_testpll(minfo, mnp, pll)) {
 				mnpfound = mnp;
 				found = 1;
 			}
 		}
 	}
-	g450_setpll(PMINFO mnpfound, pll);
+	g450_setpll(minfo, mnpfound, pll);
 	return mnpfound;
 }
 
@@ -283,7 +306,9 @@ static void g450_addcache(struct matrox_pll_cache* ci, unsigned int mnp_key, uns
 	ci->data[0].mnp_value = mnp_value;
 }
 
-static int g450_checkcache(WPMINFO struct matrox_pll_cache* ci, unsigned int mnp_key) {
+static int g450_checkcache(struct matrox_fb_info *minfo,
+			   struct matrox_pll_cache *ci, unsigned int mnp_key)
+{
 	unsigned int i;
 	
 	mnp_key &= G450_MNP_FREQBITS;
@@ -303,8 +328,10 @@ static int g450_checkcache(WPMINFO struct matrox_pll_cache* ci, unsigned int mnp
 	return NO_MORE_MNP;
 }
 
-static int __g450_setclk(WPMINFO unsigned int fout, unsigned int pll, 
-		unsigned int* mnparray, unsigned int* deltaarray) {
+static int __g450_setclk(struct matrox_fb_info *minfo, unsigned int fout,
+			 unsigned int pll, unsigned int *mnparray,
+			 unsigned int *deltaarray)
+{
 	unsigned int mnpcount;
 	unsigned int pixel_vco;
 	const struct matrox_pll_limits* pi;
@@ -321,16 +348,16 @@ static int __g450_setclk(WPMINFO unsigned int fout, unsigned int pll,
 				
 				matroxfb_DAC_lock_irqsave(flags);
 
-				xpwrctrl = matroxfb_DAC_in(PMINFO M1064_XPWRCTRL);
-				matroxfb_DAC_out(PMINFO M1064_XPWRCTRL, xpwrctrl & ~M1064_XPWRCTRL_PANELPDN);
+				xpwrctrl = matroxfb_DAC_in(minfo, M1064_XPWRCTRL);
+				matroxfb_DAC_out(minfo, M1064_XPWRCTRL, xpwrctrl & ~M1064_XPWRCTRL_PANELPDN);
 				mga_outb(M_SEQ_INDEX, M_SEQ1);
 				mga_outb(M_SEQ_DATA, mga_inb(M_SEQ_DATA) | M_SEQ1_SCROFF);
-				tmp = matroxfb_DAC_in(PMINFO M1064_XPIXCLKCTRL);
+				tmp = matroxfb_DAC_in(minfo, M1064_XPIXCLKCTRL);
 				tmp |= M1064_XPIXCLKCTRL_DIS;
 				if (!(tmp & M1064_XPIXCLKCTRL_PLL_UP)) {
 					tmp |= M1064_XPIXCLKCTRL_PLL_UP;
 				}
-				matroxfb_DAC_out(PMINFO M1064_XPIXCLKCTRL, tmp);
+				matroxfb_DAC_out(minfo, M1064_XPIXCLKCTRL, tmp);
 				/* DVI PLL preferred for frequencies up to
 				   panel link max, standard PLL otherwise */
 				if (fout >= minfo->max_pixel_clock_panellink)
@@ -341,8 +368,8 @@ static int __g450_setclk(WPMINFO unsigned int fout, unsigned int pll,
 					M1064_XDVICLKCTRL_C1DVICLKEN |
 					M1064_XDVICLKCTRL_DVILOOPCTL |
 					M1064_XDVICLKCTRL_P1LOOPBWDTCTL;
-				matroxfb_DAC_out(PMINFO M1064_XDVICLKCTRL,tmp);
-				matroxfb_DAC_out(PMINFO M1064_XPWRCTRL,
+				matroxfb_DAC_out(minfo, M1064_XDVICLKCTRL, tmp);
+				matroxfb_DAC_out(minfo, M1064_XPWRCTRL,
 						 xpwrctrl);
 
 				matroxfb_DAC_unlock_irqrestore(flags);
@@ -385,14 +412,14 @@ static int __g450_setclk(WPMINFO unsigned int fout, unsigned int pll,
 				unsigned long flags;
 				
 				matroxfb_DAC_lock_irqsave(flags);
-				tmp = matroxfb_DAC_in(PMINFO M1064_XPWRCTRL);
+				tmp = matroxfb_DAC_in(minfo, M1064_XPWRCTRL);
 				if (!(tmp & 2)) {
-					matroxfb_DAC_out(PMINFO M1064_XPWRCTRL, tmp | 2);
+					matroxfb_DAC_out(minfo, M1064_XPWRCTRL, tmp | 2);
 				}
 				
-				mnp = matroxfb_DAC_in(PMINFO M1064_XPIXPLLCM) << 16;
-				mnp |= matroxfb_DAC_in(PMINFO M1064_XPIXPLLCN) << 8;
-				pixel_vco = g450_mnp2vco(PMINFO mnp);
+				mnp = matroxfb_DAC_in(minfo, M1064_XPIXPLLCM) << 16;
+				mnp |= matroxfb_DAC_in(minfo, M1064_XPIXPLLCN) << 8;
+				pixel_vco = g450_mnp2vco(minfo, mnp);
 				matroxfb_DAC_unlock_irqrestore(flags);
 			}
 			pi = &minfo->limits.video;
@@ -407,12 +434,12 @@ static int __g450_setclk(WPMINFO unsigned int fout, unsigned int pll,
 		unsigned int mnp;
 		unsigned int xvco;
 
-		for(mnp = g450_firstpll(PMINFO pi, &xvco, fout); mnp != NO_MORE_MNP; mnp = g450_nextpll(PMINFO pi, &xvco, mnp)) {
+		for (mnp = g450_firstpll(minfo, pi, &xvco, fout); mnp != NO_MORE_MNP; mnp = g450_nextpll(minfo, pi, &xvco, mnp)) {
 			unsigned int idx;
 			unsigned int vco;
 			unsigned int delta;
 
-			vco = g450_mnp2vco(PMINFO mnp);
+			vco = g450_mnp2vco(minfo, mnp);
 #if 0			
 			if (pll == M_VIDEO_PLL) {
 				unsigned int big, small;
@@ -444,7 +471,7 @@ static int __g450_setclk(WPMINFO unsigned int fout, unsigned int pll,
 					 * (freqs near VCOmin aren't as stable)
 					 */
 					if (delta == deltaarray[idx-1]
-					    && vco != g450_mnp2vco(PMINFO mnparray[idx-1])
+					    && vco != g450_mnp2vco(minfo, mnparray[idx-1])
 					    && vco < (pi->vcomin * 17 / 16)) {
 						break;
 					}
@@ -468,11 +495,11 @@ static int __g450_setclk(WPMINFO unsigned int fout, unsigned int pll,
 		unsigned int mnp;
 		
 		matroxfb_DAC_lock_irqsave(flags);
-		mnp = g450_checkcache(PMINFO ci, mnparray[0]);
+		mnp = g450_checkcache(minfo, ci, mnparray[0]);
 		if (mnp != NO_MORE_MNP) {
-			matroxfb_g450_setpll_cond(PMINFO mnp, pll);
+			matroxfb_g450_setpll_cond(minfo, mnp, pll);
 		} else {
-			mnp = g450_findworkingpll(PMINFO pll, mnparray, mnpcount);
+			mnp = g450_findworkingpll(minfo, pll, mnparray, mnpcount);
 			g450_addcache(ci, mnparray[0], mnp);
 		}
 		updatehwstate_clk(&minfo->hw, mnp, pll);
@@ -485,14 +512,16 @@ static int __g450_setclk(WPMINFO unsigned int fout, unsigned int pll,
  * Currently there is 5(p) * 10(m) = 50 possible values. */
 #define MNP_TABLE_SIZE  64
 
-int matroxfb_g450_setclk(WPMINFO unsigned int fout, unsigned int pll) {
+int matroxfb_g450_setclk(struct matrox_fb_info *minfo, unsigned int fout,
+			 unsigned int pll)
+{
 	unsigned int* arr;
 	
 	arr = kmalloc(sizeof(*arr) * MNP_TABLE_SIZE * 2, GFP_KERNEL);
 	if (arr) {
 		int r;
 
-		r = __g450_setclk(PMINFO fout, pll, arr, arr + MNP_TABLE_SIZE);
+		r = __g450_setclk(minfo, fout, pll, arr, arr + MNP_TABLE_SIZE);
 		kfree(arr);
 		return r;
 	}
diff --git a/drivers/video/matrox/g450_pll.h b/drivers/video/matrox/g450_pll.h
index c17ed74..aac615d 100644
--- a/drivers/video/matrox/g450_pll.h
+++ b/drivers/video/matrox/g450_pll.h
@@ -3,8 +3,10 @@
 
 #include "matroxfb_base.h"
 
-int matroxfb_g450_setclk(WPMINFO unsigned int fout, unsigned int pll);
-unsigned int g450_mnp2f(CPMINFO unsigned int mnp);
-void matroxfb_g450_setpll_cond(WPMINFO unsigned int mnp, unsigned int pll);
+int matroxfb_g450_setclk(struct matrox_fb_info *minfo, unsigned int fout,
+			 unsigned int pll);
+unsigned int g450_mnp2f(const struct matrox_fb_info *minfo, unsigned int mnp);
+void matroxfb_g450_setpll_cond(struct matrox_fb_info *minfo, unsigned int mnp,
+			       unsigned int pll);
 
 #endif	/* __G450_PLL_H__ */
diff --git a/drivers/video/matrox/i2c-matroxfb.c b/drivers/video/matrox/i2c-matroxfb.c
index cefabe8..f3728ab 100644
--- a/drivers/video/matrox/i2c-matroxfb.c
+++ b/drivers/video/matrox/i2c-matroxfb.c
@@ -41,7 +41,7 @@ static int matroxfb_read_gpio(struct matrox_fb_info* minfo) {
 	int v;
 
 	matroxfb_DAC_lock_irqsave(flags);
-	v = matroxfb_DAC_in(PMINFO DAC_XGENIODATA);
+	v = matroxfb_DAC_in(minfo, DAC_XGENIODATA);
 	matroxfb_DAC_unlock_irqrestore(flags);
 	return v;
 }
@@ -51,10 +51,10 @@ static void matroxfb_set_gpio(struct matrox_fb_info* minfo, int mask, int val) {
 	int v;
 
 	matroxfb_DAC_lock_irqsave(flags);
-	v = (matroxfb_DAC_in(PMINFO DAC_XGENIOCTRL) & mask) | val;
-	matroxfb_DAC_out(PMINFO DAC_XGENIOCTRL, v);
+	v = (matroxfb_DAC_in(minfo, DAC_XGENIOCTRL) & mask) | val;
+	matroxfb_DAC_out(minfo, DAC_XGENIOCTRL, v);
 	/* We must reset GENIODATA very often... XFree plays with this register */
-	matroxfb_DAC_out(PMINFO DAC_XGENIODATA, 0x00);
+	matroxfb_DAC_out(minfo, DAC_XGENIODATA, 0x00);
 	matroxfb_DAC_unlock_irqrestore(flags);
 }
 
@@ -149,8 +149,8 @@ static void* i2c_matroxfb_probe(struct matrox_fb_info* minfo) {
 		return NULL;
 
 	matroxfb_DAC_lock_irqsave(flags);
-	matroxfb_DAC_out(PMINFO DAC_XGENIODATA, 0xFF);
-	matroxfb_DAC_out(PMINFO DAC_XGENIOCTRL, 0x00);
+	matroxfb_DAC_out(minfo, DAC_XGENIODATA, 0xFF);
+	matroxfb_DAC_out(minfo, DAC_XGENIOCTRL, 0x00);
 	matroxfb_DAC_unlock_irqrestore(flags);
 
 	switch (minfo->chip) {
diff --git a/drivers/video/matrox/matroxfb_DAC1064.c b/drivers/video/matrox/matroxfb_DAC1064.c
index 7662a28..88466a0 100644
--- a/drivers/video/matrox/matroxfb_DAC1064.c
+++ b/drivers/video/matrox/matroxfb_DAC1064.c
@@ -33,7 +33,11 @@
 #define DAC1064_OPT_MDIV2	0x00
 #define DAC1064_OPT_RESERVED	0x10
 
-static void DAC1064_calcclock(CPMINFO unsigned int freq, unsigned int fmax, unsigned int* in, unsigned int* feed, unsigned int* post) {
+static void DAC1064_calcclock(const struct matrox_fb_info *minfo,
+			      unsigned int freq, unsigned int fmax,
+			      unsigned int *in, unsigned int *feed,
+			      unsigned int *post)
+{
 	unsigned int fvco;
 	unsigned int p;
 
@@ -41,7 +45,7 @@ static void DAC1064_calcclock(CPMINFO unsigned int freq, unsigned int fmax, unsi
 	
 	/* only for devices older than G450 */
 
-	fvco = PLL_calcclock(PMINFO freq, fmax, in, feed, &p);
+	fvco = PLL_calcclock(minfo, freq, fmax, in, feed, &p);
 	
 	p = (1 << p) - 1;
 	if (fvco <= 100000)
@@ -80,18 +84,21 @@ static const unsigned char MGA1064_DAC[] = {
 		0x00,
 		0x00, 0x00, 0xFF, 0xFF};
 
-static void DAC1064_setpclk(WPMINFO unsigned long fout) {
+static void DAC1064_setpclk(struct matrox_fb_info *minfo, unsigned long fout)
+{
 	unsigned int m, n, p;
 
 	DBG(__func__)
 
-	DAC1064_calcclock(PMINFO fout, minfo->max_pixel_clock, &m, &n, &p);
+	DAC1064_calcclock(minfo, fout, minfo->max_pixel_clock, &m, &n, &p);
 	minfo->hw.DACclk[0] = m;
 	minfo->hw.DACclk[1] = n;
 	minfo->hw.DACclk[2] = p;
 }
 
-static void DAC1064_setmclk(WPMINFO int oscinfo, unsigned long fmem) {
+static void DAC1064_setmclk(struct matrox_fb_info *minfo, int oscinfo,
+			    unsigned long fmem)
+{
 	u_int32_t mx;
 	struct matrox_hw_state *hw = &minfo->hw;
 
@@ -99,9 +106,9 @@ static void DAC1064_setmclk(WPMINFO int oscinfo, unsigned long fmem) {
 
 	if (minfo->devflags.noinit) {
 		/* read MCLK and give up... */
-		hw->DACclk[3] = inDAC1064(PMINFO DAC1064_XSYSPLLM);
-		hw->DACclk[4] = inDAC1064(PMINFO DAC1064_XSYSPLLN);
-		hw->DACclk[5] = inDAC1064(PMINFO DAC1064_XSYSPLLP);
+		hw->DACclk[3] = inDAC1064(minfo, DAC1064_XSYSPLLM);
+		hw->DACclk[4] = inDAC1064(minfo, DAC1064_XSYSPLLN);
+		hw->DACclk[5] = inDAC1064(minfo, DAC1064_XSYSPLLP);
 		return;
 	}
 	mx = hw->MXoptionReg | 0x00000004;
@@ -131,12 +138,12 @@ static void DAC1064_setmclk(WPMINFO int oscinfo, unsigned long fmem) {
 		   perfect... */
 		/* (bit 2 of PCI_OPTION_REG must be 0... and bits 0,1 must not
 		   select PLL... because of PLL can be stopped at this time) */
-		DAC1064_calcclock(PMINFO fmem, minfo->max_pixel_clock, &m, &n, &p);
-		outDAC1064(PMINFO DAC1064_XSYSPLLM, hw->DACclk[3] = m);
-		outDAC1064(PMINFO DAC1064_XSYSPLLN, hw->DACclk[4] = n);
-		outDAC1064(PMINFO DAC1064_XSYSPLLP, hw->DACclk[5] = p);
+		DAC1064_calcclock(minfo, fmem, minfo->max_pixel_clock, &m, &n, &p);
+		outDAC1064(minfo, DAC1064_XSYSPLLM, hw->DACclk[3] = m);
+		outDAC1064(minfo, DAC1064_XSYSPLLN, hw->DACclk[4] = n);
+		outDAC1064(minfo, DAC1064_XSYSPLLP, hw->DACclk[5] = p);
 		for (clk = 65536; clk; --clk) {
-			if (inDAC1064(PMINFO DAC1064_XSYSPLLSTAT) & 0x40)
+			if (inDAC1064(minfo, DAC1064_XSYSPLLSTAT) & 0x40)
 				break;
 		}
 		if (!clk)
@@ -154,7 +161,8 @@ static void DAC1064_setmclk(WPMINFO int oscinfo, unsigned long fmem) {
 }
 
 #ifdef CONFIG_FB_MATROX_G
-static void g450_set_plls(WPMINFO2) {
+static void g450_set_plls(struct matrox_fb_info *minfo)
+{
 	u_int32_t c2_ctl;
 	unsigned int pxc;
 	struct matrox_hw_state *hw = &minfo->hw;
@@ -184,16 +192,16 @@ static void g450_set_plls(WPMINFO2) {
 		c2_ctl |=  0x0006;	/* Use video PLL */
 		hw->DACreg[POS1064_XPWRCTRL] |= 0x02;
 		
-		outDAC1064(PMINFO M1064_XPWRCTRL, hw->DACreg[POS1064_XPWRCTRL]);
-		matroxfb_g450_setpll_cond(PMINFO videomnp, M_VIDEO_PLL);
+		outDAC1064(minfo, M1064_XPWRCTRL, hw->DACreg[POS1064_XPWRCTRL]);
+		matroxfb_g450_setpll_cond(minfo, videomnp, M_VIDEO_PLL);
 	}
 
 	hw->DACreg[POS1064_XPIXCLKCTRL] &= ~M1064_XPIXCLKCTRL_PLL_UP;
 	if (pixelmnp >= 0) {
 		hw->DACreg[POS1064_XPIXCLKCTRL] |= M1064_XPIXCLKCTRL_PLL_UP;
 		
-		outDAC1064(PMINFO M1064_XPIXCLKCTRL, hw->DACreg[POS1064_XPIXCLKCTRL]);
-		matroxfb_g450_setpll_cond(PMINFO pixelmnp, M_PIXEL_PLL_C);
+		outDAC1064(minfo, M1064_XPIXCLKCTRL, hw->DACreg[POS1064_XPIXCLKCTRL]);
+		matroxfb_g450_setpll_cond(minfo, pixelmnp, M_PIXEL_PLL_C);
 	}
 	if (c2_ctl != hw->crtc2.ctl) {
 		hw->crtc2.ctl = c2_ctl;
@@ -245,7 +253,8 @@ static void g450_set_plls(WPMINFO2) {
 }
 #endif
 
-void DAC1064_global_init(WPMINFO2) {
+void DAC1064_global_init(struct matrox_fb_info *minfo)
+{
 	struct matrox_hw_state *hw = &minfo->hw;
 
 	hw->DACreg[POS1064_XMISCCTRL] &= M1064_XMISCCTRL_DAC_WIDTHMASK;
@@ -299,7 +308,7 @@ void DAC1064_global_init(WPMINFO2) {
 				break;
 		}
 		/* Now set timming related variables... */
-		g450_set_plls(PMINFO2);
+		g450_set_plls(minfo);
 	} else
 #endif
 	{
@@ -318,24 +327,26 @@ void DAC1064_global_init(WPMINFO2) {
 	}
 }
 
-void DAC1064_global_restore(WPMINFO2) {
+void DAC1064_global_restore(struct matrox_fb_info *minfo)
+{
 	struct matrox_hw_state *hw = &minfo->hw;
 
-	outDAC1064(PMINFO M1064_XPIXCLKCTRL, hw->DACreg[POS1064_XPIXCLKCTRL]);
-	outDAC1064(PMINFO M1064_XMISCCTRL, hw->DACreg[POS1064_XMISCCTRL]);
+	outDAC1064(minfo, M1064_XPIXCLKCTRL, hw->DACreg[POS1064_XPIXCLKCTRL]);
+	outDAC1064(minfo, M1064_XMISCCTRL, hw->DACreg[POS1064_XMISCCTRL]);
 	if (minfo->devflags.accelerator == FB_ACCEL_MATROX_MGAG400) {
-		outDAC1064(PMINFO 0x20, 0x04);
-		outDAC1064(PMINFO 0x1F, minfo->devflags.dfp_type);
+		outDAC1064(minfo, 0x20, 0x04);
+		outDAC1064(minfo, 0x1F, minfo->devflags.dfp_type);
 		if (minfo->devflags.g450dac) {
-			outDAC1064(PMINFO M1064_XSYNCCTRL, 0xCC);
-			outDAC1064(PMINFO M1064_XPWRCTRL, hw->DACreg[POS1064_XPWRCTRL]);
-			outDAC1064(PMINFO M1064_XPANMODE, hw->DACreg[POS1064_XPANMODE]);
-			outDAC1064(PMINFO M1064_XOUTPUTCONN, hw->DACreg[POS1064_XOUTPUTCONN]);
+			outDAC1064(minfo, M1064_XSYNCCTRL, 0xCC);
+			outDAC1064(minfo, M1064_XPWRCTRL, hw->DACreg[POS1064_XPWRCTRL]);
+			outDAC1064(minfo, M1064_XPANMODE, hw->DACreg[POS1064_XPANMODE]);
+			outDAC1064(minfo, M1064_XOUTPUTCONN, hw->DACreg[POS1064_XOUTPUTCONN]);
 		}
 	}
 }
 
-static int DAC1064_init_1(WPMINFO struct my_timming* m) {
+static int DAC1064_init_1(struct matrox_fb_info *minfo, struct my_timming *m)
+{
 	struct matrox_hw_state *hw = &minfo->hw;
 
 	DBG(__func__)
@@ -367,11 +378,12 @@ static int DAC1064_init_1(WPMINFO struct my_timming* m) {
 	hw->DACreg[POS1064_XCURADDL] = 0;
 	hw->DACreg[POS1064_XCURADDH] = 0;
 
-	DAC1064_global_init(PMINFO2);
+	DAC1064_global_init(minfo);
 	return 0;
 }
 
-static int DAC1064_init_2(WPMINFO struct my_timming* m) {
+static int DAC1064_init_2(struct matrox_fb_info *minfo, struct my_timming *m)
+{
 	struct matrox_hw_state *hw = &minfo->hw;
 
 	DBG(__func__)
@@ -413,7 +425,8 @@ static int DAC1064_init_2(WPMINFO struct my_timming* m) {
 	return 0;
 }
 
-static void DAC1064_restore_1(WPMINFO2) {
+static void DAC1064_restore_1(struct matrox_fb_info *minfo)
+{
 	struct matrox_hw_state *hw = &minfo->hw;
 
 	CRITFLAGS
@@ -422,28 +435,29 @@ static void DAC1064_restore_1(WPMINFO2) {
 
 	CRITBEGIN
 
-	if ((inDAC1064(PMINFO DAC1064_XSYSPLLM) != hw->DACclk[3]) ||
-	    (inDAC1064(PMINFO DAC1064_XSYSPLLN) != hw->DACclk[4]) ||
-	    (inDAC1064(PMINFO DAC1064_XSYSPLLP) != hw->DACclk[5])) {
-		outDAC1064(PMINFO DAC1064_XSYSPLLM, hw->DACclk[3]);
-		outDAC1064(PMINFO DAC1064_XSYSPLLN, hw->DACclk[4]);
-		outDAC1064(PMINFO DAC1064_XSYSPLLP, hw->DACclk[5]);
+	if ((inDAC1064(minfo, DAC1064_XSYSPLLM) != hw->DACclk[3]) ||
+	    (inDAC1064(minfo, DAC1064_XSYSPLLN) != hw->DACclk[4]) ||
+	    (inDAC1064(minfo, DAC1064_XSYSPLLP) != hw->DACclk[5])) {
+		outDAC1064(minfo, DAC1064_XSYSPLLM, hw->DACclk[3]);
+		outDAC1064(minfo, DAC1064_XSYSPLLN, hw->DACclk[4]);
+		outDAC1064(minfo, DAC1064_XSYSPLLP, hw->DACclk[5]);
 	}
 	{
 		unsigned int i;
 
 		for (i = 0; i < sizeof(MGA1064_DAC_regs); i++) {
 			if ((i != POS1064_XPIXCLKCTRL) && (i != POS1064_XMISCCTRL))
-				outDAC1064(PMINFO MGA1064_DAC_regs[i], hw->DACreg[i]);
+				outDAC1064(minfo, MGA1064_DAC_regs[i], hw->DACreg[i]);
 		}
 	}
 
-	DAC1064_global_restore(PMINFO2);
+	DAC1064_global_restore(minfo);
 
 	CRITEND
 };
 
-static void DAC1064_restore_2(WPMINFO2) {
+static void DAC1064_restore_2(struct matrox_fb_info *minfo)
+{
 #ifdef DEBUG
 	unsigned int i;
 #endif
@@ -470,14 +484,14 @@ static int m1064_compute(void* out, struct my_timming* m) {
 		int tmout;
 		CRITFLAGS
 
-		DAC1064_setpclk(PMINFO m->pixclock);
+		DAC1064_setpclk(minfo, m->pixclock);
 
 		CRITBEGIN
 
 		for (i = 0; i < 3; i++)
-			outDAC1064(PMINFO M1064_XPIXPLLCM + i, minfo->hw.DACclk[i]);
+			outDAC1064(minfo, M1064_XPIXPLLCM + i, minfo->hw.DACclk[i]);
 		for (tmout = 500000; tmout; tmout--) {
-			if (inDAC1064(PMINFO M1064_XPIXPLLSTAT) & 0x40)
+			if (inDAC1064(minfo, M1064_XPIXPLLSTAT) & 0x40)
 				break;
 			udelay(10);
 		};
@@ -500,9 +514,9 @@ static struct matrox_altout m1064 = {
 static int g450_compute(void* out, struct my_timming* m) {
 #define minfo ((struct matrox_fb_info*)out)
 	if (m->mnp < 0) {
-		m->mnp = matroxfb_g450_setclk(PMINFO m->pixclock, (m->crtc == MATROXFB_SRC_CRTC1) ? M_PIXEL_PLL_C : M_VIDEO_PLL);
+		m->mnp = matroxfb_g450_setclk(minfo, m->pixclock, (m->crtc == MATROXFB_SRC_CRTC1) ? M_PIXEL_PLL_C : M_VIDEO_PLL);
 		if (m->mnp >= 0) {
-			m->pixclock = g450_mnp2f(PMINFO m->mnp);
+			m->pixclock = g450_mnp2f(minfo, m->mnp);
 		}
 	}
 #undef minfo
@@ -518,13 +532,14 @@ static struct matrox_altout g450out = {
 #endif /* NEED_DAC1064 */
 
 #ifdef CONFIG_FB_MATROX_MYSTIQUE
-static int MGA1064_init(WPMINFO struct my_timming* m) {
+static int MGA1064_init(struct matrox_fb_info *minfo, struct my_timming *m)
+{
 	struct matrox_hw_state *hw = &minfo->hw;
 
 	DBG(__func__)
 
-	if (DAC1064_init_1(PMINFO m)) return 1;
-	if (matroxfb_vgaHWinit(PMINFO m)) return 1;
+	if (DAC1064_init_1(minfo, m)) return 1;
+	if (matroxfb_vgaHWinit(minfo, m)) return 1;
 
 	hw->MiscOutReg = 0xCB;
 	if (m->sync & FB_SYNC_HOR_HIGH_ACT)
@@ -534,20 +549,21 @@ static int MGA1064_init(WPMINFO struct my_timming* m) {
 	if (m->sync & FB_SYNC_COMP_HIGH_ACT) /* should be only FB_SYNC_COMP */
 		hw->CRTCEXT[3] |= 0x40;
 
-	if (DAC1064_init_2(PMINFO m)) return 1;
+	if (DAC1064_init_2(minfo, m)) return 1;
 	return 0;
 }
 #endif
 
 #ifdef CONFIG_FB_MATROX_G
-static int MGAG100_init(WPMINFO struct my_timming* m) {
+static int MGAG100_init(struct matrox_fb_info *minfo, struct my_timming *m)
+{
 	struct matrox_hw_state *hw = &minfo->hw;
 
 	DBG(__func__)
 
-	if (DAC1064_init_1(PMINFO m)) return 1;
+	if (DAC1064_init_1(minfo, m)) return 1;
 	hw->MXoptionReg &= ~0x2000;
-	if (matroxfb_vgaHWinit(PMINFO m)) return 1;
+	if (matroxfb_vgaHWinit(minfo, m)) return 1;
 
 	hw->MiscOutReg = 0xEF;
 	if (m->sync & FB_SYNC_HOR_HIGH_ACT)
@@ -557,13 +573,14 @@ static int MGAG100_init(WPMINFO struct my_timming* m) {
 	if (m->sync & FB_SYNC_COMP_HIGH_ACT) /* should be only FB_SYNC_COMP */
 		hw->CRTCEXT[3] |= 0x40;
 
-	if (DAC1064_init_2(PMINFO m)) return 1;
+	if (DAC1064_init_2(minfo, m)) return 1;
 	return 0;
 }
 #endif	/* G */
 
 #ifdef CONFIG_FB_MATROX_MYSTIQUE
-static void MGA1064_ramdac_init(WPMINFO2) {
+static void MGA1064_ramdac_init(struct matrox_fb_info *minfo)
+{
 
 	DBG(__func__)
 
@@ -577,7 +594,7 @@ static void MGA1064_ramdac_init(WPMINFO2) {
 	minfo->features.pll.post_shift_max = 3;
 	minfo->features.DAC1064.xvrefctrl = DAC1064_XVREFCTRL_EXTERNAL;
 	/* maybe cmdline MCLK= ?, doc says gclk=44MHz, mclk=66MHz... it was 55/83 with old values */
-	DAC1064_setmclk(PMINFO DAC1064_OPT_MDIV2 | DAC1064_OPT_GDIV3 | DAC1064_OPT_SCLK_PLL, 133333);
+	DAC1064_setmclk(minfo, DAC1064_OPT_MDIV2 | DAC1064_OPT_GDIV3 | DAC1064_OPT_SCLK_PLL, 133333);
 }
 #endif
 
@@ -589,23 +606,25 @@ static int x7AF4 = 0x10;	/* flags, maybe 0x10 = SDRAM, 0x00 = SGRAM??? */
 static int def50 = 0;	/* reg50, & 0x0F, & 0x3000 (only 0x0000, 0x1000, 0x2000 (0x3000 disallowed and treated as 0) */
 #endif
 
-static void MGAG100_progPixClock(CPMINFO int flags, int m, int n, int p) {
+static void MGAG100_progPixClock(const struct matrox_fb_info *minfo, int flags,
+				 int m, int n, int p)
+{
 	int reg;
 	int selClk;
 	int clk;
 
 	DBG(__func__)
 
-	outDAC1064(PMINFO M1064_XPIXCLKCTRL, inDAC1064(PMINFO M1064_XPIXCLKCTRL) | M1064_XPIXCLKCTRL_DIS |
+	outDAC1064(minfo, M1064_XPIXCLKCTRL, inDAC1064(minfo, M1064_XPIXCLKCTRL) | M1064_XPIXCLKCTRL_DIS |
 		   M1064_XPIXCLKCTRL_PLL_UP);
 	switch (flags & 3) {
 		case 0:		reg = M1064_XPIXPLLAM; break;
 		case 1:		reg = M1064_XPIXPLLBM; break;
 		default:	reg = M1064_XPIXPLLCM; break;
 	}
-	outDAC1064(PMINFO reg++, m);
-	outDAC1064(PMINFO reg++, n);
-	outDAC1064(PMINFO reg, p);
+	outDAC1064(minfo, reg++, m);
+	outDAC1064(minfo, reg++, n);
+	outDAC1064(minfo, reg, p);
 	selClk = mga_inb(M_MISC_REG_READ) & ~0xC;
 	/* there should be flags & 0x03 & case 0/1/else */
 	/* and we should first select source and after that we should wait for PLL */
@@ -617,34 +636,37 @@ static void MGAG100_progPixClock(CPMINFO int flags, int m, int n, int p) {
 	}
 	mga_outb(M_MISC_REG, selClk);
 	for (clk = 500000; clk; clk--) {
-		if (inDAC1064(PMINFO M1064_XPIXPLLSTAT) & 0x40)
+		if (inDAC1064(minfo, M1064_XPIXPLLSTAT) & 0x40)
 			break;
 		udelay(10);
 	};
 	if (!clk)
 		printk(KERN_ERR "matroxfb: Pixel PLL%c not locked after usual time\n", (reg-M1064_XPIXPLLAM-2)/4 + 'A');
-	selClk = inDAC1064(PMINFO M1064_XPIXCLKCTRL) & ~M1064_XPIXCLKCTRL_SRC_MASK;
+	selClk = inDAC1064(minfo, M1064_XPIXCLKCTRL) & ~M1064_XPIXCLKCTRL_SRC_MASK;
 	switch (flags & 0x0C) {
 		case 0x00:	selClk |= M1064_XPIXCLKCTRL_SRC_PCI; break;
 		case 0x04:	selClk |= M1064_XPIXCLKCTRL_SRC_PLL; break;
 		default:	selClk |= M1064_XPIXCLKCTRL_SRC_EXT; break;
 	}
-	outDAC1064(PMINFO M1064_XPIXCLKCTRL, selClk);
-	outDAC1064(PMINFO M1064_XPIXCLKCTRL, inDAC1064(PMINFO M1064_XPIXCLKCTRL) & ~M1064_XPIXCLKCTRL_DIS);
+	outDAC1064(minfo, M1064_XPIXCLKCTRL, selClk);
+	outDAC1064(minfo, M1064_XPIXCLKCTRL, inDAC1064(minfo, M1064_XPIXCLKCTRL) & ~M1064_XPIXCLKCTRL_DIS);
 }
 
-static void MGAG100_setPixClock(CPMINFO int flags, int freq) {
+static void MGAG100_setPixClock(const struct matrox_fb_info *minfo, int flags,
+				int freq)
+{
 	unsigned int m, n, p;
 
 	DBG(__func__)
 
-	DAC1064_calcclock(PMINFO freq, minfo->max_pixel_clock, &m, &n, &p);
-	MGAG100_progPixClock(PMINFO flags, m, n, p);
+	DAC1064_calcclock(minfo, freq, minfo->max_pixel_clock, &m, &n, &p);
+	MGAG100_progPixClock(minfo, flags, m, n, p);
 }
 #endif
 
 #ifdef CONFIG_FB_MATROX_MYSTIQUE
-static int MGA1064_preinit(WPMINFO2) {
+static int MGA1064_preinit(struct matrox_fb_info *minfo)
+{
 	static const int vxres_mystique[] = { 512,        640, 768,  800,  832,  960,
 					     1024, 1152, 1280,      1600, 1664, 1920,
 					     2048,    0};
@@ -681,16 +703,18 @@ static int MGA1064_preinit(WPMINFO2) {
 	return 0;
 }
 
-static void MGA1064_reset(WPMINFO2) {
+static void MGA1064_reset(struct matrox_fb_info *minfo)
+{
 
 	DBG(__func__);
 
-	MGA1064_ramdac_init(PMINFO2);
+	MGA1064_ramdac_init(minfo);
 }
 #endif
 
 #ifdef CONFIG_FB_MATROX_G
-static void g450_mclk_init(WPMINFO2) {
+static void g450_mclk_init(struct matrox_fb_info *minfo)
+{
 	/* switch all clocks to PCI source */
 	pci_write_config_dword(minfo->pcidev, PCI_OPTION_REG, minfo->hw.MXoptionReg | 4);
 	pci_write_config_dword(minfo->pcidev, PCI_OPTION3_REG, minfo->values.reg.opt3 & ~0x00300C03);
@@ -699,17 +723,17 @@ static void g450_mclk_init(WPMINFO2) {
 	if (((minfo->values.reg.opt3 & 0x000003) == 0x000003) ||
 	    ((minfo->values.reg.opt3 & 0x000C00) == 0x000C00) ||
 	    ((minfo->values.reg.opt3 & 0x300000) == 0x300000)) {
-		matroxfb_g450_setclk(PMINFO minfo->values.pll.video, M_VIDEO_PLL);
+		matroxfb_g450_setclk(minfo, minfo->values.pll.video, M_VIDEO_PLL);
 	} else {
 		unsigned long flags;
 		unsigned int pwr;
 		
 		matroxfb_DAC_lock_irqsave(flags);
-		pwr = inDAC1064(PMINFO M1064_XPWRCTRL) & ~0x02;
-		outDAC1064(PMINFO M1064_XPWRCTRL, pwr);
+		pwr = inDAC1064(minfo, M1064_XPWRCTRL) & ~0x02;
+		outDAC1064(minfo, M1064_XPWRCTRL, pwr);
 		matroxfb_DAC_unlock_irqrestore(flags);
 	}
-	matroxfb_g450_setclk(PMINFO minfo->values.pll.system, M_SYSTEM_PLL);
+	matroxfb_g450_setclk(minfo, minfo->values.pll.system, M_SYSTEM_PLL);
 	
 	/* switch clocks to their real PLL source(s) */
 	pci_write_config_dword(minfo->pcidev, PCI_OPTION_REG, minfo->hw.MXoptionReg | 4);
@@ -718,7 +742,8 @@ static void g450_mclk_init(WPMINFO2) {
 
 }
 
-static void g450_memory_init(WPMINFO2) {
+static void g450_memory_init(struct matrox_fb_info *minfo)
+{
 	/* disable memory refresh */
 	minfo->hw.MXoptionReg &= ~0x001F8000;
 	pci_write_config_dword(minfo->pcidev, PCI_OPTION_REG, minfo->hw.MXoptionReg);
@@ -760,7 +785,8 @@ static void g450_memory_init(WPMINFO2) {
 	
 }
 
-static void g450_preinit(WPMINFO2) {
+static void g450_preinit(struct matrox_fb_info *minfo)
+{
 	u_int32_t c2ctl;
 	u_int8_t curctl;
 	u_int8_t c1ctl;
@@ -783,24 +809,24 @@ static void g450_preinit(WPMINFO2) {
 	c2ctl = mga_inl(M_C2CTL);
 	mga_outl(M_C2CTL, c2ctl & ~1);
 	/* stop cursor */
-	curctl = inDAC1064(PMINFO M1064_XCURCTRL);
-	outDAC1064(PMINFO M1064_XCURCTRL, 0);
+	curctl = inDAC1064(minfo, M1064_XCURCTRL);
+	outDAC1064(minfo, M1064_XCURCTRL, 0);
 	/* stop crtc1 */
 	c1ctl = mga_readr(M_SEQ_INDEX, 1);
 	mga_setr(M_SEQ_INDEX, 1, c1ctl | 0x20);
 
-	g450_mclk_init(PMINFO2);
-	g450_memory_init(PMINFO2);
+	g450_mclk_init(minfo);
+	g450_memory_init(minfo);
 	
 	/* set legacy VGA clock sources for DOSEmu or VMware... */
-	matroxfb_g450_setclk(PMINFO 25175, M_PIXEL_PLL_A);
-	matroxfb_g450_setclk(PMINFO 28322, M_PIXEL_PLL_B);
+	matroxfb_g450_setclk(minfo, 25175, M_PIXEL_PLL_A);
+	matroxfb_g450_setclk(minfo, 28322, M_PIXEL_PLL_B);
 
 	/* restore crtc1 */
 	mga_setr(M_SEQ_INDEX, 1, c1ctl);
 	
 	/* restore cursor */
-	outDAC1064(PMINFO M1064_XCURCTRL, curctl);
+	outDAC1064(minfo, M1064_XCURCTRL, curctl);
 
 	/* restore crtc2 */
 	mga_outl(M_C2CTL, c2ctl);
@@ -808,7 +834,8 @@ static void g450_preinit(WPMINFO2) {
 	return;
 }
 
-static int MGAG100_preinit(WPMINFO2) {
+static int MGAG100_preinit(struct matrox_fb_info *minfo)
+{
 	static const int vxres_g100[] = {  512,        640, 768,  800,  832,  960,
                                           1024, 1152, 1280,      1600, 1664, 1920,
                                           2048, 0};
@@ -862,7 +889,7 @@ static int MGAG100_preinit(WPMINFO2) {
 	if (minfo->devflags.noinit)
 		return 0;
 	if (minfo->devflags.g450dac) {
-		g450_preinit(PMINFO2);
+		g450_preinit(minfo);
 		return 0;
 	}
 	hw->MXoptionReg &= 0xC0000100;
@@ -874,7 +901,7 @@ static int MGAG100_preinit(WPMINFO2) {
 	if (minfo->devflags.nopciretry)
 		hw->MXoptionReg |=  0x20000000;
 	pci_write_config_dword(minfo->pcidev, PCI_OPTION_REG, hw->MXoptionReg);
-	DAC1064_setmclk(PMINFO DAC1064_OPT_MDIV2 | DAC1064_OPT_GDIV3 | DAC1064_OPT_SCLK_PCI, 133333);
+	DAC1064_setmclk(minfo, DAC1064_OPT_MDIV2 | DAC1064_OPT_GDIV3 | DAC1064_OPT_SCLK_PCI, 133333);
 
 	if (minfo->devflags.accelerator == FB_ACCEL_MATROX_MGAG100) {
 		pci_read_config_dword(minfo->pcidev, PCI_OPTION2_REG, &reg50);
@@ -952,7 +979,8 @@ static int MGAG100_preinit(WPMINFO2) {
 	return 0;
 }
 
-static void MGAG100_reset(WPMINFO2) {
+static void MGAG100_reset(struct matrox_fb_info *minfo)
+{
 	u_int8_t b;
 	struct matrox_hw_state *hw = &minfo->hw;
 
@@ -981,35 +1009,36 @@ static void MGAG100_reset(WPMINFO2) {
 	}
 	if (minfo->devflags.g450dac) {
 		/* either leave MCLK as is... or they were set in preinit */
-		hw->DACclk[3] = inDAC1064(PMINFO DAC1064_XSYSPLLM);
-		hw->DACclk[4] = inDAC1064(PMINFO DAC1064_XSYSPLLN);
-		hw->DACclk[5] = inDAC1064(PMINFO DAC1064_XSYSPLLP);
+		hw->DACclk[3] = inDAC1064(minfo, DAC1064_XSYSPLLM);
+		hw->DACclk[4] = inDAC1064(minfo, DAC1064_XSYSPLLN);
+		hw->DACclk[5] = inDAC1064(minfo, DAC1064_XSYSPLLP);
 	} else {
-		DAC1064_setmclk(PMINFO DAC1064_OPT_RESERVED | DAC1064_OPT_MDIV2 | DAC1064_OPT_GDIV1 | DAC1064_OPT_SCLK_PLL, 133333);
+		DAC1064_setmclk(minfo, DAC1064_OPT_RESERVED | DAC1064_OPT_MDIV2 | DAC1064_OPT_GDIV1 | DAC1064_OPT_SCLK_PLL, 133333);
 	}
 	if (minfo->devflags.accelerator == FB_ACCEL_MATROX_MGAG400) {
 		if (minfo->devflags.dfp_type == -1) {
-			minfo->devflags.dfp_type = inDAC1064(PMINFO 0x1F);
+			minfo->devflags.dfp_type = inDAC1064(minfo, 0x1F);
 		}
 	}
 	if (minfo->devflags.noinit)
 		return;
 	if (minfo->devflags.g450dac) {
 	} else {
-		MGAG100_setPixClock(PMINFO 4, 25175);
-		MGAG100_setPixClock(PMINFO 5, 28322);
+		MGAG100_setPixClock(minfo, 4, 25175);
+		MGAG100_setPixClock(minfo, 5, 28322);
 		if (x7AF4 & 0x10) {
-			b = inDAC1064(PMINFO M1064_XGENIODATA) & ~1;
-			outDAC1064(PMINFO M1064_XGENIODATA, b);
-			b = inDAC1064(PMINFO M1064_XGENIOCTRL) | 1;
-			outDAC1064(PMINFO M1064_XGENIOCTRL, b);
+			b = inDAC1064(minfo, M1064_XGENIODATA) & ~1;
+			outDAC1064(minfo, M1064_XGENIODATA, b);
+			b = inDAC1064(minfo, M1064_XGENIOCTRL) | 1;
+			outDAC1064(minfo, M1064_XGENIOCTRL, b);
 		}
 	}
 }
 #endif
 
 #ifdef CONFIG_FB_MATROX_MYSTIQUE
-static void MGA1064_restore(WPMINFO2) {
+static void MGA1064_restore(struct matrox_fb_info *minfo)
+{
 	int i;
 	struct matrox_hw_state *hw = &minfo->hw;
 
@@ -1025,17 +1054,18 @@ static void MGA1064_restore(WPMINFO2) {
 
 	CRITEND
 
-	DAC1064_restore_1(PMINFO2);
-	matroxfb_vgaHWrestore(PMINFO2);
+	DAC1064_restore_1(minfo);
+	matroxfb_vgaHWrestore(minfo);
 	minfo->crtc1.panpos = -1;
 	for (i = 0; i < 6; i++)
 		mga_setr(M_EXTVGA_INDEX, i, hw->CRTCEXT[i]);
-	DAC1064_restore_2(PMINFO2);
+	DAC1064_restore_2(minfo);
 }
 #endif
 
 #ifdef CONFIG_FB_MATROX_G
-static void MGAG100_restore(WPMINFO2) {
+static void MGAG100_restore(struct matrox_fb_info *minfo)
+{
 	int i;
 	struct matrox_hw_state *hw = &minfo->hw;
 
@@ -1048,8 +1078,8 @@ static void MGAG100_restore(WPMINFO2) {
 	pci_write_config_dword(minfo->pcidev, PCI_OPTION_REG, hw->MXoptionReg);
 	CRITEND
 
-	DAC1064_restore_1(PMINFO2);
-	matroxfb_vgaHWrestore(PMINFO2);
+	DAC1064_restore_1(minfo);
+	matroxfb_vgaHWrestore(minfo);
 #ifdef CONFIG_FB_MATROX_32MB
 	if (minfo->devflags.support32MB)
 		mga_setr(M_EXTVGA_INDEX, 8, hw->CRTCEXT[8]);
@@ -1057,7 +1087,7 @@ static void MGAG100_restore(WPMINFO2) {
 	minfo->crtc1.panpos = -1;
 	for (i = 0; i < 6; i++)
 		mga_setr(M_EXTVGA_INDEX, i, hw->CRTCEXT[i]);
-	DAC1064_restore_2(PMINFO2);
+	DAC1064_restore_2(minfo);
 }
 #endif
 
diff --git a/drivers/video/matrox/matroxfb_DAC1064.h b/drivers/video/matrox/matroxfb_DAC1064.h
index 7a98ce8..c6ed780 100644
--- a/drivers/video/matrox/matroxfb_DAC1064.h
+++ b/drivers/video/matrox/matroxfb_DAC1064.h
@@ -11,8 +11,8 @@ extern struct matrox_switch matrox_mystique;
 extern struct matrox_switch matrox_G100;
 #endif
 #ifdef NEED_DAC1064
-void DAC1064_global_init(WPMINFO2);
-void DAC1064_global_restore(WPMINFO2);
+void DAC1064_global_init(struct matrox_fb_info *minfo);
+void DAC1064_global_restore(struct matrox_fb_info *minfo);
 #endif
 
 #define M1064_INDEX	0x00
diff --git a/drivers/video/matrox/matroxfb_Ti3026.c b/drivers/video/matrox/matroxfb_Ti3026.c
index bc9c274..835aaaa 100644
--- a/drivers/video/matrox/matroxfb_Ti3026.c
+++ b/drivers/video/matrox/matroxfb_Ti3026.c
@@ -279,27 +279,31 @@ static const unsigned char MGADACbpp32[] =
   TVP3026_XCOLKEYCTRL_ZOOM1,
   0x00, 0x00, TVP3026_XCURCTRL_DIS };
 
-static int Ti3026_calcclock(CPMINFO unsigned int freq, unsigned int fmax, int* in, int* feed, int* post) {
+static int Ti3026_calcclock(const struct matrox_fb_info *minfo,
+			    unsigned int freq, unsigned int fmax, int *in,
+			    int *feed, int *post)
+{
 	unsigned int fvco;
 	unsigned int lin, lfeed, lpost;
 
 	DBG(__func__)
 
-	fvco = PLL_calcclock(PMINFO freq, fmax, &lin, &lfeed, &lpost);
+	fvco = PLL_calcclock(minfo, freq, fmax, &lin, &lfeed, &lpost);
 	fvco >>= (*post = lpost);
 	*in = 64 - lin;
 	*feed = 64 - lfeed;
 	return fvco;
 }
 
-static int Ti3026_setpclk(WPMINFO int clk) {
+static int Ti3026_setpclk(struct matrox_fb_info *minfo, int clk)
+{
 	unsigned int f_pll;
 	unsigned int pixfeed, pixin, pixpost;
 	struct matrox_hw_state *hw = &minfo->hw;
 
 	DBG(__func__)
 
-	f_pll = Ti3026_calcclock(PMINFO clk, minfo->max_pixel_clock, &pixin, &pixfeed, &pixpost);
+	f_pll = Ti3026_calcclock(minfo, clk, minfo->max_pixel_clock, &pixin, &pixfeed, &pixpost);
 
 	hw->DACclk[0] = pixin | 0xC0;
 	hw->DACclk[1] = pixfeed;
@@ -361,7 +365,8 @@ static int Ti3026_setpclk(WPMINFO int clk) {
 	return 0;
 }
 
-static int Ti3026_init(WPMINFO struct my_timming* m) {
+static int Ti3026_init(struct matrox_fb_info *minfo, struct my_timming *m)
+{
 	u_int8_t muxctrl = isInterleave(minfo) ? TVP3026_XMUXCTRL_MEMORY_64BIT : TVP3026_XMUXCTRL_MEMORY_32BIT;
 	struct matrox_hw_state *hw = &minfo->hw;
 
@@ -400,7 +405,7 @@ static int Ti3026_init(WPMINFO struct my_timming* m) {
 		default:
 			return 1;	/* TODO: failed */
 	}
-	if (matroxfb_vgaHWinit(PMINFO m)) return 1;
+	if (matroxfb_vgaHWinit(minfo, m)) return 1;
 
 	/* set SYNC */
 	hw->MiscOutReg = 0xCB;
@@ -429,11 +434,12 @@ static int Ti3026_init(WPMINFO struct my_timming* m) {
 	if (isInterleave(minfo)) hw->MXoptionReg |= 0x00001000;
 
 	/* set DAC */
-	Ti3026_setpclk(PMINFO m->pixclock);
+	Ti3026_setpclk(minfo, m->pixclock);
 	return 0;
 }
 
-static void ti3026_setMCLK(WPMINFO int fout){
+static void ti3026_setMCLK(struct matrox_fb_info *minfo, int fout)
+{
 	unsigned int f_pll;
 	unsigned int pclk_m, pclk_n, pclk_p;
 	unsigned int mclk_m, mclk_n, mclk_p;
@@ -442,29 +448,29 @@ static void ti3026_setMCLK(WPMINFO int fout){
 
 	DBG(__func__)
 
-	f_pll = Ti3026_calcclock(PMINFO fout, minfo->max_pixel_clock, &mclk_n, &mclk_m, &mclk_p);
+	f_pll = Ti3026_calcclock(minfo, fout, minfo->max_pixel_clock, &mclk_n, &mclk_m, &mclk_p);
 
 	/* save pclk */
-	outTi3026(PMINFO TVP3026_XPLLADDR, 0xFC);
-	pclk_n = inTi3026(PMINFO TVP3026_XPIXPLLDATA);
-	outTi3026(PMINFO TVP3026_XPLLADDR, 0xFD);
-	pclk_m = inTi3026(PMINFO TVP3026_XPIXPLLDATA);
-	outTi3026(PMINFO TVP3026_XPLLADDR, 0xFE);
-	pclk_p = inTi3026(PMINFO TVP3026_XPIXPLLDATA);
+	outTi3026(minfo, TVP3026_XPLLADDR, 0xFC);
+	pclk_n = inTi3026(minfo, TVP3026_XPIXPLLDATA);
+	outTi3026(minfo, TVP3026_XPLLADDR, 0xFD);
+	pclk_m = inTi3026(minfo, TVP3026_XPIXPLLDATA);
+	outTi3026(minfo, TVP3026_XPLLADDR, 0xFE);
+	pclk_p = inTi3026(minfo, TVP3026_XPIXPLLDATA);
 
 	/* stop pclk */
-	outTi3026(PMINFO TVP3026_XPLLADDR, 0xFE);
-	outTi3026(PMINFO TVP3026_XPIXPLLDATA, 0x00);
+	outTi3026(minfo, TVP3026_XPLLADDR, 0xFE);
+	outTi3026(minfo, TVP3026_XPIXPLLDATA, 0x00);
 
 	/* set pclk to new mclk */
-	outTi3026(PMINFO TVP3026_XPLLADDR, 0xFC);
-	outTi3026(PMINFO TVP3026_XPIXPLLDATA, mclk_n | 0xC0);
-	outTi3026(PMINFO TVP3026_XPIXPLLDATA, mclk_m);
-	outTi3026(PMINFO TVP3026_XPIXPLLDATA, mclk_p | 0xB0);
+	outTi3026(minfo, TVP3026_XPLLADDR, 0xFC);
+	outTi3026(minfo, TVP3026_XPIXPLLDATA, mclk_n | 0xC0);
+	outTi3026(minfo, TVP3026_XPIXPLLDATA, mclk_m);
+	outTi3026(minfo, TVP3026_XPIXPLLDATA, mclk_p | 0xB0);
 
 	/* wait for PLL to lock */
 	for (tmout = 500000; tmout; tmout--) {
-		if (inTi3026(PMINFO TVP3026_XPIXPLLDATA) & 0x40)
+		if (inTi3026(minfo, TVP3026_XPIXPLLDATA) & 0x40)
 			break;
 		udelay(10);
 	};
@@ -472,23 +478,23 @@ static void ti3026_setMCLK(WPMINFO int fout){
 		printk(KERN_ERR "matroxfb: Temporary pixel PLL not locked after 5 secs\n");
 
 	/* output pclk on mclk pin */
-	mclk_ctl = inTi3026(PMINFO TVP3026_XMEMPLLCTRL);
-	outTi3026(PMINFO TVP3026_XMEMPLLCTRL, mclk_ctl & 0xE7);
-	outTi3026(PMINFO TVP3026_XMEMPLLCTRL, (mclk_ctl & 0xE7) | TVP3026_XMEMPLLCTRL_STROBEMKC4);
+	mclk_ctl = inTi3026(minfo, TVP3026_XMEMPLLCTRL);
+	outTi3026(minfo, TVP3026_XMEMPLLCTRL, mclk_ctl & 0xE7);
+	outTi3026(minfo, TVP3026_XMEMPLLCTRL, (mclk_ctl & 0xE7) | TVP3026_XMEMPLLCTRL_STROBEMKC4);
 
 	/* stop MCLK */
-	outTi3026(PMINFO TVP3026_XPLLADDR, 0xFB);
-	outTi3026(PMINFO TVP3026_XMEMPLLDATA, 0x00);
+	outTi3026(minfo, TVP3026_XPLLADDR, 0xFB);
+	outTi3026(minfo, TVP3026_XMEMPLLDATA, 0x00);
 
 	/* set mclk to new freq */
-	outTi3026(PMINFO TVP3026_XPLLADDR, 0xF3);
-	outTi3026(PMINFO TVP3026_XMEMPLLDATA, mclk_n | 0xC0);
-	outTi3026(PMINFO TVP3026_XMEMPLLDATA, mclk_m);
-	outTi3026(PMINFO TVP3026_XMEMPLLDATA, mclk_p | 0xB0);
+	outTi3026(minfo, TVP3026_XPLLADDR, 0xF3);
+	outTi3026(minfo, TVP3026_XMEMPLLDATA, mclk_n | 0xC0);
+	outTi3026(minfo, TVP3026_XMEMPLLDATA, mclk_m);
+	outTi3026(minfo, TVP3026_XMEMPLLDATA, mclk_p | 0xB0);
 
 	/* wait for PLL to lock */
 	for (tmout = 500000; tmout; tmout--) {
-		if (inTi3026(PMINFO TVP3026_XMEMPLLDATA) & 0x40)
+		if (inTi3026(minfo, TVP3026_XMEMPLLDATA) & 0x40)
 			break;
 		udelay(10);
 	}
@@ -509,22 +515,22 @@ static void ti3026_setMCLK(WPMINFO int fout){
 	pci_write_config_dword(minfo->pcidev, PCI_OPTION_REG, minfo->hw.MXoptionReg);
 
 	/* output MCLK to MCLK pin */
-	outTi3026(PMINFO TVP3026_XMEMPLLCTRL, (mclk_ctl & 0xE7) | TVP3026_XMEMPLLCTRL_MCLK_MCLKPLL);
-	outTi3026(PMINFO TVP3026_XMEMPLLCTRL, (mclk_ctl       ) | TVP3026_XMEMPLLCTRL_MCLK_MCLKPLL | TVP3026_XMEMPLLCTRL_STROBEMKC4);
+	outTi3026(minfo, TVP3026_XMEMPLLCTRL, (mclk_ctl & 0xE7) | TVP3026_XMEMPLLCTRL_MCLK_MCLKPLL);
+	outTi3026(minfo, TVP3026_XMEMPLLCTRL, (mclk_ctl       ) | TVP3026_XMEMPLLCTRL_MCLK_MCLKPLL | TVP3026_XMEMPLLCTRL_STROBEMKC4);
 
 	/* stop PCLK */
-	outTi3026(PMINFO TVP3026_XPLLADDR, 0xFE);
-	outTi3026(PMINFO TVP3026_XPIXPLLDATA, 0x00);
+	outTi3026(minfo, TVP3026_XPLLADDR, 0xFE);
+	outTi3026(minfo, TVP3026_XPIXPLLDATA, 0x00);
 
 	/* restore pclk */
-	outTi3026(PMINFO TVP3026_XPLLADDR, 0xFC);
-	outTi3026(PMINFO TVP3026_XPIXPLLDATA, pclk_n);
-	outTi3026(PMINFO TVP3026_XPIXPLLDATA, pclk_m);
-	outTi3026(PMINFO TVP3026_XPIXPLLDATA, pclk_p);
+	outTi3026(minfo, TVP3026_XPLLADDR, 0xFC);
+	outTi3026(minfo, TVP3026_XPIXPLLDATA, pclk_n);
+	outTi3026(minfo, TVP3026_XPIXPLLDATA, pclk_m);
+	outTi3026(minfo, TVP3026_XPIXPLLDATA, pclk_p);
 
 	/* wait for PLL to lock */
 	for (tmout = 500000; tmout; tmout--) {
-		if (inTi3026(PMINFO TVP3026_XPIXPLLDATA) & 0x40)
+		if (inTi3026(minfo, TVP3026_XPIXPLLDATA) & 0x40)
 			break;
 		udelay(10);
 	}
@@ -532,8 +538,8 @@ static void ti3026_setMCLK(WPMINFO int fout){
 		printk(KERN_ERR "matroxfb: Pixel PLL not locked after 5 secs\n");
 }
 
-static void ti3026_ramdac_init(WPMINFO2) {
-
+static void ti3026_ramdac_init(struct matrox_fb_info *minfo)
+{
 	DBG(__func__)
 
 	minfo->features.pll.vco_freq_min = 110000;
@@ -545,10 +551,11 @@ static void ti3026_ramdac_init(WPMINFO2) {
 	minfo->features.pll.post_shift_max = 3;
 	if (minfo->devflags.noinit)
 		return;
-	ti3026_setMCLK(PMINFO 60000);
+	ti3026_setMCLK(minfo, 60000);
 }
 
-static void Ti3026_restore(WPMINFO2) {
+static void Ti3026_restore(struct matrox_fb_info *minfo)
+{
 	int i;
 	unsigned char progdac[6];
 	struct matrox_hw_state *hw = &minfo->hw;
@@ -569,7 +576,7 @@ static void Ti3026_restore(WPMINFO2) {
 
 	CRITEND
 
-	matroxfb_vgaHWrestore(PMINFO2);
+	matroxfb_vgaHWrestore(minfo);
 
 	CRITBEGIN
 
@@ -578,18 +585,18 @@ static void Ti3026_restore(WPMINFO2) {
 		mga_setr(M_EXTVGA_INDEX, i, hw->CRTCEXT[i]);
 
 	for (i = 0; i < 21; i++) {
-		outTi3026(PMINFO DACseq[i], hw->DACreg[i]);
+		outTi3026(minfo, DACseq[i], hw->DACreg[i]);
 	}
 
-	outTi3026(PMINFO TVP3026_XPLLADDR, 0x00);
-	progdac[0] = inTi3026(PMINFO TVP3026_XPIXPLLDATA);
-	progdac[3] = inTi3026(PMINFO TVP3026_XLOOPPLLDATA);
-	outTi3026(PMINFO TVP3026_XPLLADDR, 0x15);
-	progdac[1] = inTi3026(PMINFO TVP3026_XPIXPLLDATA);
-	progdac[4] = inTi3026(PMINFO TVP3026_XLOOPPLLDATA);
-	outTi3026(PMINFO TVP3026_XPLLADDR, 0x2A);
-	progdac[2] = inTi3026(PMINFO TVP3026_XPIXPLLDATA);
-	progdac[5] = inTi3026(PMINFO TVP3026_XLOOPPLLDATA);
+	outTi3026(minfo, TVP3026_XPLLADDR, 0x00);
+	progdac[0] = inTi3026(minfo, TVP3026_XPIXPLLDATA);
+	progdac[3] = inTi3026(minfo, TVP3026_XLOOPPLLDATA);
+	outTi3026(minfo, TVP3026_XPLLADDR, 0x15);
+	progdac[1] = inTi3026(minfo, TVP3026_XPIXPLLDATA);
+	progdac[4] = inTi3026(minfo, TVP3026_XLOOPPLLDATA);
+	outTi3026(minfo, TVP3026_XPLLADDR, 0x2A);
+	progdac[2] = inTi3026(minfo, TVP3026_XPIXPLLDATA);
+	progdac[5] = inTi3026(minfo, TVP3026_XLOOPPLLDATA);
 
 	CRITEND
 	if (memcmp(hw->DACclk, progdac, 6)) {
@@ -598,20 +605,20 @@ static void Ti3026_restore(WPMINFO2) {
 		/* Maybe even we should call schedule() ? */
 
 		CRITBEGIN
-		outTi3026(PMINFO TVP3026_XCLKCTRL, hw->DACreg[POS3026_XCLKCTRL]);
-		outTi3026(PMINFO TVP3026_XPLLADDR, 0x2A);
-		outTi3026(PMINFO TVP3026_XLOOPPLLDATA, 0);
-		outTi3026(PMINFO TVP3026_XPIXPLLDATA, 0);
+		outTi3026(minfo, TVP3026_XCLKCTRL, hw->DACreg[POS3026_XCLKCTRL]);
+		outTi3026(minfo, TVP3026_XPLLADDR, 0x2A);
+		outTi3026(minfo, TVP3026_XLOOPPLLDATA, 0);
+		outTi3026(minfo, TVP3026_XPIXPLLDATA, 0);
 
-		outTi3026(PMINFO TVP3026_XPLLADDR, 0x00);
+		outTi3026(minfo, TVP3026_XPLLADDR, 0x00);
 		for (i = 0; i < 3; i++)
-			outTi3026(PMINFO TVP3026_XPIXPLLDATA, hw->DACclk[i]);
+			outTi3026(minfo, TVP3026_XPIXPLLDATA, hw->DACclk[i]);
 		/* wait for PLL only if PLL clock requested (always for PowerMode, never for VGA) */
 		if (hw->MiscOutReg & 0x08) {
 			int tmout;
-			outTi3026(PMINFO TVP3026_XPLLADDR, 0x3F);
+			outTi3026(minfo, TVP3026_XPLLADDR, 0x3F);
 			for (tmout = 500000; tmout; --tmout) {
-				if (inTi3026(PMINFO TVP3026_XPIXPLLDATA) & 0x40)
+				if (inTi3026(minfo, TVP3026_XPIXPLLDATA) & 0x40)
 					break;
 				udelay(10);
 			}
@@ -624,18 +631,18 @@ static void Ti3026_restore(WPMINFO2) {
 				dprintk(KERN_INFO "PixelPLL: %d\n", 500000-tmout);
 			CRITBEGIN
 		}
-		outTi3026(PMINFO TVP3026_XMEMPLLCTRL, hw->DACreg[POS3026_XMEMPLLCTRL]);
-		outTi3026(PMINFO TVP3026_XPLLADDR, 0x00);
+		outTi3026(minfo, TVP3026_XMEMPLLCTRL, hw->DACreg[POS3026_XMEMPLLCTRL]);
+		outTi3026(minfo, TVP3026_XPLLADDR, 0x00);
 		for (i = 3; i < 6; i++)
-			outTi3026(PMINFO TVP3026_XLOOPPLLDATA, hw->DACclk[i]);
+			outTi3026(minfo, TVP3026_XLOOPPLLDATA, hw->DACclk[i]);
 		CRITEND
 		if ((hw->MiscOutReg & 0x08) && ((hw->DACclk[5] & 0x80) == 0x80)) {
 			int tmout;
 
 			CRITBEGIN
-			outTi3026(PMINFO TVP3026_XPLLADDR, 0x3F);
+			outTi3026(minfo, TVP3026_XPLLADDR, 0x3F);
 			for (tmout = 500000; tmout; --tmout) {
-				if (inTi3026(PMINFO TVP3026_XLOOPPLLDATA) & 0x40)
+				if (inTi3026(minfo, TVP3026_XLOOPPLLDATA) & 0x40)
 					break;
 				udelay(10);
 			}
@@ -660,18 +667,19 @@ static void Ti3026_restore(WPMINFO2) {
 #endif
 }
 
-static void Ti3026_reset(WPMINFO2) {
-
+static void Ti3026_reset(struct matrox_fb_info *minfo)
+{
 	DBG(__func__)
 
-	ti3026_ramdac_init(PMINFO2);
+	ti3026_ramdac_init(minfo);
 }
 
 static struct matrox_altout ti3026_output = {
 	.name	 = "Primary output",
 };
 
-static int Ti3026_preinit(WPMINFO2) {
+static int Ti3026_preinit(struct matrox_fb_info *minfo)
+{
 	static const int vxres_mill2[] = { 512,        640, 768,  800,  832,  960,
 					  1024, 1152, 1280,      1600, 1664, 1920,
 					  2048, 0};
@@ -706,19 +714,19 @@ static int Ti3026_preinit(WPMINFO2) {
 		hw->MXoptionReg |=  0x20000000;
 	pci_write_config_dword(minfo->pcidev, PCI_OPTION_REG, hw->MXoptionReg);
 
-	minfo->accel.ramdac_rev = inTi3026(PMINFO TVP3026_XSILICONREV);
+	minfo->accel.ramdac_rev = inTi3026(minfo, TVP3026_XSILICONREV);
 
-	outTi3026(PMINFO TVP3026_XCLKCTRL, TVP3026_XCLKCTRL_SRC_CLK0VGA | TVP3026_XCLKCTRL_CLKSTOPPED);
-	outTi3026(PMINFO TVP3026_XTRUECOLORCTRL, TVP3026_XTRUECOLORCTRL_PSEUDOCOLOR);
-	outTi3026(PMINFO TVP3026_XMUXCTRL, TVP3026_XMUXCTRL_VGA);
+	outTi3026(minfo, TVP3026_XCLKCTRL, TVP3026_XCLKCTRL_SRC_CLK0VGA | TVP3026_XCLKCTRL_CLKSTOPPED);
+	outTi3026(minfo, TVP3026_XTRUECOLORCTRL, TVP3026_XTRUECOLORCTRL_PSEUDOCOLOR);
+	outTi3026(minfo, TVP3026_XMUXCTRL, TVP3026_XMUXCTRL_VGA);
 
-	outTi3026(PMINFO TVP3026_XPLLADDR, 0x2A);
-	outTi3026(PMINFO TVP3026_XLOOPPLLDATA, 0x00);
-	outTi3026(PMINFO TVP3026_XPIXPLLDATA, 0x00);
+	outTi3026(minfo, TVP3026_XPLLADDR, 0x2A);
+	outTi3026(minfo, TVP3026_XLOOPPLLDATA, 0x00);
+	outTi3026(minfo, TVP3026_XPIXPLLDATA, 0x00);
 
 	mga_outb(M_MISC_REG, 0x67);
 
-	outTi3026(PMINFO TVP3026_XMEMPLLCTRL, TVP3026_XMEMPLLCTRL_STROBEMKC4 | TVP3026_XMEMPLLCTRL_MCLK_MCLKPLL);
+	outTi3026(minfo, TVP3026_XMEMPLLCTRL, TVP3026_XMEMPLLCTRL_STROBEMKC4 | TVP3026_XMEMPLLCTRL_MCLK_MCLKPLL);
 
 	mga_outl(M_RESET, 1);
 	udelay(250);
diff --git a/drivers/video/matrox/matroxfb_accel.c b/drivers/video/matrox/matroxfb_accel.c
index ed42bf6..3577ec1 100644
--- a/drivers/video/matrox/matroxfb_accel.c
+++ b/drivers/video/matrox/matroxfb_accel.c
@@ -107,7 +107,8 @@ static void matroxfb_imageblit(struct fb_info* info, const struct fb_image* imag
 static void matroxfb_cfb4_fillrect(struct fb_info* info, const struct fb_fillrect* rect);
 static void matroxfb_cfb4_copyarea(struct fb_info* info, const struct fb_copyarea* area);
 
-void matrox_cfbX_init(WPMINFO2) {
+void matrox_cfbX_init(struct matrox_fb_info *minfo)
+{
 	u_int32_t maccess;
 	u_int32_t mpitch;
 	u_int32_t mopmode;
@@ -195,7 +196,9 @@ void matrox_cfbX_init(WPMINFO2) {
 
 EXPORT_SYMBOL(matrox_cfbX_init);
 
-static void matrox_accel_bmove(WPMINFO int vxres, int sy, int sx, int dy, int dx, int height, int width) {
+static void matrox_accel_bmove(struct matrox_fb_info *minfo, int vxres, int sy,
+			       int sx, int dy, int dx, int height, int width)
+{
 	int start, end;
 	CRITFLAGS
 
@@ -231,7 +234,10 @@ static void matrox_accel_bmove(WPMINFO int vxres, int sy, int sx, int dy, int dx
 	CRITEND
 }
 
-static void matrox_accel_bmove_lin(WPMINFO int vxres, int sy, int sx, int dy, int dx, int height, int width) {
+static void matrox_accel_bmove_lin(struct matrox_fb_info *minfo, int vxres,
+				   int sy, int sx, int dy, int dx, int height,
+				   int width)
+{
 	int start, end;
 	CRITFLAGS
 
@@ -274,17 +280,18 @@ static void matroxfb_cfb4_copyarea(struct fb_info* info, const struct fb_copyare
 	if ((area->sx | area->dx | area->width) & 1)
 		cfb_copyarea(info, area);
 	else
-		matrox_accel_bmove_lin(PMINFO minfo->fbcon.var.xres_virtual >> 1, area->sy, area->sx >> 1, area->dy, area->dx >> 1, area->height, area->width >> 1);
+		matrox_accel_bmove_lin(minfo, minfo->fbcon.var.xres_virtual >> 1, area->sy, area->sx >> 1, area->dy, area->dx >> 1, area->height, area->width >> 1);
 }
 
 static void matroxfb_copyarea(struct fb_info* info, const struct fb_copyarea* area) {
 	MINFO_FROM_INFO(info);
 
-	matrox_accel_bmove(PMINFO minfo->fbcon.var.xres_virtual, area->sy, area->sx, area->dy, area->dx, area->height, area->width);
+	matrox_accel_bmove(minfo, minfo->fbcon.var.xres_virtual, area->sy, area->sx, area->dy, area->dx, area->height, area->width);
 }
 
-static void matroxfb_accel_clear(WPMINFO u_int32_t color, int sy, int sx, int height,
-		int width) {
+static void matroxfb_accel_clear(struct matrox_fb_info *minfo, u_int32_t color,
+				 int sy, int sx, int height, int width)
+{
 	CRITFLAGS
 
 	DBG(__func__)
@@ -306,12 +313,14 @@ static void matroxfb_fillrect(struct fb_info* info, const struct fb_fillrect* re
 
 	switch (rect->rop) {
 		case ROP_COPY:
-			matroxfb_accel_clear(PMINFO ((u_int32_t*)info->pseudo_palette)[rect->color], rect->dy, rect->dx, rect->height, rect->width);
+			matroxfb_accel_clear(minfo, ((u_int32_t *)info->pseudo_palette)[rect->color], rect->dy, rect->dx, rect->height, rect->width);
 			break;
 	}
 }
 
-static void matroxfb_cfb4_clear(WPMINFO u_int32_t bgx, int sy, int sx, int height, int width) {
+static void matroxfb_cfb4_clear(struct matrox_fb_info *minfo, u_int32_t bgx,
+				int sy, int sx, int height, int width)
+{
 	int whattodo;
 	CRITFLAGS
 
@@ -371,13 +380,15 @@ static void matroxfb_cfb4_fillrect(struct fb_info* info, const struct fb_fillrec
 
 	switch (rect->rop) {
 		case ROP_COPY:
-			matroxfb_cfb4_clear(PMINFO ((u_int32_t*)info->pseudo_palette)[rect->color], rect->dy, rect->dx, rect->height, rect->width);
+			matroxfb_cfb4_clear(minfo, ((u_int32_t *)info->pseudo_palette)[rect->color], rect->dy, rect->dx, rect->height, rect->width);
 			break;
 	}
 }
 
-static void matroxfb_1bpp_imageblit(WPMINFO u_int32_t fgx, u_int32_t bgx,
-		const u_int8_t* chardata, int width, int height, int yy, int xx) {
+static void matroxfb_1bpp_imageblit(struct matrox_fb_info *minfo, u_int32_t fgx,
+				    u_int32_t bgx, const u_int8_t *chardata,
+				    int width, int height, int yy, int xx)
+{
 	u_int32_t step;
 	u_int32_t ydstlen;
 	u_int32_t xlen;
@@ -476,7 +487,7 @@ static void matroxfb_imageblit(struct fb_info* info, const struct fb_image* imag
 
 		fgx = ((u_int32_t*)info->pseudo_palette)[image->fg_color];
 		bgx = ((u_int32_t*)info->pseudo_palette)[image->bg_color];
-		matroxfb_1bpp_imageblit(PMINFO fgx, bgx, image->data, image->width, image->height, image->dy, image->dx);
+		matroxfb_1bpp_imageblit(minfo, fgx, bgx, image->data, image->width, image->height, image->dy, image->dx);
 	} else {
 		/* Danger! image->depth is useless: logo painting code always
 		   passes framebuffer color depth here, although logo data are
diff --git a/drivers/video/matrox/matroxfb_accel.h b/drivers/video/matrox/matroxfb_accel.h
index f40c314..1e418e62 100644
--- a/drivers/video/matrox/matroxfb_accel.h
+++ b/drivers/video/matrox/matroxfb_accel.h
@@ -3,6 +3,6 @@
 
 #include "matroxfb_base.h"
 
-void matrox_cfbX_init(WPMINFO2);
+void matrox_cfbX_init(struct matrox_fb_info *minfo);
 
 #endif
diff --git a/drivers/video/matrox/matroxfb_base.c b/drivers/video/matrox/matroxfb_base.c
index 867a4d9..73ad598 100644
--- a/drivers/video/matrox/matroxfb_base.c
+++ b/drivers/video/matrox/matroxfb_base.c
@@ -154,7 +154,8 @@ static struct fb_var_screeninfo vesafb_defined = {
 
 
 /* --------------------------------------------------------------------- */
-static void update_crtc2(WPMINFO unsigned int pos) {
+static void update_crtc2(struct matrox_fb_info *minfo, unsigned int pos)
+{
 	struct matroxfb_dh_fb_info *info = minfo->crtc2.info;
 
 	/* Make sure that displays are compatible */
@@ -177,7 +178,8 @@ static void update_crtc2(WPMINFO unsigned int pos) {
 	}
 }
 
-static void matroxfb_crtc1_panpos(WPMINFO2) {
+static void matroxfb_crtc1_panpos(struct matrox_fb_info *minfo)
+{
 	if (minfo->crtc1.panpos >= 0) {
 		unsigned long flags;
 		int panpos;
@@ -210,7 +212,7 @@ static irqreturn_t matrox_irq(int irq, void *dev_id)
 	if (status & 0x20) {
 		mga_outl(M_ICLEAR, 0x20);
 		minfo->crtc1.vsync.cnt++;
-		matroxfb_crtc1_panpos(PMINFO2);
+		matroxfb_crtc1_panpos(minfo);
 		wake_up_interruptible(&minfo->crtc1.vsync.wait);
 		handled = 1;
 	}
@@ -223,7 +225,8 @@ static irqreturn_t matrox_irq(int irq, void *dev_id)
 	return IRQ_RETVAL(handled);
 }
 
-int matroxfb_enable_irq(WPMINFO int reenable) {
+int matroxfb_enable_irq(struct matrox_fb_info *minfo, int reenable)
+{
 	u_int32_t bm;
 
 	if (minfo->devflags.accelerator == FB_ACCEL_MATROX_MGAG400)
@@ -252,10 +255,11 @@ int matroxfb_enable_irq(WPMINFO int reenable) {
 	return 0;
 }
 
-static void matroxfb_disable_irq(WPMINFO2) {
+static void matroxfb_disable_irq(struct matrox_fb_info *minfo)
+{
 	if (test_and_clear_bit(0, &minfo->irq_flags)) {
 		/* Flush pending pan-at-vbl request... */
-		matroxfb_crtc1_panpos(PMINFO2);
+		matroxfb_crtc1_panpos(minfo);
 		if (minfo->devflags.accelerator == FB_ACCEL_MATROX_MGAG400)
 			mga_outl(M_IEN, mga_inl(M_IEN) & ~0x220);
 		else
@@ -264,7 +268,8 @@ static void matroxfb_disable_irq(WPMINFO2) {
 	}
 }
 
-int matroxfb_wait_for_sync(WPMINFO u_int32_t crtc) {
+int matroxfb_wait_for_sync(struct matrox_fb_info *minfo, u_int32_t crtc)
+{
 	struct matrox_vsync *vs;
 	unsigned int cnt;
 	int ret;
@@ -282,7 +287,7 @@ int matroxfb_wait_for_sync(WPMINFO u_int32_t crtc) {
 		default:
 			return -ENODEV;
 	}
-	ret = matroxfb_enable_irq(PMINFO 0);
+	ret = matroxfb_enable_irq(minfo, 0);
 	if (ret) {
 		return ret;
 	}
@@ -293,7 +298,7 @@ int matroxfb_wait_for_sync(WPMINFO u_int32_t crtc) {
 		return ret;
 	}
 	if (ret == 0) {
-		matroxfb_enable_irq(PMINFO 1);
+		matroxfb_enable_irq(minfo, 1);
 		return -ETIMEDOUT;
 	}
 	return 0;
@@ -301,7 +306,9 @@ int matroxfb_wait_for_sync(WPMINFO u_int32_t crtc) {
 
 /* --------------------------------------------------------------------- */
 
-static void matrox_pan_var(WPMINFO struct fb_var_screeninfo *var) {
+static void matrox_pan_var(struct matrox_fb_info *minfo,
+			   struct fb_var_screeninfo *var)
+{
 	unsigned int pos;
 	unsigned short p0, p1, p2;
 #ifdef CONFIG_FB_MATROX_32MB
@@ -329,7 +336,7 @@ static void matrox_pan_var(WPMINFO struct fb_var_screeninfo *var) {
 #endif
 
 	/* FB_ACTIVATE_VBL and we can acquire interrupts? Honor FB_ACTIVATE_VBL then... */
-	vbl = (var->activate & FB_ACTIVATE_VBL) && (matroxfb_enable_irq(PMINFO 0) == 0);
+	vbl = (var->activate & FB_ACTIVATE_VBL) && (matroxfb_enable_irq(minfo, 0) == 0);
 
 	CRITBEGIN
 
@@ -349,12 +356,13 @@ static void matrox_pan_var(WPMINFO struct fb_var_screeninfo *var) {
 	}
 	matroxfb_DAC_unlock_irqrestore(flags);
 
-	update_crtc2(PMINFO pos);
+	update_crtc2(minfo, pos);
 
 	CRITEND
 }
 
-static void matroxfb_remove(WPMINFO int dummy) {
+static void matroxfb_remove(struct matrox_fb_info *minfo, int dummy)
+{
 	/* Currently we are holding big kernel lock on all dead & usecount updates.
 	 * Destroy everything after all users release it. Especially do not unregister
 	 * framebuffer and iounmap memory, neither fbmem nor fbcon-cfb* does not check
@@ -370,7 +378,7 @@ static void matroxfb_remove(WPMINFO int dummy) {
 	}
 	matroxfb_unregister_device(minfo);
 	unregister_framebuffer(&minfo->fbcon);
-	matroxfb_g450_shutdown(PMINFO2);
+	matroxfb_g450_shutdown(minfo);
 #ifdef CONFIG_MTRR
 	if (minfo->mtrr.vram_valid)
 		mtrr_del(minfo->mtrr.vram, minfo->video.base, minfo->video.len);
@@ -410,11 +418,11 @@ static int matroxfb_release(struct fb_info *info, int user)
 
 	if (user) {
 		if (0 == --minfo->userusecount) {
-			matroxfb_disable_irq(PMINFO2);
+			matroxfb_disable_irq(minfo);
 		}
 	}
 	if (!(--minfo->usecount) && minfo->dead) {
-		matroxfb_remove(PMINFO 0);
+		matroxfb_remove(minfo, 0);
 	}
 	return(0);
 }
@@ -425,11 +433,13 @@ static int matroxfb_pan_display(struct fb_var_screeninfo *var,
 
 	DBG(__func__)
 
-	matrox_pan_var(PMINFO var);
+	matrox_pan_var(minfo, var);
 	return 0;
 }
 
-static int matroxfb_get_final_bppShift(CPMINFO int bpp) {
+static int matroxfb_get_final_bppShift(const struct matrox_fb_info *minfo,
+				       int bpp)
+{
 	int bppshft2;
 
 	DBG(__func__)
@@ -445,7 +455,9 @@ static int matroxfb_get_final_bppShift(CPMINFO int bpp) {
 	return bppshft2;
 }
 
-static int matroxfb_test_and_set_rounding(CPMINFO int xres, int bpp) {
+static int matroxfb_test_and_set_rounding(const struct matrox_fb_info *minfo,
+					  int xres, int bpp)
+{
 	int over;
 	int rounding;
 
@@ -476,7 +488,9 @@ static int matroxfb_test_and_set_rounding(CPMINFO int xres, int bpp) {
 	return xres;
 }
 
-static int matroxfb_pitch_adjust(CPMINFO int xres, int bpp) {
+static int matroxfb_pitch_adjust(const struct matrox_fb_info *minfo, int xres,
+				 int bpp)
+{
 	const int* width;
 	int xres_new;
 
@@ -488,14 +502,14 @@ static int matroxfb_pitch_adjust(CPMINFO int xres, int bpp) {
 
 	if (minfo->devflags.precise_width) {
 		while (*width) {
-			if ((*width >= xres) && (matroxfb_test_and_set_rounding(PMINFO *width, bpp) == *width)) {
+			if ((*width >= xres) && (matroxfb_test_and_set_rounding(minfo, *width, bpp) == *width)) {
 				break;
 			}
 			width++;
 		}
 		xres_new = *width;
 	} else {
-		xres_new = matroxfb_test_and_set_rounding(PMINFO xres, bpp);
+		xres_new = matroxfb_test_and_set_rounding(minfo, xres, bpp);
 	}
 	return xres_new;
 }
@@ -522,7 +536,10 @@ static int matroxfb_get_cmap_len(struct fb_var_screeninfo *var) {
 	return 16;	/* return something reasonable... or panic()? */
 }
 
-static int matroxfb_decode_var(CPMINFO struct fb_var_screeninfo *var, int *visual, int *video_cmap_len, unsigned int* ydstorg) {
+static int matroxfb_decode_var(const struct matrox_fb_info *minfo,
+			       struct fb_var_screeninfo *var, int *visual,
+			       int *video_cmap_len, unsigned int* ydstorg)
+{
 	struct RGBT {
 		unsigned char bpp;
 		struct {
@@ -564,7 +581,7 @@ static int matroxfb_decode_var(CPMINFO struct fb_var_screeninfo *var, int *visua
 	if (var->xres_virtual < var->xres)
 		var->xres_virtual = var->xres;
 
-	var->xres_virtual = matroxfb_pitch_adjust(PMINFO var->xres_virtual, bpp);
+	var->xres_virtual = matroxfb_pitch_adjust(minfo, var->xres_virtual, bpp);
 	memlen = var->xres_virtual * bpp * var->yres_virtual / 8;
 	if (memlen > vramlen) {
 		var->yres_virtual = vramlen * 8 / (var->xres_virtual * bpp);
@@ -700,7 +717,7 @@ static int matroxfb_setcolreg(unsigned regno, unsigned red, unsigned green,
 	return 0;
 }
 
-static void matroxfb_init_fix(WPMINFO2)
+static void matroxfb_init_fix(struct matrox_fb_info *minfo)
 {
 	struct fb_fix_screeninfo *fix = &minfo->fbcon.fix;
 	DBG(__func__)
@@ -715,7 +732,7 @@ static void matroxfb_init_fix(WPMINFO2)
 	fix->accel = minfo->devflags.accelerator;
 }
 
-static void matroxfb_update_fix(WPMINFO2)
+static void matroxfb_update_fix(struct matrox_fb_info *minfo)
 {
 	struct fb_fix_screeninfo *fix = &minfo->fbcon.fix;
 	DBG(__func__)
@@ -737,7 +754,7 @@ static int matroxfb_check_var(struct fb_var_screeninfo *var, struct fb_info *inf
 	if (minfo->dead) {
 		return -ENXIO;
 	}
-	if ((err = matroxfb_decode_var(PMINFO var, &visual, &cmap_len, &ydstorg)) != 0)
+	if ((err = matroxfb_decode_var(minfo, var, &visual, &cmap_len, &ydstorg)) != 0)
 		return err;
 	return 0;
 }
@@ -758,10 +775,10 @@ static int matroxfb_set_par(struct fb_info *info)
 	}
 
 	var = &info->var;
-	if ((err = matroxfb_decode_var(PMINFO var, &visual, &cmap_len, &ydstorg)) != 0)
+	if ((err = matroxfb_decode_var(minfo, var, &visual, &cmap_len, &ydstorg)) != 0)
 		return err;
 	minfo->fbcon.screen_base = vaddr_va(minfo->video.vbase) + ydstorg;
-	matroxfb_update_fix(PMINFO2);
+	matroxfb_update_fix(minfo);
 	minfo->fbcon.fix.visual = visual;
 	minfo->fbcon.fix.type = FB_TYPE_PACKED_PIXELS;
 	minfo->fbcon.fix.type_aux = 0;
@@ -777,7 +794,7 @@ static int matroxfb_set_par(struct fb_info *info)
 			minfo->curr.ydstorg.pixels = ydstorg;
 		else
 			minfo->curr.ydstorg.pixels = (ydstorg * 8) / var->bits_per_pixel;
-		minfo->curr.final_bppShift = matroxfb_get_final_bppShift(PMINFO var->bits_per_pixel);
+		minfo->curr.final_bppShift = matroxfb_get_final_bppShift(minfo, var->bits_per_pixel);
 		{	struct my_timming mt;
 			struct matrox_hw_state* hw;
 			int out;
@@ -805,7 +822,7 @@ static int matroxfb_set_par(struct fb_info *info)
 			up_read(&minfo->altout.lock);
 			minfo->crtc1.pixclock = mt.pixclock;
 			minfo->crtc1.mnp = mt.mnp;
-			minfo->hw_switch->init(PMINFO &mt);
+			minfo->hw_switch->init(minfo, &mt);
 			pos = (var->yoffset * var->xres_virtual + var->xoffset) * minfo->curr.final_bppShift / 32;
 			pos += minfo->curr.ydstorg.chunks;
 
@@ -813,8 +830,8 @@ static int matroxfb_set_par(struct fb_info *info)
 			hw->CRTC[0x0C] = (pos & 0xFF00) >> 8;
 			hw->CRTCEXT[0] = (hw->CRTCEXT[0] & 0xF0) | ((pos >> 16) & 0x0F) | ((pos >> 14) & 0x40);
 			hw->CRTCEXT[8] = pos >> 21;
-			minfo->hw_switch->restore(PMINFO2);
-			update_crtc2(PMINFO pos);
+			minfo->hw_switch->restore(minfo);
+			update_crtc2(minfo, pos);
 			down_read(&minfo->altout.lock);
 			for (out = 0; out < MATROXFB_MAX_OUTPUTS; out++) {
 				if (minfo->outputs[out].src == MATROXFB_SRC_CRTC1 &&
@@ -829,18 +846,19 @@ static int matroxfb_set_par(struct fb_info *info)
 				}
 			}
 			up_read(&minfo->altout.lock);
-			matrox_cfbX_init(PMINFO2);
+			matrox_cfbX_init(minfo);
 		}
 	}
 	minfo->initialized = 1;
 	return 0;
 }
 
-static int matroxfb_get_vblank(WPMINFO struct fb_vblank *vblank)
+static int matroxfb_get_vblank(struct matrox_fb_info *minfo,
+			       struct fb_vblank *vblank)
 {
 	unsigned int sts1;
 
-	matroxfb_enable_irq(PMINFO 0);
+	matroxfb_enable_irq(minfo, 0);
 	memset(vblank, 0, sizeof(*vblank));
 	vblank->flags = FB_VBLANK_HAVE_VCOUNT | FB_VBLANK_HAVE_VSYNC |
 			FB_VBLANK_HAVE_VBLANK | FB_VBLANK_HAVE_HBLANK;
@@ -886,7 +904,7 @@ static int matroxfb_ioctl(struct fb_info *info,
 				struct fb_vblank vblank;
 				int err;
 
-				err = matroxfb_get_vblank(PMINFO &vblank);
+				err = matroxfb_get_vblank(minfo, &vblank);
 				if (err)
 					return err;
 				if (copy_to_user(argp, &vblank, sizeof(vblank)))
@@ -900,7 +918,7 @@ static int matroxfb_ioctl(struct fb_info *info,
 				if (get_user(crt, (u_int32_t __user *)arg))
 					return -EFAULT;
 
-				return matroxfb_wait_for_sync(PMINFO crt);
+				return matroxfb_wait_for_sync(minfo, crt);
 			}
 		case MATROXFB_SET_OUTPUT_MODE:
 			{
@@ -1277,7 +1295,9 @@ static char outputs[8];			/* "matrox:outputs:xxx" */
 static char videomode[64];		/* "matrox:mode:xxxxx" or "matrox:xxxxx" */
 #endif
 
-static int matroxfb_getmemory(WPMINFO unsigned int maxSize, unsigned int *realSize){
+static int matroxfb_getmemory(struct matrox_fb_info *minfo,
+			      unsigned int maxSize, unsigned int *realSize)
+{
 	vaddr_t vm;
 	unsigned int offs;
 	unsigned int offs2;
@@ -1554,7 +1574,8 @@ static struct fb_videomode defaultmode = {
 
 static int hotplug = 0;
 
-static void setDefaultOutputs(WPMINFO2) {
+static void setDefaultOutputs(struct matrox_fb_info *minfo)
+{
 	unsigned int i;
 	const char* ptr;
 
@@ -1587,7 +1608,8 @@ static void setDefaultOutputs(WPMINFO2) {
 	outputs[0] = 0;
 }
 
-static int initMatrox2(WPMINFO struct board* b){
+static int initMatrox2(struct matrox_fb_info *minfo, struct board *b)
+{
 	unsigned long ctrlptr_phys = 0;
 	unsigned long video_base_phys = 0;
 	unsigned int memsize;
@@ -1636,7 +1658,7 @@ static int initMatrox2(WPMINFO struct board* b){
 	minfo->devflags.g450dac = (b->flags & DEVF_G450DAC) != 0;
 	minfo->devflags.textstep = minfo->devflags.vgastep * minfo->devflags.textmode;
 	minfo->devflags.textvram = 65536 / minfo->devflags.textmode;
-	setDefaultOutputs(PMINFO2);
+	setDefaultOutputs(minfo);
 	if (b->flags & DEVF_PANELLINK_CAPABLE) {
 		minfo->outputs[2].data = minfo;
 		minfo->outputs[2].output = &panellink_output;
@@ -1719,13 +1741,13 @@ static int initMatrox2(WPMINFO struct board* b){
 	}
 
 	err = -ENXIO;
-	matroxfb_read_pins(PMINFO2);
-	if (minfo->hw_switch->preinit(PMINFO2)) {
+	matroxfb_read_pins(minfo);
+	if (minfo->hw_switch->preinit(minfo)) {
 		goto failVideoIO;
 	}
 
 	err = -ENOMEM;
-	if (!matroxfb_getmemory(PMINFO memsize, &minfo->video.len) || !minfo->video.len) {
+	if (!matroxfb_getmemory(minfo, memsize, &minfo->video.len) || !minfo->video.len) {
 		printk(KERN_ERR "matroxfb: cannot determine memory size\n");
 		goto failVideoIO;
 	}
@@ -1745,8 +1767,8 @@ static int initMatrox2(WPMINFO struct board* b){
 
 	if (!minfo->devflags.novga)
 		request_region(0x3C0, 32, "matrox");
-	matroxfb_g450_connect(PMINFO2);
-	minfo->hw_switch->reset(PMINFO2);
+	matroxfb_g450_connect(minfo);
+	minfo->hw_switch->reset(minfo);
 
 	minfo->fbcon.monspecs.hfmin = 0;
 	minfo->fbcon.monspecs.hfmax = fh;
@@ -1870,7 +1892,7 @@ static int initMatrox2(WPMINFO struct board* b){
 		vesafb_defined.yres_virtual = 65536; /* large enough to be INF, but small enough
 							to yres_virtual * xres_virtual < 2^32 */
 	}
-	matroxfb_init_fix(PMINFO2);
+	matroxfb_init_fix(minfo);
 	minfo->fbcon.screen_base = vaddr_va(minfo->video.vbase);
 	/* Normalize values (namely yres_virtual) */
 	matroxfb_check_var(&vesafb_defined, &minfo->fbcon);
@@ -1910,7 +1932,7 @@ static int initMatrox2(WPMINFO struct board* b){
 
 	return 0;
 failVideoIO:;
-	matroxfb_g450_shutdown(PMINFO2);
+	matroxfb_g450_shutdown(minfo);
 	mga_iounmap(minfo->video.vbase);
 failCtrlIO:;
 	mga_iounmap(minfo->mmio.vbase);
@@ -2075,7 +2097,7 @@ static int matroxfb_probe(struct pci_dev* pdev, const struct pci_device_id* dumm
 	init_waitqueue_head(&minfo->crtc2.vsync.wait);
 	minfo->crtc1.panpos = -1;
 
-	err = initMatrox2(PMINFO b);
+	err = initMatrox2(minfo, b);
 	if (!err) {
 		matroxfb_register_device(minfo);
 		return 0;
@@ -2088,7 +2110,7 @@ static void pci_remove_matrox(struct pci_dev* pdev) {
 	struct matrox_fb_info* minfo;
 
 	minfo = pci_get_drvdata(pdev);
-	matroxfb_remove(PMINFO 1);
+	matroxfb_remove(minfo, 1);
 }
 
 static struct pci_device_id matroxfb_devices[] = {
diff --git a/drivers/video/matrox/matroxfb_base.h b/drivers/video/matrox/matroxfb_base.h
index bbd7c04..cd90cd2 100644
--- a/drivers/video/matrox/matroxfb_base.h
+++ b/drivers/video/matrox/matroxfb_base.h
@@ -524,22 +524,15 @@ struct matrox_fb_info {
 
 #define info2minfo(info) container_of(info, struct matrox_fb_info, fbcon)
 
-#define WPMINFO2 struct matrox_fb_info* minfo
-#define WPMINFO  WPMINFO2 ,
-#define CPMINFO2 const struct matrox_fb_info* minfo
-#define CPMINFO	 CPMINFO2 ,
-#define PMINFO2  minfo
-#define PMINFO   PMINFO2 ,
-
 #define MINFO_FROM(x)	   struct matrox_fb_info* minfo = x
 
 #define MINFO_FROM_INFO(x) MINFO_FROM(info2minfo(x))
 
 struct matrox_switch {
-	int	(*preinit)(WPMINFO2);
-	void	(*reset)(WPMINFO2);
-	int	(*init)(WPMINFO struct my_timming*);
-	void	(*restore)(WPMINFO2);
+	int	(*preinit)(struct matrox_fb_info *minfo);
+	void	(*reset)(struct matrox_fb_info *minfo);
+	int	(*init)(struct matrox_fb_info *minfo, struct my_timming*);
+	void	(*restore)(struct matrox_fb_info *minfo);
 };
 
 struct matroxfb_driver {
@@ -729,11 +722,12 @@ void matroxfb_unregister_driver(struct matroxfb_driver* drv);
 #define matroxfb_DAC_unlock()                 spin_unlock(&minfo->lock.DAC)
 #define matroxfb_DAC_lock_irqsave(flags)      spin_lock_irqsave(&minfo->lock.DAC, flags)
 #define matroxfb_DAC_unlock_irqrestore(flags) spin_unlock_irqrestore(&minfo->lock.DAC, flags)
-extern void matroxfb_DAC_out(CPMINFO int reg, int val);
-extern int matroxfb_DAC_in(CPMINFO int reg);
+extern void matroxfb_DAC_out(const struct matrox_fb_info *minfo, int reg,
+			     int val);
+extern int matroxfb_DAC_in(const struct matrox_fb_info *minfo, int reg);
 extern void matroxfb_var2my(struct fb_var_screeninfo* fvsi, struct my_timming* mt);
-extern int matroxfb_wait_for_sync(WPMINFO u_int32_t crtc);
-extern int matroxfb_enable_irq(WPMINFO int reenable);
+extern int matroxfb_wait_for_sync(struct matrox_fb_info *minfo, u_int32_t crtc);
+extern int matroxfb_enable_irq(struct matrox_fb_info *minfo, int reenable);
 
 #ifdef MATROXFB_USE_SPINLOCKS
 #define CRITBEGIN  spin_lock_irqsave(&minfo->lock.accel, critflags);
diff --git a/drivers/video/matrox/matroxfb_crtc2.c b/drivers/video/matrox/matroxfb_crtc2.c
index 8249959..f41d4b4 100644
--- a/drivers/video/matrox/matroxfb_crtc2.c
+++ b/drivers/video/matrox/matroxfb_crtc2.c
@@ -369,8 +369,8 @@ static int matroxfb_dh_set_par(struct fb_info* info) {
 		} else {
 			matroxfb_dh_disable(m2info);
 		}
-		DAC1064_global_init(PMINFO2);
-		DAC1064_global_restore(PMINFO2);
+		DAC1064_global_init(minfo);
+		DAC1064_global_restore(minfo);
 		down_read(&minfo->altout.lock);
 		for (out = 0; out < MATROXFB_MAX_OUTPUTS; out++) {
 			if (minfo->outputs[out].src == MATROXFB_SRC_CRTC2 &&
@@ -401,7 +401,7 @@ static int matroxfb_dh_pan_display(struct fb_var_screeninfo* var, struct fb_info
 static int matroxfb_dh_get_vblank(const struct matroxfb_dh_fb_info* m2info, struct fb_vblank* vblank) {
 	MINFO_FROM(m2info->primary_dev);
 
-	matroxfb_enable_irq(PMINFO 0);
+	matroxfb_enable_irq(minfo, 0);
 	memset(vblank, 0, sizeof(*vblank));
 	vblank->flags = FB_VBLANK_HAVE_VCOUNT | FB_VBLANK_HAVE_VBLANK;
 	/* mask out reserved bits + field number (odd/even) */
@@ -449,7 +449,7 @@ static int matroxfb_dh_ioctl(struct fb_info *info,
 
 				if (crt != 0)
 					return -ENODEV;
-				return matroxfb_wait_for_sync(PMINFO 1);
+				return matroxfb_wait_for_sync(minfo, 1);
 			}
 		case MATROXFB_SET_OUTPUT_MODE:
 		case MATROXFB_GET_OUTPUT_MODE:
@@ -595,7 +595,9 @@ static struct fb_var_screeninfo matroxfb_dh_defined = {
 		0, {0,0,0,0,0}
 };
 
-static int matroxfb_dh_regit(CPMINFO struct matroxfb_dh_fb_info* m2info) {
+static int matroxfb_dh_regit(const struct matrox_fb_info *minfo,
+			     struct matroxfb_dh_fb_info *m2info)
+{
 #define minfo (m2info->primary_dev)
 	void* oldcrtc2;
 
@@ -649,7 +651,7 @@ static int matroxfb_dh_regit(CPMINFO struct matroxfb_dh_fb_info* m2info) {
 
 static int matroxfb_dh_registerfb(struct matroxfb_dh_fb_info* m2info) {
 #define minfo (m2info->primary_dev)
-	if (matroxfb_dh_regit(PMINFO m2info)) {
+	if (matroxfb_dh_regit(minfo, m2info)) {
 		printk(KERN_ERR "matroxfb_crtc2: secondary head failed to register\n");
 		return -1;
 	}
diff --git a/drivers/video/matrox/matroxfb_g450.c b/drivers/video/matrox/matroxfb_g450.c
index 841b439..f7f1343 100644
--- a/drivers/video/matrox/matroxfb_g450.c
+++ b/drivers/video/matrox/matroxfb_g450.c
@@ -80,50 +80,57 @@ static int get_ctrl_id(__u32 v4l2_id) {
 	return -EINVAL;
 }
 
-static inline int* get_ctrl_ptr(WPMINFO unsigned int idx) {
+static inline int *get_ctrl_ptr(struct matrox_fb_info *minfo, unsigned int idx)
+{
 	return (int*)((char*)minfo + g450_controls[idx].control);
 }
 
-static void tvo_fill_defaults(WPMINFO2) {
+static void tvo_fill_defaults(struct matrox_fb_info *minfo)
+{
 	unsigned int i;
 	
 	for (i = 0; i < G450CTRLS; i++) {
-		*get_ctrl_ptr(PMINFO i) = g450_controls[i].desc.default_value;
+		*get_ctrl_ptr(minfo, i) = g450_controls[i].desc.default_value;
 	}
 }
 
-static int cve2_get_reg(WPMINFO int reg) {
+static int cve2_get_reg(struct matrox_fb_info *minfo, int reg)
+{
 	unsigned long flags;
 	int val;
 	
 	matroxfb_DAC_lock_irqsave(flags);
-	matroxfb_DAC_out(PMINFO 0x87, reg);
-	val = matroxfb_DAC_in(PMINFO 0x88);
+	matroxfb_DAC_out(minfo, 0x87, reg);
+	val = matroxfb_DAC_in(minfo, 0x88);
 	matroxfb_DAC_unlock_irqrestore(flags);
 	return val;
 }
 
-static void cve2_set_reg(WPMINFO int reg, int val) {
+static void cve2_set_reg(struct matrox_fb_info *minfo, int reg, int val)
+{
 	unsigned long flags;
 
 	matroxfb_DAC_lock_irqsave(flags);
-	matroxfb_DAC_out(PMINFO 0x87, reg);
-	matroxfb_DAC_out(PMINFO 0x88, val);
+	matroxfb_DAC_out(minfo, 0x87, reg);
+	matroxfb_DAC_out(minfo, 0x88, val);
 	matroxfb_DAC_unlock_irqrestore(flags);
 }
 
-static void cve2_set_reg10(WPMINFO int reg, int val) {
+static void cve2_set_reg10(struct matrox_fb_info *minfo, int reg, int val)
+{
 	unsigned long flags;
 
 	matroxfb_DAC_lock_irqsave(flags);
-	matroxfb_DAC_out(PMINFO 0x87, reg);
-	matroxfb_DAC_out(PMINFO 0x88, val >> 2);
-	matroxfb_DAC_out(PMINFO 0x87, reg + 1);
-	matroxfb_DAC_out(PMINFO 0x88, val & 3);
+	matroxfb_DAC_out(minfo, 0x87, reg);
+	matroxfb_DAC_out(minfo, 0x88, val >> 2);
+	matroxfb_DAC_out(minfo, 0x87, reg + 1);
+	matroxfb_DAC_out(minfo, 0x88, val & 3);
 	matroxfb_DAC_unlock_irqrestore(flags);
 }
 
-static void g450_compute_bwlevel(CPMINFO int *bl, int *wl) {
+static void g450_compute_bwlevel(const struct matrox_fb_info *minfo, int *bl,
+				 int *wl)
+{
 	const int b = minfo->altout.tvo_params.brightness + BLMIN;
 	const int c = minfo->altout.tvo_params.contrast;
 
@@ -162,7 +169,7 @@ static int g450_set_ctrl(void* md, struct v4l2_control *p) {
 	/*
 	 * Check if changed.
 	 */
-	if (p->value == *get_ctrl_ptr(PMINFO i)) return 0;
+	if (p->value == *get_ctrl_ptr(minfo, i)) return 0;
 
 	/*
 	 * Check limits.
@@ -173,31 +180,31 @@ static int g450_set_ctrl(void* md, struct v4l2_control *p) {
 	/*
 	 * Store new value.
 	 */
-	*get_ctrl_ptr(PMINFO i) = p->value;
+	*get_ctrl_ptr(minfo, i) = p->value;
 
 	switch (p->id) {
 		case V4L2_CID_BRIGHTNESS:
 		case V4L2_CID_CONTRAST:
 			{
 				int blacklevel, whitelevel;
-				g450_compute_bwlevel(PMINFO &blacklevel, &whitelevel);
-				cve2_set_reg10(PMINFO 0x0e, blacklevel);
-				cve2_set_reg10(PMINFO 0x1e, whitelevel);
+				g450_compute_bwlevel(minfo, &blacklevel, &whitelevel);
+				cve2_set_reg10(minfo, 0x0e, blacklevel);
+				cve2_set_reg10(minfo, 0x1e, whitelevel);
 			}
 			break;
 		case V4L2_CID_SATURATION:
-			cve2_set_reg(PMINFO 0x20, p->value);
-			cve2_set_reg(PMINFO 0x22, p->value);
+			cve2_set_reg(minfo, 0x20, p->value);
+			cve2_set_reg(minfo, 0x22, p->value);
 			break;
 		case V4L2_CID_HUE:
-			cve2_set_reg(PMINFO 0x25, p->value);
+			cve2_set_reg(minfo, 0x25, p->value);
 			break;
 		case MATROXFB_CID_TESTOUT:
 			{
-				unsigned char val = cve2_get_reg (PMINFO 0x05);
+				unsigned char val = cve2_get_reg(minfo, 0x05);
 				if (p->value) val |=  0x02;
 				else          val &= ~0x02;
-				cve2_set_reg(PMINFO 0x05, val);
+				cve2_set_reg(minfo, 0x05, val);
 			}
 			break;
 	}
@@ -212,7 +219,7 @@ static int g450_get_ctrl(void* md, struct v4l2_control *p) {
 	
 	i = get_ctrl_id(p->id);
 	if (i < 0) return -EINVAL;
-	p->value = *get_ctrl_ptr(PMINFO i);
+	p->value = *get_ctrl_ptr(minfo, i);
 	return 0;
 }
 
@@ -226,7 +233,9 @@ struct output_desc {
 	unsigned int	v_total;
 };
 
-static void computeRegs(WPMINFO struct mavenregs* r, struct my_timming* mt, const struct output_desc* outd) {
+static void computeRegs(struct matrox_fb_info *minfo, struct mavenregs *r,
+			struct my_timming *mt, const struct output_desc *outd)
+{
 	u_int32_t chromasc;
 	u_int32_t hlen;
 	u_int32_t hsl;
@@ -251,10 +260,10 @@ static void computeRegs(WPMINFO struct mavenregs* r, struct my_timming* mt, cons
 
 	dprintk(KERN_DEBUG "Want %u kHz pixclock\n", (unsigned int)piic);
 	
-	mnp = matroxfb_g450_setclk(PMINFO piic, M_VIDEO_PLL);
+	mnp = matroxfb_g450_setclk(minfo, piic, M_VIDEO_PLL);
 	
 	mt->mnp = mnp;
-	mt->pixclock = g450_mnp2f(PMINFO mnp);
+	mt->pixclock = g450_mnp2f(minfo, mnp);
 
 	dprintk(KERN_DEBUG "MNP=%08X\n", mnp);
 
@@ -490,20 +499,22 @@ static void cve2_init_TVdata(int norm, struct mavenregs* data, const struct outp
  	return;
 }
 
-#define LR(x) cve2_set_reg(PMINFO (x), m->regs[(x)])
-static void cve2_init_TV(WPMINFO const struct mavenregs* m) {
+#define LR(x) cve2_set_reg(minfo, (x), m->regs[(x)])
+static void cve2_init_TV(struct matrox_fb_info *minfo,
+			 const struct mavenregs *m)
+{
 	int i;
 
 	LR(0x80);
 	LR(0x82); LR(0x83);
 	LR(0x84); LR(0x85);
 	
-	cve2_set_reg(PMINFO 0x3E, 0x01);
+	cve2_set_reg(minfo, 0x3E, 0x01);
 	
 	for (i = 0; i < 0x3E; i++) {
 		LR(i);
 	}
-	cve2_set_reg(PMINFO 0x3E, 0x00);
+	cve2_set_reg(minfo, 0x3E, 0x00);
 }
 
 static int matroxfb_g450_compute(void* md, struct my_timming* mt) {
@@ -518,7 +529,7 @@ static int matroxfb_g450_compute(void* md, struct my_timming* mt) {
 		cve2_init_TVdata(minfo->outputs[1].mode, &minfo->hw.maven, &outd);
 		{
 			int blacklevel, whitelevel;
-			g450_compute_bwlevel(PMINFO &blacklevel, &whitelevel);
+			g450_compute_bwlevel(minfo, &blacklevel, &whitelevel);
 			minfo->hw.maven.regs[0x0E] = blacklevel >> 2;
 			minfo->hw.maven.regs[0x0F] = blacklevel & 3;
 			minfo->hw.maven.regs[0x1E] = whitelevel >> 2;
@@ -533,12 +544,12 @@ static int matroxfb_g450_compute(void* md, struct my_timming* mt) {
 				minfo->hw.maven.regs[0x05] |= 0x02;
 			}
 		}
-		computeRegs(PMINFO &minfo->hw.maven, mt, outd);
+		computeRegs(minfo, &minfo->hw.maven, mt, outd);
 	} else if (mt->mnp < 0) {
 		/* We must program clocks before CRTC2, otherwise interlaced mode
 		   startup may fail */
-		mt->mnp = matroxfb_g450_setclk(PMINFO mt->pixclock, (mt->crtc == MATROXFB_SRC_CRTC1) ? M_PIXEL_PLL_C : M_VIDEO_PLL);
-		mt->pixclock = g450_mnp2f(PMINFO mt->mnp);
+		mt->mnp = matroxfb_g450_setclk(minfo, mt->pixclock, (mt->crtc == MATROXFB_SRC_CRTC1) ? M_PIXEL_PLL_C : M_VIDEO_PLL);
+		mt->pixclock = g450_mnp2f(minfo, mt->mnp);
 	}
 	dprintk(KERN_DEBUG "Pixclock = %u\n", mt->pixclock);
 	return 0;
@@ -548,7 +559,7 @@ static int matroxfb_g450_program(void* md) {
 	MINFO_FROM(md);
 	
 	if (minfo->outputs[1].mode != MATROXFB_OUTPUT_MODE_MONITOR) {
-		cve2_init_TV(PMINFO &minfo->hw.maven);
+		cve2_init_TV(minfo, &minfo->hw.maven);
 	}
 	return 0;
 }
@@ -567,8 +578,8 @@ static int g450_dvi_compute(void* md, struct my_timming* mt) {
 	MINFO_FROM(md);
 
 	if (mt->mnp < 0) {
-		mt->mnp = matroxfb_g450_setclk(PMINFO mt->pixclock, (mt->crtc == MATROXFB_SRC_CRTC1) ? M_PIXEL_PLL_C : M_VIDEO_PLL);
-		mt->pixclock = g450_mnp2f(PMINFO mt->mnp);
+		mt->mnp = matroxfb_g450_setclk(minfo, mt->pixclock, (mt->crtc == MATROXFB_SRC_CRTC1) ? M_PIXEL_PLL_C : M_VIDEO_PLL);
+		mt->pixclock = g450_mnp2f(minfo, mt->mnp);
 	}
 	return 0;
 }
@@ -588,10 +599,11 @@ static struct matrox_altout matroxfb_g450_dvi = {
 	.compute	= g450_dvi_compute,
 };
 
-void matroxfb_g450_connect(WPMINFO2) {
+void matroxfb_g450_connect(struct matrox_fb_info *minfo)
+{
 	if (minfo->devflags.g450dac) {
 		down_write(&minfo->altout.lock);
-		tvo_fill_defaults(PMINFO2);
+		tvo_fill_defaults(minfo);
 		minfo->outputs[1].src = minfo->outputs[1].default_src;
 		minfo->outputs[1].data = minfo;
 		minfo->outputs[1].output = &matroxfb_g450_altout;
@@ -604,7 +616,8 @@ void matroxfb_g450_connect(WPMINFO2) {
 	}
 }
 
-void matroxfb_g450_shutdown(WPMINFO2) {
+void matroxfb_g450_shutdown(struct matrox_fb_info *minfo)
+{
 	if (minfo->devflags.g450dac) {
 		down_write(&minfo->altout.lock);
 		minfo->outputs[1].src = MATROXFB_SRC_NONE;
diff --git a/drivers/video/matrox/matroxfb_g450.h b/drivers/video/matrox/matroxfb_g450.h
index a0822a6..3a3e654 100644
--- a/drivers/video/matrox/matroxfb_g450.h
+++ b/drivers/video/matrox/matroxfb_g450.h
@@ -4,11 +4,11 @@
 #include "matroxfb_base.h"
 
 #ifdef CONFIG_FB_MATROX_G
-void matroxfb_g450_connect(WPMINFO2);
-void matroxfb_g450_shutdown(WPMINFO2);
+void matroxfb_g450_connect(struct matrox_fb_info *minfo);
+void matroxfb_g450_shutdown(struct matrox_fb_info *minfo);
 #else
-static inline void matroxfb_g450_connect(WPMINFO2) { };
-static inline void matroxfb_g450_shutdown(WPMINFO2) { };
+static inline void matroxfb_g450_connect(struct matrox_fb_info *minfo) { };
+static inline void matroxfb_g450_shutdown(struct matrox_fb_info *minfo) { };
 #endif
 
 #endif /* __MATROXFB_G450_H__ */
diff --git a/drivers/video/matrox/matroxfb_misc.c b/drivers/video/matrox/matroxfb_misc.c
index fe07af8..9948ca2 100644
--- a/drivers/video/matrox/matroxfb_misc.c
+++ b/drivers/video/matrox/matroxfb_misc.c
@@ -89,13 +89,15 @@
 #include <linux/interrupt.h>
 #include <linux/matroxfb.h>
 
-void matroxfb_DAC_out(CPMINFO int reg, int val) {
+void matroxfb_DAC_out(const struct matrox_fb_info *minfo, int reg, int val)
+{
 	DBG_REG(__func__)
 	mga_outb(M_RAMDAC_BASE+M_X_INDEX, reg);
 	mga_outb(M_RAMDAC_BASE+M_X_DATAREG, val);
 }
 
-int matroxfb_DAC_in(CPMINFO int reg) {
+int matroxfb_DAC_in(const struct matrox_fb_info *minfo, int reg)
+{
 	DBG_REG(__func__)
 	mga_outb(M_RAMDAC_BASE+M_X_INDEX, reg);
 	return mga_inb(M_RAMDAC_BASE+M_X_DATAREG);
@@ -184,7 +186,8 @@ int matroxfb_PLL_calcclock(const struct matrox_pll_features* pll, unsigned int f
 	return bestvco;
 }
 
-int matroxfb_vgaHWinit(WPMINFO struct my_timming* m) {
+int matroxfb_vgaHWinit(struct matrox_fb_info *minfo, struct my_timming *m)
+{
 	unsigned int hd, hs, he, hbe, ht;
 	unsigned int vd, vs, ve, vt, lc;
 	unsigned int wd;
@@ -331,7 +334,8 @@ int matroxfb_vgaHWinit(WPMINFO struct my_timming* m) {
 	return 0;
 };
 
-void matroxfb_vgaHWrestore(WPMINFO2) {
+void matroxfb_vgaHWrestore(struct matrox_fb_info *minfo)
+{
 	int i;
 	struct matrox_hw_state * const hw = &minfo->hw;
 	CRITFLAGS
@@ -522,7 +526,9 @@ static void parse_bios(unsigned char __iomem* vbios, struct matrox_bios* bd) {
 #endif
 }
 
-static int parse_pins1(WPMINFO const struct matrox_bios* bd) {
+static int parse_pins1(struct matrox_fb_info *minfo,
+		       const struct matrox_bios *bd)
+{
 	unsigned int maxdac;
 
 	switch (bd->pins[22]) {
@@ -542,7 +548,8 @@ static int parse_pins1(WPMINFO const struct matrox_bios* bd) {
 	return 0;
 }
 
-static void default_pins1(WPMINFO2) {
+static void default_pins1(struct matrox_fb_info *minfo)
+{
 	/* Millennium */
 	minfo->limits.pixel.vcomax	= 220000;
 	minfo->values.pll.system	=  50000;
@@ -550,7 +557,9 @@ static void default_pins1(WPMINFO2) {
 	minfo->values.reg.mctlwtst	= 0x00030101;
 }
 
-static int parse_pins2(WPMINFO const struct matrox_bios* bd) {
+static int parse_pins2(struct matrox_fb_info *minfo,
+		       const struct matrox_bios *bd)
+{
 	minfo->limits.pixel.vcomax	=
 	minfo->limits.system.vcomax	= (bd->pins[41] == 0xFF) ? 230000 : ((bd->pins[41] + 100) * 1000);
 	minfo->values.reg.mctlwtst	= ((bd->pins[51] & 0x01) ? 0x00000001 : 0) |
@@ -562,7 +571,8 @@ static int parse_pins2(WPMINFO const struct matrox_bios* bd) {
 	return 0;
 }
 
-static void default_pins2(WPMINFO2) {
+static void default_pins2(struct matrox_fb_info *minfo)
+{
 	/* Millennium II, Mystique */
 	minfo->limits.pixel.vcomax	=
 	minfo->limits.system.vcomax	= 230000;
@@ -571,7 +581,9 @@ static void default_pins2(WPMINFO2) {
 	minfo->features.pll.ref_freq	=  14318;
 }
 
-static int parse_pins3(WPMINFO const struct matrox_bios* bd) {
+static int parse_pins3(struct matrox_fb_info *minfo,
+		       const struct matrox_bios *bd)
+{
 	minfo->limits.pixel.vcomax	=
 	minfo->limits.system.vcomax	= (bd->pins[36] == 0xFF) ? 230000			: ((bd->pins[36] + 100) * 1000);
 	minfo->values.reg.mctlwtst	= get_unaligned_le32(bd->pins + 48) == 0xFFFFFFFF ?
@@ -587,7 +599,8 @@ static int parse_pins3(WPMINFO const struct matrox_bios* bd) {
 	return 0;
 }
 
-static void default_pins3(WPMINFO2) {
+static void default_pins3(struct matrox_fb_info *minfo)
+{
 	/* G100, G200 */
 	minfo->limits.pixel.vcomax	=
 	minfo->limits.system.vcomax	= 230000;
@@ -598,7 +611,9 @@ static void default_pins3(WPMINFO2) {
 	minfo->features.pll.ref_freq	=  27000;
 }
 
-static int parse_pins4(WPMINFO const struct matrox_bios* bd) {
+static int parse_pins4(struct matrox_fb_info *minfo,
+		       const struct matrox_bios *bd)
+{
 	minfo->limits.pixel.vcomax	= (bd->pins[ 39] == 0xFF) ? 230000			: bd->pins[ 39] * 4000;
 	minfo->limits.system.vcomax	= (bd->pins[ 38] == 0xFF) ? minfo->limits.pixel.vcomax	: bd->pins[ 38] * 4000;
 	minfo->values.reg.mctlwtst	= get_unaligned_le32(bd->pins + 71);
@@ -615,7 +630,8 @@ static int parse_pins4(WPMINFO const struct matrox_bios* bd) {
 	return 0;
 }
 
-static void default_pins4(WPMINFO2) {
+static void default_pins4(struct matrox_fb_info *minfo)
+{
 	/* G400 */
 	minfo->limits.pixel.vcomax	=
 	minfo->limits.system.vcomax	= 252000;
@@ -627,7 +643,9 @@ static void default_pins4(WPMINFO2) {
 	minfo->features.pll.ref_freq	= 27000;
 }
 
-static int parse_pins5(WPMINFO const struct matrox_bios* bd) {
+static int parse_pins5(struct matrox_fb_info *minfo,
+		       const struct matrox_bios *bd)
+{
 	unsigned int mult;
 	
 	mult = bd->pins[4]?8000:6000;
@@ -662,7 +680,8 @@ static int parse_pins5(WPMINFO const struct matrox_bios* bd) {
 	return 0;
 }
 
-static void default_pins5(WPMINFO2) {
+static void default_pins5(struct matrox_fb_info *minfo)
+{
 	/* Mine 16MB G450 with SDRAM DDR */
 	minfo->limits.pixel.vcomax	=
 	minfo->limits.system.vcomax	=
@@ -686,20 +705,22 @@ static void default_pins5(WPMINFO2) {
 	minfo->values.reg.maccess	= 0x00004000;
 }
 
-static int matroxfb_set_limits(WPMINFO const struct matrox_bios* bd) {
+static int matroxfb_set_limits(struct matrox_fb_info *minfo,
+			       const struct matrox_bios *bd)
+{
 	unsigned int pins_version;
 	static const unsigned int pinslen[] = { 64, 64, 64, 128, 128 };
 
 	switch (minfo->chip) {
-		case MGA_2064:	default_pins1(PMINFO2); break;
+		case MGA_2064:	default_pins1(minfo); break;
 		case MGA_2164:
 		case MGA_1064:
-		case MGA_1164:	default_pins2(PMINFO2); break;
+		case MGA_1164:	default_pins2(minfo); break;
 		case MGA_G100:
-		case MGA_G200:	default_pins3(PMINFO2); break;
-		case MGA_G400:	default_pins4(PMINFO2); break;
+		case MGA_G200:	default_pins3(minfo); break;
+		case MGA_G400:	default_pins4(minfo); break;
 		case MGA_G450:
-		case MGA_G550:	default_pins5(PMINFO2); break;
+		case MGA_G550:	default_pins5(minfo); break;
 	}
 	if (!bd->bios_valid) {
 		printk(KERN_INFO "matroxfb: Your Matrox device does not have BIOS\n");
@@ -724,22 +745,23 @@ static int matroxfb_set_limits(WPMINFO const struct matrox_bios* bd) {
 	}
 	switch (pins_version) {
 		case 1:
-			return parse_pins1(PMINFO bd);
+			return parse_pins1(minfo, bd);
 		case 2:
-			return parse_pins2(PMINFO bd);
+			return parse_pins2(minfo, bd);
 		case 3:
-			return parse_pins3(PMINFO bd);
+			return parse_pins3(minfo, bd);
 		case 4:
-			return parse_pins4(PMINFO bd);
+			return parse_pins4(minfo, bd);
 		case 5:
-			return parse_pins5(PMINFO bd);
+			return parse_pins5(minfo, bd);
 		default:
 			printk(KERN_DEBUG "matroxfb: Powerup info version %u is not yet supported\n", pins_version);
 			return -1;
 	}
 }
 
-void matroxfb_read_pins(WPMINFO2) {
+void matroxfb_read_pins(struct matrox_fb_info *minfo)
+{
 	u32 opt;
 	u32 biosbase;
 	u32 fbbase;
@@ -775,7 +797,7 @@ void matroxfb_read_pins(WPMINFO2) {
 		}
 	}
 #endif
-	matroxfb_set_limits(PMINFO &minfo->bios);
+	matroxfb_set_limits(minfo, &minfo->bios);
 	printk(KERN_INFO "PInS memtype = %u\n",
 	       (minfo->values.reg.opt & 0x1C00) >> 10);
 }
diff --git a/drivers/video/matrox/matroxfb_misc.h b/drivers/video/matrox/matroxfb_misc.h
index b460045..351c823 100644
--- a/drivers/video/matrox/matroxfb_misc.h
+++ b/drivers/video/matrox/matroxfb_misc.h
@@ -6,13 +6,16 @@
 /* also for modules */
 int matroxfb_PLL_calcclock(const struct matrox_pll_features* pll, unsigned int freq, unsigned int fmax,
 	unsigned int* in, unsigned int* feed, unsigned int* post);
-static inline int PLL_calcclock(CPMINFO unsigned int freq, unsigned int fmax,
-		unsigned int* in, unsigned int* feed, unsigned int* post) {
+static inline int PLL_calcclock(const struct matrox_fb_info *minfo,
+				unsigned int freq, unsigned int fmax,
+				unsigned int *in, unsigned int *feed,
+				unsigned int *post)
+{
 	return matroxfb_PLL_calcclock(&minfo->features.pll, freq, fmax, in, feed, post);
 }
 
-int matroxfb_vgaHWinit(WPMINFO struct my_timming* m);
-void matroxfb_vgaHWrestore(WPMINFO2);
-void matroxfb_read_pins(WPMINFO2);
+int matroxfb_vgaHWinit(struct matrox_fb_info *minfo, struct my_timming* m);
+void matroxfb_vgaHWrestore(struct matrox_fb_info *minfo);
+void matroxfb_read_pins(struct matrox_fb_info *minfo);
 
 #endif	/* __MATROXFB_MISC_H__ */
-- 
cgit v0.10.2


From ee5a27497957e55a520788f88536401e7b12bf41 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Tue, 22 Sep 2009 16:47:50 -0700
Subject: matroxfb: get rid of unneeded macro MINFO_FROM

With multihead support always enabled, macros MINFO_FROM and
MINFO_FROM_INFO are no longer needed and make the code harder to read.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Acked-by: Petr Vandrovec <vandrove@vc.cvut.cz>
Cc: Krzysztof Helt <krzysztof.h1@poczta.fm>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/matrox/matroxfb_accel.c b/drivers/video/matrox/matroxfb_accel.c
index 3577ec1..8335a6f 100644
--- a/drivers/video/matrox/matroxfb_accel.c
+++ b/drivers/video/matrox/matroxfb_accel.c
@@ -275,7 +275,7 @@ static void matrox_accel_bmove_lin(struct matrox_fb_info *minfo, int vxres,
 }
 
 static void matroxfb_cfb4_copyarea(struct fb_info* info, const struct fb_copyarea* area) {
-	MINFO_FROM_INFO(info);
+	struct matrox_fb_info *minfo = info2minfo(info);
 
 	if ((area->sx | area->dx | area->width) & 1)
 		cfb_copyarea(info, area);
@@ -284,7 +284,7 @@ static void matroxfb_cfb4_copyarea(struct fb_info* info, const struct fb_copyare
 }
 
 static void matroxfb_copyarea(struct fb_info* info, const struct fb_copyarea* area) {
-	MINFO_FROM_INFO(info);
+	struct matrox_fb_info *minfo = info2minfo(info);
 
 	matrox_accel_bmove(minfo, minfo->fbcon.var.xres_virtual, area->sy, area->sx, area->dy, area->dx, area->height, area->width);
 }
@@ -309,7 +309,7 @@ static void matroxfb_accel_clear(struct matrox_fb_info *minfo, u_int32_t color,
 }
 
 static void matroxfb_fillrect(struct fb_info* info, const struct fb_fillrect* rect) {
-	MINFO_FROM_INFO(info);
+	struct matrox_fb_info *minfo = info2minfo(info);
 
 	switch (rect->rop) {
 		case ROP_COPY:
@@ -376,7 +376,7 @@ static void matroxfb_cfb4_clear(struct matrox_fb_info *minfo, u_int32_t bgx,
 }
 
 static void matroxfb_cfb4_fillrect(struct fb_info* info, const struct fb_fillrect* rect) {
-	MINFO_FROM_INFO(info);
+	struct matrox_fb_info *minfo = info2minfo(info);
 
 	switch (rect->rop) {
 		case ROP_COPY:
@@ -478,7 +478,7 @@ static void matroxfb_1bpp_imageblit(struct matrox_fb_info *minfo, u_int32_t fgx,
 
 
 static void matroxfb_imageblit(struct fb_info* info, const struct fb_image* image) {
-	MINFO_FROM_INFO(info);
+	struct matrox_fb_info *minfo = info2minfo(info);
 
 	DBG_HEAVY(__func__);
 
diff --git a/drivers/video/matrox/matroxfb_base.c b/drivers/video/matrox/matroxfb_base.c
index 73ad598..c6b122c 100644
--- a/drivers/video/matrox/matroxfb_base.c
+++ b/drivers/video/matrox/matroxfb_base.c
@@ -204,8 +204,7 @@ static irqreturn_t matrox_irq(int irq, void *dev_id)
 {
 	u_int32_t status;
 	int handled = 0;
-
-	MINFO_FROM(dev_id);
+	struct matrox_fb_info *minfo = dev_id;
 
 	status = mga_inl(M_STATUS);
 
@@ -396,7 +395,7 @@ static void matroxfb_remove(struct matrox_fb_info *minfo, int dummy)
 
 static int matroxfb_open(struct fb_info *info, int user)
 {
-	MINFO_FROM_INFO(info);
+	struct matrox_fb_info *minfo = info2minfo(info);
 
 	DBG_LOOP(__func__)
 
@@ -412,7 +411,7 @@ static int matroxfb_open(struct fb_info *info, int user)
 
 static int matroxfb_release(struct fb_info *info, int user)
 {
-	MINFO_FROM_INFO(info);
+	struct matrox_fb_info *minfo = info2minfo(info);
 
 	DBG_LOOP(__func__)
 
@@ -429,7 +428,7 @@ static int matroxfb_release(struct fb_info *info, int user)
 
 static int matroxfb_pan_display(struct fb_var_screeninfo *var,
 		struct fb_info* info) {
-	MINFO_FROM_INFO(info);
+	struct matrox_fb_info *minfo = info2minfo(info);
 
 	DBG(__func__)
 
@@ -749,7 +748,7 @@ static int matroxfb_check_var(struct fb_var_screeninfo *var, struct fb_info *inf
 	int visual;
 	int cmap_len;
 	unsigned int ydstorg;
-	MINFO_FROM_INFO(info);
+	struct matrox_fb_info *minfo = info2minfo(info);
 
 	if (minfo->dead) {
 		return -ENXIO;
@@ -766,7 +765,7 @@ static int matroxfb_set_par(struct fb_info *info)
 	int cmap_len;
 	unsigned int ydstorg;
 	struct fb_var_screeninfo *var;
-	MINFO_FROM_INFO(info);
+	struct matrox_fb_info *minfo = info2minfo(info);
 
 	DBG(__func__)
 
@@ -890,7 +889,7 @@ static int matroxfb_ioctl(struct fb_info *info,
 			  unsigned int cmd, unsigned long arg)
 {
 	void __user *argp = (void __user *)arg;
-	MINFO_FROM_INFO(info);
+	struct matrox_fb_info *minfo = info2minfo(info);
 
 	DBG(__func__)
 
@@ -1189,7 +1188,7 @@ static int matroxfb_blank(int blank, struct fb_info *info)
 	int seq;
 	int crtc;
 	CRITFLAGS
-	MINFO_FROM_INFO(info);
+	struct matrox_fb_info *minfo = info2minfo(info);
 
 	DBG(__func__)
 
diff --git a/drivers/video/matrox/matroxfb_base.h b/drivers/video/matrox/matroxfb_base.h
index cd90cd2..06158ab 100644
--- a/drivers/video/matrox/matroxfb_base.h
+++ b/drivers/video/matrox/matroxfb_base.h
@@ -524,10 +524,6 @@ struct matrox_fb_info {
 
 #define info2minfo(info) container_of(info, struct matrox_fb_info, fbcon)
 
-#define MINFO_FROM(x)	   struct matrox_fb_info* minfo = x
-
-#define MINFO_FROM_INFO(x) MINFO_FROM(info2minfo(x))
-
 struct matrox_switch {
 	int	(*preinit)(struct matrox_fb_info *minfo);
 	void	(*reset)(struct matrox_fb_info *minfo);
diff --git a/drivers/video/matrox/matroxfb_crtc2.c b/drivers/video/matrox/matroxfb_crtc2.c
index f41d4b4..78414ba 100644
--- a/drivers/video/matrox/matroxfb_crtc2.c
+++ b/drivers/video/matrox/matroxfb_crtc2.c
@@ -65,7 +65,7 @@ static void matroxfb_dh_restore(struct matroxfb_dh_fb_info* m2info,
 		unsigned int pos) {
 	u_int32_t tmp;
 	u_int32_t datactl;
-	MINFO_FROM(m2info->primary_dev);
+	struct matrox_fb_info *minfo = m2info->primary_dev;
 
 	switch (mode) {
 		case 15:
@@ -157,7 +157,7 @@ static void matroxfb_dh_restore(struct matroxfb_dh_fb_info* m2info,
 }
 
 static void matroxfb_dh_disable(struct matroxfb_dh_fb_info* m2info) {
-	MINFO_FROM(m2info->primary_dev);
+	struct matrox_fb_info *minfo = m2info->primary_dev;
 
 	mga_outl(0x3C10, 0x00000004);	/* disable CRTC2, CRTC1->DAC1, PLL as clock source */
 	minfo->hw.crtc2.ctl = 0x00000004;
@@ -168,7 +168,7 @@ static void matroxfb_dh_pan_var(struct matroxfb_dh_fb_info* m2info,
 	unsigned int pos;
 	unsigned int linelen;
 	unsigned int pixelsize;
-	MINFO_FROM(m2info->primary_dev);
+	struct matrox_fb_info *minfo = m2info->primary_dev;
 
 	m2info->fbcon.var.xoffset = var->xoffset;
 	m2info->fbcon.var.yoffset = var->yoffset;
@@ -260,7 +260,7 @@ static int matroxfb_dh_decode_var(struct matroxfb_dh_fb_info* m2info,
 
 static int matroxfb_dh_open(struct fb_info* info, int user) {
 #define m2info (container_of(info, struct matroxfb_dh_fb_info, fbcon))
-	MINFO_FROM(m2info->primary_dev);
+	struct matrox_fb_info *minfo = m2info->primary_dev;
 
 	if (minfo) {
 		int err;
@@ -280,7 +280,7 @@ static int matroxfb_dh_open(struct fb_info* info, int user) {
 static int matroxfb_dh_release(struct fb_info* info, int user) {
 #define m2info (container_of(info, struct matroxfb_dh_fb_info, fbcon))
 	int err = 0;
-	MINFO_FROM(m2info->primary_dev);
+	struct matrox_fb_info *minfo = m2info->primary_dev;
 
 	if (minfo) {
 		err = minfo->fbops.fb_release(&minfo->fbcon, user);
@@ -326,7 +326,7 @@ static int matroxfb_dh_set_par(struct fb_info* info) {
 	int mode;
 	int err;
 	struct fb_var_screeninfo* var = &info->var;
-	MINFO_FROM(m2info->primary_dev);
+	struct matrox_fb_info *minfo = m2info->primary_dev;
 
 	if ((err = matroxfb_dh_decode_var(m2info, var, &visual, &cmap_len, &mode)) != 0)
 		return err;
@@ -399,7 +399,7 @@ static int matroxfb_dh_pan_display(struct fb_var_screeninfo* var, struct fb_info
 }
 
 static int matroxfb_dh_get_vblank(const struct matroxfb_dh_fb_info* m2info, struct fb_vblank* vblank) {
-	MINFO_FROM(m2info->primary_dev);
+	struct matrox_fb_info *minfo = m2info->primary_dev;
 
 	matroxfb_enable_irq(minfo, 0);
 	memset(vblank, 0, sizeof(*vblank));
@@ -423,7 +423,7 @@ static int matroxfb_dh_ioctl(struct fb_info *info,
 		unsigned long arg)
 {
 #define m2info (container_of(info, struct matroxfb_dh_fb_info, fbcon))
-	MINFO_FROM(m2info->primary_dev);
+	struct matrox_fb_info *minfo = m2info->primary_dev;
 
 	DBG(__func__)
 
diff --git a/drivers/video/matrox/matroxfb_g450.c b/drivers/video/matrox/matroxfb_g450.c
index f7f1343..cff0546 100644
--- a/drivers/video/matrox/matroxfb_g450.c
+++ b/drivers/video/matrox/matroxfb_g450.c
@@ -161,7 +161,7 @@ static int g450_query_ctrl(void* md, struct v4l2_queryctrl *p) {
 
 static int g450_set_ctrl(void* md, struct v4l2_control *p) {
 	int i;
-	MINFO_FROM(md);
+	struct matrox_fb_info *minfo = md;
 	
 	i = get_ctrl_id(p->id);
 	if (i < 0) return -EINVAL;
@@ -215,7 +215,7 @@ static int g450_set_ctrl(void* md, struct v4l2_control *p) {
 
 static int g450_get_ctrl(void* md, struct v4l2_control *p) {
 	int i;
-	MINFO_FROM(md);
+	struct matrox_fb_info *minfo = md;
 	
 	i = get_ctrl_id(p->id);
 	if (i < 0) return -EINVAL;
@@ -518,7 +518,7 @@ static void cve2_init_TV(struct matrox_fb_info *minfo,
 }
 
 static int matroxfb_g450_compute(void* md, struct my_timming* mt) {
-	MINFO_FROM(md);
+	struct matrox_fb_info *minfo = md;
 
 	dprintk(KERN_DEBUG "Computing, mode=%u\n", minfo->outputs[1].mode);
 
@@ -556,7 +556,7 @@ static int matroxfb_g450_compute(void* md, struct my_timming* mt) {
 }
 
 static int matroxfb_g450_program(void* md) {
-	MINFO_FROM(md);
+	struct matrox_fb_info *minfo = md;
 	
 	if (minfo->outputs[1].mode != MATROXFB_OUTPUT_MODE_MONITOR) {
 		cve2_init_TV(minfo, &minfo->hw.maven);
@@ -575,7 +575,7 @@ static int matroxfb_g450_verify_mode(void* md, u_int32_t arg) {
 }
 
 static int g450_dvi_compute(void* md, struct my_timming* mt) {
-	MINFO_FROM(md);
+	struct matrox_fb_info *minfo = md;
 
 	if (mt->mnp < 0) {
 		mt->mnp = matroxfb_g450_setclk(minfo, mt->pixclock, (mt->crtc == MATROXFB_SRC_CRTC1) ? M_PIXEL_PLL_C : M_VIDEO_PLL);
diff --git a/drivers/video/matrox/matroxfb_maven.c b/drivers/video/matrox/matroxfb_maven.c
index 4fbb386..91af915 100644
--- a/drivers/video/matrox/matroxfb_maven.c
+++ b/drivers/video/matrox/matroxfb_maven.c
@@ -458,7 +458,7 @@ static void maven_init_TVdata(const struct maven_data* md, struct mavenregs* dat
 		0x00,	/* 3E written multiple times */
 		0x00,	/* never written */
 	}, MATROXFB_OUTPUT_MODE_NTSC, 525, 60 };
-	MINFO_FROM(md->primary_head);
+	struct matrox_fb_info *minfo = md->primary_head;
 
 	if (minfo->outputs[1].mode == MATROXFB_OUTPUT_MODE_PAL)
 		*data = palregs;
@@ -741,7 +741,7 @@ static inline int maven_compute_timming(struct maven_data* md,
 		struct mavenregs* m) {
 	unsigned int tmpi;
 	unsigned int a, bv, c;
-	MINFO_FROM(md->primary_head);
+	struct matrox_fb_info *minfo = md->primary_head;
 
 	m->mode = minfo->outputs[1].mode;
 	if (m->mode != MATROXFB_OUTPUT_MODE_MONITOR) {
@@ -1184,7 +1184,9 @@ static struct matrox_altout maven_altout = {
 
 static int maven_init_client(struct i2c_client* clnt) {
 	struct maven_data* md = i2c_get_clientdata(clnt);
-	MINFO_FROM(container_of(clnt->adapter, struct i2c_bit_adapter, adapter)->minfo);
+	struct matrox_fb_info *minfo = container_of(clnt->adapter,
+						    struct i2c_bit_adapter,
+						    adapter)->minfo;
 
 	md->primary_head = minfo;
 	md->client = clnt;
@@ -1218,7 +1220,7 @@ static int maven_shutdown_client(struct i2c_client* clnt) {
 	struct maven_data* md = i2c_get_clientdata(clnt);
 
 	if (md->primary_head) {
-		MINFO_FROM(md->primary_head);
+		struct matrox_fb_info *minfo = md->primary_head;
 
 		down_write(&minfo->altout.lock);
 		minfo->outputs[1].src = MATROXFB_SRC_NONE;
-- 
cgit v0.10.2


From a690606d1f54845b018d033ac32e91df25cb2680 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Tue, 22 Sep 2009 16:47:50 -0700
Subject: matroxfb: get rid of CONFIG_FB_MATROX_32MB

CONFIG_FB_MATROX_32MB is always enabled, so there is no point in having
ifdefs all around.  And it is bad practice to use CONFIG_* as a name for
something which is not a Kconfig option.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Acked-by: Petr Vandrovec <vandrove@vc.cvut.cz>
Cc: Krzysztof Helt <krzysztof.h1@poczta.fm>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/matrox/matroxfb_DAC1064.c b/drivers/video/matrox/matroxfb_DAC1064.c
index 88466a0..f9fa0fd 100644
--- a/drivers/video/matrox/matroxfb_DAC1064.c
+++ b/drivers/video/matrox/matroxfb_DAC1064.c
@@ -1080,10 +1080,8 @@ static void MGAG100_restore(struct matrox_fb_info *minfo)
 
 	DAC1064_restore_1(minfo);
 	matroxfb_vgaHWrestore(minfo);
-#ifdef CONFIG_FB_MATROX_32MB
 	if (minfo->devflags.support32MB)
 		mga_setr(M_EXTVGA_INDEX, 8, hw->CRTCEXT[8]);
-#endif
 	minfo->crtc1.panpos = -1;
 	for (i = 0; i < 6; i++)
 		mga_setr(M_EXTVGA_INDEX, i, hw->CRTCEXT[i]);
diff --git a/drivers/video/matrox/matroxfb_base.c b/drivers/video/matrox/matroxfb_base.c
index c6b122c..7064fb4 100644
--- a/drivers/video/matrox/matroxfb_base.c
+++ b/drivers/video/matrox/matroxfb_base.c
@@ -310,9 +310,7 @@ static void matrox_pan_var(struct matrox_fb_info *minfo,
 {
 	unsigned int pos;
 	unsigned short p0, p1, p2;
-#ifdef CONFIG_FB_MATROX_32MB
 	unsigned int p3;
-#endif
 	int vbl;
 	unsigned long flags;
 
@@ -330,9 +328,7 @@ static void matrox_pan_var(struct matrox_fb_info *minfo,
 	p0 = minfo->hw.CRTC[0x0D] = pos & 0xFF;
 	p1 = minfo->hw.CRTC[0x0C] = (pos & 0xFF00) >> 8;
 	p2 = minfo->hw.CRTCEXT[0] = (minfo->hw.CRTCEXT[0] & 0xB0) | ((pos >> 16) & 0x0F) | ((pos >> 14) & 0x40);
-#ifdef CONFIG_FB_MATROX_32MB
 	p3 = minfo->hw.CRTCEXT[8] = pos >> 21;
-#endif
 
 	/* FB_ACTIVATE_VBL and we can acquire interrupts? Honor FB_ACTIVATE_VBL then... */
 	vbl = (var->activate & FB_ACTIVATE_VBL) && (matroxfb_enable_irq(minfo, 0) == 0);
@@ -342,10 +338,8 @@ static void matrox_pan_var(struct matrox_fb_info *minfo,
 	matroxfb_DAC_lock_irqsave(flags);
 	mga_setr(M_CRTC_INDEX, 0x0D, p0);
 	mga_setr(M_CRTC_INDEX, 0x0C, p1);
-#ifdef CONFIG_FB_MATROX_32MB
 	if (minfo->devflags.support32MB)
 		mga_setr(M_EXTVGA_INDEX, 0x08, p3);
-#endif
 	if (vbl) {
 		minfo->crtc1.panpos = p2;
 	} else {
@@ -1360,13 +1354,9 @@ static struct video_board vbMystique		= {0x0800000, 0x0800000, FB_ACCEL_MATROX_M
 #ifdef CONFIG_FB_MATROX_G
 static struct video_board vbG100		= {0x0800000, 0x0800000, FB_ACCEL_MATROX_MGAG100,	&matrox_G100};
 static struct video_board vbG200		= {0x1000000, 0x1000000, FB_ACCEL_MATROX_MGAG200,	&matrox_G100};
-#ifdef CONFIG_FB_MATROX_32MB
 /* from doc it looks like that accelerator can draw only to low 16MB :-( Direct accesses & displaying are OK for
    whole 32MB */
 static struct video_board vbG400		= {0x2000000, 0x1000000, FB_ACCEL_MATROX_MGAG400,	&matrox_G100};
-#else
-static struct video_board vbG400		= {0x2000000, 0x1000000, FB_ACCEL_MATROX_MGAG400,	&matrox_G100};
-#endif
 #endif
 
 #define DEVF_VIDEO64BIT		0x0001
@@ -1646,9 +1636,7 @@ static int initMatrox2(struct matrox_fb_info *minfo, struct board *b)
 		minfo->devflags.textmode = 1;
 		minfo->devflags.text_type_aux = FB_AUX_TEXT_MGA_STEP8;
 	}
-#ifdef CONFIG_FB_MATROX_32MB
 	minfo->devflags.support32MB = (b->flags & DEVF_SUPPORT32MB) != 0;
-#endif
 	minfo->devflags.precise_width = !(b->flags & DEVF_ANY_VXRES);
 	minfo->devflags.crtc2 = (b->flags & DEVF_CRTC2) != 0;
 	minfo->devflags.maven_capable = (b->flags & DEVF_MAVEN_CAPABLE) != 0;
diff --git a/drivers/video/matrox/matroxfb_base.h b/drivers/video/matrox/matroxfb_base.h
index 06158ab..f3a4e15 100644
--- a/drivers/video/matrox/matroxfb_base.h
+++ b/drivers/video/matrox/matroxfb_base.h
@@ -54,9 +54,6 @@
 #include "../macmodes.h"
 #endif
 
-/* always compile support for 32MB... It cost almost nothing */
-#define CONFIG_FB_MATROX_32MB
-
 #ifdef MATROXFB_DEBUG
 
 #define DEBUG
@@ -464,9 +461,7 @@ struct matrox_fb_info {
 		int		nopciretry;
 		int		noinit;
 		int		sgram;
-#ifdef CONFIG_FB_MATROX_32MB
 		int		support32MB;
-#endif
 
 		int		accelerator;
 		int		text_type_aux;
-- 
cgit v0.10.2


From 2ddce3fd0acbdc1be684fb5f919ae3d2e9518aac Mon Sep 17 00:00:00 2001
From: Ian Armstrong <ian@iarmst.demon.co.uk>
Date: Tue, 22 Sep 2009 16:47:52 -0700
Subject: fbcon: only unbind from console if successfully registered

Attempting to unload a framebuffer module calls unregister_framebuffer()
which in turn gets fbcon to release it.  If fbcon has no framebuffers
linked to a console, it will also unbind itself from the console driver.
However, if fbcon never registered itself as a console driver, the unbind
will fail causing the framebuffer device entry to persist.  In most cases
this failure will result in an oops when attempting to access the now
non-existent device.

This patch ensures that the fbcon unbind request will succeed even if a
bind was never done.  It tracks if a successful bind ever occurred & will
only attempt to unbind if needed.  If there never was a bind, it simply
returns with no error.

Signed-off-by: Ian Armstrong <ian@iarmst.demon.co.uk>
Cc: Krzysztof Helt <krzysztof.h1@poczta.fm>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c
index 4bee7c2..5a686ce 100644
--- a/drivers/video/console/fbcon.c
+++ b/drivers/video/console/fbcon.c
@@ -114,6 +114,7 @@ static int last_fb_vc = MAX_NR_CONSOLES - 1;
 static int fbcon_is_default = 1; 
 static int fbcon_has_exited;
 static int primary_device = -1;
+static int fbcon_has_console_bind;
 
 #ifdef CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY
 static int map_override;
@@ -544,6 +545,8 @@ static int fbcon_takeover(int show_logo)
 			con2fb_map[i] = -1;
 		}
 		info_idx = -1;
+	} else {
+		fbcon_has_console_bind = 1;
 	}
 
 	return err;
@@ -2949,6 +2952,10 @@ static int fbcon_unbind(void)
 
 	ret = unbind_con_driver(&fb_con, first_fb_vc, last_fb_vc,
 				fbcon_is_default);
+
+	if (!ret)
+		fbcon_has_console_bind = 0;
+
 	return ret;
 }
 #else
@@ -2962,6 +2969,9 @@ static int fbcon_fb_unbind(int idx)
 {
 	int i, new_idx = -1, ret = 0;
 
+	if (!fbcon_has_console_bind)
+		return 0;
+
 	for (i = first_fb_vc; i <= last_fb_vc; i++) {
 		if (con2fb_map[i] != idx &&
 		    con2fb_map[i] != -1) {
-- 
cgit v0.10.2


From f0c7d2b72ad1a694c0c886a14cc708841181e9bd Mon Sep 17 00:00:00 2001
From: Francisco Jerez <currojerez@riseup.net>
Date: Tue, 22 Sep 2009 16:47:53 -0700
Subject: vgacon: prevent vgacon_deinit from touching the hardware for inactive
 consoles.

fbcon makes the (reasonable) assumption that it only needs to program the
hardware once, when fbcon_init() is called for the foreground console.

This doesn't always play well with vgacon because vgacon_deinit() is only
doing its job when the last console it owns is closed (when switching from
vgacon to fbcon, that's usually *after* fbcon_init() has set the new
mode).

Depending on the hardware this can cause the wrong framebuffer location to
be scanned out (e.g.  reproduced on nv05 with the nouveau framebuffer
driver).

Signed-off-by: Francisco Jerez <currojerez@riseup.net>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Krzysztof Helt <krzysztof.h1@poczta.fm>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/console/vgacon.c b/drivers/video/console/vgacon.c
index 74e96cf..da55cca 100644
--- a/drivers/video/console/vgacon.c
+++ b/drivers/video/console/vgacon.c
@@ -589,12 +589,14 @@ static void vgacon_init(struct vc_data *c, int init)
 
 static void vgacon_deinit(struct vc_data *c)
 {
-	/* When closing the last console, reset video origin */
-	if (!--vgacon_uni_pagedir[1]) {
+	/* When closing the active console, reset video origin */
+	if (CON_IS_VISIBLE(c)) {
 		c->vc_visible_origin = vga_vram_base;
 		vga_set_mem_top(c);
-		con_free_unimap(c);
 	}
+
+	if (!--vgacon_uni_pagedir[1])
+		con_free_unimap(c);
 	c->vc_uni_pagedir_loc = &c->vc_uni_pagedir;
 	con_set_default_unimap(c);
 }
-- 
cgit v0.10.2


From a9672c4b861afcf547bfe2cafeec4ca85a63da67 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Tue, 22 Sep 2009 16:47:54 -0700
Subject: drivers/video/tmiofb.c: fix uninitialised return value

drivers/video/tmiofb.c: In function 'tmiofb_resume':
drivers/video/tmiofb.c:977: warning: 'retval' may be used uninitialized in this function

Acked-by: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/tmiofb.c b/drivers/video/tmiofb.c
index a1eb086..6913fe1 100644
--- a/drivers/video/tmiofb.c
+++ b/drivers/video/tmiofb.c
@@ -974,7 +974,7 @@ static int tmiofb_resume(struct platform_device *dev)
 {
 	struct fb_info *info = platform_get_drvdata(dev);
 	struct mfd_cell *cell = dev->dev.platform_data;
-	int retval;
+	int retval = 0;
 
 	acquire_console_sem();
 
-- 
cgit v0.10.2


From 3163eaba34943967aebb1eefa0d4bdc4e5dc197c Mon Sep 17 00:00:00 2001
From: Peter Korsgaard <jacmet@sunsite.dk>
Date: Tue, 22 Sep 2009 16:47:55 -0700
Subject: video: s3c_fb.c: fix build with CONFIG_HOTPLUG=n

Fixes `s3c_fb_remove' referenced in section `.data' of
drivers/built-in.o: defined in discarded section `.devexit.text' of
drivers/built-in.o

With CONFIG_HOTPLUG=n, functions marked with __devexit gets removed,
so make sure we use __devexit_p when referencing pointers to them.

Signed-off-by: Peter Korsgaard <jacmet@sunsite.dk>
Acked-by: Ben Dooks <ben-linux@fluff.org>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/s3c-fb.c b/drivers/video/s3c-fb.c
index 5a72083..adf9632 100644
--- a/drivers/video/s3c-fb.c
+++ b/drivers/video/s3c-fb.c
@@ -1036,7 +1036,7 @@ static int s3c_fb_resume(struct platform_device *pdev)
 
 static struct platform_driver s3c_fb_driver = {
 	.probe		= s3c_fb_probe,
-	.remove		= s3c_fb_remove,
+	.remove		= __devexit_p(s3c_fb_remove),
 	.suspend	= s3c_fb_suspend,
 	.resume		= s3c_fb_resume,
 	.driver		= {
-- 
cgit v0.10.2


From 532f649f148bf70e6a5816d95fe55e6a065e8754 Mon Sep 17 00:00:00 2001
From: Aaro Koskinen <aaro.koskinen@iki.fi>
Date: Tue, 22 Sep 2009 16:47:58 -0700
Subject: sisfb: change SiS_DDC_Port type to SISIOADDRESS

The patch enables the driver to be used on platforms such as ARM where an
I/O address is a 32-bit memory address.

The patch avoids the following kernel oops:

debian:~# modprobe sisfb
[   73.070000] sisfb: Video ROM found
[   73.080000] sisfb: Video RAM at 0x80000000, mapped to 0xe0a00000, size 1024k
[   73.090000] sisfb: MMIO at 0x84080000, mapped to 0xe0b80000, size 256k
[   73.090000] sisfb: Memory heap starting at 800K, size 32K
[   73.360000] Unable to handle kernel paging request at virtual address 6e000844
[   73.380000] pgd = df230000
[   73.380000] [6e000844] *pgd=00000000
[   73.380000] Internal error: Oops: 8f5 [#1]
[   73.380000] Modules linked in: sisfb(+) fb cfbcopyarea cfbimgblt cfbfillrect
[   73.380000] CPU: 0    Not tainted  (2.6.31-iop32x #1)
[   73.380000] PC is at SiS_SetRegANDOR+0x10/0x38 [sisfb]
[   73.380000] LR is at SiS_SetSCLKHigh+0x38/0x94 [sisfb]
[   73.380000] pc : [<bf01dc00>]    lr : [<bf0238f8>]    psr: 60000013
[   73.380000] sp : df38fd00  ip : 6e000000  fp : 00000002
[   73.380000] r10: 00000108  r9 : 00000000  r8 : 00000108
[   73.380000] r7 : df064258  r6 : 00000110  r5 : 6e000844  r4 : 0000010a
[   73.380000] r3 : 00000001  r2 : 0000000e  r1 : 00000011  r0 : 00000844
[   73.380000] Flags: nZCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment user
[   73.380000] Control: 0000397f  Table: bf230000  DAC: 00000015
[   73.380000] Process modprobe (pid: 1849, stack limit = 0xdf38e270)
[   73.380000] Stack: (0xdf38fd00 to 0xdf390000)
[   73.380000] fd00: 0000010a 00000108 df064258 df064258 df064258 00000000 00000000 bf02c4e0
[   73.380000] fd20: 00000114 bf02c50c 00000013 00000114 0000010a df064258 00000000 bf02c980
[   73.380000] fd40: 00009c66 00000004 00000001 df064250 a0010000 a6a2a0a0 df064250 00000003
[   73.380000] fd60: df064250 00000000 df064258 0000fffd 00000000 00000000 00000000 bf033948
[   73.380000] fd80: 00000000 00000000 00000000 bf019e2c 00000000 df064a70 bf03b470 00010000
[   73.380000] fda0: 00000000 df064250 00000000 df831c00 00000012 bf039f70 00000000 c00abed8
[   73.380000] fdc0: 000008a6 000008a4 df0649b0 df064878 df064258 df064000 00000000 00000000
[   73.380000] fde0: 00000001 00008000 00000001 00030000 df81c930 bf049f88 df831c00 00000000
[   73.380000] fe00: bf049f58 df3952a0 c0447708 bf049f88 bf049fe0 c0191980 df831c00 c0191b10
[   73.380000] fe20: df831c58 bf049f58 df831c00 bf04aca8 df3952a0 df831c58 df831c58 bf049f88
[   73.380000] fe40: c01ba1b4 c01ba0a0 df831c58 df831c8c bf049f88 c01ba1b4 df3952a0 00000000
[   73.380000] fe60: c03e265c c01ba240 00000000 df38fe78 bf049f88 c01b990c df812938 df81b8d0
[   73.380000] fe80: df3952a0 df807780 00000000 00000060 bf049f88 c01b9224 bf0429c8 00000000
[   73.380000] fea0: bf049f58 00000000 bf049f88 00000000 00000000 bf04aea8 00000000 c01ba4e4
[   73.380000] fec0: e09861a0 bf049f58 00000000 bf049f88 00000000 c0191f20 00000000 00000000
[   73.380000] fee0: c03f7bac bf04d418 0000fff2 0000fff1 bf04ad08 0002f260 0002f260 e0986038
[   73.380000] ff00: e0986150 e098568b df143340 e0990280 00000036 c03d8b00 fffffffd 00000000
[   73.380000] ff20: bf04acfc 00000000 fffffffc 0003cf4b 00018098 c03f7bac 00000000 bf04d000
[   73.380000] ff40: df38e000 00000000 bedc0984 c00272a4 ffffffff c005bc88 00000000 00000000
[   73.380000] ff60: 0003cf4b 0003cf4b 00018098 bf04acfc 00000000 c0027fe8 df38e000 00000000
[   73.380000] ff80: bedc0984 c006882c 00001000 00000003 00000000 00009064 00000000 00008edc
[   73.380000] ffa0: 00000080 c0027e20 00009064 00000000 4014e000 0003cf4b 00018098 0003cf4b
[   73.380000] ffc0: 00009064 00000000 00008edc 00000080 00000000 00000000 40025000 bedc0984
[   73.380000] ffe0: 00000000 bedc08fc 0000b6b0 400e8f34 60000010 4014e000 00000000 00000000
[   73.380000] [<bf01dc00>] (SiS_SetRegANDOR+0x10/0x38 [sisfb]) from [<df064258>] (0xdf064258)
[   73.380000] Code: e92d0030 e20110ff e280546e e3a0c46e (e5c51000)
[   73.680000] ---[ end trace 62a93e01df37a5f2 ]---

Signed-off-by: Aaro Koskinen <aaro.koskinen@iki.fi>
Cc: Thomas Winischhofer <thomas@winischhofer.net>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/sis/vstruct.h b/drivers/video/sis/vstruct.h
index 705c853..bef4aae 100644
--- a/drivers/video/sis/vstruct.h
+++ b/drivers/video/sis/vstruct.h
@@ -342,7 +342,7 @@ struct SiS_Private
 	unsigned short			SiS_RY4COE;
 	unsigned short			SiS_LCDHDES;
 	unsigned short			SiS_LCDVDES;
-	unsigned short			SiS_DDC_Port;
+	SISIOADDRESS			SiS_DDC_Port;
 	unsigned short			SiS_DDC_Index;
 	unsigned short			SiS_DDC_Data;
 	unsigned short			SiS_DDC_NData;
-- 
cgit v0.10.2


From 9f6c1333938c5d93da8be8f29f3b5469a3c00f95 Mon Sep 17 00:00:00 2001
From: Doug Graham <dgraham@nortel.com>
Date: Tue, 22 Sep 2009 16:48:00 -0700
Subject: V3 minixfs: add missing directory type checking

There are a few places in the Minix FS code where the "inode" field of a
minix_dir_entry is used without checking first to see if the dirent is
really a minix3_dir_entry.  The inode number in a V1/V2 dirent is 16 bits,
whereas that in a V3 dirent is 32 bits.

Accessing it as a 16 bit field when it really should be accessed as a 32
bit field probably kinda sorta works on a little-endian machine, but leads
to some rather odd behaviour on big-endian machines.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Doug Graham <dgraham@nortel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/minix/dir.c b/fs/minix/dir.c
index d407e7a..6198731 100644
--- a/fs/minix/dir.c
+++ b/fs/minix/dir.c
@@ -308,14 +308,18 @@ int minix_delete_entry(struct minix_dir_entry *de, struct page *page)
 	struct inode *inode = (struct inode*)mapping->host;
 	char *kaddr = page_address(page);
 	loff_t pos = page_offset(page) + (char*)de - kaddr;
-	unsigned len = minix_sb(inode->i_sb)->s_dirsize;
+	struct minix_sb_info *sbi = minix_sb(inode->i_sb);
+	unsigned len = sbi->s_dirsize;
 	int err;
 
 	lock_page(page);
 	err = __minix_write_begin(NULL, mapping, pos, len,
 					AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
 	if (err == 0) {
-		de->inode = 0;
+		if (sbi->s_version == MINIX_V3)
+			((minix3_dirent *) de)->inode = 0;
+		else
+			de->inode = 0;
 		err = dir_commit_chunk(page, pos, len);
 	} else {
 		unlock_page(page);
@@ -440,7 +444,10 @@ void minix_set_link(struct minix_dir_entry *de, struct page *page,
 	err = __minix_write_begin(NULL, mapping, pos, sbi->s_dirsize,
 					AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
 	if (err == 0) {
-		de->inode = inode->i_ino;
+		if (sbi->s_version == MINIX_V3)
+			((minix3_dirent *) de)->inode = inode->i_ino;
+		else
+			de->inode = inode->i_ino;
 		err = dir_commit_chunk(page, pos, sbi->s_dirsize);
 	} else {
 		unlock_page(page);
@@ -470,7 +477,14 @@ ino_t minix_inode_by_name(struct dentry *dentry)
 	ino_t res = 0;
 
 	if (de) {
-		res = de->inode;
+		struct address_space *mapping = page->mapping;
+		struct inode *inode = mapping->host;
+		struct minix_sb_info *sbi = minix_sb(inode->i_sb);
+
+		if (sbi->s_version == MINIX_V3)
+			res = ((minix3_dirent *) de)->inode;
+		else
+			res = de->inode;
 		dir_put_page(page);
 	}
 	return res;
-- 
cgit v0.10.2


From a4255e4c1c41f906488ae4d3fa328a66695cf200 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Tue, 22 Sep 2009 16:48:01 -0700
Subject: ext2: fix format string compile warning (ino_t)

Unlike on most other architectures ino_t is an unsigned int on s390.  So
add an explicit cast to avoid this compile warning:

fs/ext2/namei.c: In function 'ext2_lookup':
fs/ext2/namei.c:73: warning: format '%lu' expects type 'long unsigned int', but argument 4 has type 'ino_t'

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 23701f2..dd7175c 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -70,7 +70,7 @@ static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, str
 			if (PTR_ERR(inode) == -ESTALE) {
 				ext2_error(dir->i_sb, __func__,
 						"deleted inode referenced: %lu",
-						ino);
+						(unsigned long) ino);
 				return ERR_PTR(-EIO);
 			} else {
 				return ERR_CAST(inode);
-- 
cgit v0.10.2


From 4fefce9abaeef0d6ec45e06a882db23a65135272 Mon Sep 17 00:00:00 2001
From: H Hartley Sweeten <hartleys@visionengravers.com>
Date: Tue, 22 Sep 2009 16:48:02 -0700
Subject: jbd.h: bitfields should be unsigned

bitfields should be unsigned.

This fixes sparse noise:
  error: dubious one-bit signed bitfield

Signed-off-by: H Hartley Sweeten <hsweeten@visionengravers.com>
Cc: Jan Kara <jack@ucw.cz>
Cc: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index a1187a0..331530c 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -556,7 +556,7 @@ struct transaction_s
 	 * This transaction is being forced and some process is
 	 * waiting for it to finish.
 	 */
-	int t_synchronous_commit:1;
+	unsigned int t_synchronous_commit:1;
 };
 
 /**
-- 
cgit v0.10.2


From 4fd8da8d62416d0dae05603ab5990a498d9aeb12 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 23 Sep 2009 17:49:55 +0200
Subject: fs: change sys_truncate length parameter type

For this system call user space passes a signed long length parameter,
while the kernel side takes an unsigned long parameter and converts it
later to signed long again.

This has led to bugs in compat wrappers see e.g.  dd90bbd5 "powerpc: Add
compat_sys_truncate".  The s390 compat wrapper for this functions is
broken as well since it also performs zero extension instead of sign
extension for the length parameter.

In addition if hpa comes up with an automated way of generating
compat wrappers it would generate a wrong one here.

So change the length parameter from unsigned long to long.

Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/open.c b/fs/open.c
index 31191bf..4f01e06 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -290,10 +290,9 @@ out:
 	return error;
 }
 
-SYSCALL_DEFINE2(truncate, const char __user *, path, unsigned long, length)
+SYSCALL_DEFINE2(truncate, const char __user *, path, long, length)
 {
-	/* on 32-bit boxen it will cut the range 2^31--2^32-1 off */
-	return do_sys_truncate(path, (long)length);
+	return do_sys_truncate(path, length);
 }
 
 static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 8d8285a..a990ace 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -460,8 +460,7 @@ asmlinkage long sys_mount(char __user *dev_name, char __user *dir_name,
 				void __user *data);
 asmlinkage long sys_umount(char __user *name, int flags);
 asmlinkage long sys_oldumount(char __user *name);
-asmlinkage long sys_truncate(const char __user *path,
-				unsigned long length);
+asmlinkage long sys_truncate(const char __user *path, long length);
 asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length);
 asmlinkage long sys_stat(char __user *filename,
 			struct __old_kernel_stat __user *statbuf);
-- 
cgit v0.10.2


From 4266c97a3ef4604561a22212eb0eab8a3c338971 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Wed, 23 Sep 2009 17:05:53 +0100
Subject: nommu: fix two build breakages

My 58fa879e1e640a1856f736b418984ebeccee1c95 "mm: FOLL flags for GUP flags"
broke CONFIG_NOMMU build by forgetting to update nommu.c foll_flags type:

  mm/nommu.c:171: error: conflicting types for `__get_user_pages'
  mm/internal.h:254: error: previous declaration of `__get_user_pages' was here
  make[1]: *** [mm/nommu.o] Error 1

My 03f6462a3ae78f36eb1f0ee8b4d5ae2f7859c1d5 "mm: move highest_memmap_pfn"
broke CONFIG_NOMMU build by forgetting to add a nommu.c highest_memmap_pfn:

  mm/built-in.o: In function `memmap_init_zone':
  (.meminit.text+0x326): undefined reference to `highest_memmap_pfn'
  mm/built-in.o: In function `memmap_init_zone':
  (.meminit.text+0x32d): undefined reference to `highest_memmap_pfn'

Fix both breakages, and give myself 30 lashes (ouch!)

Reported-by: Michal Simek <michal.simek@petalogix.com>
Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/nommu.c b/mm/nommu.c
index 1a4473fa..8d48424 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -61,6 +61,7 @@ void *high_memory;
 struct page *mem_map;
 unsigned long max_mapnr;
 unsigned long num_physpages;
+unsigned long highest_memmap_pfn;
 struct percpu_counter vm_committed_as;
 int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */
 int sysctl_overcommit_ratio = 50; /* default is 50% */
@@ -169,7 +170,7 @@ unsigned int kobjsize(const void *objp)
 }
 
 int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
-		     unsigned long start, int nr_pages, int foll_flags,
+		     unsigned long start, int nr_pages, unsigned int foll_flags,
 		     struct page **pages, struct vm_area_struct **vmas)
 {
 	struct vm_area_struct *vma;
-- 
cgit v0.10.2


From cf63ff5fa4399e215cc5ef322ccd8bddfff9afa6 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Wed, 23 Sep 2009 10:33:54 +0100
Subject: misc: remove redundant start_kernel prototypes

Impact: cleanup

No need for redeclaration.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/frv/include/asm/gdb-stub.h b/arch/frv/include/asm/gdb-stub.h
index 24f9738..2da7164 100644
--- a/arch/frv/include/asm/gdb-stub.h
+++ b/arch/frv/include/asm/gdb-stub.h
@@ -90,7 +90,6 @@ extern void gdbstub_do_rx(void);
 extern asmlinkage void __debug_stub_init_break(void);
 extern asmlinkage void __break_hijack_kernel_event(void);
 extern asmlinkage void __break_hijack_kernel_event_breaks_here(void);
-extern asmlinkage void start_kernel(void);
 
 extern asmlinkage void gdbstub_rx_handler(void);
 extern asmlinkage void gdbstub_rx_irq(void);
diff --git a/arch/frv/kernel/debug-stub.c b/arch/frv/kernel/debug-stub.c
index 2f6c60c..2845139 100644
--- a/arch/frv/kernel/debug-stub.c
+++ b/arch/frv/kernel/debug-stub.c
@@ -15,6 +15,7 @@
 #include <linux/sched.h>
 #include <linux/init.h>
 #include <linux/serial_reg.h>
+#include <linux/start_kernel.h>
 
 #include <asm/system.h>
 #include <asm/serial-regs.h>
diff --git a/arch/mn10300/include/asm/gdb-stub.h b/arch/mn10300/include/asm/gdb-stub.h
index e5a6368..556cce9 100644
--- a/arch/mn10300/include/asm/gdb-stub.h
+++ b/arch/mn10300/include/asm/gdb-stub.h
@@ -109,7 +109,6 @@ extern asmlinkage int  gdbstub_intercept(struct pt_regs *, enum exception_code);
 extern asmlinkage void gdbstub_exception(struct pt_regs *, enum exception_code);
 extern asmlinkage void __gdbstub_bug_trap(void);
 extern asmlinkage void __gdbstub_pause(void);
-extern asmlinkage void start_kernel(void);
 
 #ifndef CONFIG_MN10300_CACHE_DISABLED
 extern asmlinkage void gdbstub_purge_cache(void);
-- 
cgit v0.10.2


From a70770998c1d99e86eea97294ae3cce059a18d7e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 23 Sep 2009 10:04:02 +0100
Subject: FRV: Use asm/generic-hardirq.h

Use asm/generic-hardirq.h to build asm/hardirq.h and also remove the unused
idle_timestamp field in irq_cpustat whilst we're at it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/frv/include/asm/hardirq.h b/arch/frv/include/asm/hardirq.h
index fc47515..5fc8b6f 100644
--- a/arch/frv/include/asm/hardirq.h
+++ b/arch/frv/include/asm/hardirq.h
@@ -12,24 +12,15 @@
 #ifndef __ASM_HARDIRQ_H
 #define __ASM_HARDIRQ_H
 
-#include <linux/threads.h>
-#include <linux/irq.h>
-
-typedef struct {
-	unsigned int __softirq_pending;
-	unsigned long idle_timestamp;
-} ____cacheline_aligned irq_cpustat_t;
-
-#include <linux/irq_cpustat.h>	/* Standard mappings for irq_cpustat_t above */
-
-#ifdef CONFIG_SMP
-#error SMP not available on FR-V
-#endif /* CONFIG_SMP */
+#include <asm/atomic.h>
 
 extern atomic_t irq_err_count;
 static inline void ack_bad_irq(int irq)
 {
 	atomic_inc(&irq_err_count);
 }
+#define ack_bad_irq ack_bad_irq
+
+#include <asm-generic/hardirq.h>
 
 #endif
diff --git a/arch/frv/kernel/process.c b/arch/frv/kernel/process.c
index 0de50df..9042559 100644
--- a/arch/frv/kernel/process.c
+++ b/arch/frv/kernel/process.c
@@ -83,13 +83,9 @@ void (*idle)(void) = core_sleep_idle;
  */
 void cpu_idle(void)
 {
-	int cpu = smp_processor_id();
-
 	/* endless idle loop with no priority at all */
 	while (1) {
 		while (!need_resched()) {
-			irq_stat[cpu].idle_timestamp = jiffies;
-
 			check_pgt_cache();
 
 			if (!frv_dma_inprogress && idle)
-- 
cgit v0.10.2


From 7043078630bbc9eb908a56e08077c5abe7f2d311 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 23 Sep 2009 10:40:24 +0100
Subject: MN10300: Handle removal of struct uart_info

Commit ebd2c8f6d2ec4012c267ecb95e72a57b8355a705 removed struct uart_info and
commit bdc04e3174e18f475289fa8f4144f66686326b7e further moved delta_msr_wait.
Fix up the MN10300 on-chip serial port drivers to comply with this.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/mn10300/kernel/asm-offsets.c b/arch/mn10300/kernel/asm-offsets.c
index 82b4007..02dc7e4 100644
--- a/arch/mn10300/kernel/asm-offsets.c
+++ b/arch/mn10300/kernel/asm-offsets.c
@@ -85,7 +85,7 @@ void foo(void)
 	OFFSET(__rx_buffer,		mn10300_serial_port, rx_buffer);
 	OFFSET(__rx_inp,		mn10300_serial_port, rx_inp);
 	OFFSET(__rx_outp,		mn10300_serial_port, rx_outp);
-	OFFSET(__tx_info_buffer,	mn10300_serial_port, uart.info);
+	OFFSET(__uart_state,		mn10300_serial_port, uart.state);
 	OFFSET(__tx_xchar,		mn10300_serial_port, tx_xchar);
 	OFFSET(__tx_break,		mn10300_serial_port, tx_break);
 	OFFSET(__intr_flags,		mn10300_serial_port, intr_flags);
diff --git a/arch/mn10300/kernel/mn10300-serial-low.S b/arch/mn10300/kernel/mn10300-serial-low.S
index 2244853..66702d2 100644
--- a/arch/mn10300/kernel/mn10300-serial-low.S
+++ b/arch/mn10300/kernel/mn10300-serial-low.S
@@ -130,7 +130,7 @@ ENTRY(mn10300_serial_vdma_tx_handler)
 	or	d2,d2
 	bne	mnsc_vdma_tx_xchar
 
-	mov	(__tx_info_buffer,a3),a2 # get the uart_info struct for Tx
+	mov	(__uart_state,a3),a2	# see if the TTY Tx queue has anything in it
 	mov	(__xmit_tail,a2),d3
 	mov	(__xmit_head,a2),d2
 	cmp	d3,d2
diff --git a/arch/mn10300/kernel/mn10300-serial.c b/arch/mn10300/kernel/mn10300-serial.c
index 2fd5966..229b710 100644
--- a/arch/mn10300/kernel/mn10300-serial.c
+++ b/arch/mn10300/kernel/mn10300-serial.c
@@ -391,7 +391,7 @@ static int mask_test_and_clear(volatile u8 *ptr, u8 mask)
 static void mn10300_serial_receive_interrupt(struct mn10300_serial_port *port)
 {
 	struct uart_icount *icount = &port->uart.icount;
-	struct tty_struct *tty = port->uart.info->port.tty;
+	struct tty_struct *tty = port->uart.state->port.tty;
 	unsigned ix;
 	int count;
 	u8 st, ch, push, status, overrun;
@@ -566,16 +566,16 @@ static void mn10300_serial_transmit_interrupt(struct mn10300_serial_port *port)
 {
 	_enter("%s", port->name);
 
-	if (!port->uart.info || !port->uart.info->port.tty) {
+	if (!port->uart.state || !port->uart.state->port.tty) {
 		mn10300_serial_dis_tx_intr(port);
 		return;
 	}
 
 	if (uart_tx_stopped(&port->uart) ||
-	    uart_circ_empty(&port->uart.info->xmit))
+	    uart_circ_empty(&port->uart.state->xmit))
 		mn10300_serial_dis_tx_intr(port);
 
-	if (uart_circ_chars_pending(&port->uart.info->xmit) < WAKEUP_CHARS)
+	if (uart_circ_chars_pending(&port->uart.state->xmit) < WAKEUP_CHARS)
 		uart_write_wakeup(&port->uart);
 }
 
@@ -596,7 +596,7 @@ static void mn10300_serial_cts_changed(struct mn10300_serial_port *port, u8 st)
 	*port->_control = ctr;
 
 	uart_handle_cts_change(&port->uart, st & SC2STR_CTS);
-	wake_up_interruptible(&port->uart.info->delta_msr_wait);
+	wake_up_interruptible(&port->uart.state->port.delta_msr_wait);
 }
 
 /*
@@ -705,8 +705,8 @@ static void mn10300_serial_start_tx(struct uart_port *_port)
 
 	_enter("%s{%lu}",
 	       port->name,
-	       CIRC_CNT(&port->uart.info->xmit.head,
-			&port->uart.info->xmit.tail,
+	       CIRC_CNT(&port->uart.state->xmit.head,
+			&port->uart.state->xmit.tail,
 			UART_XMIT_SIZE));
 
 	/* kick the virtual DMA controller */
-- 
cgit v0.10.2


From 1b9894f342a39601bb0420b7b8c7e445670c1b51 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Mon, 21 Sep 2009 11:12:03 -0700
Subject: serial core: fix new kernel-doc warnings

Fix new kernel-doc warnings in serial_core.[hc] files.

  Warning(include/linux/serial_core.h:485): No description found for parameter 'uport'
  Warning(include/linux/serial_core.h:485): Excess function parameter 'port' description in 'uart_handle_dcd_change'
  Warning(include/linux/serial_core.h:511): No description found for parameter 'uport'
  Warning(include/linux/serial_core.h:511): Excess function parameter 'port' description in 'uart_handle_cts_change'
  Warning(drivers/serial/serial_core.c:2437): No description found for parameter 'uport'
  Warning(drivers/serial/serial_core.c:2437): Excess function parameter 'port' description in 'uart_add_one_port'
  Warning(drivers/serial/serial_core.c:2509): No description found for parameter 'uport'
  Warning(drivers/serial/serial_core.c:2509): Excess function parameter 'port' description in 'uart_remove_one_port'

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/serial/serial_core.c b/drivers/serial/serial_core.c
index 2514d00..1689bda 100644
--- a/drivers/serial/serial_core.c
+++ b/drivers/serial/serial_core.c
@@ -2426,7 +2426,7 @@ struct tty_driver *uart_console_device(struct console *co, int *index)
 /**
  *	uart_add_one_port - attach a driver-defined port structure
  *	@drv: pointer to the uart low level driver structure for this port
- *	@port: uart port structure to use for this port.
+ *	@uport: uart port structure to use for this port.
  *
  *	This allows the driver to register its own uart_port structure
  *	with the core driver.  The main purpose is to allow the low
@@ -2499,7 +2499,7 @@ int uart_add_one_port(struct uart_driver *drv, struct uart_port *uport)
 /**
  *	uart_remove_one_port - detach a driver defined port structure
  *	@drv: pointer to the uart low level driver structure for this port
- *	@port: uart port structure for this port
+ *	@uport: uart port structure for this port
  *
  *	This unhooks (and hangs up) the specified port structure from the
  *	core driver.  No further calls will be made to the low-level code
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index d58e460..fe661af 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -477,7 +477,7 @@ static inline int uart_handle_break(struct uart_port *port)
 
 /**
  *	uart_handle_dcd_change - handle a change of carrier detect state
- *	@port: uart_port structure for the open port
+ *	@uport: uart_port structure for the open port
  *	@status: new carrier detect status, nonzero if active
  */
 static inline void
@@ -503,7 +503,7 @@ uart_handle_dcd_change(struct uart_port *uport, unsigned int status)
 
 /**
  *	uart_handle_cts_change - handle a change of clear-to-send state
- *	@port: uart_port structure for the open port
+ *	@uport: uart_port structure for the open port
  *	@status: new clear to send status, nonzero if active
  */
 static inline void
-- 
cgit v0.10.2